| <?xml version="1.0" encoding="UTF-8"?> |
| <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "xhtml1-transitional.dtd"> |
| <!-- saved from url=(0013)about:internet --> |
| <?xml-stylesheet type="text/xsl" href="mathml.xsl"?><html xmlns="http://www.w3.org/1999/xhtml" xmlns:pref="http://www.w3.org/2002/Math/preference" xmlns:xlink="http://www.w3.org/1999/xlink" pref:renderer="mathplayer-dl"> |
| <head> |
| <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> |
| <style xmlns="" type="text/css"> |
| /* This style sets a margin around the entire page */ |
| html, body { |
| margin: 10px; |
| } |
| |
| p { |
| font: normal 16px verdana, sans-serif; |
| margin: 0; |
| padding-bottom:12px; |
| } |
| |
| h1 { |
| font: bold 25px verdana, sans-serif; |
| margin-top: 0; |
| margin-bottom: 3px; |
| padding-top: 0; |
| padding-bottom: 0; |
| } |
| |
| h2 { |
| font: bold 19px verdana, sans-serif; |
| margin-top: 28px; |
| margin-bottom: 3px; |
| padding-top: 0; |
| padding-bottom: 0; |
| } |
| |
| h3 { |
| font: bold 19px verdana, sans-serif !important; |
| margin-top: 28px; |
| margin-bottom: 3px; |
| padding-top: 0; |
| padding-bottom: 0; |
| } |
| |
| li { |
| font: normal 16px verdana, sans-serif; |
| margin-top: 0; |
| margin-bottom: 18px; |
| padding-top: 0; |
| padding-bottom: 0; |
| } |
| |
| .pdparam { |
| font: italic 16px verdana, sans-serif; |
| } |
| |
| .term { |
| font: italic 16px verdana, sans-serif; |
| font-weight: normal; |
| } |
| |
| .type { |
| font: normal 16px verdana, sans-serif !important; |
| } |
| |
| .parameter { |
| font-style: italic; |
| } |
| |
| a:link, a:visited { |
| color: blue; |
| text-decoration: none; |
| font: normal 16px; |
| } |
| |
| a:hover { |
| background-color: #FFFF99; |
| font: normal 16px; |
| } |
| |
| div.funcsynopsis { |
| text-align: left; |
| background-color: #e6e6e6; |
| font: normal 16px verdana, sans-serif; |
| padding-top: 10px; |
| padding-bottom: 10px; |
| } |
| |
| div.funcsynopsis table { |
| border-collapse: separate; |
| font: normal 16px verdana, sans-serif; |
| } |
| |
| div.funcsynopsis td { |
| background-color: #e6e6e6; |
| border: 0 solid #000; |
| padding: 1px; |
| font: normal 16px verdana, sans-serif; |
| } |
| |
| div.refsect1 { |
| font-family: verdana, sans-serif; |
| font-size: 16px; |
| } |
| |
| code.constant { |
| font: normal 16px courier new, monospace !important; |
| } |
| |
| span.errorname { |
| font: normal 16px verdana, sans-serif !important; |
| } |
| |
| code.function { |
| font: bold 16px verdana, sans-serif !important; |
| } |
| |
| b.fsfunc { |
| font: bold 16px verdana, sans-serif !important; |
| } |
| |
| code.varname { |
| font: italic 16px verdana, sans-serif; |
| } |
| |
| code.replaceable { |
| font: italic 16px courier new, monospace; |
| } |
| |
| code.funcdef { |
| font: normal 16px verdana, sans-serif !important; |
| } |
| |
| .citerefentry { |
| font: normal 16px verdana, sans-serif !important; |
| } |
| |
| .parameter { |
| font-style: italic; |
| } |
| |
| code.fsfunc { |
| font: normal 16px verdana, sans-serif !important; |
| } |
| |
| /* PARAMETER: This style controls spacing between the terms in Parameter section */ |
| dt { |
| margin-top: 15px; |
| } |
| |
| /* TABLES: These styles apply to all tables OTHER than the Synopsis and Example tables */ |
| div.refsect1 table { |
| width: 100%; |
| margin-top: 10px; |
| background-color: #FFF; |
| border-collapse: collapse; |
| border-color: #000; |
| border-width: 1px; |
| font: normal 16px verdana, sans-serif; |
| } |
| |
| div.refsect1 th { |
| border-collapse: collapse; |
| border-color: #000; |
| border-width: 1px; |
| font: bold 16px verdana, sans-serif; |
| } |
| |
| div.refsect1 td { |
| background-color: #FFF; |
| padding: 5px; |
| vertical-align: text-top; |
| border-collapse: collapse; |
| border-color: #000; |
| border-width: 1px; |
| font: normal 16px verdana, sans-serif; |
| } |
| |
| div.refsect1 p{ |
| font: normal 16px verdana, sans-serif; |
| margin-top: 8px; |
| margin-bottom: 8px; |
| padding-top: 0; |
| padding-bottom: 0; |
| } |
| |
| |
| /* EXAMPLE: These styles apply only to the Example section */ |
| div.refsect2 { |
| font: normal 16px courier new, monospace !important; |
| } |
| |
| div.refsect2 table { |
| margin-top: 0; |
| background-color: #e6e6e6; |
| width: 100%; |
| border: 0 solid #000; |
| padding: 2px; |
| font: normal 16px courier new, monospace !important; |
| } |
| |
| div.refsect2 td { |
| background-color: #e6e6e6; |
| font: normal 16px courier new, monospace !important; |
| white-space:pre; |
| } |
| |
| /* COPYRIGHT: This style formats the text of the copyright statement at the bottom of the page */ |
| div.refsect3 { |
| font: normal 11px verdana, sans-serif; |
| margin-top: 50px; |
| margin-bottom: 20px; |
| padding-top: 0; |
| padding-bottom: 0; |
| } |
| |
| </style> |
| <title>Function Qualifiers</title> |
| <meta name="generator" content="DocBook XSL Stylesheets V1.79.1" /> |
| <meta name="keywords" content="Function Qualifiers" /> |
| </head> |
| <body> |
| <div class="refentry"> |
| <a id="id-1"></a> |
| <div class="titlepage"></div> |
| <div xmlns="" class="refnamediv"> |
| <a xmlns="http://www.w3.org/1999/xhtml" id="FunctionQualifiers"></a> |
| <h1>Function Qualifiers</h1> |
| <p> |
| Qualifiers for kernel functions. |
| </p> |
| </div> |
| <div class="refsect2"> |
| <a id="synopsis"></a> |
| <h3> |
| </h3> |
| <div class="informaltable"> |
| <table class="informaltable" border="0"> |
| <colgroup> |
| <col align="left" class="col1" /> |
| </colgroup> |
| <tbody> |
| <tr> |
| <td align="left"> |
| __kernel |
| kernel |
| |
| __attribute__((vec_type_hint(<type>))) |
| __attribute__((work_group_size_hint(<span class="emphasis"><em>X</em></span>, <span class="emphasis"><em>Y</em></span>, <span class="emphasis"><em>Z</em></span>))) |
| __attribute__((reqd_work_group_size(<span class="emphasis"><em>X</em></span>, <span class="emphasis"><em>Y</em></span>, <span class="emphasis"><em>Z</em></span>))) |
| __attribute__((nosvm)) |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="refsect1"> |
| <a id="description"></a> |
| <h2>Description</h2> |
| <p> |
| The <code class="function">__kernel</code> (or <code class="function">kernel</code>) qualifier declares a |
| function to be a kernel that can be executed by an application on an OpenCL device(s). |
| The following rules apply to functions that are declared with this qualifier: |
| </p> |
| <div class="itemizedlist"> |
| <ul class="itemizedlist" style="list-style-type: bullet; "> |
| <li class="listitem" style="list-style-type: disc"> |
| <p> |
| It can be executed on the device only |
| </p> |
| </li> |
| <li class="listitem" style="list-style-type: disc"> |
| <p> |
| It can be called by the host |
| </p> |
| </li> |
| <li class="listitem" style="list-style-type: disc"> |
| <p> |
| It is just a regular function call if a <code class="function">__kernel</code> function |
| is called by another kernel function. |
| </p> |
| </li> |
| </ul> |
| </div> |
| <p> |
| Kernel functions with variables declared inside the function with the |
| <a class="citerefentry" href="local.html"><span class="citerefentry"><span class="refentrytitle">__local</span></span></a> |
| or <a class="citerefentry" href="local.html"><span class="citerefentry"><span class="refentrytitle">local</span></span></a> |
| qualifier can be called by the host using appropriate APIs such as |
| <a class="citerefentry" href="clEnqueueNDRangeKernel.html"><span class="citerefentry"><span class="refentrytitle">clEnqueueNDRangeKernel</span></span></a>. |
| </p> |
| <p> |
| The <code class="function">__kernel</code> and <code class="function">kernel</code> names are reserved |
| for use as functions qualifiers and shall not be used otherwise. |
| </p> |
| <h4><a id="id-1.5.6"></a>Optional Attribute Qualifiers</h4> |
| <p> |
| The <code class="function">__kernel</code> qualifier can be used with the keyword <a class="citerefentry" href="attribute.html"><span class="citerefentry"><span class="refentrytitle">__attribute__</span></span></a> to |
| declare additional information about the kernel function as described below. |
| </p> |
| <p> |
| The optional <code class="code">__attribute__((vec_type_hint(<type>)))</code> |
| is a hint to the compiler and is intended to be a representation of the computational |
| <span class="emphasis"><em>width</em></span> of the <code class="function">__kernel</code>, and should serve as the |
| basis for calculating processor bandwidth utilization when the compiler is looking to |
| autovectorize the code. In the <code class="code">__attribute__((vec_type_hint(<type>)))</code> |
| qualifier <type> is one of the built-in vector types or the constituent scalar |
| element types. If <code class="code">vec_type_hint (<type>)</code> is not specified, the |
| kernel is assumed to have the <code class="code">__attribute__((vec_type_hint(int)))</code> qualifier. |
| </p> |
| <p> |
| Implicit in autovectorization is the assumption that any libraries called from |
| the <code class="function">__kernel</code> must be recompilable at run time to handle cases |
| where the compiler decides to merge or separate workitems. This probably means that |
| such libraries can never be hard coded binaries or that hard coded binaries must be |
| accompanied either by source or some retargetable intermediate representation. This |
| may be a code security question for some. |
| </p> |
| <p> |
| For example, where the developer specified a width of <span class="type">float4</span>, the compiler |
| should assume that the computation usually uses up 4 lanes of a float vector, and would |
| decide to merge work-items or possibly even separate one work-item into many threads |
| to better match the hardware capabilities. A conforming implementation is not required |
| to autovectorize code, but shall support the hint. A compiler may autovectorize, even |
| if no hint is provided. If an implementation merges <code class="constant">N</code> work-items |
| into one thread, it is responsible for correctly handling cases where the number of |
| global or local work-items in any dimension modulo <code class="constant">N</code> is not zero. |
| </p> |
| <p> |
| If for example, a <code class="function">__kernel</code> function is declared with |
| <code class="code">__attribute__(( vec_type_hint (float4)))</code> (meaning that most |
| operations in the <code class="function">__kernel</code> are explicitly vectorized using |
| <span class="type">float4</span>) and the kernel is running using Intel® Advanced Vector |
| Instructions (Intel® AVX) which implements a 8-float-wide vector unit, the |
| autovectorizer might choose to merge two work-items to one thread, running a second |
| work-item in the high half of the 256-bit AVX register. |
| </p> |
| <p> |
| As another example, a Power4 machine has two scalar double precision floating-point |
| units with an 6-cycle deep pipe. An autovectorizer for the Power4 machine might choose |
| to interleave six kernels declared with the <code class="code">__attribute__(( vec_type_hint |
| (double2)))</code> qualifier into one hardware thread, to ensure that there is |
| always 12-way parallelism available to saturate the FPUs. It might also choose to |
| merge 4 or 8 work-items (or some other number) if it concludes that these are better |
| choices, due to resource utilization concerns or some preference for divisibility by 2. |
| </p> |
| <p> |
| The optional <code class="code">__attribute__((work_group_size_hint(<span class="emphasis"><em>X</em></span>, |
| <span class="emphasis"><em>Y</em></span>, <span class="emphasis"><em>Z</em></span>)))</code> is a hint to the |
| compiler and is intended to specify the work-group size that may be used i.e. value |
| most likely to be specified by the <code class="varname">local_work_size</code> argument to |
| <a class="citerefentry" href="clEnqueueNDRangeKernel.html"><span class="citerefentry"><span class="refentrytitle">clEnqueueNDRangeKernel</span></span></a>. |
| For example the <code class="code">__attribute__((work_group_size_hint(1, 1, 1)))</code> is |
| a hint to the compiler that the kernel will most likely be executed with a work-group |
| size of 1. |
| </p> |
| <p> |
| The optional <code class="code">__attribute__((reqd_work_group_size(<span class="emphasis"><em>X</em></span>, |
| <span class="emphasis"><em>Y</em></span>, <span class="emphasis"><em>Z</em></span>)))</code> is the work-group |
| size that must be used as the <code class="varname">local_work_size</code> argument to |
| <a class="citerefentry" href="clEnqueueNDRangeKernel.html"><span class="citerefentry"><span class="refentrytitle">clEnqueueNDRangeKernel</span></span></a>. This |
| allows the compiler to optimize the generated code appropriately for this kernel. |
| </p> |
| <p> |
| If <code class="varname">Z</code> is one, the <code class="varname">work_dim</code> argument to |
| <a class="citerefentry" href="clEnqueueNDRangeKernel.html"><span class="citerefentry"><span class="refentrytitle">clEnqueueNDRangeKernel</span></span></a> |
| can be 2 or 3. If <code class="varname">Y</code> and <code class="varname">Z</code> |
| are one, the <code class="varname">work_dim</code> argument to |
| <a class="citerefentry" href="clEnqueueNDRangeKernel.html"><span class="citerefentry"><span class="refentrytitle">clEnqueueNDRangeKernel</span></span></a> |
| can be 1, 2 or 3. |
| </p> |
| <p> |
| The optional <code class="code">__attribute__((nosvm))</code> |
| qualifier can be used with a pointer variable to |
| inform the compiler that the pointer does not |
| refer to a shared virtual memory region. |
| </p> |
| </div> |
| <div class="refsect2"> |
| <a id="example1"></a> |
| <h3> |
| Example |
| </h3> |
| <div class="informaltable"> |
| <table class="informaltable" border="0"> |
| <colgroup> |
| <col align="left" class="col1" /> |
| </colgroup> |
| <tbody> |
| <tr> |
| <td align="left"> |
| // autovectorize assuming float4 as the |
| // basic computation width |
| __kernel __attribute__((vec_type_hint(float4))) |
| void foo( __global float4 *p ) { .... } |
| |
| // autovectorize assuming double as the |
| // basic computation width |
| __kernel __attribute__((vec_type_hint(double))) |
| void foo( __global float4 *p ){ .... } |
| |
| // autovectorize assuming int (default) |
| // as the basic computation width |
| __kernel |
| void foo( __global float4 *p ){ .... } |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="refsect1"> |
| <a id="specification"></a> |
| <h2>Specification</h2> |
| <p> |
| <img src="pdficon_small1.gif" /> |
| |
| <a href="https://www.khronos.org/registry/cl/specs/opencl-2.1-openclc.pdf#page=47" target="OpenCL Spec">OpenCL Specification</a> |
| </p> |
| </div> |
| <div class="refsect1"> |
| <a id="seealso"></a> |
| <h2>Also see</h2> |
| <p> |
| <a class="citerefentry" href="attribute.html"><span class="citerefentry"><span class="refentrytitle">__attribute__</span></span></a>, |
| <a class="citerefentry" href="clEnqueueNDRangeKernel.html"><span class="citerefentry"><span class="refentrytitle">clEnqueueNDRangeKernel</span></span></a>, |
| <a class="citerefentry" href="qualifiers.html"><span class="citerefentry"><span class="refentrytitle">qualifiers</span></span></a> |
| </p> |
| </div> |
| <div xmlns="" class="refsect3" lang="en" xml:lang="en"><a xmlns="http://www.w3.org/1999/xhtml" id="Copyright"></a><h4 xmlns="http://www.w3.org/1999/xhtml"></h4><img xmlns="http://www.w3.org/1999/xhtml" src="KhronosLogo.jpg" /><p xmlns="http://www.w3.org/1999/xhtml"></p>Copyright © 2007-2015 The Khronos Group Inc. |
| Permission is hereby granted, free of charge, to any person obtaining a |
| copy of this software and/or associated documentation files (the |
| "Materials"), to deal in the Materials without restriction, including |
| without limitation the rights to use, copy, modify, merge, publish, |
| distribute, sublicense, and/or sell copies of the Materials, and to |
| permit persons to whom the Materials are furnished to do so, subject to |
| the condition that this copyright notice and permission notice shall be included |
| in all copies or substantial portions of the Materials. |
| </div> |
| </div> |
| </body> |
| </html> |