|  | <?xml version="1.0" encoding="UTF-8"?> | 
|  | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "xhtml1-transitional.dtd"> | 
|  | <!-- saved from url=(0013)about:internet --> | 
|  | <?xml-stylesheet type="text/xsl" href="mathml.xsl"?><html xmlns="http://www.w3.org/1999/xhtml" xmlns:pref="http://www.w3.org/2002/Math/preference" xmlns:xlink="http://www.w3.org/1999/xlink" pref:renderer="mathplayer-dl"> | 
|  | <head> | 
|  | <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> | 
|  | <style xmlns="" type="text/css"> | 
|  | /* This style sets a margin around the entire page */ | 
|  | html, body { | 
|  | margin: 10px; | 
|  | } | 
|  |  | 
|  | p { | 
|  | font: normal 16px verdana, sans-serif; | 
|  | margin: 0; | 
|  | padding-bottom:12px; | 
|  | } | 
|  |  | 
|  | h1 { | 
|  | font: bold 25px verdana, sans-serif; | 
|  | margin-top: 0; | 
|  | margin-bottom: 3px; | 
|  | padding-top: 0; | 
|  | padding-bottom: 0; | 
|  | } | 
|  |  | 
|  | h2 { | 
|  | font: bold 19px verdana, sans-serif; | 
|  | margin-top: 28px; | 
|  | margin-bottom: 3px; | 
|  | padding-top: 0; | 
|  | padding-bottom: 0; | 
|  | } | 
|  |  | 
|  | h3 { | 
|  | font: bold 19px verdana, sans-serif !important; | 
|  | margin-top: 28px; | 
|  | margin-bottom: 3px; | 
|  | padding-top: 0; | 
|  | padding-bottom: 0; | 
|  | } | 
|  |  | 
|  | li { | 
|  | font: normal 16px verdana, sans-serif; | 
|  | margin-top: 0; | 
|  | margin-bottom: 18px; | 
|  | padding-top: 0; | 
|  | padding-bottom: 0; | 
|  | } | 
|  |  | 
|  | .pdparam { | 
|  | font: italic 16px verdana, sans-serif; | 
|  | } | 
|  |  | 
|  | .term { | 
|  | font: italic 16px verdana, sans-serif; | 
|  | font-weight: normal; | 
|  | } | 
|  |  | 
|  | .type { | 
|  | font: normal 16px verdana, sans-serif !important; | 
|  | } | 
|  |  | 
|  | .parameter { | 
|  | font-style: italic; | 
|  | } | 
|  |  | 
|  | a:link, a:visited { | 
|  | color: blue; | 
|  | text-decoration: none; | 
|  | font: normal 16px; | 
|  | } | 
|  |  | 
|  | a:hover { | 
|  | background-color: #FFFF99; | 
|  | font: normal 16px; | 
|  | } | 
|  |  | 
|  | div.funcsynopsis { | 
|  | text-align: left; | 
|  | background-color: #e6e6e6; | 
|  | font: normal 16px verdana, sans-serif; | 
|  | padding-top: 10px; | 
|  | padding-bottom: 10px; | 
|  | } | 
|  |  | 
|  | div.funcsynopsis table { | 
|  | border-collapse: separate; | 
|  | font: normal 16px verdana, sans-serif; | 
|  | } | 
|  |  | 
|  | div.funcsynopsis td { | 
|  | background-color: #e6e6e6; | 
|  | border: 0 solid #000; | 
|  | padding: 1px; | 
|  | font: normal 16px verdana, sans-serif; | 
|  | } | 
|  |  | 
|  | div.refsect1 { | 
|  | font-family: verdana, sans-serif; | 
|  | font-size: 16px; | 
|  | } | 
|  |  | 
|  | code.constant { | 
|  | font: normal 16px courier new, monospace !important; | 
|  | } | 
|  |  | 
|  | span.errorname { | 
|  | font: normal 16px verdana, sans-serif !important; | 
|  | } | 
|  |  | 
|  | code.function { | 
|  | font: bold 16px verdana, sans-serif !important; | 
|  | } | 
|  |  | 
|  | b.fsfunc { | 
|  | font: bold 16px verdana, sans-serif !important; | 
|  | } | 
|  |  | 
|  | code.varname { | 
|  | font: italic 16px verdana, sans-serif; | 
|  | } | 
|  |  | 
|  | code.replaceable { | 
|  | font: italic 16px courier new, monospace; | 
|  | } | 
|  |  | 
|  | code.funcdef { | 
|  | font: normal 16px verdana, sans-serif !important; | 
|  | } | 
|  |  | 
|  | .citerefentry { | 
|  | font: normal 16px verdana, sans-serif !important; | 
|  | } | 
|  |  | 
|  | .parameter { | 
|  | font-style: italic; | 
|  | } | 
|  |  | 
|  | code.fsfunc { | 
|  | font: normal 16px verdana, sans-serif !important; | 
|  | } | 
|  |  | 
|  | /* PARAMETER: This style controls spacing between the terms in Parameter section */ | 
|  | dt { | 
|  | margin-top: 15px; | 
|  | } | 
|  |  | 
|  | /* TABLES: These styles apply to all tables OTHER than the Synopsis and Example tables */ | 
|  | div.refsect1 table { | 
|  | width: 100%; | 
|  | margin-top: 10px; | 
|  | background-color: #FFF; | 
|  | border-collapse: collapse; | 
|  | border-color: #000; | 
|  | border-width: 1px; | 
|  | font: normal 16px verdana, sans-serif; | 
|  | } | 
|  |  | 
|  | div.refsect1 th { | 
|  | border-collapse: collapse; | 
|  | border-color: #000; | 
|  | border-width: 1px; | 
|  | font: bold 16px verdana, sans-serif; | 
|  | } | 
|  |  | 
|  | div.refsect1 td { | 
|  | background-color: #FFF; | 
|  | padding: 5px; | 
|  | vertical-align: text-top; | 
|  | border-collapse: collapse; | 
|  | border-color: #000; | 
|  | border-width: 1px; | 
|  | font: normal 16px verdana, sans-serif; | 
|  | } | 
|  |  | 
|  | div.refsect1 p{ | 
|  | font: normal 16px verdana, sans-serif; | 
|  | margin-top: 8px; | 
|  | margin-bottom: 8px; | 
|  | padding-top: 0; | 
|  | padding-bottom: 0; | 
|  | } | 
|  |  | 
|  |  | 
|  | /* EXAMPLE: These styles apply only to the Example section */ | 
|  | div.refsect2 { | 
|  | font: normal 16px courier new, monospace !important; | 
|  | } | 
|  |  | 
|  | div.refsect2 table { | 
|  | margin-top: 0; | 
|  | background-color: #e6e6e6; | 
|  | width: 100%; | 
|  | border: 0 solid #000; | 
|  | padding: 2px; | 
|  | font: normal 16px courier new, monospace !important; | 
|  | } | 
|  |  | 
|  | div.refsect2 td { | 
|  | background-color: #e6e6e6; | 
|  | font: normal 16px courier new, monospace !important; | 
|  | white-space:pre; | 
|  | } | 
|  |  | 
|  | /* COPYRIGHT: This style formats the text of the copyright statement at the bottom of the page */ | 
|  | div.refsect3 { | 
|  | font: normal 11px verdana, sans-serif; | 
|  | margin-top: 50px; | 
|  | margin-bottom: 20px; | 
|  | padding-top: 0; | 
|  | padding-bottom: 0; | 
|  | } | 
|  |  | 
|  | </style> | 
|  | <title>Function Qualifiers</title> | 
|  | <meta name="generator" content="DocBook XSL Stylesheets V1.78.1" /> | 
|  | <meta name="keywords" content="Function Qualifiers" /> | 
|  | </head> | 
|  | <body> | 
|  | <div class="refentry"> | 
|  | <a id="id-1"></a> | 
|  | <div class="titlepage"></div> | 
|  | <div xmlns="" class="refnamediv"> | 
|  | <a xmlns="http://www.w3.org/1999/xhtml" id="FunctionQualifiers"></a> | 
|  | <h1>Function Qualifiers</h1> | 
|  | <p> | 
|  | Qualifiers for kernel functions. | 
|  | </p> | 
|  | </div> | 
|  | <div class="refsect2"> | 
|  | <a id="synopsis"></a> | 
|  | <h3> | 
|  | </h3> | 
|  | <div class="informaltable"> | 
|  | <table border="0"> | 
|  | <colgroup> | 
|  | <col align="left" class="col1" /> | 
|  | </colgroup> | 
|  | <tbody> | 
|  | <tr> | 
|  | <td align="left"> | 
|  | __kernel | 
|  | kernel | 
|  |  | 
|  | __attribute__((vec_type_hint(<type>))) | 
|  | __attribute__((work_group_size_hint(<span class="emphasis"><em>X</em></span>, <span class="emphasis"><em>Y</em></span>, <span class="emphasis"><em>Z</em></span>))) | 
|  | __attribute__((reqd_work_group_size(<span class="emphasis"><em>X</em></span>, <span class="emphasis"><em>Y</em></span>, <span class="emphasis"><em>Z</em></span>))) | 
|  | __attribute__((nosvm)) | 
|  | </td> | 
|  | </tr> | 
|  | </tbody> | 
|  | </table> | 
|  | </div> | 
|  | </div> | 
|  | <div class="refsect1"> | 
|  | <a id="description"></a> | 
|  | <h2>Description</h2> | 
|  | <p> | 
|  | The <code class="function">__kernel</code> (or <code class="function">kernel</code>) qualifier declares a | 
|  | function to be a kernel that can be executed by an application on an OpenCL device(s). | 
|  | The following rules apply to functions that are declared with this qualifier: | 
|  | </p> | 
|  | <div class="itemizedlist"> | 
|  | <ul class="itemizedlist" style="list-style-type: bullet; "> | 
|  | <li class="listitem" style="list-style-type: disc"> | 
|  | <p> | 
|  | It can be executed on the device only | 
|  | </p> | 
|  | </li> | 
|  | <li class="listitem" style="list-style-type: disc"> | 
|  | <p> | 
|  | It can be called by the host | 
|  | </p> | 
|  | </li> | 
|  | <li class="listitem" style="list-style-type: disc"> | 
|  | <p> | 
|  | It is just a regular function call if a <code class="function">__kernel</code> function | 
|  | is called by another kernel function. | 
|  | </p> | 
|  | </li> | 
|  | </ul> | 
|  | </div> | 
|  | <p> | 
|  | Kernel functions with variables declared inside the function with the | 
|  | <a class="citerefentry" href="local.html"><span class="citerefentry"><span class="refentrytitle">__local</span></span></a> | 
|  | or <a class="citerefentry" href="local.html"><span class="citerefentry"><span class="refentrytitle">local</span></span></a> | 
|  | qualifier can be called by the host using appropriate APIs such as | 
|  | <a class="citerefentry" href="clEnqueueNDRangeKernel.html"><span class="citerefentry"><span class="refentrytitle">clEnqueueNDRangeKernel</span></span></a>. | 
|  | </p> | 
|  | <p> | 
|  | The <code class="function">__kernel</code> and <code class="function">kernel</code> names are reserved | 
|  | for use as functions qualifiers and shall not be used otherwise. | 
|  | </p> | 
|  | <h4><a id="id-1.5.6"></a>Optional Attribute Qualifiers</h4> | 
|  | <p> | 
|  | The <code class="function">__kernel</code> qualifier can be used with the keyword <a class="citerefentry" href="attribute.html"><span class="citerefentry"><span class="refentrytitle">__attribute__</span></span></a> to | 
|  | declare additional information about the kernel function as described below. | 
|  | </p> | 
|  | <p> | 
|  | The optional <code class="code">__attribute__((vec_type_hint(<type>)))</code> | 
|  | is a hint to the compiler and is intended to be a representation of the computational | 
|  | <span class="emphasis"><em>width</em></span> of the <code class="function">__kernel</code>, and should serve as the | 
|  | basis for calculating processor bandwidth utilization when the compiler is looking to | 
|  | autovectorize the code.  In the <code class="code">__attribute__((vec_type_hint(<type>)))</code> | 
|  | qualifier <type> is one of the built-in vector types or the constituent scalar | 
|  | element types.  If <code class="code">vec_type_hint (<type>)</code> is not specified, the | 
|  | kernel is assumed to have the <code class="code">__attribute__((vec_type_hint(int)))</code> qualifier. | 
|  | </p> | 
|  | <p> | 
|  | Implicit in autovectorization is the assumption that any libraries called from | 
|  | the <code class="function">__kernel</code> must be recompilable at run time to handle cases | 
|  | where the compiler decides to merge or separate workitems. This probably means that | 
|  | such libraries can never be hard coded binaries or that hard coded binaries must be | 
|  | accompanied either by source or some retargetable intermediate representation. This | 
|  | may be a code security question for some. | 
|  | </p> | 
|  | <p> | 
|  | For example, where the developer specified a width of <span class="type">float4</span>, the compiler | 
|  | should assume that the computation usually uses up 4 lanes of a float vector, and would | 
|  | decide to merge work-items or possibly even separate one work-item into many threads | 
|  | to better match the hardware capabilities. A conforming implementation is not required | 
|  | to autovectorize code, but shall support the hint. A compiler may autovectorize, even | 
|  | if no hint is provided. If an implementation merges <code class="constant">N</code> work-items | 
|  | into one thread, it is responsible for correctly handling cases where the number of | 
|  | global or local work-items in any dimension modulo <code class="constant">N</code> is not zero. | 
|  | </p> | 
|  | <p> | 
|  | If for example, a <code class="function">__kernel</code> function is declared with | 
|  | <code class="code">__attribute__(( vec_type_hint (float4)))</code> (meaning that most | 
|  | operations in the <code class="function">__kernel</code> are explicitly vectorized using | 
|  | <span class="type">float4</span>) and the kernel is running using Intel® Advanced Vector | 
|  | Instructions (Intel® AVX) which implements a 8-float-wide vector unit, the | 
|  | autovectorizer might choose to merge two work-items to one thread, running a second | 
|  | work-item in the high half of the 256-bit AVX register. | 
|  | </p> | 
|  | <p> | 
|  | As another example, a Power4 machine has two scalar double precision floating-point | 
|  | units with an 6-cycle deep pipe. An autovectorizer for the Power4 machine might choose | 
|  | to interleave six kernels declared with the <code class="code">__attribute__(( vec_type_hint | 
|  | (double2)))</code> qualifier into one hardware thread, to ensure that there is | 
|  | always 12-way parallelism available to saturate the FPUs. It might also choose to | 
|  | merge 4 or 8 work-items (or some other number) if it concludes that these are better | 
|  | choices, due to resource utilization concerns or some preference for divisibility by 2. | 
|  | </p> | 
|  | <p> | 
|  | The optional <code class="code">__attribute__((work_group_size_hint(<span class="emphasis"><em>X</em></span>, | 
|  | <span class="emphasis"><em>Y</em></span>, <span class="emphasis"><em>Z</em></span>)))</code> is a hint to the | 
|  | compiler and is intended to specify the work-group size that may be used i.e. value | 
|  | most likely to be specified by the <code class="varname">local_work_size</code> argument to | 
|  | <a class="citerefentry" href="clEnqueueNDRangeKernel.html"><span class="citerefentry"><span class="refentrytitle">clEnqueueNDRangeKernel</span></span></a>. | 
|  | For example the <code class="code">__attribute__((work_group_size_hint(1, 1, 1)))</code> is | 
|  | a hint to the compiler that the kernel will most likely be executed with a work-group | 
|  | size of 1. | 
|  | </p> | 
|  | <p> | 
|  | The optional <code class="code">__attribute__((reqd_work_group_size(<span class="emphasis"><em>X</em></span>, | 
|  | <span class="emphasis"><em>Y</em></span>, <span class="emphasis"><em>Z</em></span>)))</code> is the work-group | 
|  | size that must be used as the <code class="varname">local_work_size</code> argument to | 
|  | <a class="citerefentry" href="clEnqueueNDRangeKernel.html"><span class="citerefentry"><span class="refentrytitle">clEnqueueNDRangeKernel</span></span></a>.  This | 
|  | allows the compiler to optimize the generated code appropriately for this kernel. | 
|  | </p> | 
|  | <p> | 
|  | If <code class="varname">Z</code> is one, the <code class="varname">work_dim</code> argument to | 
|  | <a class="citerefentry" href="clEnqueueNDRangeKernel.html"><span class="citerefentry"><span class="refentrytitle">clEnqueueNDRangeKernel</span></span></a> | 
|  | can be 2 or 3. If <code class="varname">Y</code> and <code class="varname">Z</code> | 
|  | are one, the <code class="varname">work_dim</code> argument to | 
|  | <a class="citerefentry" href="clEnqueueNDRangeKernel.html"><span class="citerefentry"><span class="refentrytitle">clEnqueueNDRangeKernel</span></span></a> | 
|  | can be 1, 2 or 3. | 
|  | </p> | 
|  | <p> | 
|  | The optional <code class="code">__attribute__((nosvm))</code> | 
|  | qualifier can be used with a pointer variable to | 
|  | inform the compiler that the pointer does not | 
|  | refer to a shared virtual memory region. | 
|  | </p> | 
|  | </div> | 
|  | <div class="refsect2"> | 
|  | <a id="example1"></a> | 
|  | <h3> | 
|  | Example | 
|  | </h3> | 
|  | <div class="informaltable"> | 
|  | <table border="0"> | 
|  | <colgroup> | 
|  | <col align="left" class="col1" /> | 
|  | </colgroup> | 
|  | <tbody> | 
|  | <tr> | 
|  | <td align="left"> | 
|  | // autovectorize assuming float4 as the | 
|  | // basic computation width | 
|  | __kernel __attribute__((vec_type_hint(float4))) | 
|  | void foo( __global float4 *p ) { .... } | 
|  |  | 
|  | // autovectorize assuming double as the | 
|  | // basic computation width | 
|  | __kernel __attribute__((vec_type_hint(double))) | 
|  | void foo( __global float4 *p ){ .... } | 
|  |  | 
|  | // autovectorize assuming int (default) | 
|  | // as the basic computation width | 
|  | __kernel | 
|  | void foo( __global float4 *p ){ .... } | 
|  | </td> | 
|  | </tr> | 
|  | </tbody> | 
|  | </table> | 
|  | </div> | 
|  | </div> | 
|  | <div class="refsect1"> | 
|  | <a id="specification"></a> | 
|  | <h2>Specification</h2> | 
|  | <p> | 
|  | <img src="pdficon_small1.gif" /> | 
|  |  | 
|  | <a href="https://www.khronos.org/registry/cl/specs/opencl-2.0-openclc.pdf#page=47" target="OpenCL Spec">OpenCL Specification</a> | 
|  | </p> | 
|  | </div> | 
|  | <div class="refsect1"> | 
|  | <a id="seealso"></a> | 
|  | <h2>Also see</h2> | 
|  | <p> | 
|  | <a class="citerefentry" href="attribute.html"><span class="citerefentry"><span class="refentrytitle">__attribute__</span></span></a>, | 
|  | <a class="citerefentry" href="clEnqueueNDRangeKernel.html"><span class="citerefentry"><span class="refentrytitle">clEnqueueNDRangeKernel</span></span></a>, | 
|  | <a class="citerefentry" href="qualifiers.html"><span class="citerefentry"><span class="refentrytitle">qualifiers</span></span></a> | 
|  | </p> | 
|  | </div> | 
|  | <div xmlns="" class="refsect3" lang="en" xml:lang="en"><a xmlns="http://www.w3.org/1999/xhtml" id="Copyright"></a><h4 xmlns="http://www.w3.org/1999/xhtml"></h4><img xmlns="http://www.w3.org/1999/xhtml" src="KhronosLogo.jpg" /><p xmlns="http://www.w3.org/1999/xhtml"></p>Copyright © 2007-2013 The Khronos Group Inc. | 
|  | Permission is hereby granted, free of charge, to any person obtaining a | 
|  | copy of this software and/or associated documentation files (the | 
|  | "Materials"), to deal in the Materials without restriction, including | 
|  | without limitation the rights to use, copy, modify, merge, publish, | 
|  | distribute, sublicense, and/or sell copies of the Materials, and to | 
|  | permit persons to whom the Materials are furnished to do so, subject to | 
|  | the condition that this copyright notice and permission notice shall be included | 
|  | in all copies or substantial portions of the Materials. | 
|  | </div> | 
|  | </div> | 
|  | </body> | 
|  | </html> |