| <?xml version="1.0" encoding="UTF-8"?> |
| <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "xhtml1-transitional.dtd"> |
| <!-- saved from url=(0013)about:internet --> |
| <?xml-stylesheet type="text/xsl" href="mathml.xsl"?><html xmlns="http://www.w3.org/1999/xhtml" xmlns:pref="http://www.w3.org/2002/Math/preference" xmlns:xlink="http://www.w3.org/1999/xlink" pref:renderer="mathplayer-dl"> |
| <head> |
| <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> |
| <style xmlns="" type="text/css"> |
| /* This style sets a margin around the entire page */ |
| html, body { |
| margin: 10px; |
| } |
| |
| p { |
| font: normal 16px verdana, sans-serif; |
| margin: 0; |
| padding-bottom:12px; |
| } |
| |
| h1 { |
| font: bold 25px verdana, sans-serif; |
| margin-top: 0; |
| margin-bottom: 3px; |
| padding-top: 0; |
| padding-bottom: 0; |
| } |
| |
| h2 { |
| font: bold 19px verdana, sans-serif; |
| margin-top: 28px; |
| margin-bottom: 3px; |
| padding-top: 0; |
| padding-bottom: 0; |
| } |
| |
| h3 { |
| font: bold 19px verdana, sans-serif !important; |
| margin-top: 28px; |
| margin-bottom: 3px; |
| padding-top: 0; |
| padding-bottom: 0; |
| } |
| |
| li { |
| font: normal 16px verdana, sans-serif; |
| margin-top: 0; |
| margin-bottom: 18px; |
| padding-top: 0; |
| padding-bottom: 0; |
| } |
| |
| .pdparam { |
| font: italic 16px verdana, sans-serif; |
| } |
| |
| .term { |
| font: italic 16px verdana, sans-serif; |
| font-weight: normal; |
| } |
| |
| .type { |
| font: normal 16px verdana, sans-serif !important; |
| } |
| |
| .parameter { |
| font-style: italic; |
| } |
| |
| a:link, a:visited { |
| color: blue; |
| text-decoration: none; |
| font: normal 16px; |
| } |
| |
| a:hover { |
| background-color: #FFFF99; |
| font: normal 16px; |
| } |
| |
| div.funcsynopsis { |
| text-align: left; |
| background-color: #e6e6e6; |
| font: normal 16px verdana, sans-serif; |
| padding-top: 10px; |
| padding-bottom: 10px; |
| } |
| |
| div.funcsynopsis table { |
| border-collapse: separate; |
| font: normal 16px verdana, sans-serif; |
| } |
| |
| div.funcsynopsis td { |
| background-color: #e6e6e6; |
| border: 0 solid #000; |
| padding: 1px; |
| font: normal 16px verdana, sans-serif; |
| } |
| |
| div.refsect1 { |
| font-family: verdana, sans-serif; |
| font-size: 16px; |
| } |
| |
| code.constant { |
| font: normal 16px courier new, monospace !important; |
| } |
| |
| span.errorname { |
| font: normal 16px verdana, sans-serif !important; |
| } |
| |
| code.function { |
| font: bold 16px verdana, sans-serif !important; |
| } |
| |
| b.fsfunc { |
| font: bold 16px verdana, sans-serif !important; |
| } |
| |
| code.varname { |
| font: italic 16px verdana, sans-serif; |
| } |
| |
| code.replaceable { |
| font: italic 16px courier new, monospace; |
| } |
| |
| code.funcdef { |
| font: normal 16px verdana, sans-serif !important; |
| } |
| |
| .citerefentry { |
| font: normal 16px verdana, sans-serif !important; |
| } |
| |
| .parameter { |
| font-style: italic; |
| } |
| |
| code.fsfunc { |
| font: normal 16px verdana, sans-serif !important; |
| } |
| |
| /* PARAMETER: This style controls spacing between the terms in Parameter section */ |
| dt { |
| margin-top: 15px; |
| } |
| |
| /* TABLES: These styles apply to all tables OTHER than the Synopsis and Example tables */ |
| div.refsect1 table { |
| width: 100%; |
| margin-top: 10px; |
| background-color: #FFF; |
| border-collapse: collapse; |
| border-color: #000; |
| border-width: 1px; |
| font: normal 16px verdana, sans-serif; |
| } |
| |
| div.refsect1 th { |
| border-collapse: collapse; |
| border-color: #000; |
| border-width: 1px; |
| font: bold 16px verdana, sans-serif; |
| } |
| |
| div.refsect1 td { |
| background-color: #FFF; |
| padding: 5px; |
| vertical-align: text-top; |
| border-collapse: collapse; |
| border-color: #000; |
| border-width: 1px; |
| font: normal 16px verdana, sans-serif; |
| } |
| |
| div.refsect1 p{ |
| font: normal 16px verdana, sans-serif; |
| margin-top: 8px; |
| margin-bottom: 8px; |
| padding-top: 0; |
| padding-bottom: 0; |
| } |
| |
| |
| /* EXAMPLE: These styles apply only to the Example section */ |
| div.refsect2 { |
| font: normal 16px courier new, monospace !important; |
| } |
| |
| div.refsect2 table { |
| margin-top: 0; |
| background-color: #e6e6e6; |
| width: 100%; |
| border: 0 solid #000; |
| padding: 2px; |
| font: normal 16px courier new, monospace !important; |
| } |
| |
| div.refsect2 td { |
| background-color: #e6e6e6; |
| font: normal 16px courier new, monospace !important; |
| white-space:pre; |
| } |
| |
| /* COPYRIGHT: This style formats the text of the copyright statement at the bottom of the page */ |
| div.refsect3 { |
| font: normal 11px verdana, sans-serif; |
| margin-top: 50px; |
| margin-bottom: 20px; |
| padding-top: 0; |
| padding-bottom: 0; |
| } |
| |
| </style> |
| <title>Function Qualifiers</title> |
| <meta name="generator" content="DocBook XSL Stylesheets V1.79.1" /> |
| <meta name="keywords" content=" Function Qualifiers " /> |
| </head> |
| <body> |
| <div class="refentry"> |
| <a id="idm1"></a> |
| <div class="titlepage"></div> |
| <div xmlns="" class="refnamediv"> |
| <a xmlns="http://www.w3.org/1999/xhtml" id="Function Qualifiers"></a> |
| <h1>Function Qualifiers</h1> |
| <p> |
| Qualifiers for kernel functions. |
| </p> |
| </div> |
| <div class="refsect2"> |
| <a id="synopsis"></a> |
| <h3> |
| </h3> |
| <div class="informaltable"> |
| <table class="informaltable" border="0"> |
| <colgroup> |
| <col align="left" class="col1" /> |
| </colgroup> |
| <tbody> |
| <tr> |
| <td align="left"> |
| __kernel |
| kernel |
| |
| __attribute__((vec_type_hint(<type<span class="emphasis"><em>n</em></span>>))) |
| __attribute__((work_group_size_hint(<span class="emphasis"><em>X</em></span>, <span class="emphasis"><em>Y</em></span>, <span class="emphasis"><em>Z</em></span>))) |
| __attribute__((reqd_work_group_size(<span class="emphasis"><em>X</em></span>, <span class="emphasis"><em>Y</em></span>, <span class="emphasis"><em>Z</em></span>))) |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="refsect1"> |
| <a id="description"></a> |
| <h2>Description</h2> |
| <p> |
| The <code class="function">__kernel</code> (or <code class="function">kernel</code>) qualifier |
| declares a function to be a kernel that can be |
| executed by an application on an OpenCL device(s). |
| The following rules apply to functions that |
| are declared with this qualifier: |
| </p> |
| <div class="itemizedlist"> |
| <ul class="itemizedlist" style="list-style-type: bullet; "> |
| <li class="listitem" style="list-style-type: disc"> |
| <p> |
| It can be executed on the device only |
| </p> |
| </li> |
| <li class="listitem" style="list-style-type: disc"> |
| <p> |
| It can be called by the host |
| </p> |
| </li> |
| <li class="listitem" style="list-style-type: disc"> |
| <p> |
| It is just a regular function call if a <code class="function">__kernel</code> |
| function is called by another kernel function. |
| </p> |
| </li> |
| </ul> |
| </div> |
| <p> |
| The <code class="function">__kernel</code> qualifier can be used with the keyword |
| <a class="citerefentry" href="attribute.html"><span class="citerefentry"><span class="refentrytitle">__attribute__</span></span></a> to declare additional |
| information about the kernel function as described below. |
| </p> |
| <p> |
| The optional |
| <code class="constant">__attribute__((vec_type_hint(<type<span class="emphasis"><em>n</em></span>>)))</code> |
| is a hint to the |
| compiler and is intended to be a representation of the computational |
| <span class="emphasis"><em>width</em></span> of the |
| <code class="function">__kernel</code>, |
| and should serve as the basis for calculating processor |
| bandwidth utilization when the compiler |
| is looking to autovectorize the code. |
| <code class="constant">vec_type_hint (<type<span class="emphasis"><em>n</em></span>>)</code> |
| shall be one of the built-in scalar or vector data type described in |
| tables 6.1 and 6.2. |
| If |
| <code class="constant">vec_type_hint (<type<span class="emphasis"><em>n</em></span>>)</code> |
| is not specified, the default value is <span class="type">int</span>. |
| </p> |
| <p> |
| The |
| <code class="constant">__attribute__((vec_type_hint(int)))</code> |
| is the default type. |
| </p> |
| <p> |
| For example, where the developer specified a width of <span class="type">float4</span>, |
| the compiler should assume |
| that the computation usually uses up 4 lanes of a float vector, |
| and would decide to merge work-items or possibly even separate |
| one work-item into many threads to better match the hardware |
| capabilities. A conforming implementation is not required |
| to autovectorize code, but shall |
| support the hint. A compiler may autovectorize, even if no |
| hint is provided. If an |
| implementation merges <code class="constant">N</code> work-items into one thread, |
| it is responsible for correctly handling |
| cases where the number of global or local work-items |
| in any dimension modulo <code class="constant">N</code> is not zero. |
| </p> |
| <p> |
| If for example, a <code class="function">__kernel</code> is declared with |
| <code class="constant">__attribute__(( vec_type_hint (float4)))</code> |
| (meaning that most operations in the <code class="function">__kernel</code> |
| are explicitly vectorized using |
| <span class="type">float4</span>) and the kernel is running using |
| Intel® Advanced Vector Instructions |
| (Intel® AVX) |
| which implements a 8-float-wide vector unit, |
| the autovectorizer might choose to merge two |
| work-items to one thread, running a second |
| work-item in the high half of the 256-bit AVX |
| register. |
| </p> |
| <p> |
| As another example, a Power4 machine has two scalar |
| double precision floating-point units with |
| an 6-cycle deep pipe. An autovectorizer for the |
| Power4 machine might choose to interleave six |
| <code class="constant">__attribute__(( vec_type_hint (double2))) __kernel</code>s |
| into one hardware |
| thread, to ensure that there is always 12-way |
| parallelism available to saturate the FPUs. It might |
| also choose to merge 4 or 8 work-items (or some |
| other number) if it concludes that these are |
| better choices, due to resource utilization |
| concerns or some preference for divisibility by 2. |
| </p> |
| <p> |
| The optional |
| <code class="constant">__attribute__((work_group_size_hint(<span class="emphasis"><em>X</em></span>, <span class="emphasis"><em>Y</em></span>, <span class="emphasis"><em>Z</em></span>)))</code> |
| is a hint to the |
| compiler and is intended to specify the work-group size |
| that may be used i.e. value most likely to |
| be specified by the <code class="varname">local_work_size</code> argument to |
| <a class="citerefentry" href="clEnqueueNDRangeKernel.html"><span class="citerefentry"><span class="refentrytitle">clEnqueueNDRangeKernel</span></span></a>. |
| For example the |
| <code class="constant">__attribute__((work_group_size_hint(1, 1, 1)))</code> |
| is a hint to the compiler |
| that the kernel will most likely be executed |
| with a work-group size of 1. |
| </p> |
| <p> |
| The optional |
| <code class="constant">__attribute__((reqd_work_group_size(<span class="emphasis"><em>X</em></span>, <span class="emphasis"><em>Y</em></span>, <span class="emphasis"><em>Z</em></span>)))</code> |
| is the work-group size that must be used as the |
| <code class="varname">local_work_size</code> argument to |
| <a class="citerefentry" href="clEnqueueNDRangeKernel.html"><span class="citerefentry"><span class="refentrytitle">clEnqueueNDRangeKernel</span></span></a>. |
| This allows the compiler to optimize the generated |
| code appropriately for this kernel. The optional |
| <code class="constant">__attribute__((reqd_work_group_size(<span class="emphasis"><em>X</em></span>, <span class="emphasis"><em>Y</em></span>, <span class="emphasis"><em>Z</em></span>)))</code>, |
| if specified, must be (1, 1, 1) if the kernel is executed via |
| <a class="citerefentry" href="clEnqueueTask.html"><span class="citerefentry"><span class="refentrytitle">clEnqueueTask</span></span></a>. |
| </p> |
| <p> |
| If <code class="varname">Z</code> is one, the <code class="varname">work_dim</code> argument to |
| <a class="citerefentry" href="clEnqueueNDRangeKernel.html"><span class="citerefentry"><span class="refentrytitle">clEnqueueNDRangeKernel</span></span></a> |
| can be 2 or 3. If <code class="varname">Y</code> and <code class="varname">Z</code> are |
| one, the <code class="varname">work_dim</code> argument to |
| <a class="citerefentry" href="clEnqueueNDRangeKernel.html"><span class="citerefentry"><span class="refentrytitle">clEnqueueNDRangeKernel</span></span></a> |
| can be 1, 2 or 3. |
| </p> |
| </div> |
| <div class="refsect1"> |
| <a id="notes"></a> |
| <h2>Notes</h2> |
| <p> |
| Implicit in autovectorization is the assumption that |
| any libraries called from the |
| __kernel must be recompilable at |
| run time to handle cases where the compiler decides to |
| merge or separate workitems. This probably means that such |
| libraries can never be hard coded binaries or that hard |
| coded binaries must be accompanied either by source or some |
| retargetable intermediate representation. This may be |
| a code security question for some. |
| |
| </p> |
| </div> |
| <div class="refsect2"> |
| <a id="example1"></a> |
| <h3> |
| Example |
| </h3> |
| <div class="informaltable"> |
| <table class="informaltable" border="0"> |
| <colgroup> |
| <col align="left" class="col1" /> |
| </colgroup> |
| <tbody> |
| <tr> |
| <td align="left"> |
| // autovectorize assuming float4 as the |
| // basic computation width |
| __kernel __attribute__((vec_type_hint(float4))) |
| void foo( __global float4 *p ) { .... |
| |
| // autovectorize assuming double as the |
| // basic computation width |
| __kernel __attribute__((vec_type_hint(double))) |
| void foo( __global float4 *p ){ .... |
| |
| // autovectorize assuming int (default) |
| // as the basic computation width |
| __kernel |
| void foo( __global float4 *p ){ .... |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="refsect1"> |
| <a id="specification"></a> |
| <h2>Specification</h2> |
| <p> |
| <img src="pdficon_small1.gif" /> |
| |
| <a href="http://www.khronos.org/registry/cl/specs/opencl-1.0.pdf#page=154" target="OpenCL Spec">OpenCL Specification</a> |
| </p> |
| </div> |
| <div class="refsect1"> |
| <a id="seealso"></a> |
| <h2>Also see</h2> |
| <p> |
| <a class="citerefentry" href="clEnqueueNDRangeKernel.html"><span class="citerefentry"><span class="refentrytitle">clEnqueueNDRangeKernel</span></span></a> |
| <a class="citerefentry" href="clEnqueueTask.html"><span class="citerefentry"><span class="refentrytitle">clEnqueueTask</span></span></a> |
| </p> |
| </div> |
| <div xmlns="" class="refsect3" lang="en" xml:lang="en"><a xmlns="http://www.w3.org/1999/xhtml" id="Copyright"></a><h4 xmlns="http://www.w3.org/1999/xhtml"></h4><img xmlns="http://www.w3.org/1999/xhtml" src="KhronosLogo.jpg" /><p xmlns="http://www.w3.org/1999/xhtml"></p>Copyright © 2007-2009 The Khronos Group Inc. |
| Permission is hereby granted, free of charge, to any person obtaining a |
| copy of this software and/or associated documentation files (the |
| "Materials"), to deal in the Materials without restriction, including |
| without limitation the rights to use, copy, modify, merge, publish, |
| distribute, sublicense, and/or sell copies of the Materials, and to |
| permit persons to whom the Materials are furnished to do so, subject to |
| the condition that this copyright notice and permission notice shall be included |
| in all copies or substantial portions of the Materials. |
| </div> |
| </div> |
| </body> |
| </html> |