Merge pull request #5 from bashbaug/cl_intel_planar_yuv

cl_intel_planar_yuv
diff --git a/api/cl.xml b/api/cl.xml
index 375183a..39075e7 100644
--- a/api/cl.xml
+++ b/api/cl.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <registry>
     <comment>
-Copyright (c) 2013-2016 The Khronos Group Inc.
+Copyright (c) 2013-2017 The Khronos Group Inc.
 
 Permission is hereby granted, free of charge, to any person obtaining a
 copy of this software and/or associated documentation files (the
@@ -322,7 +322,8 @@
         <enum value="(1 << 11)"   name="CL_MEM_SVM_ATOMICS"/>
         <enum value="(1 << 12)"   name="CL_MEM_KERNEL_READ_AND_WRITE"/>
             <unused start="1<<13" end="1<<23"/>
-            <!-- 1 << (24,25) reserved for Intel (bug 16049) -->
+        <enum value="(1 << 24)"   name="CL_MEM_NO_ACCESS_INTEL"/>
+        <enum value="(1 << 25)"   name="CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL"/>
         <enum value="(1 << 26)"   name="CL_MEM_USE_UNCACHED_CPU_MEMORY_IMG"/>
         <enum value="(1 << 27)"   name="CL_MEM_USE_CACHED_CPU_MEMORY_IMG"/>
             <!-- 1 << 28 reserved for IMG (bug 12428) -->
@@ -1061,7 +1062,9 @@
         <enum value="0x4108"      name="CL_DEVICE_SUB_GROUP_SIZES_INTEL"/>
         <enum value="0x4109"      name="CL_KERNEL_SPILL_MEM_SIZE_INTEL"/>
         <enum value="0x410A"      name="CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL"/>
-            <unused start="0x410B" end="0x410F"/>
+            <unused start="0x410B" end="0x410D"/>
+        <enum value="0x410E"      name="CL_NV12_INTEL"/>
+            <unused start="0x410F" end="0x410F"/>
     </enums>
 
     <enums namespace="CL" start="0x4110" end="0x411F" vendor="Qualcomm" comment="Per Bug 13929">
@@ -1081,7 +1084,9 @@
     </enums>
 
     <enums namespace="CL" start="0x4170" end="0x417F" vendor="Intel" comment="Per bug 16067.">
-            <unused start="0x4170" end="0x417F"/>
+            <unused start="0x4170" end="0x417D"/>
+        <enum value="0x417E"      name="CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL"/>
+        <enum value="0x417F"      name="CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL"/>
     </enums>
 
     <enums namespace="CL" start="0x4180" end="0x4FFF" comment="Reserved for vendor extensions. Allocate in groups of 16.">
@@ -1148,5 +1153,6 @@
     <extension number="46" name="cl_img_yuv_image"/>
     <extension number="47" name="cl_intel_driver_diagnostics"/>
     <extension number="48" name="cl_intel_subgroups_short"/>
+    <extension number="49" name="cl_intel_planar_yuv"/>
         <!-- Next free extension number is assigned sequentially here -->
 </registry>
diff --git a/extensions/intel/cl_intel_planar_yuv.txt b/extensions/intel/cl_intel_planar_yuv.txt
new file mode 100644
index 0000000..9108d34
--- /dev/null
+++ b/extensions/intel/cl_intel_planar_yuv.txt
@@ -0,0 +1,545 @@
+Name String

+

+    cl_intel_planar_yuv

+

+Contributors

+

+    Dan Petre, Intel

+    Krzysztof Laskowski, Intel

+    Bartosz Sochacki, Intel

+    Ben Ashbaugh, Intel

+    Biju George, Intel

+

+Contact

+   

+    Krzysztof Laskowski, Intel (krzysztof.laskowski 'at' intel.com)

+

+Version

+

+    Version 1, November 22, 2016

+

+Number

+

+    OpenCL Extension #49

+

+Status

+

+    Final Draft

+

+Extension Type

+

+    OpenCL platform extension

+

+Dependencies

+

+    OpenCL 1.2 is required. The concept of using clCreateImage to create a 2D image from

+    another 2D image object is based on OpenCL 2.0. This extension is written against

+    revision 29 of the OpenCL 2.0 specification and revision 30 of the OpenCL C 2.0

+    specification.

+

+Overview

+

+    The purpose of this extension is to provide OpenCL support for the Planar YUV (YCbCr)

+    image formats. NV12 format must be supported; support for other Planar YUV formats

+    that may be defined in this extension is optional.

+

+    The extension introduces two new cl_mem_flags:

+    - CL_MEM_NO_ACCESS_INTEL which should be used together with image formats for which

+      device does not support reading from or writing to at OpenCL kernels level, but are

+      still useful in other use cases.

+    - CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL which may be used to relax the memory access

+      rights specified in cl_mem_flags at memory object creation time and allow to access

+      and modify the contents of the underlying data storage in unrestricted way e.g. by

+      creating another memory object from that memory object or using dedicated device

+      mechanisms.

+

+New Procedures and Functions

+

+    None

+

+New Tokens

+

+    Accepted as <cl_mem_flags>:

+

+    CL_MEM_NO_ACCESS_INTEL                                 (1 << 24)

+    CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL                 (1 << 25)

+

+Accepted as <image_channel_order> of <cl_image_format>:

+

+    CL_NV12_INTEL                                          0x410E

+

+Accepted as arguments passed to clGetDeviceInfo:

+

+    CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL                   0x417E

+    CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL                  0x417F

+

+New Types

+

+    None    

+

+Additions to Chapter 4 of the OpenCL 2.0 Specification:

+

+    In section 4.2 "Querying Devices" modify the description of function clGetDeviceInfo:

+

+    Table 4.3 must be extended to include the following enumeration constants:

+

++---------------------------------------+-------------+---------------------------------+

+| cl_device_info                        | Return Type | Description                     |

++---------------------------------------+-------------+---------------------------------+

+| CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL  | size_t      | Max width of Planar YUV image   |

+|                                       |             | in pixels.                      |

++---------------------------------------+-------------+---------------------------------+

+| CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL | size_t      | Max height of Planar YUV image  |

+|                                       |             | in pixels.                      |

++---------------------------------------+-------------+---------------------------------+

+

+Additions to Chapter 5 of the OpenCL 2.0 Specification:

+

+    In section 5.2.1 "Creating Buffer Objects", extend table 5.3 "List of supported

+    <cl_mem_flags> values" with:

+

++-------------------------+-------------------------------------------------------------+

+|      cl_mem_flags       |                       Description                           |

++-------------------------+-------------------------------------------------------------+

+|                         | This flag specifies that the device will not read or write  |

+| CL_MEM_NO_ACCESS_INTEL  | to the memory object.                                       |

+|                         | CL_MEM_READ_WRITE or                                        |

+|                         | CL_MEM_WRITE_ONLY or                                        |

+|                         | CL_MEM_READ_ONLY and                                        |

+|                         | CL_MEM_NO_ACCESS_INTEL                                      |

+|                         | are mutually exclusive.                                     |

++-------------------------+-------------------------------------------------------------+

+|                         | This flag indicates that the host and device access flags   |

+| CL_MEM_ACCESS_FLAGS_    | used together with this flag do not strictly prohibit       |

+| UNRESTRICTED_INTEL      | reading or modifying the contents of this memory object.    |

+|                         | Memory objects created from this memory object may          |

+|                         | re-specify the host and device access capabilities of the   |

+|                         | created memory object with new access capabilities, and any |

+|                         | mechanisms provided by the implementation which explicitly  |

+|                         | support certain operations on memory objects of this type   |

+|                         | are allowed to access this memory object without any        |

+|                         | restrictions.                                               |

++-------------------------+-------------------------------------------------------------+

+

+    In section 5.3.1 "Creating Image Objects", in the table that lists the image types

+    change the CL_MEM_OBJECT_2D entry description to:

+

++-----------------------+-------------------------------------------+

+|      Image Type       | Size of buffer that host_ptr points to    |

++-----------------------+-------------------------------------------+

+|                       | >= image_row_pitch * image_height +       |

+| CL_MEM_OBJECT_IMAGE2D |    image_row_pitch * image_height / 2     |

+|                       |    for images with CL_NV12_INTEL          |

+|                       |    image_channel_order.                   |

++-----------------------+-------------------------------------------+

+

+    Extend the description of clCreateImage with:

+

+    "<flags> is a bit-field [...].If <image_channel_order> of <image_format> is a Planar

+     YUV format then <flags> must include CL_MEM_HOST_NO_ACCESS.

+     [...]

+     If CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL is specified in the memory access

+     qualifier values associated with <mem_object> then <flags> can specify any host and

+     device access capabilities regardless of the memory access qualifier values

+     associated with <mem_object>."

+

+    "A 2D image can be created from another 2D image object [...].

+     Creating a 2D image from a Planar YUV image object allows users to create a new

+     image object that shares the Planar YUV image object's data store but represents

+     only the specified plane. The restrictions are:

+     - all the values specified in image_desc except for mem_object must match the image

+       descriptor information associated with mem_object, with exception where mem_object

+       is a Planar YUV image object then image_width and image_height are ignored and

+       derived from the mem_object and image_depth specifies the index of the target

+       plane the image will be created against and must be one of the following:

+

+     +---------------------+-------+-----------------------+

+     | image_channel_order | plane | image_depth specified |

+     | of mem_object       |       | in image_desc         |

+     +---------------------+-------+-----------------------+

+     | CL_NV12_INTEL       | Y     | 0                     |

+     | CL_NV12_INTEL       | UV    | 1                     |

+     +----------------------+-------+----------------------+

+

+     The derived values of image_width and image_height can be later queried using

+     clGetImageInfo.

+     - the channel data type specified in image_format must match the channel data type

+       associated with mem_object with exception to the following list of supported

+       combinations:

+

+     +---------------------+---------------------------+

+     | image_channel_order | image_channel_data_type   |

+     | of mem_object       | specified in image_format |

+     +---------------------+---------------------------+

+     | CL_NV12_INTEL       | CL_UNORM_INT8             |

+     | CL_NV12_INTEL       | CL_UNSIGNED_INT8          |

+     +---------------------+---------------------------+

+

+     - the channel order specified in image_format must match the channel order

+       associated with mem_object with exception to the following list of supported

+       combinations:

+

+     +---------------------------+-------------------------+

+     | image_channel_order       | image_channel_order of  |

+     | specified in image_format | mem_object              |

+     +---------------------------+-------------------------+

+     | CL_sBGRA                  | CL_BGRA                 |

+     | CL_BGRA                   | CL_sBGRA                |

+     | CL_sRGBA                  | CL_RGBA                 |

+     | CL_RGBA                   | CL_sRGBA                |

+     | CL_sRGB                   | CL_RGB                  |

+     | CL_RGB                    | CL_sRGB                 |

+     | CL_sRGBx                  | CL_RGBx                 |

+     | CL_RGBx                   | CL_sRGBx                |

+     | CL_DEPTH                  | CL_R                    |

+     +---------------------------+-------------------------+

+

+     If mem_object is a Planar YUV image object the channel order specified in image

+     format must be one of the following:

+

+     +---------------------------+-------------------------+-------+------------------+

+     | image_channel_order       | image_channel_order of  | plane | channel mappings |

+     | specified in image_format | mem_object              |       |                  |

+     +---------------------------+-------------------------+-------+------------------+

+     | CL_R                      | CL_NV12_INTEL           | Y     | R = Y            |

+     | CL_RG                     | CL_NV12_INTEL           | UV    | R = U, G = V     |

+     +---------------------------+-------------------------+-------+------------------+

+

+     NOTE:

+     Concurrent reading from or writing to both a Planar YUV image object and an image

+     object created from the Planar YUV image object is undefined.

+

+     Reading from or writing to an image created from a Planar YUV image and then reading

+     from or writing to the Planar YUV image in a kernel even if appropriate

+     synchronization operations (such as a barrier) are performed between the reads or

+     writes is undefined. Similarly, reading from and writing to the Planar YUV image and

+     then reading from or writing to the image created from the Planar YUV image with

+     appropriate synchronization between the reads or writes is undefined."

+

+    Modify the section about the returned error values:

+

+    "clCreateImage returns a valid non-zero image object created and <errcode_ret> is

+     set to CL_SUCCESS if the image object is created successfully. Otherwise, it returns

+     a NULL value with one of the following error values returned in <errcode_ret>:

+     [...]

+     * CL_INVALID_VALUE if an image is being created from another memory object (buffer

+       or image) under one of the following circumstances: 1) <mem_object> was created

+       with CL_MEM_WRITE_ONLY and <flags> specifies CL_MEM_READ_WRITE or

+       CL_MEM_READ_ONLY, 2) <mem_object> was created with CL_MEM_READ_ONLY and <flags>

+       specifies CL_MEM_READ_WRITE or CL_MEM_WRITE_ONLY, 3) <mem_object> was created with

+       CL_MEM_NO_ACCESS_INTEL and <flags> specifies CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY

+       or CL_MEM_READ_WRITE, 4) <flags> specifies CL_MEM_USE_HOST_PTR or

+       CL_MEM_ALLOC_HOST_PTR or CL_MEM_COPY_HOST_PTR. However, restrictions 1), 2) and 3)

+       described above do not apply if <mem_object> was created with <flag>

+       CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL.

+     * CL_INVALID_VALUE if an image is being created from another memory object (buffer

+       or image) and <mem_object> was created with CL_MEM_HOST_WRITE_ONLY and <flags>

+       specifies CL_MEM_HOST_READ_ONLY, or if <mem_object> was created with

+       CL_MEM_HOST_READ_ONLY and <flags> specifies CL_MEM_HOST_WRITE_ONLY, or if

+       <mem_object> was created with CL_MEM_HOST_NO_ACCESS and flags specifies

+       CL_MEM_HOST_READ_ONLY or CL_MEM_HOST_WRITE_ONLY. However, these restrictions do

+       not apply if <mem_object> was created with <flag>

+       CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL."

+

+    Modify the section about the memory layout of a 2D image:

+

+    "For a 2D RGB image, the image data specified by <host_ptr> is stored as a linear

+     sequence of adjacent scanlines. Each scanline is a linear sequence of image

+     elements. For 2D Planar YUV images see section 5.3.1.3 on the "Memory Layout for

+     Planar YUV Images"."

+

+    Extend table 5.6 "List of supported Image Channel Order Values" with:

+

+     +------------------------------------------------------------------+

+     | Enum values that can be specified in channel_order               |

+     +------------------------------------------------------------------+

+     | CL_NV12_INTEL. Images created with this <image_channel_order>    |

+     | value are Planar YUV images. CL_NV12_INTEL <image_channel_order> |

+     | can only be used if <image_channel_data_type> is CL_UNORM_INT8   |

+     | and <image_type> is CL_MEM_OBJECT_IMAGE2D. Number of channels in |

+     | a CL_NV12_INTEL image equals 3 and the element size refers to    |

+     | the Y plane and equals 1.                                        |

+     +------------------------------------------------------------------+

+

+    In section 5.3.1.2 "Image Descriptor", extend the description of <image_width>,

+    <image_height> and <image_depth>:

+

+    "<image_width> is the width of the image in pixels. [...] If this is a CL_NV12_INTEL

+     image, <image_width> must be a multiple of 4 and less than or equal to

+     CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL.

+

+     <image_height> is the height of the image in pixels. [...] If this is a

+     CL_NV12_INTEL image, <image_height> must be a multiple of 4 and less than or equal

+     to CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL.

+

+     <image_depth> is the depth of the image in pixels. [...] If this is a CL_NV12_INTEL

+     image, <image_depth> must be 1.

+

+    Add section 5.3.1.3 "Memory Layout for Planar YUV Images":

+

+    "In Planar YUV formats the Y, U and V components can all be stored as separate planes

+     or the U and V components can be stored combined as one plane. There are various

+     flavors of Planar YUV formats, differing in the number of planes, order, layout and

+     the subsampling methods used for the U and V components. See below for explanation

+     of the memory layout of CL_NV12_INTEL format. 

+

+     CL_NV12_INTEL image is laid out in memory in the form of two planes, Y (luma) plane

+     and an interleaved UV (chroma) plane:

+

+        <----    WIDTH   ---->

+        +------------------------+ ^

+        |YYYYYYYYYYYYYYYYYYYY^^^^| |

+        |YYYYYYYYYYYYYYYYYYYY^^^^| H

+        |YYYYYYYYYYYYYYYYYYYY^^^^| E

+        |YYYYYYYYYYYYYYYYYYYY^^^^| I  Luma plane (Y)

+        |YYYYYYYYYYYYYYYYYYYY^^^^| G

+        |YYYYYYYYYYYYYYYYYYYY^^^^| H

+        |YYYYYYYYYYYYYYYYYYYY^^^^| T

+        |YYYYYYYYYYYYYYYYYYYY^^^^| |

+        +------------------------+ v

+        |UVUVUVUVUVUVUVUVUVUV^^^^|

+        |UVUVUVUVUVUVUVUVUVUV^^^^|    Chroma plane (UV)

+        |UVUVUVUVUVUVUVUVUVUV^^^^|

+        |UVUVUVUVUVUVUVUVUVUV^^^^|

+        +------------------------+

+        <----    ROW PITCH    --->

+

+     The luma plane contains 8 bit Y samples in case of CL_NV12_INTEL format, one for

+     each pixel:

+

+               +-----+-----+-----+-----+--

+               | Y00 | Y01 | Y02 | Y03 |   ...

+               +-----+-----+-----+-----+--

+               | Y10 | Y11 | Y12 | Y13 |   ...

+               +-----+-----+-----+-----+--

+               | Y20 | Y21 | Y22 | Y23 |   ...

+               +-----+-----+-----+-----+--

+               | ... | ... | ... | ... |

+     Sample ->    0     1     2     3

+     Offset

+

+     The chroma plane contains interleaved 8 bit UV 2x2 samples in case of CL_NV12_INTEL

+     format. The chroma components are sampled only once for every other pixel and for

+     every other row of pixels:

+

+               +-----+-----+-----+-----+--

+               | U00 | V00 | U02 | V02 |   ...

+               +-----+-----+-----+-----+--

+               | U20 | V20 | U22 | V22 |   ...

+               +-----+-----+-----+-----+--

+               | ... | ... | ... | ... |

+     Sample ->    0     1     2     3

+     Offset

+

+     Using the above notation we can represent pixels like this:

+

+               +-----+-----+-----+-----+--

+               | P00 | P01 | P02 | P03 |   ...

+               +-----+-----+-----+-----+--

+               | P10 | P11 | P12 | P13 |   ...

+               +-----+-----+-----+-----+--

+               | P20 | P21 | P22 | P23 |   ...

+               +-----+-----+-----+-----+--

+               | ... | ... | ... | ... |

+

+     where:

+         P00 = Y00U00V00     P01 = Y01U00V00

+         P10 = Y10U00V00     P11 = Y11U00V00

+         ...

+         P20 = Y20U20V20     P21 = Y21U20V20

+         ...

+         P30 = Y30U20V20     P31 = Y31U20V20

+

+         etc.

+

+     The luma plane is followed immediately by the chroma plane.

+     Both the luma and the chroma planes have the same <image_row_pitch>.

+     The luma plane height is <image_height>.

+     The chroma plane height is (<image_height> / 2).

+     The luma plane width is <image_width>.

+     The chroma plane width is (<image_width> / 2)."

+

+    In section 5.3.2 "Querying List of Supported Image Formats", modify the description

+    of <flags>:

+

+    "<flags> is a bit-field that is used to specify allocation and usage information

+     about the image memory object being queried and is described in table 5.3. To get a

+     list of supported image formats that can be read from or written to by a kernel,

+     <flags> must be set to CL_MEM_READ_WRITE (get a list of images that can be read from

+     and written to by different kernel instances when correctly ordered by event

+     dependencies), CL_MEM_READ_ONLY (list of images that can be read from by a kernel)

+     or CL_MEM_WRITE_ONLY (list of images that can be written to by a kernel). To get a

+     list of supported image formats that can be both read from and written to by the

+     same kernel instance, flags must be set to CL_MEM_KERNEL_READ_AND_WRITE. To get a

+     list of images that cannot be read from nor written to by a kernel, flags must be

+     set to CL_MEM_NO_ACCESS_INTEL. Please see section 5.3.2.2 for clarification."

+

+    In section 5.3.2.1 "Minimum List of Supported Image Formats" add a description:

+

+    "For 2D image objects, the mandated minimum list of image formats that are not

+     required to be read from nor written to by a kernel and that must be supported by

+     all devices that support cl_intel_planar_yuv extension is described in table 5.8.c.

+

+     --------------------------------------------------

+     num_channels    channel_order    channel_data_type

+     ------------    -------------    -----------------

+     3               CL_NV12_INTEL    CL_UNORM_INT8

+     --------------------------------------------------

+     Table 5.8.c "Min. list of supported image formats - kernel no-access 2D Images""

+

+Additions to Chapter 6 of the OpenCL 2.0 Specification:

+

+    Extend the section 6.13.14.1.1 "Determining the border color or value":

+

+    "If the image channel order is CL_R, CL_RG, CL_RGB, CL_LUMINANCE, or a CL_NV12_INTEL

+     format's channel order, the border color is (0.0f, 0.0f, 0.0f, 1.0f)."

+

+    Extend the paragraph at the beginning of section 6.13.14.2 "Built-in Image Read

+    Functions":

+

+    "The following built-in function calls to read images with a sampler are supported.

+     Note that reading from a CL_NV12_INTEL image object is only supported by read_imagef

+     calls that take integer coordinates."

+

+    Add a note below the table in section 6.13.14.7 "Mapping image channels to color

+    values returned by read_image and color values passed to write_image to image

+    channels":

+

+    "NOTE: The mapping of CL_NV12_INTEL image channels to the appropriate components in

+     the float4 vector data type is ( V, Y, U, 1.0 )."

+

+    In section 6.13.14.5 "Built-in Image Query Functions", in table 6.26 "Built-in Image

+    Query Functions" extend the list of valid values returned by get_image_channel_order

+    built-in function with CLK_NV12_INTEL.

+

+Appendix A

+

+Sample host code:

+

+cl_image_format image_format;

+image_format.image_channel_order     = CL_NV12_INTEL;

+image_format.image_channel_data_type = CL_UNORM_INT8;

+

+cl_image_desc image_desc;

+image_desc.image_type                = CL_MEM_OBJECT_IMAGE2D;

+image_desc.image_width               = width;

+image_desc.image_height              = height;

+image_desc.image_array_size          = 0;

+image_desc.image_row_pitch           = 0;

+image_desc.image_slice_pitch         = 0;

+image_desc.num_mip_levels            = 0;

+image_desc.num_samples               = 0;

+image_desc.mem_object                = NULL;

+

+// create a CL_NV12_IMAGE

+cl_mem nv12Img = clCreateImage(context,

+                               CL_MEM_READ_ONLY | CL_MEM_HOST_NO_ACCESS |   

+                               CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL,

+                               image_format, image_desc,

+                               host_ptr, errcode_ret);

+

+// image_width & image_height are ignored for plane extraction

+image_desc.image_width               = 0;

+image_desc.image_height              = 0;

+

+// set mem_object to the full NV12 image

+image_desc.mem_object                = nv12Img;

+

+// get access to the Y plane (CL_R)

+image_desc.image_depth               = 0;

+

+// set proper image_format for the Y plane

+image_format.image_channel_order     = CL_R;

+image_format.image_channel_data_type = CL_UNORM_INT8;

+

+cl_mem nv12YplaneImg = clCreateImage(context, CL_MEM_READ_WRITE,

+                                     image_format, image_desc,

+                                     NULL, errcode_ret);

+

+// get access to the UV plane (CL_RG)

+image_desc.image_depth               = 1;

+

+// set proper image_format for the UV plane

+image_format.image_channel_order     = CL_RG;

+image_format.image_channel_data_type = CL_UNORM_INT8;

+

+cl_mem nv12UVplaneImg = clCreateImage(context, CL_MEM_READ_WRITE,

+                                      image_format, image_desc,

+                                      NULL, errcode_ret);

+// NOT SUPPORTED: transfer the whole NV12 image to the device

+// status = clEnqueueWriteImage(queue, nv12Img, true, origin, region,

+                             row_pitch, slice_pitch,

+                             ptr, 0, NULL, NULL);

+

+// write Y plane of NV12 image

+status = clEnqueueWriteImage(queue, nv12YplaneImg, true,

+                             origin, region, row_pitch, slice_pitch,

+                             ptr, 0, NULL, NULL);

+

+// write UV plane of NV12 image

+status = clEnqueueWriteImage(queue, nv12YplaneImg, true,

+                             origin, region, row_pitch, slice_pitch,

+                             ptr + uvPlaneOffset, 0, NULL, NULL);

+

+// NOT SUPPORTED: read the whole NV12 image back

+// status = clEnqueueReadImage(queue, nv12Img, true,

+                               origin, region, row_pitch, slice_pitch, 

+                               ptr, 0, NULL, NULL);

+// read Y plane of NV12 image

+status = clEnqueueReadImage(queue, nv12UVplaneImg, true,

+                            origin, region, row_pitch, slice_pitch,

+                            ptr, 0, NULL, NULL);

+

+// read UV plane of NV12 image

+status = clEnqueueReadImage(queue, nv12UVplaneImg, true,

+                            origin, region, row_pitch, slice_pitch,

+                            ptr + uvPlaneOffset, 0, NULL, NULL);

+

+Appendix B

+

+Sample kernel code:

+

+// do something with a whole NV12 image

+kernel void DoSomethingWithNV12

+(

+    ...

+    read_write image2d_t nv12Img,

+    ...

+)

+{

+    ...

+    // sample the CL_NV12_INTEL image - supported if CL_NV12_INTEL format is 

+    // available with CL_MEM_READ_ONLY or CL_MEM_READ_WRITE access flags 

+    // based on clGetSupportedImageFormats query.

+    float4 p = read_imagef(nv12Img, sampler, coord);

+    ...

+    // write to the CL_NV12_INTEL image - supported if CL_NV12_INTEL format is 

+    // available with CL_MEM_WRITE_ONLY or CL_MEM_READ_WRITE access flags 

+    // based on clGetSupportedImageFormats query.

+    write_imagef(nv12Img, coord, p);

+    ...

+}

+

+// do something with planes of an NV12 image

+kernel void DoSomethingWithNV12Planes

+(

+    ...

+    read_write image2d_t nv12ImgYPlane,

+    read_write image2d_t nv12ImgUVPlane,

+    ...

+)

+{

+    ...

+    // sample the Y & UV planes

+    float4 py = read_imagef(nv12ImgYPlane, sampler, coord);

+    float4 puv = read_imagef(nv12ImgUVPlane, sampler, coord);

+    ...

+    // write to Y & UV planes

+    write_imagef(nv12ImgYPlane, coord, py);

+    write_imagef(nv12ImgUVPlane, coord, puv);

+    ...

+}

+

+Revision History

+

+Version 1, 2016/22/11 (K.Laskowski)   - Initial version.