cl_intel_device_side_avc_motion_estimation
diff --git a/api/cl.xml b/api/cl.xml
index 39075e7..602a042 100644
--- a/api/cl.xml
+++ b/api/cl.xml
@@ -1062,7 +1062,9 @@
         <enum value="0x4108"      name="CL_DEVICE_SUB_GROUP_SIZES_INTEL"/>
         <enum value="0x4109"      name="CL_KERNEL_SPILL_MEM_SIZE_INTEL"/>
         <enum value="0x410A"      name="CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL"/>
-            <unused start="0x410B" end="0x410D"/>
+        <enum value="0x410B"      name="CL_DEVICE_AVC_ME_VERSION_INTEL"/>
+        <enum value="0x410C"      name="CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL"/>
+        <enum value="0x410D"      name="CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL"/>
         <enum value="0x410E"      name="CL_NV12_INTEL"/>
             <unused start="0x410F" end="0x410F"/>
     </enums>
@@ -1154,5 +1156,6 @@
     <extension number="47" name="cl_intel_driver_diagnostics"/>
     <extension number="48" name="cl_intel_subgroups_short"/>
     <extension number="49" name="cl_intel_planar_yuv"/>
+    <extension number="50" name="cl_intel_device_side_avc_motion_estimation"/>
         <!-- Next free extension number is assigned sequentially here -->
 </registry>
diff --git a/extensions/intel/cl_intel_device_side_avc_motion_estimation.txt b/extensions/intel/cl_intel_device_side_avc_motion_estimation.txt
new file mode 100644
index 0000000..74bb7af
--- /dev/null
+++ b/extensions/intel/cl_intel_device_side_avc_motion_estimation.txt
@@ -0,0 +1,4356 @@
+Name String

+

+   cl_intel_device_side_avc_motion_estimation

+

+Contributors

+

+   Biju George, Intel

+   Raghukrishnan Embar, Intel

+   Ryan Lei, Intel

+   Ben Ashbaugh, Intel

+   Bartosz Sochacki, Intel

+   Krishna Madaparambil, Intel

+   Mateusz Tabaka, Intel

+

+Contact

+

+   Biju George, Intel (biju.george 'at' intel.com)

+

+Version

+

+   Version 1, December 2, 2016

+

+Number 

+

+   OpenCL Extension #50

+

+Status

+

+   First Draft

+

+Dependencies

+

+   OpenCL 1.2 is required.

+

+   The OpenCL Intel vendor extension cl_intel_subgroups is required. The

+   VME built-in functions are an extension of the subgroup functions

+   defined in cl_intel_subgroups.

+

+   The built-in functions which perform intra estimation depend on the 

+   OpenCL Intel vendor extension cl_intel_media_block_io in order to

+   read-in the neighboring macroblock edge pixels.

+

+   The built-in functions which perform chroma based intra estimation

+   operations depend on the OpenCL Intel vendor extension

+   cl_intel_planar_yuv in order to create NV12 source images.

+

+   This extension is written against revision 29 of the OpenCL 2.0 API

+   specification, against revision 33 of the OpenCL 2.0 OpenCL C

+   specification, and against revision 32 of the OpenCL 2.0 extension

+   specification.

+

+Overview

+

+   Video motion estimation (VME) is defined as of set motion estimation

+   operations that are used to determine the motion vectors, intra

+   estimation angles and macroblock partitioning combination that best

+   describe the transformation to the source macroblock, from blocks in

+   one or more previous reference pictures (inter-prediction), or from

+   other blocks in the same source picture (intra-prediction). It does

+   this by searching for spatial and temporal patterns on the current and

+   various forward and backward reference pictures.

+

+   The goal of this extension is to provide programmers with a fine-

+   grained interface to the AVC VME media sampler in Intel graphics

+   processors. It describes the specification of low-level built-in

+   functions, callable from OpenCL kernels, that facilitate the

+   programming of the VME media sampler to evaluate specific AVC motion

+   estimation operations. If only a coarser-level interface at the level

+   of built-in kernels suffices, then the Intel vendor extensions 

+   cl_intel_motion_estimation and cl_advanced_motion_estimation may be

+   considered.

+

+   Built-in functions are defined for all the major operations of the VME

+   media sampler. The major operations of the AVC VME media sampler in 

+   Intel Graphics Processors can be described as follows:

+    

+   1. Integer motion estimation (IME)

+

+      Perform motion estimation on a given source macroblock in

+      a source image over a single or dual reference window in a 

+      reference image, at full-pixel resolution, to determine the 

+      best integer motion vectors and their associated distortions, 

+      and the best macroblock shape partitioning combination.

+

+   2. Motion estimation refinement (REF)

+

+      Perform refinement operations on the results of IME. The two sub-

+      operations are:

+

+      Fractional motion estimation (FME)

+

+      Perform sub-pixel refinement on the results of an IME operation.

+      half-pixel (HPEL) or quarter-pixel (QPEL) refinements are performed

+      to determine the best sub-pixel motion vectors and their associated

+      distortions. 

+

+      Bidirectional motion estimation (BME)

+

+      Perform bidirectional refinement on the results of an IME

+      operation using two reference images to check if the bidirectional

+      mode using two references yields lesser distortions. An FME can

+      optionally be performed implicitly as part of a bidirectional

+      refinement.

+

+   3. Skip and Intra check (SIC)

+

+      Performs the following two sub-operations:

+

+      Skip check (SKC)

+

+      Compute the pixel distortion of a user-specified shape and motion

+      vector combination. The VME media sampler fetches necessary pixels,

+      performs fractional and bidirectional filtering (as necessary), and

+      then computes the distortion between the derived reference and

+      source. The skip decision can optionally be enhanced to include a

+      4x4 forward transform, the results of which are compared against a

+      user specified threshold to emulate the effects of the forward

+      quantization zeroing effect.

+

+      Intra prediction estimation (IPE)

+

+      Perform intra prediction on a given source macroblock to determine

+      the best intra prediction modes and the best shape partitioning

+      combination.

+

+New API Enums

+

+    Accepted as arguments to clGetDeviceInfo

+

+    CL_DEVICE_AVC_ME_VERSION_INTEL                      0x410B

+    CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C

+    CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL          0x410D

+

+    Additional valid constant values returned by clGetDeviceInfo:

+

+    CL_AVC_ME_VERSION_1_INTEL                           0x1

+

+Additions to Chapter 4 of the OpenCL Specification:

+

+    Modify the description of function clGetDeviceInfo(..)

+

+    Table 4.3 is extended to include the following enumeration constants.

+

+    +--------------------------+-----------+----------------------------+

+    |cl_device_info            |Return Type|Description                 |

+    +--------------------------+-----------+----------------------------+

+    |CL_DEVICE_AVC_ME_VERSION_ |cl_uint    |The AVC device-side motion  |

+    |INTEL                     |           |estimation API version      |

+    |                          |           |number supported by the     |

+    |                          |           |device and driver.          |

+    +--------------------------+-----------+----------------------------+

+    |CL_DEVICE_AVC_ME_SUPPORTS_|cl_bool    |Is CL_TRUE if built-in      |

+    |TEXTURE_SAMPLER_USE_INTEL |           |functions using the texture |

+    |                          |           |sampler may be used along   |

+    |                          |           |with AVC VME built-in       |

+    |                          |           |functions using the media   |

+    |                          |           |sampler on the device, and  |

+    |                          |           |CL_FALSE otherwise.         |

+    +--------------------------+-----------+----------------------------+

+    |CL_DEVICE_AVC_ME_SUPPORTS_|cl_bool    |Is CL_TRUE if an enqueue of |

+    |PREEMPTION_INTEL          |           |an OpenCL kernel that uses  |

+    |                          |           |AVC VME built-in functions  |

+    |                          |           |supports preemption on the  |

+    |                          |           |device, and CL_FALSE        |

+    |                          |           |otherwise. (This may be     |

+    |                          |           |useful on platforms that    |

+    |                          |           |have strict latency         |

+    |                          |           |requirements on the GPU).   |

+    +--------------------------+-----------+----------------------------+    

+    

+    This extension requires a value of CL_AVC_ME_VERSION_1_INTEL returned

+    by clGetDeviceInfo for CL_DEVICE_AVC_ME_VERSION_INTEL.

+

+Terms, Acronyms and Definitions

+

+   Adds a new section 6.13.14.X "VME built-in functions" to the OpenCL

+   2.0 C specification.

+

+   "

+   The following terms, acronyms and definitions are used in and

+   provide context for the VME built-in functions.

+

+   Macro-block (MB):

+   -----------------

+   An image is partitioned into macro-blocks of size 16x16 pixels. It is

+   the basic unit of processing for AVC video motion estimation

+   operations.

+

+   Shape:

+   ------

+   A MB may be partitioned into sub-blocks of one of the major shapes. A

+   sub-block with an 8x8 major shape may be further independently 

+   partitioned into sub-blocks of one of the minor shapes. It is 

+   represented by pre-defined shape enumeration values.

+

+   Major Shapes:

+   -------------

+   Shapes of 16x16, 16x8, 8x16, or 8x8 partitions of a MB. A 16x16 major

+   shape merely indicates that the MB was not further partitioned.

+

+   Minor Shapes:

+   -------------

+   Shapes of 8x8, 8x4, 4x8, or 4x4 sub-partitions of an 8x8 partition. A

+   8x8 minor shape merely indicates that the 8x8 major partition was not

+   further sub-partitioned.

+

+   Block:

+   ------

+   A sub-block of a MB with one of the major or minor shapes.

+

+   Reference Image:

+   ----------------

+   An image (typically from the previously decoded buffer in an encoder

+   pipeline) from which motion estimation predictions are made.

+

+   Source Image:

+   -------------

+   The current image for which motion estimation predictions are made.

+

+   Source Macro-block Offset:

+   --------------------------

+   The 2D offset of the top left corner of the source MB in pixel units.

+   It is represented by a pair of unsigned 16-bit integers.

+

+   Reference Window Offset:

+   ------------------------

+   The 2D offset of the top left corner reference search window w.r.t to

+   the top left corner of the source MB in pixel units. It is represented

+   by a pair of signed 16-bit integers in the range [-2048, 2047].

+

+   Reference identifier:

+   ---------------------

+   Reference identifiers are associated to pairs of forward(L0)/

+   backward(L1) reference image parameters. Up to 16 pairs of reference

+   pairs of reference image parameters are permitted, with the permitted

+   values of reference identifiers ranging from 0 to 15. The reference 

+   identifers are assigned in increasing order in which the reference

+   image parameter pairs are declared in the OpenCL kernel parameter

+   list. See new section 6.13.14.X "VME built-in functions" described 

+   below for more details.

+    

+   Motion Vector (MV):

+   -------------------

+   A 2D vector used for inter motion estimation that provides an offset

+   from the top left corner of a block in the source image to the top

+   left corner an identically sized block in the reference image.

+   Generally it is used to represent the best match of a block in the

+   reference image to a block in the source image. The best match is

+   determined as the block minimizing the distortion. MVs are specified

+   in QPEL resolution with the 2 LSB representing the fractional part

+   of the offset. It is represented by a pair of signed 16-bit signed

+   integers.

+

+   Packed Motion Vector:

+   --------------------------

+   A motion vector represented as a packed 32-bit unsigned integer. The

+   lower 16 bits contains the X coordinate and the upper 16 bits contains

+   the Y coordinate.

+

+   Bidirectional Motion Vector (BMV):

+   ----------------------------------

+   A pair of MVs for the forward(L0) and backward(L1) images. Depending

+   on how the VME operation is configured only the forward or the

+   backward MV or both may be valid.

+

+   Packed Bidirectional Motion Vector:

+   -----------------------------------

+   A bidirectional MV represented as a packed 64-bit unsigned integer. 

+   The lower 32-bits contain the forward packed MV, and the upper 32-bits

+   contain the backward packed MV.

+

+   Sum Of Absolute Difference (SAD):

+   ---------------------------------

+   The sum of absolute differences of every full/sub-pixel location in 

+   the source block w.r.t every corresponding full/sub pixel in the 

+   reference block as specified by a given MV. The sum of absolute 

+   differences may be optionally Haar transform adjusted. It is 

+   represented by an unsigned 16-bit integer value.

+

+   Haar Transform (HAAR):

+   ----------------------

+   A simple wavelet transform that is used to refine the distortion

+   measure of SAD. The per pixel difference goes through a 4x4 Haar 

+   transform. Then the SAD is replaced by the sum of the absolute 

+   values of the transform domain coefficients in the distortion. Haar

+   transform is used as a coarse estimation of the integer transform.

+

+   Motion Vector Cost Center (CC):

+   -------------------------------

+   A MV has an associated cost w.r.t a cost center coordinate. The 

+   further away from the cost center, the larger will be the cost

+   associated with the MV. Cost centers are specified in QPEL resolution

+   with the 2 LSB representing the fractional part of the offset. 

+

+   Motion Vector Cost Center Delta (CCD):

+   --------------------------------------

+   The 2D offset of the cost center relative to the top left corner of 

+   the source MB. Cost center deltas are specified in QPEL resolution

+   with the 2 LSB representing the fractional part of the offset. 

+   It is represented by a pair of signed 16-bit integers.

+

+   Packed Motion Vector Cost Center Delta:

+   ---------------------------------------

+   A motion vector cost center delta represented as a packed 32-bit

+   unsigned integer. The lower 16 bits contains the X coordinate and the

+   upper 16 bits contains the Y coordinate.

+

+   Bidirectional Motion Vector Cost Center Delta:

+   ----------------------------------------------

+   A pair of cost center deltas for the forward and backward images.

+

+   Packed Bidirectional Motion Vector Cost Center Delta:

+   -----------------------------------------------------

+   A packed bidirectional motion vector cost center delta represented as 

+   a 64-bit unsigned integer. The lower 32-bits contain the forward 

+   packed CCD, and the upper 32-bits contain the backward packed CCD.

+

+   Motion Vector Cost:

+   -------------------

+   The MV cost is determined using a cost function described by a cost

+   table that is indexed based on power-of-two distances from the user

+   specified cost center, with a user specified precision (or unit) of 

+   the distances from the cost center.

+

+   U4U4 Byte Format:

+   -----------------

+   Represents a value of (B<<S), where B, called base, is the 4 bit

+   LSB of the byte and S, called shift, is the 4 bit MSB of the byte.

+

+   Motion Vector Cost Table:

+   -------------------------

+   A table which specifies the cost penalties at 8 control points. The

+   first 7 control points represent the distances from cost center at

+   powers-of-two locations (2^0 to 2^6), and the last control point

+   represents the base penalty for distances that are out of range of the

+   cost function curve. It is represented by a packed array of 8 U4U4

+   unsigned integer values.

+

+   Motion Vector Cost Precision:

+   -----------------------------

+   The precision (or unit) of the control points in the MV cost table. 

+   It can be used to control the precision and range of the cost 

+   function. It is represented by pre-defined cost precision enumeration

+   values.

+

+   Shape Cost:

+   -----------

+   The cost associated with encoding a particular partition shape using

+   inter or intra prediction. It is represented by a packed array of 10

+   U4U4 unsigned integer values.

+

+   Distortion:

+   -----------

+   The distortion is the sum of SAD, MV cost, shape cost and multi-

+   reference cost for inter estimation, and the sum of SAD, mode cost,

+   shape cost and non-dc cost for intra estimation. It is a measure of

+   the cost of encoding a block and is represented by an unsigned 16-bit

+   integer value.

+

+   Intra Mode:

+   -----------

+   An intra-prediction angle which provides a prediction for the current

+   block from the edge pixels in its neighboring blocks. It is 

+   represented by pre-defined intra mode enumeration values.

+

+   Intra Mode Cost:

+   ----------------

+   The cost associated with a computed intra mode for a block w.r.t a

+   predicted intra mode based on the computed intra modes for its

+   neighboring blocks.

+

+   Mode:

+   -----

+   The decision whether the inter-prediction or intra-prediction 

+   minimizes distortion of a given MB.

+

+   Search unit (SU):

+   -----------------

+   The basic unit of searching. Possible reference search locations are

+   grouped in a predefined 4x4 pattern, and all locations within the same

+   group must be completely chosen or completely skipped. These 

+   predefined groups are called search units.

+

+   Search Path (SP):

+   -----------------

+   The path taken during searching in a reference window. The steps taken

+   in a search path are in units of SUs. The search path must lie within

+   the defined search window.

+

+   Luma:

+   -----

+   Luma refers to either the Y-plane of a NV12 image or a regular image

+   with the image_channel_order and image_data_type restricted as CL_R

+   and CL_UNORM_INT8.

+

+   Chroma:

+   ------

+   Chroma refer to the UV-plane of a NV12 image. 

+ 

+   Search Window (SW):

+   -------------------

+   The search area that will be covered during searching. The area of the

+   search window is limited to 2K luma pixels.

+ 

+   Search Window Configuration:

+   ----------------------------

+   The configuration of a search window which is a combination of the

+   search path and search window.

+                                                          

+   The pre-defined search window configurations are:

+

+   +--------------+-----------------------------------------------------+

+   | EXHAUSTIVE   | 48x40 SW with exhaustive single reference search (or|

+   |              | 32x32 dual SW for exhaustive dual-reference search);|

+   |              | an exhaustive search means that all SU within the   |

+   |              | search window are searched in a spiral pattern with |

+   |              | the search center being the middle of the search    |

+   |              | window                                              |

+   +--------------+-----------------------------------------------------+

+   | SMALL        | 28x28 SW with exhaustive search                     | 

+   +--------------+-----------------------------------------------------+

+   | TINY         | 24x24 SW with exhaustive search                     |

+   +--------------+-----------------------------------------------------+

+   | EXTRA TINY   | 20x20 SW with exhaustive search                     |

+   +--------------+-----------------------------------------------------+ 

+   | DIAMOND      | 48x40 SW with diamond single reference search (or   |

+   |              | 32x32 dual SW for diamond dual-reference search); a |

+   |              | diamond pattern search path is used for the first 16|

+   |              | (or 7 per reference for dual reference search) SUs, |

+   |              | and then gradient based searching is used for up to |

+   |              | a maximum of 57 search unit.                        |

+   +--------------+-----------------------------------------------------+

+   | LARGE DIAMOND| 48x40 SW with large diamond single reference search |

+   |              | (or 32x32 dual SW for large diamond dual-reference  |

+   |              | search); a diamond pattern search pattern is used   |

+   |              | for the first 32 (or 10 per reference for dual      |

+   |              | reference search) SUs, and then gradient based      |

+   |              | searching is used for up to a maximum of 57 search  |

+   |              | units.                                              |

+   +--------------+-----------------------------------------------------+

+

+   Inter Estimation:

+   -----------------

+   The process of determining motion vectors and shapes that best 

+   describe the transformation from 2D images from previously decoded      

+   images in a video sequence to the currently processed image.

+ 

+   Intra-Prediction Estimation (IPE):

+   ----------------------------------

+   The process of determining prediction angles and shapes that best

+   describe the transformation from neighboring MBs in an image to the

+   currently processed MB in the same image.

+ 

+   Luma Mode:

+   ----------

+   The prediction angle returned by IPE for the luma component for a

+   block. It is represented by an unsigned 8-bit integer with the upper 4

+   bits set to zero.

+ 

+   Integer Motion Estimation (IME):

+   -------------------------------

+   Inter-motion estimation in integer pixel resolution.

+ 

+   Fractional Motion Estimation (FME):

+   -----------------------------------

+   Inter-motion estimation in sub-pixel resolution. The result of integer

+   motion estimation on a reference image is used to perform fractional

+   refinement.

+ 

+   Bidirectional Motion Estimation (BME):

+   --------------------------------------

+   The process of determining if the bi-directional prediction minimizes

+   the distortion w.r.t to unidirectional prediction. The results of IME

+   on forward(L0) and backward(L1) reference images are used to perform

+   bi-directional refinement. BME can be performed in integer or sub-

+   pixel resolution. If performed in sub-pixel resolution an implicit FME

+   operation is done before performing the BME.

+ 

+   Refinement (REF):

+   -----------------

+   A FME and/or BME refinement operation.

+ 

+   Skip/Spot Check (SKC):

+   ----------------------

+   The operation determining the distortion associated with a given

+   (uni or bidirectional) MV in a reference image(s) w.r.t a source

+   image.

+ 

+   Skip and Intra Check (SIC):

+   ---------------------------

+   The process of performing both SKC and IPE in the same operation.

+ 

+   Motion Check or Estimation (MCE):

+   ---------------------------------

+   A generic IME, REF, or SIC operation.

+ 

+   Forward Transform (FT):

+   -----------------------

+   An 8x8 or 4x4 integer transform used to transform the residual to the

+   frequency domain.

+ "

+ 

+ New OpenCL C Enums

+ 

+   Adds the following enums to the OpenCL C language. The corresponding

+   API enums have the same name with the prefix "CLK_" replaced with

+   "CL_".

+

+   These enums are used as arguments and return values of some of the

+   VME built-in function parameters and return values described in the

+   new section 6.13.14.X "VME built-in functions".

+

+ 

+   Interlaced image field polarity values:

+   ---------------------------------------

+   #define CLK_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL    0x0

+   #define CLK_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1

+

+   Inter macro-block major shape values:

+   -------------------------------------	

+   #define CLK_AVC_ME_MAJOR_16x16_INTEL 0x0

+   #define CLK_AVC_ME_MAJOR_16x8_INTEL  0x1

+   #define CLK_AVC_ME_MAJOR_8x16_INTEL  0x2

+   #define CLK_AVC_ME_MAJOR_8x8_INTEL   0x3

+

+   Inter macro-block minor shape values:

+   -------------------------------------

+   #define CLK_AVC_ME_MINOR_8x8_INTEL 0x0

+   #define CLK_AVC_ME_MINOR_8x4_INTEL 0x1

+   #define CLK_AVC_ME_MINOR_4x8_INTEL 0x2

+   #define CLK_AVC_ME_MINOR_4x4_INTEL 0x3

+

+   Inter macro-block major direction values:

+   -----------------------------------------

+   #define CLK_AVC_ME_MAJOR_FORWARD_INTEL       0x0

+   #define CLK_AVC_ME_MAJOR_BACKWARD_INTEL      0x1

+   #define CLK_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2

+

+   Inter (IME) partition mask values:

+   ----------------------------------

+   #define CLK_AVC_ME_PARTITION_MASK_ALL_INTEL   0x0

+   #define CLK_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E

+   #define CLK_AVC_ME_PARTITION_MASK_16x8_INTEL  0x7D

+   #define CLK_AVC_ME_PARTITION_MASK_8x16_INTEL  0x7B

+   #define CLK_AVC_ME_PARTITION_MASK_8x8_INTEL   0x77

+   #define CLK_AVC_ME_PARTITION_MASK_8x4_INTEL   0x6F

+   #define CLK_AVC_ME_PARTITION_MASK_4x8_INTEL   0x5F

+   #define CLK_AVC_ME_PARTITION_MASK_4x4_INTEL   0x3F

+

+   Slice type values:

+   ------------------

+   #define CLK_AVC_ME_SLICE_TYPE_PRED_INTEL  0x0

+   #define CLK_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1

+   #define CLK_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2

+

+   Search window configuration:

+   ----------------------------

+   #define CLK_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL    0x0

+   #define CLK_AVC_ME_SEARCH_WINDOW_SMALL_INTEL         0x1

+   #define CLK_AVC_ME_SEARCH_WINDOW_TINY_INTEL          0x2

+   #define CLK_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL    0x3

+   #define CLK_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL       0x4

+   #define CLK_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5

+   #define CLK_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL     0x6

+   #define CLK_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL     0x7

+

+   SAD adjustment mode:

+   --------------------

+   #define CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0

+   #define CLK_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2

+

+   Pixel resolution:

+   -----------------

+   #define CLK_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0

+   #define CLK_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL    0x1

+   #define CLK_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL    0x3

+

+   Cost precision values:

+   ----------------------

+   #define CLK_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0

+   #define CLK_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1

+   #define CLK_AVC_ME_COST_PRECISION_PEL_INTEL  0x2

+   #define CLK_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3

+

+   Inter bidirectional weights:

+   ----------------------------

+   #define CLK_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL       0x10

+   #define CLK_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL         0x15

+   #define CLK_AVC_ME_BIDIR_WEIGHT_HALF_INTEL          0x20

+   #define CLK_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL     0x2B

+   #define CLK_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30

+

+   Inter border reached values:

+   ----------------------------

+   #define CLK_AVC_ME_BORDER_REACHED_LEFT_INTEL   0x0

+   #define CLK_AVC_ME_BORDER_REACHED_RIGHT_INTEL  0x2

+   #define CLK_AVC_ME_BORDER_REACHED_TOP_INTEL    0x4

+   #define CLK_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8

+

+   Intra macro-block shape values:

+   -------------------------------

+   #define CLK_AVC_ME_INTRA_16x16_INTEL  0x0

+   #define CLK_AVC_ME_INTRA_8x8_INTEL    0x1

+   #define CLK_AVC_ME_INTRA_4x4_INTEL    0x2

+

+   Inter skip block partition type:

+   --------------------------------

+   #define CLK_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0

+   #define CLK_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL   0x04000

+

+   Inter skip motion vector mask:

+   ------------------------------

+   #define CLK_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL   (0x1<<24)

+   #define CLK_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ ENABLE_INTEL (0x2<<24)

+   #define CLK_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL      (0x3<<24)

+   #define CLK_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL     (0x55<<24)

+   #define CLK_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL    (0xAA<<24)

+   #define CLK_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL        (0xFF<<24)

+   #define CLK_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL   (0x1<<24)

+   #define CLK_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL  (0x2<<24)

+   #define CLK_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL   (0x1<<26)

+   #define CLK_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL  (0x2<<26)

+   #define CLK_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL   (0x1<<28)

+   #define CLK_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL  (0x2<<28)

+   #define CLK_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL   (0x1<<30)

+   #define CLK_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL  (0x2<<30)

+

+   Block based skip type values:

+   -----------------------------

+   #define CLK_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x0

+   #define CLK_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80

+

+   Luma intra partition mask values:

+   ---------------------------------	

+   #define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_ALL_INTEL   0x0

+   #define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6

+   #define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL   0x5

+   #define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL   0x3 

+

+   Intra neighbor availability mask values:

+   ----------------------------------------

+   #define CLK_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL        0x60

+   #define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL       0x10

+   #define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8

+   #define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL  0x4

+

+   Luma intra modes:

+   -----------------

+   #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL            0x0

+   #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL          0x1

+   #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL                  0x2

+   #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL  0x3

+   #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4

+   #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL               0x4

+   #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL      0x5

+   #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL     0x6

+   #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL       0x7

+   #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL       0x8

+          

+   Chroma intra modes:

+   -------------------

+   #define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL         0x0

+   #define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1

+   #define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL   0x2

+   #define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL      0x3

+

+   Reference image select values:

+   ------------------------------

+   #define CLK_AVC_ME_FRAME_FORWARD_INTEL  0x1

+   #define CLK_AVC_ME_FRAME_BACKWARD_INTEL 0x2

+   #define CLK_AVC_ME_FRAME_DUAL_INTEL     0x3

+

+   VME media sampler initialization value:

+   ----------------------------------------

+   #define CLK_AVC_ME_INITIALIZE_INTEL 0x0

+

+   Default IME payload initialization:

+   -----------------------------------

+   #define CLK_AVC_IME_PAYLOAD_INITIALIZE_INTEL {0x0}

+

+   Default REF payload initialization:

+   -----------------------------------

+   #define CLK_AVC_REF_PAYLOAD_INITIALIZE_INTEL {0x0}

+

+   Default SIC payload initialization:

+   -----------------------------------

+   #define CLK_AVC_SIC_PAYLOAD_INITIALIZE_INTEL {0x0}

+

+   Default IME result initialization:

+   ----------------------------------

+   #define CLK_AVC_IME_RESULT_INITIALIZE_INTEL  {0x0}

+

+   Default REF result initialization:

+   ----------------------------------

+   #define CLK_AVC_REF_RESULT_INITIALIZE_INTEL  {0x0}

+

+   Default SIC result initialization:

+   ----------------------------------

+   #define CLK_AVC_SIC_RESULT_INITIALIZE_INTEL  {0x0}

+

+New OpenCL C Types

+

+   Adds the following built-in types to the OpenCL C language. Appends

+   the following built-in type definitions to Table 6.3 in section 6.1.3

+   "Other Built-in Data Types".

+

+   These built-in types are used to declare the types of some of the VME

+   built-in function parameters and return values in the new section

+   6.13.14.X "VME built-in functions".

+

+   +-----------------------------------+--------------------------------+

+   | intel_sub_group_avc_mce_payload_t |This is a parameter and/or      |

+   |                                   |return type of a generic AVC MCE|

+   |                                   |operation. It is used in MCE    |

+   |                                   |built-in functions and          |

+   |                                   |represents the payload for a    |

+   |                                   |basic IME/REF/SIC operation.    |

+   +-----------------------------------+--------------------------------+

+   | intel_sub_group_avc_ime_payload_t |This is a parameter and/or      |

+   |                                   |return type of a basic AVC IME  |

+   |                                   |operation. It is used in IME    |

+   |                                   |built-in functions and          |

+   |                                   |represents the payload for an   |

+   |                                   |IME operation.                  |

+   +-----------------------------------+--------------------------------+

+   | intel_sub_group_avc_ref_payload_t |This is a parameter and/or      |

+   |                                   |return type of an AVC REF       |

+   |                                   |operation. It is used in REF    |

+   |                                   |built-in functions and          |

+   |                                   |represents the payload for a REF|

+   |                                   |operation.                      |

+   +-----------------------------------+--------------------------------+

+   | intel_sub_group_avc_sic_payload_t |This is a parameter and/or      |

+   |                                   |return type of an AVC SIC       |

+   |                                   |operation. It is used in SIC    |

+   |                                   |built-in functions and          |

+   |                                   |represents the payload for a SIC|

+   |                                   |operation.                      |

+   +-----------------------------------+--------------------------------+

+   | intel_sub_group_avc_mce_result_t  |This is a parameter and/or      |

+   |                                   |return type of a MCE operation. |

+   |                                   |It is used in MCE built-in      |

+   |                                   |functions and represents the    |

+   |                                   |evaluation result for a basic   |

+   |                                   |IME/REF/SIC operation.          |

+   +-----------------------------------+--------------------------------+

+   | intel_sub_group_avc_ime_result_t  |This is a parameter and/or      |

+   |                                   |return type of a basic AVC IME  |

+   |                                   |operation not using the stream- |

+   |                                   |in/streamout functionality.It is|

+   |                                   |used in IME built-in functions  |

+   |                                   |and represents the result for a |

+   |                                   |basic IME evaluation operation. |

+   +-----------------------------------+--------------------------------+

+   | intel_sub_group_avc_ime_result_   |This is a parameter and/or      |

+   | single_reference_streamout_t      |return type of an AVC single    |

+   |                                   |reference IME operation using   |

+   |                                   |the streamout functionality. It |

+   |                                   |is used in specific IME built-  |

+   |                                   |in functions and represents the |

+   |                                   |evaluation result of an IME     |

+   |                                   |streamout operation.            |

+   +-----------------------------------+--------------------------------+

+   | intel_sub_group_avc_ime_result_   |This is a parameter and/or      |

+   | dual_reference_streamout_t        |return type of an AVC dual      |

+   |                                   |reference IME operation using   |

+   |                                   |the streamout functionality. It |

+   |                                   |is used in specific IME built-in|

+   |                                   |functions and represents the    |

+   |                                   |evaluation result for an IME    |

+   |                                   |streamout operation.            |

+   +-----------------------------------+--------------------------------+

+   | intel_sub_group_avc_ime_          |This is a parameter and/or      |

+   | single_reference_streamin_t       |return type of an AVC single IME|

+   |                                   |operation using the stream-in   |

+   |                                   |reference functionality. It is  |

+   |                                   |used in specific IME built-in   |

+   |                                   |functions. It represents the    |

+   |                                   |additional results from the     |

+   |                                   |result of an IME evaluation     |

+   |                                   |using the streamout             |

+   |                                   |functionality that may be       |

+   |                                   |streamed-in in a subsequent IME |

+   |                                   |streamin call.                  |

+   +-----------------------------------+--------------------------------+

+   | intel_sub_group_avc_ime_          |This is a parameter and/or      |

+   | dual_reference_streamin_t         |return type of an AVC dual      |

+   |                                   |reference IME operation using   |

+   |                                   |the streamin functionality. It  |

+   |                                   |is used in specific IME built-  | 

+   |                                   |in functions. It represents the |

+   |                                   |additional results from the     |

+   |                                   |result of an IME evaluation     |

+   |                                   |using the streamout             |

+   |                                   |functionality that may be       |

+   |                                   |streamed-in in a subsequent IME |

+   |                                   |call.                           |

+   +-----------------------------------+--------------------------------+

+   | intel_sub_group_avc_ref_result_t  |This is a parameter and or/     |

+   |                                   |return type of an AVC REF       |

+   |                                   |operation. It is used in REF    |

+   |                                   |built-in functions and          | 

+   |                                   |represents the evaluation result|

+   |                                   |from a REF evaluation operation.|

+   +-----------------------------------+--------------------------------+

+   | intel_sub_group_avc_sic_result_t  |This is a parameter and/or      |

+   |                                   |return type of an AVC SKC, IPE, |

+   |                                   |or SIC operation. It is used in |

+   |                                   |SIC built-in functions and      |

+   |                                   |represents the evaluation result|

+   |                                   |from a SIC evaluation operation.|

+   +-----------------------------------+--------------------------------+

+

+   Amends the following built-in type definition in Table 6.3 in section

+   6.1.3 "Other Built-in Data Types".

+

+   +---------+----------------------------------------------------------+

+   |sampler_t|A texture or media sampler type. Refer to section 6.13.14 |

+   |         |for a detailed description the built-in functions that use|

+   |         |of this type.                                             |

+   |         |                                                          |

+   |         |In general, the use of the word "sampler" by itself refers|

+   |         |to a texture sampler. A media sampler will be explicitly  |

+   |         |refered to as "media sampler".                            |

+   |         |                                                          |

+   |         |In general, sampler_t parameters of built-in functions    |

+   |         |refer to texture sampler parameter. Media sampler         |

+   |         |parameters are exclusively used in the VME built-in       |

+   |         |functions (described in new section 6.13.14.X "VME        |

+   |         |built-in functions").                                     |

+   +---------+----------------------------------------------------------+

+ 

+ 

+   Amends the restrictions defined for sampler types in (b) section 6.9

+   "Restrictions".

+  

+   "...

+    The texture sampler type (sampler_t) can only be used as the type of

+    a function argument or a variable declared in the program scope or

+    the outermost scope of a kernel function. The behavior of a sampler

+    variable declared in a non-outermost scope of a kernel function is

+    implementation-defined. The media sampler type cannot be used as a 

+    function argument of a kernel function, but otherwise can be used as 

+    the type of a function argument or a variable declared in any scope.

+    ... "

+

+   Amends the following section 6.13.14.1 "Samplers".

+

+   Replace all strings "sampler(s)" with "texture sampler(s)".

+

+   Appends the following paragraphs in section 6.13.14.1 "Samplers".

+ 

+   "The VME evaluation phase built-in functions (described in new section

+   6.13.14.X "VME built-in functions" take a VME media sampler argument.

+   The media sampler can be declared as a constant variable in any scope,

+   and must be initialized with the 32-bit unsigned integer constant

+   CLK_AVC_ME_INITIALIZE_INTEL."

+

+   Note that VME media samplers declared using the constant qualifier are

+   not counted towards the maximum size of constant address space allowed

+   per device (i.e. CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE as described in

+   table 4.3)

+

+   Amends the following section 6.13.14.2 "Built-in Image Read

+   Functions".

+

+   "The following built-in function calls to read images with a texture

+    sampler are supported."

+

+   Amends the following section 6.13.14.3 "Built-in Image Sampler-less

+   Read Functions".

+

+   "The sampler-less read image functions behave exactly as the

+    corresponding read image functions described in section 6.13.14.2

+    that take integer coordinates and a texture sampler with filter mode

+    set to CLK_FILTER_NEAREST, normalized coordinates set to 

+    CLK_NORMALIZED_COORDS_FALSE and addressing mode to CLK_ADDRESS_NONE"

+

+

+New OpenCL C built-in functions

+ 

+   Appends the following to the new section 6.13.14.X "VME built-in

+   functions".

+

+   "

+   The OpenCL C programming language implements the following built-in

+   functions to evaluate VME operations as a block operation by all work-

+   items in a subgroup. Furthermore they are defined only for subgroup

+   size of 16, and thus using these built-in functions in a kernel will

+   force a subgroup size of 16. The use of kernel attribute 

+   intel_reqd_sub_group_size with a value other than 16 as described in 

+   the Intel vendor extension cl_intel_required_subgroup_size is not

+   allowed.

+

+   These built-in functions are defined to operate on a 16x16 MB and must

+   be encountered by all work items in a subgroup executing the kernel,

+   otherwise the behavior is undefined (i.e. they can only be used only

+   in convergent control flow where all the work-items in the subgroups

+   are enabled).

+  

+   ______________________________________________________________________

+

+

+   If VME built-in functions are used in an OpenCL kernel, then the 

+   following additional requirements must be satisfied.

+

+   1. The VME source and reference images must either be 8-bit luma

+      2D images, or planar YUV images with NV12 format. If chroma based

+      intra estimation operations are being performed, then NV12 source

+      images are required, otherwise either 8-bit luma or NV12 images may

+      be used. For 8-bit luma images, the image_channel_order and the 

+      image_data_type are restricted as CL_R and CL_UNORM_INT8 

+      respectively. NV12 images are described in the Intel vendor

+      extension cl_intel_planar_yuv. Images created from buffers are not

+      allowed as VME source and reference images.

+   2. OpenCL image kernel parameters that are used as VME source and

+      reference images in the VME evaluation built-in functions must

+      exclusively be used only by VME built-in functions. If the same

+      image needs to be used for other image operations within the same

+      kernel, then an additional image parameter may be used that is

+      bound to the same image object as for the VME image parameter in

+      the OpenCL host application for the kernel enqueue API call.

+   3. OpenCL image kernel parameters that are used as VME source and

+      reference images in the VME evaluation built-in functions must be

+      declared in a specific order in the kernel parameter list of a

+      kernel that calls any of the VME built-in functions as shown below;

+      otherwise it will result in a complier reported error.

+

+         +-----------------------------------------------+

+         |   __kernel void vme_custom(                   |

+         |      read_only image2d_t src_image,           |

+         |      read_only image2d_t fwd0_ref_image,      |

+         |      read_only image2d_t bwd0_ref_image,      |

+         |      read_only image2d_t fwd1_ref_image,      |

+         |      read_only image2d_t bwd1_ref_image,      |

+         |      ...                                      |

+         |      read_only image2d_t fwd15_ref_image,     |

+         |      read_only image2d_t bwd15_ref_image,     |

+         |      ...)                                     |

+         +-----------------------------------------------+

+

+      The first forward(L0) reference image parameter must immediately 

+      follow the source image parameter, and the first backward(L1) 

+      reference image parameter must immediately follow the first forward

+      reference image parameter. Up to 16 consecutive pairs of forward 

+      and backward reference image parameters can be provided, with the 

+      reference images closer to the source image in timing order 

+      appearing earlier in the parameter list.

+   ______________________________________________________________________

+

+

+   The VME built-in functions are organized into categories of functions

+   based on the major VME operations as follows:

+

+   1. MCE built-in functions 

+   --------------------------

+   A set of generic built-in functions which are common across IME, REF,

+   or SIC operations. Certain functions may not be applicable for some 

+   specific operation as indicated in their descriptions.

+

+   2. IME built-in functions 

+   ------------------------- 

+   A set of built-in functions to initialize, configure and evaluate an 

+   integer motion estimation result.

+

+   3. REF built-in functions 

+   -------------------------

+   A set of built-in functions to initialize, configure and evaluate a 

+   fractional and/or bidirectional refinement operation on the results 

+   of IME built-in functions.

+

+   4. SIC built-in functions 

+   -------------------------

+   A set of built-in functions to initialize, configure and evaluate a 

+   skip check or intra estimation operation result.

+   ______________________________________________________________________

+

+

+   A set of ordered phases of built-in functions need to be called to

+   evaluate each category of built-in functions. Some phases may not be

+   specified for certain function categories, but if present it must be 

+   called in the correct order, otherwise the behavior is undefined.

+

+   1. Initialization (required phase)

+   ---------------------------------

+   This creates and initializes the payload for the VME operation and

+   may perform some basic configuration that invariably must be set.

+

+   2. Operation configuration (required phase, if present)

+   ------------------------------------------------------

+   This configures important sets of parameters for the VME operation

+   that were not set in the initialization phase in the payload.

+

+   3. Inter reference frame cost configuration (optional phase)

+   -----------------------------------------------------------

+   This configures the cost assigned to matching blocks in reference

+   frames based on timing distance from the source frame.

+

+   4. Inter motion vector/Intra mode cost configuration (optional phase)

+   --------------------------------------------------------------------

+   This configures the inter motion vector cost function or the intra

+   mode cost function to be used for the VME operation in the payload.

+

+   5. Inter/Intra shape cost configuration (optional phase)

+   -------------------------------------------------------

+   This configures the inter/intra shape cost penalty to be used for the

+   VME operation in the payload.

+

+   6. Miscellaneous property configuration (optional phase)

+   -------------------------------------------------------

+   This configures miscellaneous operation sub-functions in the VME 

+   operation payload.

+

+   7. Evaluation (required phase)

+   -----------------------------

+   This phase performs the evaluation of the VME operation using the 

+   payload configured in the previous phases by issuing it to the VME 

+   unit for processing.

+

+   8. Result processing (required phase)

+   ------------------------------------

+   The phase performs the extraction of various motion estimation 

+   operation component results from the result of the evaluation phase. 

+   The result components may be distributed among multiple work-items. 

+   The results components of one VME operation may be used as input to 

+   another VME operation at phase (1) or phase (7).

+

+   A set of built-in functions are defined for each of the major VME

+   operations of IME, REF, and SIC (SKC and IPE). These are categorized

+   as IME, REF, and SIC built-in functions. Functions that are common

+   across all categories are classified as MCE (motion check and

+   estimation) functions. Several restrictions for function argument

+   values are stated in the descriptions of the built-in function 

+   descriptions. In general, if the restrictions are not satisfied the

+   behavior is undefined, unless otherwise stated.

+

+   ______________________________________________________________________

+

+

+   MCE built-in functions 

+   ----------------------

+

+   A set of generic MCE operations which may be called for IME, REF, or

+   SIC operations with the restrictions as stated in their descriptions.

+   They can be called only during specific phases of these operations as

+   indicated in the description of the built-in functions.

+

+   Multi-reference cost configuration phase functions

+   ++++++++++++++++++++++++++++++++++++++++++++++++++

+

+   These built-in functions enable multi-reference image costing. They   

+   allow for the configuration of the payloads to favorably bias the

+   major partitions coming from reference images that are closer to the

+   source image, than the ones coming from reference images that are

+   further away. The distance of the reference images, in the timing

+   order, from the source image is implied based on the order in which

+   the reference images are declared in the kernel parameter list.

+

+   +-------------------------------------+-----------------------------+

+   |uchar                                |Get the default base         |

+   |intel_sub_group_avc_mce_get_default_ |multi-reference cost penalty |

+   |inter_base_multi_reference_penalty(  |in U4U4 format when HW       |

+   |   uchar slice_type,                 |assisted multi-reference     |

+   |   uchar qp )                        |search is used.              |

+   |                                     |                             |

+   |                                     |The value of slice_type must |

+   |                                     |be a valid slice type        |

+   |                                     |enumeration value.           |

+   |                                     |                             |

+   |                                     |The value of qp must be a    |

+   |                                     |valid quantization parameter |

+   |                                     |value between 0 and 51.      |

+   +-------------------------------------+-----------------------------+

+   |intel_sub_group_avc_mce_payload_t    |Set the multi-reference base |

+   |intel_sub_group_avc_mce_set_inter_   |penalty when HW assisted     |

+   |base_multi_reference_penalty(        |multi-reference search is    |

+   |   uchar reference_base_penalty,     |performed.                   |

+   |   intel_sub_group_avc_mce_payload_t |                             |

+   |      payload )                      |Reference major partitions   |

+   |                                     |get associated with a penalty|

+   |                                     |based on its distance from   |

+   |                                     |the source image. The        |

+   |                                     |reference_base_penalty is    |

+   |                                     |scaled using a scaling factor|

+   |                                     |based on the implied distance|

+   |                                     |of the reference image from  |

+   |                                     |the source image as shown    |

+   |                                     |below.                       |

+   |                                     |                             |

+   |                                     |   0       => 0x             |

+   |                                     |   1 to 2  => 1x             |

+   |                                     |   3 to 6  => 2x             |

+   |                                     |   7 to 15 => 3x             |

+   |                                     |                             |

+   |                                     |The value of                 |

+   |                                     |reference_base_penalty is    |

+   |                                     |specified in U4U4 format and |

+   |                                     |the integer value must fit   |

+   |                                     |within 12 bits.              |

+   +-------------------------------------+-----------------------------+

+   

+   Inter shape and direction cost configuration phase functions

+   ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

+

+   These built-in functions enable shape costing for inter estimation.

+   They allow for the configuration of payloads for the biasing of

+   certain shapes over others based on the configured parameters.

+

+   +------------------------------------+-------------------------------+

+   |ulong intel_sub_group_avc_mce_get_  |Get the default packed shape   |

+   |default_inter_shape_penalty(        |cost for inter estimation in   |

+   |   uchar slice_type,                |U4U4 format.                   |

+   |   uchar qp )                       |                               |

+   |                                    |The value of slice_type must be|

+   |                                    |a valid slice type enumeration |

+   |                                    |value.                         |

+   |                                    |                               |

+   |                                    |The value of qp must be a valid|

+   |                                    |quantization parameter value   |

+   |                                    |between 0 and 51.              |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_mce_payload_t   |Set the shape penalty for inter|

+   |intel_sub_group_avc_mce_set_inter_  |motion estimation.             |

+   |shape_penalty(                      |                               |

+   |   ulong packed_shape_penalty,      |The value of packed_shape_cost |

+   |   intel_sub_group_avc_mce_payload_t|is an unsigned long integer    |

+   |      payload )                     |value with the following bits  |

+   |                                    |specifying the shape penalty in|

+   |                                    |U4U4 format as follows:        |

+   |                                    |                               |

+   |                                    |   7:0   => 16x8 and 8x16      |

+   |                                    |   15:8  => 8x8                |

+   |                                    |   23:16 => 8x4 and 4x8        |

+   |                                    |   31:24 => 4x4                |

+   |                                    |   39:32 => 16x16              |

+   |                                    |   63:40 => must be zero       |

+   |                                    |                               |

+   |                                    |The U4U4 decoded integer values|

+   |                                    |for byte 0 and byte 4 must bit |

+   |                                    |fit in 12 bits, while the U4U4 |

+   |                                    |decoded integer values for the |

+   |                                    |other bytes must fit within 10 |

+   |                                    |bits.                          |

+   +------------------------------------+-------------------------------+

+   |uchar intel_sub_group_avc_mce_get_  |Get the default direction      |

+   |default_inter_direction_penalty(    |penalty for inter estimation in|

+   |   uchar slice_type,                |U4U4 format.                   |

+   |   uchar qp )                       |                               |

+   |                                    |The value of slice_type must be|

+   |                                    |a valid slice type enumeration |

+   |                                    |value.                         |

+   |                                    |                               |

+   |                                    |The value of qp must be a valid|

+   |                                    |quantization parameter value   |

+   |                                    |between 0 and 51.              |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_mce_payload_t   |Set the direction penalty for  |

+   |intel_sub_group_avc_mce_set_        |backward images used in inter  |

+   |inter_direction_penalty(            |motion estimation.             |

+   |   uchar direction_cost,            |                               |

+   |   intel_sub_group_avc_mce_payload_t|The value of direction_cost    |

+   |      payload )                     |must be in U4U4 format and its |

+   |                                    |decoded integer value must bit |

+   |                                    |fit in 12 bits.                |

+   +------------------------------------+-------------------------------+

+   

+   Intra shape cost configuration phase functions

+   ++++++++++++++++++++++++++++++++++++++++++++++

+

+   These built-in functions enable shape costing for intra estimation.

+   They allow for the configuration of payloads for biasing of certain

+   shapes over others based on the configured parameters. Only the built-

+   in function providing the default shape penalty is specified as an MCE

+   function. The function which actually configures the payload for the

+   intra estimation operation is specified as a SIC built-in function.

+

+   +----------------------------------+--------------------------------+

+   |uint intel_sub_group_avc_mce_get_ |Get the default packed luma     |

+   |default_intra_luma_shape_penalty( |intra shape penalty in U4U4     |

+   |   uchar slice_type,              |format.                         |

+   |   uchar qp )                     |                                |

+   +----------------------------------+--------------------------------+   

+

+   Inter motion vector cost configuration phase functions

+   ++++++++++++++++++++++++++++++++++++++++++++++++++++++

+

+   These built-in functions enable motion vector costing for inter

+   estimation. The distortion measure is augmented to favor motion

+   vectors closer to the cost-center considered in conjunction with the

+   primary objective of minimizing the SAD between the source and

+   reference blocks.

+

+   +------------------------------------+-------------------------------+

+   |uint2 intel_sub_group_avc_mce_get_  |Get the default inter motion   |

+   |default_inter_motion_vector_        |vector cost table for the      |

+   |cost_table(                         |pre-defined control points in  |

+   |   uchar slice_type,                |U4U4 format for the input Qp   |

+   |   uchar qp )                       |and slice type.                |

+   |                                    |                               |

+   |                                    |The value of slice_type must be|

+   |                                    |a valid slice type enumeration |

+   |                                    |value.                         |

+   |                                    |                               |

+   |                                    |The value of qp must be a valid|

+   |                                    |quantization parameter value   |

+   |                                    |between 0 and 51.              |

+   +------------------------------------+-------------------------------+

+   |uint2 intel_sub_group_avc_mce_get_  |Get the default predefined     |

+   |default_high_penalty_cost_table(    |packed U4U4 format high cost   |

+   |   void)                            |table for high Qp. This may be |

+   |                                    |more appropriate for frame     |

+   |                                    |sequences with high motion.    |

+   +------------------------------------+-------------------------------+

+   |uint2 intel_sub_group_avc_mce_get_  |Get the default predefined     |

+   |default_medium_penalty_cost_table(  |packed U4U4 format medium cost |

+   |   void)                            |table for medium Qp. This may  |

+   |                                    |be more appropriate for frame  |

+   |                                    |sequences with normal motion.  |

+   +------------------------------------+-------------------------------+

+   |uint2 intel_sub_group_avc_mce_get_  |Get the default predefined     |

+   |default_low_penalty_cost_table(     |packed U4U4 format low cost    |

+   |   void)                            |table for low Qp. This may be  |

+   |                                    |more appropriate for frame     |

+   |                                    |sequences with low motion.     |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_mce_payload_t   |Update the input payload to set|

+   |intel_sub_group_avc_mce_set_        |the cost precision along with  |

+   |motion_vector_cost_function(        |the cost center and cost table |

+   |   ulong packed_cost_center_delta,  |and return it.                 |

+   |   uint2 packed_cost_table,         |                               |

+   |   uchar cost_precision,            |The value of                   |

+   |   intel_sub_group_avc_mce_payload_t|packed_cost_center_delta is the|

+   |      payload )                     |packed bidirectional cost      |

+   |                                    |center delta value relative to |

+   |                                    |the source macroblock, which   |

+   |                                    |specifies the 4 bidirectional  |

+   |                                    |cost centers of each of the 8x8|

+   |                                    |partitions of the reference    |

+   |                                    |image.  If only unidirectional |

+   |                                    |search is performed then the   |

+   |                                    |values of the backward         |

+   |                                    |reference cost centers must be |

+   |                                    |zero. Work-item 'n' provides   |

+   |                                    |the value of cost center       |

+   |                                    |'n'. It is specified in QPEL   |

+   |                                    |units. For 16x16 partitions    |

+   |                                    |work-item '0' provides the cost|

+   |                                    |center. For 8x16 partitions    |

+   |                                    |work-items '0' and '1' provide |

+   |                                    |the cost centers. For 16x8     |

+   |                                    |partitions work-items '0' and  |

+   |                                    |'2' provide the cost           |

+   |                                    |centers. The X and Y           |

+   |                                    |coordinates of each cost center|

+   |                                    |delta must be in the range     |

+   |                                    |[-2048, 2047] and [-512.00 to  |

+   |                                    |511.75] respectively, otherwise|

+   |                                    |the results are undefined.     |

+   |                                    |                               |

+   |                                    |The packed cost table specifies|

+   |                                    |the cost penalties for         |

+   |                                    |pre-defined control points in  |

+   |                                    |U4U4 format in the cost        |

+   |                                    |function curve. The first 7    |

+   |                                    |bytes specify 7 control points |

+   |                                    |representing consecutive       |

+   |                                    |powers-of-two delta units (2^0 |

+   |                                    |to 2^6).  Each delta unit, dx, |

+   |                                    |is the distance of a motion    |

+   |                                    |vector, mv, from the specified |

+   |                                    |cost center, cc                |

+   |                                    |(dx=|mv-cc|). The cost penalty |

+   |                                    |values at in-between control   |

+   |                                    |points are linearly            |

+   |                                    |interpolated. The range of the |

+   |                                    |cost function is defined to be |

+   |                                    |from 2^0 to 2^6 delta          |

+   |                                    |units. The 8th byte of the     |

+   |                                    |packed cost table specifies the|

+   |                                    |penalty base factor (over_cost)|

+   |                                    |for dx distances that are      |

+   |                                    |out-of-range. The penalty of   |

+   |                                    |out-of-range cost dx distances |

+   |                                    |is computed as min(over_cost + |

+   |                                    |int(dx) - 64, 255).            |

+   |                                    |                               |

+   |                                    |The value of the cost precision|

+   |                                    |parameter must be a valid cost |

+   |                                    |precision enumeration value,   |

+   |                                    |and specifies the precision of |

+   |                                    |the delta units from the cost  |

+   |                                    |center, dx. This effectively   |

+   |                                    |can be used to control the     |

+   |                                    |range of the cost function as  |

+   |                                    |follows:                       |

+   |                                    |                               |

+   |                                    |PRECISION DELTAS PIXEL RANGE   |

+   |                                    |--------- ------ -----------   |

+   |                                    |                               |

+   |                                    |PEL       pixel   0-64         |

+   |                                    |DPEL      dual    0-127        |

+   |                                    |          pixel                |

+   |                                    |HALF      half    0-31         |

+   |                                    |          pixel                |

+   |                                    |QUARTER   quarter 0-15         |

+   |                                    |          pixel                |

+   |                                    |                               |

+   |                                    |The inter distortion for a     |

+   |                                    |block can be described by the  |

+   |                                    |following formula:             |

+   |                                    |                               |

+   |                                    |Distortion =                   |

+   |                                    |   SAD (or HAAR) +             |

+   |                                    |   MV_Cost_Penalty +           |

+   |                                    |   Shape_Penalty +             |

+   |                                    |   Direction_Cost +            |

+   |                                    |   Multi_Reference_Penalty     |

+   +------------------------------------+-------------------------------+

+

+   Intra mode cost configuration phase functions

+   +++++++++++++++++++++++++++++++++++++++++++++

+

+   These built-in functions enable mode costing for intra estimation. 

+   They allow for the configuration of payloads to bias the computed

+   intra modes to be closer to their configured neighbor modes. This form

+   of costing is similar to the inter motion vector costing. Only the

+   built-in functions providing the defaults mode costs are specified as

+   MCE functions. The remaining functions which actually configure the

+   payload for the intra estimation operation is specified as SIC built-

+   in functions.

+

+   +----------------------------------+---------------------------------+

+   |uchar intel_sub_group_avc_mce_get_|Get the default intra mode cost  |

+   |default_intra_luma_mode_penalty(  |penalty for in U4U4 format when  |

+   |   uchar slice_type,              |the estimated mode differs from  |

+   |   uchar qp )                     |its predicted mode from its      |

+   |                                  |neighbors.                       |

+   |                                  |                                 |

+   |                                  |The value of slice_type must be a|

+   |                                  |valid slice type enumeration     |

+   |                                  |value.                           |

+   |                                  |                                 |

+   |                                  |The value of qp must be a valid  |

+   |                                  |quantization parameter value     |

+   |                                  |between 0 and 51.                |

+   +----------------------------------+---------------------------------+

+   |uint intel_sub_group_avc_mce_get_ |Get the default intra non-dc cost|

+   |default_non_dc_luma_intra_penalty(|penalty for intra luma estimation|

+   |   void )                         |in 8-bit integer format.         |

+   +----------------------------------+---------------------------------+

+   |uchar intel_sub_group_avc_mce_get_|Get the default chroma mode base |

+   |default_intra_chroma_mode_base_   |penalty in U4U4 format.          |

+   |penalty(                          |                                 |

+   |   void )                         |                                 |

+   +----------------------------------+---------------------------------+

+

+   Miscellaneous property configuration phase functions

+   ++++++++++++++++++++++++++++++++++++++++++++++++++++

+

+   These built-in functions enable miscellaneous MCE properties settings.

+   

+   +--------------------------------------+-----------------------------+

+   |intel_sub_group_avc_mce_payload_t     |Update the input payload to  |

+   |intel_sub_group_avc_mce_set_          |enable an AC only HAAR SAD   |

+   |ac_only_haar(                         |mode and return it. It       |

+   |   intel_sub_group_avc_mce_payload_t  |overrides any previous       |

+   |       payload )                      |setting for sad adjustment.  |

+   |                                      |                             |

+   |                                      |This feature is mainly       |

+   |                                      |intended for improved block  |

+   |                                      |matching in frame-rate       |

+   |                                      |conversion (FRC) kernels.    |

+   +--------------------------------------+-----------------------------+

+   |intel_sub_group_avc_mce_payload_t     |Update the input payload to  |

+   |intel_sub_group_avc_mce_set_source_   |specify the field polarities |

+   |interlaced_field_polarity(            |for interlaced source images |

+   |   uchar src_field_polarity,          |used for inter or intra      |

+   |   intel_sub_group_avc_mce_payload_t  |operations.                  |

+   |      payload )                       |                             |

+   |                                      |The value of                 |

+   |                                      |src_field_polarity must be a |

+   |                                      |valid field polarity         |

+   |                                      |enumeration value indicating |

+   |                                      |the field polarity for the   |

+   |                                      |source image.                |

+   +--------------------------------------+-----------------------------+

+   |intel_sub_group_avc_mce_payload_t     |Update the input payload to  |

+   |intel_sub_group_avc_mce_set_single_   |specify the field polarities |

+   |reference_interlaced_field_polarity(  |for interlaced reference     |

+   |   uchar ref_field_polarity,          |images used for single       |

+   |   intel_sub_group_avc_mce_payload_t  |reference inter search or    |

+   |      payload )                       |check operation.             |

+   |                                      |                             |

+   |                                      |The value of                 |

+   |                                      |ref_field_polarity must be a |

+   |                                      |valid field polarity         |

+   |                                      |enumeration value indicating |

+   |                                      |the field polarity for the   |

+   |                                      |reference image.             |

+   +--------------------------------------+-----------------------------+

+   |intel_sub_group_avc_mce_payload_t     |Update the input payload to  |

+   |intel_sub_group_avc_mce_set_dual_     |specify the field polarities |

+   |reference_interlaced_field_polarities(|for interlaced reference     |

+   |   uchar fwd_ref_field_polarity,      |images used for dual         |

+   |   uchar bwd_ref_field_polarity,      |reference inter search or    |

+   |   intel_sub_group_avc_mce_payload_t  |check operation.             |

+   |      payload )                       |                             |

+   |                                      |The value of                 |

+   |                                      |fwd_ref_field_polarity must  |

+   |                                      |be a valid field polarity    |

+   |                                      |enumeration value indicating |

+   |                                      |the field polarity for the   |

+   |                                      |forward image.               |

+   |                                      |                             |

+   |                                      |The value of                 |

+   |                                      |bwd_ref_field_polarity must  |

+   |                                      |be a valid field polarity    |

+   |                                      |enumeration value indicating |

+   |                                      |the field polarity for the   |

+   |                                      |backward image.              |

+   +--------------------------------------+-----------------------------+

+

+   Result processing phase functions

+   +++++++++++++++++++++++++++++++++

+

+   These built-in functions facilitate the extraction of components of

+   the result from VME unit.

+

+   +-----------------------------------+--------------------------------+

+   |ulong intel_sub_group_avc_mce_get_ |Get the MCE packed BMVs result. |

+   |motion_vectors(                    |                                |

+   |   intel_sub_group_avc_mce_result_t|Up to 16 packed BMVs are        |

+   |      result )                     |returned, one per work-item. If |

+   |                                   |the MCE search operation's      |

+   |                                   |payload was setup for           |

+   |                                   |unidirectional search then only |

+   |                                   |the forward packed MV will be   |

+   |                                   |valid in each BMV, otherwise    |

+   |                                   |both packed MVs will be         |

+   |                                   |valid. The BMVs have to be      |

+   |                                   |selected by their respective    |

+   |                                   |work-items based on the result  |

+   |                                   |block major and minor shapes.   |

+   |                                   |                                |

+   |                                   |If the major shape is:          |

+   |                                   |   - 16x16, then one BMV is     |

+   |                                   |     returned by work-item 0    |

+   |                                   |   - 16x8, or 8x16, then two    |

+   |                                   |     BMVs are returned by work- |

+   |                                   |     items 0 and 8              |

+   |                                   |   - 8x8, then four sets of BMVs|

+   |                                   |     corresponding to the four  |

+   |                                   |     partitions in traditional  |

+   |                                   |     Z-order are returned by    |

+   |                                   |     work-items in the ranges   |

+   |                                   |     [0, 3], [4, 7], [8, 11],   |

+   |                                   |     and [12, 15]; the minor    |

+   |                                   |     shape will determine       |

+   |                                   |     exactly which work-items   |

+   |                                   |     in the reserved inclusive  |

+   |                                   |     range for the partition    |

+   |                                   |     returns the BMVs for that  |

+   |                                   |     partition                  |

+   |                                   |                                |

+   |                                   |If the range of work-items for  |

+   |                                   |the 8x8 major partition is [n,  |

+   |                                   |n+3] and the minor shape is:    |

+   |                                   |   - 8x8, then work-item 'n'    |

+   |                                   |     returns the BMV for each   |

+   |                                   |     minor partition            |

+   |                                   |   - 8x4 or 4x8, then work-items|

+   |                                   |     'n' and 'n+2' returns the  |

+   |                                   |     BMVs for each minor        |

+   |                                   |     partition                  |

+   |                                   |   - 4x4, then all work-items in|

+   |                                   |     [n, n+3] return the BMVs   |

+   |                                   |     for each minor partition   |

+   |                                   |     in traditional Z-order     |

+   |                                   |                                |

+   |                                   |NOTES:                          |

+   |                                   |                                |

+   |                                   |(1) All sub-block BMVs get      |

+   |                                   |replicated for each partition.  |

+   |                                   |For example, for a 16x16        |

+   |                                   |partition, all smaller sub-block|

+   |                                   |BMVs are replicated to the same |

+   |                                   |BMV, and for 8x8 partition, each|

+   |                                   |8x8 must have its respective    |

+   |                                   |sub-block BMVs replicated. This |

+   |                                   |is not important to extract the |

+   |                                   |component BMVs itself, but is   |

+   |                                   |needed if the result of this    |

+   |                                   |function is used to initialize  |

+   |                                   |the input motion vectors of a   |

+   |                                   |REF initialization function.    |

+   |                                   |                                |

+   |                                   |(2) With interlaced images, the |

+   |                                   |MBs for the top field MBs are   |

+   |                                   |considered as logically         |

+   |                                   |overlapping with the bottom MBs.|

+   +-----------------------------------+--------------------------------+

+   |ushort intel_sub_group_avc_mce_get_|Get the MCE inter distortions   |

+   |inter_distortions(                 |result corresponding to the BMVs|

+   |   intel_sub_group_avc_mce_result_t|returned by intel_sub_group_avc_|

+   |      result                       |mce_get_motion_vectors(..). The |

+   |                                   |MCE inter directions result     |

+   |                                   |returned by intel_sub_group_avc_|

+   |                                   |mce_get_inter_directions(..)    |

+   |                                   |will specify if the distortion  |

+   |                                   |corresponds to the forward MV,  |

+   |                                   |backward MV, or the             |

+   |                                   |bidirectional MV in the BMV. Up |

+   |                                   |to 16 distortions are returned, |

+   |                                   |one per work-item.              |

+   |                                   |                                |

+   |                                   |The distortions have to be      |

+   |                                   |selected by their respective    |

+   |                                   |work-items based on the result  |

+   |                                   |block major and minor shapes    |

+   |                                   |just as for the result MVs as   |

+   |                                   |described above.                |

+   +-----------------------------------+--------------------------------+

+   |ushort intel_sub_group_avc_mce_get_|Get the best inter distortion   |

+   |best_inter_distortion(             |for the whole MB.               |

+   |   intel_sub_group_avc_mce_result_t|                                |

+   |      result )                     |                                |

+   +-----------------------------------+--------------------------------+

+   |uchar intel_sub_group_avc_mce_get_ |Get the MCE inter MB major      |

+   |inter_major_shape(                 |partition shape.                |

+   |   intel_sub_group_avc_mce_result_t|                                |

+   |      result )                     |The returned values are as per  |

+   |                                   |the inter-MB major shapes       |

+   |                                   |enumeration values.             |

+   |                                   |                                |

+   |                                   |This can only be called as part |

+   |                                   |of an IME or REF operation      |

+   |                                   |evaluation.                     |

+   +-----------------------------------+--------------------------------+

+   |uchar intel_sub_group_avc_mce_get_ |Get the MCE inter MB minor      |

+   |inter_minor_shapes(                |partition shapes.               |

+   |   intel_sub_group_avc_mce_result_t|                                |

+   |      result )                     |It returns a bit field with the |

+   |                                   |minor shapes for the 4 8x8      |

+   |                                   |sub-partitions in traditional Z |

+   |                                   |order. Two bits are reserved for|

+   |                                   |each of the four sub-partitions |

+   |                                   |in row-major order.  The        |

+   |                                   |returned 2-bit values are as per|

+   |                                   |the inter-MB minor shapes       |

+   |                                   |enumeration values.             |

+   |                                   |                                |

+   |                                   |This function returns valid     |

+   |                                   |results only if the major shape |

+   |                                   |is 8x8, otherwise the results   |

+   |                                   |are undefined.                  |

+   |                                   |                                |

+   |                                   |This can only be called as part |

+   |                                   |of an IME or REF operation      |

+   |                                   |evaluation.                     |

+   +-----------------------------------+--------------------------------+

+   |uchar intel_sub_group_avc_mce_get_ |Get the MCE inter MB major      |

+   |inter_directions(                  |partition directions.           |

+   |   intel_sub_group_avc_mce_result_t|                                |

+   |      result )                     |It returns a bit field with the |

+   |                                   |direction for up to 4 major     |

+   |                                   |sub-partitions in traditional Z |

+   |                                   |order. Two bits are reserved for|

+   |                                   |each of the four                |

+   |                                   |sub-partitions. The returned    |

+   |                                   |2-bit values are as per the     |

+   |                                   |inter-MB major shape direction  |

+   |                                   |enumeration values.             |

+   |                                   |                                |

+   |                                   |If the major partition is:      |

+   |                                   |                                |

+   |                                   |- 16x16, then bits in the range |

+   |                                   |  [0, 1] contains the direction |

+   |                                   |- 16x8 or 8x16, then bits in the|

+   |                                   |  ranges [0, 1] and [4,5]       |

+   |                                   |  contains the two partitions   |

+   |                                   |  directions                    |

+   |                                   |- 8x8, then bits in the ranges  |

+   |                                   |  [0, 1], [2, 3], [4,5], and    |

+   |                                   |  [6, 7] contains the four      |

+   |                                   |  partitions directions         |

+   |                                   |                                |

+   |                                   |The returned values are as per  |

+   |                                   |the inter direction enumeration |

+   |                                   |values.                         |

+   |                                   |                                |

+   |                                   |This can only be called as part |

+   |                                   |of an IME or REF operation      |

+   |                                   |evaluation.                     |

+   +-----------------------------------+--------------------------------+

+   |uchar intel_sub_group_avc_mce_get_ |Get the count of motion vectors |

+   |inter_motion_vector_count(         |(based on the partitioning      |

+   |   intel_sub_group_avc_mce_result_t|decision) returned by the search|

+   |      result )                     |operation.                      |

+   |                                   |                                |

+   |                                   |This can only be called as part |

+   |                                   |of an IME or REF operation      |

+   |                                   |evaluation.                     |

+   +-----------------------------------+--------------------------------+

+   |uint intel_sub_group_avc_mce_get_  |Get the MCE inter MB reference  |

+   |inter_reference_ids(               |identifiers in a packed integer |

+   |   intel_sub_group_avc_mce_result_t|format, with the following bits |

+   |      result )                     |specifying the reference        |

+   |                                   |identifiers for the major       |

+   |                                   |partitions.                     |

+   |                                   |                                |

+   |                                   |3:0   => Fwd reference block 0  |

+   |                                   |7:4   => Bwd reference block 0  |

+   |                                   |11:8  => Fwd reference block 1  |

+   |                                   |15:12 => Bwd reference block 1  |

+   |                                   |19:16 => Fwd reference block 2  |

+   |                                   |23:20 => Bwd reference block 2  |

+   |                                   |27:24 => Fwd reference block 3  |

+   |                                   |31:28 => Bwd reference block 3  |

+   |                                   |                                |

+   |                                   |The values of each individual   |

+   |                                   |4-bit reference identifier range|

+   |                                   |from 0 to 15, with each value   |

+   |                                   |identifying the distance of     |

+   |                                   |ordered pair of forward/backward|

+   |                                   |reference images as declared in |

+   |                                   |the VME kernel parameter        |

+   |                                   |interface list.                 |

+   |                                   |                                |

+   |                                   |If the dual-reference evaluation|

+   |                                   |functions are not used, then the|

+   |                                   |values of the backward reference|

+   |                                   |identifiers are undefined.      |

+   |                                   |                                |

+   |                                   |The blocks are numbered using   |

+   |                                   |the traditional Z order. For    |

+   |                                   |larger block sizes, the         |

+   |                                   |sub-block reference identifier  |

+   |                                   |pairs are replicated. For       |

+   |                                   |example, for a 16x16 block all  |

+   |                                   |four pairs of reference         |

+   |                                   |identifiers are replicated to   |

+   |                                   |the value of the first pair for |

+   |                                   |block 0.                        |

+   |                                   |                                |

+   |                                   |NOTE: Unless HW assisted        |

+   |                                   |multi-reference search was      |

+   |                                   |performed using the IME         |

+   |                                   |streamin/streamout evaluation   |

+   |                                   |functions, the individual 4-bit |

+   |                                   |reference identifier pair values|

+   |                                   |will all be the same (pointing  |

+   |                                   |to the same pair for            |

+   |                                   |forward/backward reference      |

+   |                                   |images).                        |

+   +-----------------------------------+--------------------------------+

+   |uchar                              |Get the MCE inter MB reference  |

+   |intel_sub_group_avc_mce_get_inter_ |field polarities for the        |

+   |reference_interlaced_field_        |corresponding reference         |

+   |polarities(                        |identifiers returned by         |

+   |   uint packed_reference_ids,      |intel_sub_group_avc_            |

+   |   uint                            |mce_get_inter_reference_ids(..) |

+   |   packed_reference_parameter_     |in a packed integer format, with|

+   |   field_polarities,               |the following bits specifying   |

+   |   intel_sub_group_avc_mce_result_t|the reference field polarities  |

+   |      result )                     |for the major partitions.       |

+   |                                   |                                |

+   |                                   | 0 : Fwd reference block 0      |

+   |                                   | 1 : Fwd reference block 1      |

+   |                                   | 2 : Fwd reference block 2      |

+   |                                   | 3 : Fwd reference block 3      |

+   |                                   | 4 : Bwd reference block 0      |

+   |                                   | 5 : Bwd reference block 1      |

+   |                                   | 6 : Bwd reference block 2      |

+   |                                   | 7 : Bwd reference block 3      |

+   |                                   |                                |

+   |                                   |If the dual-reference evaluation|

+   |                                   |functions are not used, then the|

+   |                                   |values of the backward reference|

+   |                                   |field polarities are undefined. |

+   |                                   |                                |

+   |                                   |The blocks are numbered using   |

+   |                                   |the traditional Z order. For    |

+   |                                   |larger block sizes, the         |

+   |                                   |sub-block reference field       |

+   |                                   |polarities are replicated. For  |

+   |                                   |example, for a 16x16 block all  |

+   |                                   |four pairs of reference field   |

+   |                                   |polarities are replicated to the|

+   |                                   |value of the first pair for     |

+   |                                   |block 0.                        |

+   |                                   |                                |

+   |                                   |The value of                    |

+   |                                   |packed_reference_ids is as      |

+   |                                   |defined by the return value of  |

+   |                                   |intel_sub_group_avc_mce_get_    |

+   |                                   |inter_reference_ids(..).        |

+   |                                   |                                |

+   |                                   |The value of                    |

+   |                                   |packed_reference_parameter_     |

+   |                                   |field_polarities specifies the  |

+   |                                   |packed bit field of field       |

+   |                                   |polarities for each of the (up  |

+   |                                   |to 16) forward/backward         |

+   |                                   |interleaved pairs of reference  |

+   |                                   |images in the same order as     |

+   |                                   |specified in the kernel         |

+   |                                   |parameter list, as used for the |

+   |                                   |inter search operation. If less |

+   |                                   |than 16 pairs are used then the |

+   |                                   |corresponding bit field values  |

+   |                                   |are ignored.                    |

+   |                                   |                                |

+   |                                   |NOTE: An important restriction  |

+   |                                   |is that when multiple IME       |

+   |                                   |operations are performed for a  |

+   |                                   |HW multi-assisted               |

+   |                                   |multi-reference search operation|

+   |                                   |using the streamin/streamout    |

+   |                                   |capabilities, the same reference|

+   |                                   |image parameter cannot be used  |

+   |                                   |with different polarities in the|

+   |                                   |sequence of IME operations used |

+   |                                   |for a HW-assisted search        |

+   |                                   |operation. In other words, the  |

+   |                                   |field polarities for reference  |

+   |                                   |image parameters must be used   |

+   |                                   |consistently across IME         |

+   |                                   |operations used in a HW assisted|

+   |                                   |multi-reference search          |

+   |                                   |operation.                      |

+   +-----------------------------------+--------------------------------+

+

+   ______________________________________________________________________

+                           

+

+   IME built-in functions 

+   ---------------------- 

+   A set of ordered phases of functions are required to be called to

+   evaluate an integer motion estimation result.

+

+   Initialization phase functions

+   ++++++++++++++++++++++++++++++

+

+   These built-in functions create a properly initialized payload that

+   can be used for further configured for evaluating IME operations. This

+   is a required initial phase.

+

+   +---------------------------------+----------------------------------+

+   |intel_sub_group_avc_ime_payload_t|Return an initialized payload for |

+   |intel_sub_group_avc_ime_         |a VME integer search (IME)        |

+   |initialize(                      |operation.                        |

+   |    ushort2 src_coord,           |                                  |

+   |    uchar partition_mask,        |The payload is initialized for    |

+   |    uchar sad_adjustment )       |progressive frame operations, and |

+   |                                 |the cost configuration values and |

+   |                                 |the miscellaneous property values |

+   |                                 |are all initialized to zero. The  |

+   |                                 |cost configuration and the        |

+   |                                 |miscellaneous property            |

+   |                                 |configuration phase functions must|

+   |                                 |be used to override the initial   |

+   |                                 |configurations in the payload.    |

+   |                                 |                                  |

+   |                                 |The src_coord value represents the|

+   |                                 |2D offset of the top left corner  |

+   |                                 |of the source MB in pixel units in|

+   |                                 |the source image. Source MBs at   |

+   |                                 |the image borders are allowed to  |

+   |                                 |be partial, but the top-left      |

+   |                                 |corner must be within the image.  |

+   |                                 |                                  |

+   |                                 |If the source image is an         |

+   |                                 |interlaced scan image, then the   |

+   |                                 |bottom field lines are considered |

+   |                                 |as logically overlapping with the |

+   |                                 |top field lines (i.e. the top     |

+   |                                 |field MBs are considered as       |

+   |                                 |logically overlapping with the    |

+   |                                 |bottom MBs) for the purposes for  |

+   |                                 |specifying the src_coord value.   |

+   |                                 |                                  |

+   |                                 |The legal values for              |

+   |                                 |partition_mask can be composed by |

+   |                                 |setting the appropriate bit fields|

+   |                                 |specified by partition mask       |

+   |                                 |enumeration values using the '&'  |

+   |                                 |operator.                         |

+   |                                 |                                  |

+   |                                 |The legal values for              |

+   |                                 |sad_adjustment is specified by its|

+   |                                 |respective enumeration values.    |

+   |                                 |                                  |

+   |                                 |If the sad_adjustment is set to   |

+   |                                 |CLK_AVC_ME_SAD_ADJUST_MODE_HAAR_  |

+   |                                 |INTEL, a simple wavelet transform,|

+   |                                 |Haar transform, is used to refine |

+   |                                 |the distortion measure of         |

+   |                                 |SAD. Haar transform here is used  |

+   |                                 |as a coarse estimation of the     |

+   |                                 |integer transform.                |

+   +---------------------------------+----------------------------------+

+

+   Configuration phase functions

+   +++++++++++++++++++++++++++++

+

+   These built-in functions allow for configuration of the search window.

+   A call to either intel_sub_group_avc_ime_set_single_reference(..)  or  

+   intel_sub_group_avc_ime_set_dual_reference(..) is required. This is a

+   required phase immediately following the initialization phase

+

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_ime_payload_t   |Update the input payload for a |

+   |intel_sub_group_avc_ime_set_        |VME single-reference search    |

+   |single_reference(                   |with the configuration for the |

+   |   short2 ref_offset,               |reference window search region,|

+   |   uchar search_window_config,      |and return it.                 |

+   |   intel_sub_group_avc_ime_payload_t|                               |

+   |      payload )                     |The 2D ref_offset specifies the|

+   |                                    |reference window offset. The X |

+   |                                    |and Y coordinates must be in   |

+   |                                    |the range [-2048, 2047],       |

+   |                                    |otherwise the results are      |

+   |                                    |undefined. The reference window|

+   |                                    |is allowed to be partially     |

+   |                                    |outside the image. Pixel       |

+   |                                    |replication is applied to      |

+   |                                    |generate out-of-bound reference|

+   |                                    |pixels. It is specified in PEL |

+   |                                    |units. Results are undefined in|

+   |                                    |the reference region is        |

+   |                                    |completely outside the image.  |

+   |                                    |                               |

+   |                                    |If the reference image is an   |

+   |                                    |interlaced scan image, then the|

+   |                                    |top field lines are considered |

+   |                                    |as logically overlapping with  |

+   |                                    |the bottom field lines         |

+   |                                    |(i.e. the top field MBs are    |

+   |                                    |considered as logically        |

+   |                                    |overlapping with the bottom    |

+   |                                    |MBs) for the purposes for      |

+   |                                    |specifying the ref_offset      |

+   |                                    |value.                         |

+   |                                    |                               |

+   |                                    |The parameter                  |

+   |                                    |search_window_config must be a |

+   |                                    |compile-time constant.         |

+   |                                    |                               |

+   |                                    |The value of                   |

+   |                                    |search_window_config must be   |

+   |                                    |one of the unreserved search   |

+   |                                    |window configuration           |

+   |                                    |enumeration values.            |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_ime_payload_t   |Update the input payload for a |

+   |intel_sub_group_avc_ime_            |VME dual-reference search with |

+   |set_dual_reference(                 |the configurations for the     |

+   |   short2 fwd_ref_offset,           |reference window search        |

+   |   short2 bwd_ref_offset,           |regions, and return it.        |

+   |   uchar search_window_config,      |                               |

+   |   intel_sub_group_avc_ime_payload_t|The 2D                         |

+   |      payload )                     |fwd_ref_offset/bwd_ref_offset  |

+   |                                    |specifies the forward/backward |

+   |                                    |reference window offsets. The X|

+   |                                    |and Y coordinates must be in   |

+   |                                    |the range [-2048, 2047],       |

+   |                                    |otherwise the results are      |

+   |                                    |undefined. The reference       |

+   |                                    |windows are allowed to be      |

+   |                                    |partially outside the          |

+   |                                    |image. Pixel replication is    |

+   |                                    |applied to generate            |

+   |                                    |out-of-bound reference         |

+   |                                    |pixels. It is specified in PEL |

+   |                                    |units.                         |

+   |                                    |                               |

+   |                                    |If a reference image is an     |

+   |                                    |interlaced scan image, then the|

+   |                                    |top field lines are considered |

+   |                                    |as logically overlapping with  |

+   |                                    |the bottom field lines         |

+   |                                    |(i.e. the top field MBs are    |

+   |                                    |considered as logically        |

+   |                                    |overlapping with the bottom    |

+   |                                    |MBs) for the purposes for      |

+   |                                    |specifying the corresponding   |

+   |                                    |fwd_ref_offset and/or          |

+   |                                    |bwd_ref_offset values.         |

+   |                                    |                               |

+   |                                    |The parameter                  |

+   |                                    |search_window_config must be a |

+   |                                    |compile-time constant and must |

+   |                                    |be one of the unreserved search|

+   |                                    |window configuration           |

+   |                                    |enumeration values.            |

+   +------------------------------------+-------------------------------+

+   |ushort2 intel_sub_group_ime_        |Get the 2D size of the         |

+   |ref_window_size(                    |reference window in pixel      |

+   |  uchar search_window_config,       |units.                         |

+   |  char dual_ref )                   |                               |

+   |                                    |The value of                   |

+   |                                    |search_window_config must be   |

+   |                                    |one of the unreserved search   |

+   |                                    |window configuration           |

+   |                                    |enumeration values.            |

+   |                                    |                               |

+   |                                    |The value of dual_ref must be  |

+   |                                    |set to zero for a single       |

+   |                                    |reference search window and one|

+   |                                    |for a dual-reference search    |

+   |                                    |window.                        |

+   +------------------------------------+-------------------------------+

+   |short2                              |If the input 2D reference      |

+   |intel_sub_group_avc_ime_            |window offset, ref_offset,     |

+   |adjust_ref_offset(                  |causes the reference window to |

+   |   short2 ref_offset,               |be fully out-of-bound of the   |

+   |   ushort2 src_coord,               |reference image, adjust it such|

+   |   ushort2 ref_window_size,         |that the reference window is   |

+   |   ushort2 image_size )             |within bounds of the reference |

+   |                                    |image.                         |

+   |                                    |                               |

+   |                                    |The 2D ref_offset specifies the|

+   |                                    |reference window offset. The X |

+   |                                    |and Y coordinates must be in   |

+   |                                    |the range [-2048, 2047],       |

+   |                                    |otherwise the results are      |

+   |                                    |undefined.                     |

+   |                                    |                               |

+   |                                    |The src_coord value represents |

+   |                                    |the 2D offset of the top left  |

+   |                                    |corner of the source MB in     |

+   |                                    |pixel units in the source      |

+   |                                    |image. Source MBs at the image |

+   |                                    |borders are allowed to be      |

+   |                                    |partial. Results are undefined |

+   |                                    |if the source MB is completely |

+   |                                    |outside the image.             |

+   |                                    |                               |

+   |                                    |The ref_window_size specifies  |

+   |                                    |the 2D size of the reference   |

+   |                                    |window in pixel units.         |

+   |                                    |                               |

+   |                                    |The image_size specifies the 2D|

+   |                                    |size of the progressive scan,  |

+   |                                    |or top or bottom fields, of the|

+   |                                    |interlaced scan image in pixel |

+   |                                    |units.                         |

+   |                                    |                               |

+   |                                    |If the reference image is an   |

+   |                                    |interlaced scan image, then the|

+   |                                    |bottom field lines are         |

+   |                                    |considered as logically        |

+   |                                    |overlapping with the top field |

+   |                                    |lines the purposes for         |

+   |                                    |specifying the image_size      |

+   |                                    |value. Since, the actual layout|

+   |                                    |of the top and bottom fields in|

+   |                                    |the reference image is in an   |

+   |                                    |interleaved fashion, the height|

+   |                                    |of the top or bottom fields    |

+   |                                    |should be exactly half of the  |

+   |                                    |actual reference image height. |

+   |                                    |                               |

+   |                                    |A call to                      |

+   |                                    |intel_sub_group_avc_ime_       |

+   |                                    |adjust_ref_coord(..)  is       |

+   |                                    |optional. It is required only  |

+   |                                    |if the reference window offsets|

+   |                                    |inputs to                      |

+   |                                    |intel_sub_group_avc_ime_set_   |

+   |                                    |single_reference(..)  or       |

+   |                                    |intel_sub_group_avc_ime_set_   |

+   |                                    |dual_reference (..)  is        |

+   |                                    |potentially out-of-bounds and  |

+   |                                    |need to be adjusted.           |

+   +------------------------------------+-------------------------------+

+

+   Payload type conversion functions

+   +++++++++++++++++++++++++++++++++

+   

+   These are optional built-in functions that may be called following the

+   search configuration phase to convert IME payload to MCE payloads and

+   vice-versa.

+

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_mce_payload_t   |Convert the IME payload to a   |

+   |intel_sub_group_avc_ime_convert_    |generic MCE payload.           |

+   |to_mce_payload(                     |                               |

+   |   intel_sub_group_avc_ime_payload_t|                               |

+   |      payload )                     |                               |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_ime_payload_t   |Convert the generic MCE payload|

+   |intel_sub_group_avc_                |to a IME payload.              |

+   |mce_convert_to_ime_payload(         |                               |

+   |   intel_sub_group_avc_mce_payload_t|                               |

+   |      payload )                     |                               |

+   +------------------------------------+-------------------------------+

+

+   Multi-reference cost configuration phase functions

+   ++++++++++++++++++++++++++++++++++++++++++++++++++

+

+   These are optional built-in functions that may be called following the

+   search configuration phase to enable multi-reference image costing.

+

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_ime_payload_t   |This is a wrapper for          |

+   |intel_sub_group_avc_ime_set_        |intel_sub_group_avc_mce_       |

+   |inter_base_multi_reference_penalty( |set_inter_base_multi_reference_|

+   |   uchar reference_base_penalty,    |penalty(..) the payload        |

+   |   intel_sub_group_avc_ime_payload_t|conversions with to/from MCE   |

+   |      payload )                     |types. See MCE version for     |

+   |                                    |description.                   |

+   +------------------------------------+-------------------------------+

+

+   Inter shape and direction cost configuration phase functions

+   ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

+

+   These are optional built-in functions that may be called following the

+   search configuration phase to enable shape costing.

+

+   +-------------------------------------+------------------------------+

+   |intel_sub_group_avc_ime_payload_t    |This is a wrapper for         |

+   |intel_sub_group_avc_ime_set_         |intel_sub_group_avc_mce_set_  |

+   |inter_shape_penalty(                 |inter_shape_penalty(..)       |

+   |   ulong packed_shape_cost,          |with the payload conversions  |

+   |   intel_sub_group_avc_ime_payload_t |to/from MCE types. See MCE    |

+   |      payload )                      |version for description.      |

+   |                                     |                              |

+   +-------------------------------------+------------------------------+

+   |intel_sub_group_avc_ime_payload_t    |This is a wrapper for         |

+   |intel_sub_group_avc_ime_set_         |intel_sub_group_avc_mce_set_  |

+   |inter_direction_penalty(             |inter_direction_penalty(..)   |

+   |    uchar direction_cost,            |with the payload conversions  |

+   |    intel_sub_group_avc_ime_payload_t|to/from MCE types. See MCE    |

+   |       payload )                     |version for description.      |

+   +-------------------------------------+------------------------------+

+

+   Inter motion vector cost configuration phase functions

+   ++++++++++++++++++++++++++++++++++++++++++++++++++++++

+

+   These are optional built-in functions that may be called following the

+   search configuration phase to enable motion vector costing.

+

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_ime_payload_t   |This is a wrapper for          |

+   |intel_sub_group_avc_ime_set_        |intel_sub_group_avc_mce_set_   |

+   |motion_vector_cost_function(        |motion_vector_cost_function(..)|

+   |   ulong packed_cost_center_delta,  |with the payload conversions   |

+   |   uint2 packed_cost_table,         |to/from MCE types. See MCE     |

+   |   uchar cost_precision,            |version for description.       |

+   |   intel_sub_group_avc_ime_payload_t|                               |

+   |      payload )                     |                               |

+   +------------------------------------+-------------------------------+

+

+   Miscellaneous property configuration phase functions

+   ++++++++++++++++++++++++++++++++++++++++++++++++++++

+

+   These are optional built-in functions that may be called following the

+   search configuration phase to enable miscellaneous properties setting

+   in the payload.

+

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_ime_payload_t   |This is a wrapper for          |

+   |intel_sub_group_avc_ime_set_        |intel_sub_group_avc_mce_set_   |

+   |source_interlaced_field_polarity(   |source_interlaced_field_       |

+   |   uchar src_field_polarity         |polarity(..)  with the result  |

+   |   intel_sub_group_avc_ime_payload_t|conversions to/from MCE        |

+   |      payload )                     |types. See MCE version for     |

+   |                                    |description.                   |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_ime_payload_t   |This is a wrapper for          |

+   |intel_sub_group_avc_ime_set_        |intel_sub_group_avc_mce_       |

+   |single_reference_interlaced_field_  |set_single_reference_          |

+   |polarity(                           |interlaced_field_polarity(..)  |

+   |   uchar ref_field_polarity,        |with the result conversions    |

+   |   intel_sub_group_avc_ime_payload_t|to/from MCE types. See MCE     |

+   |      payload )                     |version for description.       |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_ime_payload_t   |This is a wrapper for          |

+   |intel_sub_group_avc_ime_set_        |intel_sub_group_avc_mce_set_   |

+   |dual_reference_interlaced_field_    |dual_reference_interlaced_     |

+   |polarities(                         |field_polarities(..) with the  |

+   |   uchar fwd_ref_field_polarity,    |result conversions to/from MCE |

+   |   uchar bwd_ref_field_polarity,    |types. See MCE version for     |

+   |   intel_sub_group_avc_ime_payload_t|description.                   |

+   |      payload )                     |                               |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_ime_payload_t   |Specify the maximum number of  |

+   |intel_sub_group_avc_ime_set_        |motion vectors allowed for the |

+   |max_motion_vector_count(            |current MB. The default setting|

+   |   uchar max_motion_vector_count,   |is 32. Any other value may     |

+   |   intel_sub_group_avc_ime_payload_t|alter the MB partitioning      |

+   |      payload )                     |decision. The IME operation    |

+   |                                    |will compute the best allowed  |

+   |                                    |partitioning such that the     |

+   |                                    |number of sub-block motion     |

+   |                                    |vectors will not exceed        |

+   |                                    |max_motion_vector_count.       |

+   |                                    |                               |

+   |                                    |The value of max_motion_vector_|

+   |                                    |count(..) specifies the maximum|

+   |                                    |number of motion vectors       |

+   |                                    |allowed for the current MB. It |

+   |                                    |must be in the range [1, 32],  |

+   |                                    |otherwise the results are      |

+   |                                    |undefined.                     |

+   |                                    |                               |

+   |                                    |NOTE: This can be used to      |

+   |                                    |handle the restriction for     |

+   |                                    |certain profiles for AVC in    |

+   |                                    |that the maximum number of     |

+   |                                    |motion vectors allowed for two |

+   |                                    |consecutive MBs can only be 16.|

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_ime_payload_t   |Update the input payload to    |

+   |intel_sub_group_avc_ime_set_        |disable a mix of forward and   |

+   |unidirectional_mix_disable(         |backward MVs in the result.    |

+   |   intel_sub_group_avc_ime_payload_t|                               |

+   |      payload )                     |Default is to enable it.       |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_ime_payload_t   |Specifies the threshold value  |

+   |intel_sub_group_avc_ime_set_early_  |of a distortion compute of a   |

+   |search_termination_threshold(       |16x16 partition of a MB for a  |

+   |   uchar threshold,                 |single-reference search, below |

+   |   intel_sub_group_avc_ime_payload_t|which no more searching is     |

+   |      payload )                     |performed for the MB.          |

+   |                                    |                               |

+   |                                    |The value of threshold is      |

+   |                                    |specified in U4U4 format and   |

+   |                                    |the integer value must fit     |

+   |                                    |within 14 bits.                |

+   |                                    |                               |

+   |                                    |The input payload must have    |

+   |                                    |been configured for a single-  |

+   |                                    |reference search with the 16x16|

+   |                                    |partition enabled for this     |

+   |                                    |threshold to be set, or else   |

+   |                                    |the results are undefined.     |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_ime_payload_t   |Set the (16) SAD weights for   |

+   |intel_sub_group_avc_ime_set_        |each 4x4 sub-block.            |

+   |weighted_sad(                       |                               |

+   |   uint packed_sad_weights,         |These values are used to       |

+   |   intel_sub_group_avc_ime_payload_t|decrease the SAD magnitude of  |

+   |      payload )                     |each 4x4 sub-block by dividing |

+   |                                    |the SAD of 4x4 sub-block of the|

+   |                                    |source MB by its mapped weight.|

+   |                                    |                               |

+   |                                    |It requires a partition_mask of|

+   |                                    |16x16 and forward search window|

+   |                                    |configuration.                 |

+   |                                    |                               |

+   |                                    |The weighting pattern used is  |

+   |                                    |the traditional Z order for    |

+   |                                    |each 4x4 block. Weighted-SAD   |

+   |                                    |Control Mapping:               |

+   |                                    |                               |

+   |                                    |   0 1 4 5                     |

+   |                                    |   2 3 6 7                     |

+   |                                    |   8 9 C D                     |

+   |                                    |   A B E F                     |

+   |                                    |                               |

+   |                                    |Each weight is of 2 bits       |

+   |                                    |represented in a packed format |

+   |                                    |in packed_sad_weights for each |

+   |                                    |of the 4x4 blocks in Z order.  |

+   |                                    |                               |

+   |                                    |A prior call to                |

+   |                                    |intel_sub_group_avc_ime_       |

+   |                                    |set_single_reference(..) set up|

+   |                                    |for the forward reference image|

+   |                                    |is required.                   |

+   |                                    |                               |

+   |                                    |This feature is mainly intended|

+   |                                    |for improved block matching in |

+   |                                    |image-rate conversion (FRC)    |

+   |                                    |kernels.                       |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_ime_payload_t   |This is a wrapper for          |

+   |intel_sub_group_avc_ime_set_        |intel_sub_group_avc_mce_set_ac_|

+   |ac_only_haar(                       |only_haar(..) with the payload |

+   |   intel_sub_group_avc_ime_payload_t|conversions to/from MCE        |

+   |      payload )                     |types. See MCE version for     |

+   |                                    |description.                   |

+   +------------------------------------+-------------------------------+

+

+   Evaluation phase functions

+   ++++++++++++++++++++++++++

+

+   These built-in functions perform the evaluation of the IME operation

+   configured in the payload with a VME media sampler and return the

+   results.

+

+   +-------------------------------------+------------------------------+

+   |intel_sub_group_avc_ime_result_t     |Evaluate the basic IME        |

+   |intel_sub_group_avc_ime_evaluate_    |operation with a single       |

+   |with_single_reference(               |reference and return its      |

+   |   read_only image2d_t src_image,    |results. The IME payload must |

+   |   read_only image2d_t ref_image,    |have been configured with     |

+   |   sampler_t vme_media_sampler,      |intel_sub_group_avc_ime_set_  |

+   |   intel_sub_group_avc_ime_payload_t |single_reference(..).         |

+   |      payload )                      |                              |

+   |                                     |The parameter ref_image must  |

+   |                                     |be a valid forward image      |

+   |                                     |kernel parameter per the      |

+   |                                     |ordering conventions for the  |

+   |                                     |kernel parameter list.        |

+   +-------------------------------------+------------------------------+

+   |intel_sub_group_avc_ime_result_t     |Evaluate the basic IME        |

+   |intel_sub_group_avc_ime_evaluate_    |operation with dual reference |

+   |with_dual_reference(                 |and return its results. The   |

+   |   read_only image2d_t src_image,    |IME payload must have been    |

+   |   read_only image2d_t fwd_ref_image,|configured with               |

+   |   read_only image2d_t bwd_ref_image,|intel_sub_group_avc_ime_set_  |

+   |   sampler_t vme_media_sampler,      |dual_reference(..).           |

+   |   intel_sub_group_avc_ime_payload_t |                              |

+   |      payload )                      |The parameter fwd_ref_image[  |

+   |                                     |bwd_ref_image] must be a valid|

+   |                                     |forward[backward] image kernel|

+   |                                     |parameter per the ordering    |

+   |                                     |conventions for the kernel    |

+   |                                     |parameter list.               |

+   +-------------------------------------+------------------------------+

+   |intel_sub_group_avc_ime_result_      |Evaluate the single reference |

+   |single_reference_streamout_t         |IME operation with streamout  |

+   |intel_sub_group_avc_ime_evaluate_    |and return its results. The   |

+   |with_single_reference_streamout(     |IME payload must have been    |

+   |   read_only image2d_t src_image,    |configured with               |

+   |   read_only image2d_t ref_image,    |intel_sub_group_avc_ime_set_  |

+   |   sampler_t vme_media_sampler,      |single_reference(..).         |

+   |   intel_sub_group_avc_ime_payload_t |                              |

+   |      payload )                      |The parameter ref_image must  |

+   |                                     |be a valid forward image      |

+   |                                     |kernel parameter per the      |

+   |                                     |ordering conventions for the  |

+   |                                     |kernel parameter list.        |

+   +-------------------------------------+------------------------------+

+   |intel_sub_group_avc_ime_result_      |Evaluate the dual reference   |

+   |dual_reference_streamout_t           |IME operation with streamout  |

+   |intel_sub_group_avc_ime_evaluate_    |and return its results. The   |

+   |with_dual_reference_streamout(       |IME payload must have been    |

+   |   read_only image2d_t src_image,    |configured with               |

+   |   read_only image2d_t fwd_ref_image,|intel_sub_group_avc_ime_set_  |

+   |   read_only image2d_t bwd_ref_image,|dual_reference(..).           |

+   |   sampler_t vme_media_sampler,      |                              |

+   |   intel_sub_group_avc_ime_payload_t |The parameter fwd_ref_image[  |

+   |      payload )                      |bwd_ref_image] must be a valid|

+   |                                     |forward[backward] image kernel|

+   |                                     |parameter per the ordering    |

+   |                                     |conventions for the kernel    |

+   |                                     |parameter list.               |

+   +-------------------------------------+------------------------------+

+   |intel_sub_group_avc_ime_result_t     |Evaluate the single reference |

+   |intel_sub_group_avc_ime_evaluate_    |IME operation with streamin   |

+   |with_single_reference_streamin(      |and return its results. The   |

+   |    read_only image2d_t src_image,   |IME payload must have been    |

+   |    read_only image2d_t ref_image,   |configured with               |

+   |    sampler_t vme_media_sampler,     |intel_sub_group_avc_ime_set_  |

+   |    intel_sub_group_avc_ime_payload_t|single_reference(..).         |

+   |       payload,                      |                              |

+   |    intel_sub_group_avc_ime_single_  |The parameter ref_image must  |

+   |    reference_streamin_t             |be a valid forward image      |

+   |       streamin_components )         |kernel parameter per the      |

+   |                                     |ordering conventions for the  |

+   |                                     |kernel parameter list.        |

+   +-------------------------------------+------------------------------+

+   |intel_sub_group_avc_ime_result_t     |Evaluate the dual reference   |

+   |intel_sub_group_avc_ime_evaluate_    |IME operation with streamin   |

+   |with_dual_reference_streamin(        |and return its results. The   |

+   |   read_only image2d_t src_image,    |IME payload must have been    |

+   |   read_only image2d_t fwd_ref_image,|configured with               |

+   |   read_only image2d_t bwd_ref_image,|intel_sub_group_avc_ime_set_  |

+   |   sampler_t vme_media_sampler,      |dual_reference(..).           |

+   |   intel_sub_group_avc_ime_payload_t |                              |

+   |      payload,                       |The parameter fwd_ref_image[  |

+   |   intel_sub_group_avc_ime_dual_     |bwd_ref_image] must be a valid|

+   |   reference_streamin_t              |forward[backward] image kernel|

+   |      streamin_components            |parameter per the ordering    |

+   |   )                                 |conventions for the kernel    |

+   |                                     |parameter list.               |

+   +-------------------------------------+------------------------------+

+   |intel_sub_group_avc_ime_result_      |Evaluate the single reference |

+   |single_reference_streamout_t         |IME operation with streamin   |

+   |intel_sub_group_avc_ime_evaluate_    |and streamout and return its  |

+   |with_single_reference_streaminout(   |results. The IME payload must |

+   |   read_only image2d_t src_image,    |have been configured with     |

+   |   read_only image2d_t ref_image,    |intel_sub_group_avc_ime_set_  |

+   |   sampler_t vme_media_sampler,      |single_reference(..).         |

+   |   intel_sub_group_avc_ime_payload_t |                              |

+   |      payload,                       |The parameter ref_image must  |

+   |   intel_sub_group_avc_ime_single_   |be a valid forward image      |

+   |   reference_streamin_t              |kernel parameter per the      |

+   |      streamin_components            |ordering conventions for the  |

+   | )                                   |kernel parameter list.        |

+   +-------------------------------------+------------------------------+

+   |intel_sub_group_avc_ime_result_      |Evaluate the dual reference   |

+   |dual_reference_streamout_t           |IME operation with streamin   |

+   |intel_sub_group_avc_ime_evaluate_    |and streamout and return its  |

+   |with_dual_reference_streaminout(     |results. The IME payload must |

+   |   read_only image2d_t src_image,    |have been configured with     |

+   |   read_only image2d_t fwd_ref_image,|intel_sub_                    |

+   |   read_only image2d_t bwd_ref_image,|group_avc_ime_set_dual_       |

+   |   sampler_t vme_media_sampler,      |reference(..).                |

+   |   intel_sub_group_avc_ime_payload_t |                              |

+   |      payload,                       |The parameter fwd_ref_image[  |

+   |   intel_sub_group_avc_ime_dual_     |bwd_ref_image] must be a valid|

+   |   reference_streamin_t              |forward[backward] image kernel|

+   |      streamin_components            |parameter per the ordering    |

+   | )                                   |conventions for the kernel    |

+   |                                     |parameter list.               |

+   +-------------------------------------+------------------------------+

+

+   Result type conversion functions

+   +++++++++++++++++++++++++++++++++

+   

+   These are optional built-in functions that may be called following the

+   evaluation phase to convert IME results to MCE results and vice-versa.

+

+   +-----------------------------------+--------------------------------+

+   |intel_sub_group_avc_mce_result_t   |Convert the IME result into a   |

+   |intel_sub_group_avc_ime_           |MCE result.                     |

+   |convert_to_mce_result(             |                                |

+   |   intel_sub_group_avc_ime_result_t|                                |

+   |      result )                     |                                |

+   +-----------------------------------+--------------------------------+

+   |intel_sub_group_avc_ime_result_t   |Convert the MCE result into an  |

+   |intel_sub_group_avc_mce_           |IME result.                     |

+   |convert_to_ime_result(             |                                |

+   |   intel_sub_group_avc_mce_result_t|                                |

+   |      result )                     |                                |

+   +-----------------------------------+--------------------------------+   

+

+   Result processing phase functions

+   +++++++++++++++++++++++++++++++++

+

+   These built-in functions are called following the evaluation phase to

+   extract the various result components from an IME evaluation result.

+

+   +-------------------------------------+------------------------------+

+   |intel_sub_group_avc_ime_             |Return the streamed out BMVs  |

+   |single_reference_streamin_t          |and distortions from the input|

+   |intel_sub_group_avc_ime_             |result from a single reference|

+   |get_single_reference_streamin(       |streamout IME operation that  |

+   |   intel_sub_group_avc_ime_result_   |can be used as streamin input |

+   |   single_reference_streamout_t      |for a subsequent IME          |

+   |      result )                       |operation.                    |

+   +-------------------------------------+------------------------------+

+   |intel_sub_group_avc_ime_             |Return the streamed out BMVs  |

+   |dual_reference_streamin_t            |and distortions from the input|

+   |intel_sub_group_avc_ime_get_         |result from a dual reference  |

+   |dual_reference_streamin(             |streamout IME operation that  |

+   |   intel_sub_group_avc_ime_result_   |can be used as streamin input |

+   |   dual_reference_streamout_t        |for a subsequent IME          |

+   |      result )                       |operation.                    |

+   +-------------------------------------+------------------------------+

+   |intel_sub_group_avc_ime_result_t     |Strip out the single reference|

+   |intel_sub_group_avc_ime_strip_       |streamout BMVs and distortions|

+   |single_reference_streamout(          |from the streamout results and|

+   |   intel_sub_group_avc_ime_result_   |return the rest.              |

+   |   single_reference_streamout_t      |                              |

+   |      result )                       |                              |

+   +-------------------------------------+------------------------------+

+   |intel_sub_group_avc_ime_result_t     |Strip out the dual reference  |

+   |intel_sub_group_avc_ime_strip_       |streamout MVs and distortions |

+   |dual_reference_streamout(            |from the streamout results and|

+   |   intel_sub_group_avc_ime_result_   |return the rest.              |

+   |   dual_reference_streamout_t        |                              |

+   |      result )                       |                              |

+   +-------------------------------------+------------------------------+

+   |uint intel_sub_group_avc_ime_get_    |Get the packed motion vectors |

+   |streamout_major_shape_motion_vectors(|for the input major shape from|

+   |   intel_sub_group_avc_ime_result_   |the IME single reference      |

+   |   single_reference_streamout_t      |streamout results.            |

+   |      result,                        |                              |

+   |   uchar major_shape )               |The parameter major_shape must|

+   |                                     |be a valid inter macro-block  |

+   |                                     |major shape enumeration value |

+   |                                     |and a compile-time constant.  |

+   |                                     |                              |

+   |                                     |Up to 4 packed MVs are        |

+   |                                     |returned, one per             |

+   |                                     |work-item. If the major shape |

+   |                                     |is:                           |

+   |                                     |- 16x6, then one packed MV    |

+   |                                     |  is returned by work-item 0  |

+   |                                     |- 16x8, or 8x16, then two     |

+   |                                     |  packed MVs are returned by  |

+   |                                     |  work-items 0 and 1          |

+   |                                     |- 8x8, then four packed MVs   |

+   |                                     |  are returned by work-items  |

+   |                                     |  0 to 3.                     |

+   +-------------------------------------+------------------------------+

+   |ushort intel_sub_group_avc_ime_get_  |Get the distortions for the   |

+   |streamout_major_shape_distortions(   |input major shape from the IME|

+   |   intel_sub_group_avc_ime_result_   |single reference streamout    |

+   |   single_reference_streamout_t      |results.                      |

+   |      result,                        |                              |

+   |   uchar major_shape )               |The parameter major_shape must|

+   |                                     |be a valid inter macro-block  |

+   |                                     |major shape enumeration value |

+   |                                     |and a compile-time constant.  |

+   |                                     |                              |

+   |                                     |Up to 4 distortions are       |

+   |                                     |returned, one per work-item in|

+   |                                     |the same format as for motion |

+   |                                     |vectors as described above.   |

+   +-------------------------------------+------------------------------+

+   |uchar intel_sub_group_avc_ime_get_   |Get the reference identifiers |

+   |streamout_major_shape_reference_ids( |for the input major shape and |

+   |   intel_sub_group_avc_ime_result_   |direction from the IME dual   |

+   |   single_reference_streamout_t      |reference streamout results.  |

+   |      result,                        |                              |

+   |   uchar major_shape )               |The parameter major_shape must|

+   |                                     |be a valid inter macro-block  |

+   |                                     |major shape enumeration value |

+   |                                     |and a compile-time constant.  |

+   |                                     |                              |

+   |                                     |Up to 4 reference identifiers |

+   |                                     |are returned, one per         |

+   |                                     |work-item in the same format  |

+   |                                     |as for motion vectors as      |

+   |                                     |described above.              |

+   +-------------------------------------+------------------------------+

+   |uint intel_sub_group_avc_ime_get_    |Get the packed motion vectors |

+   |streamout_major_shape_motion_vectors(|for the input major shape and |

+   |   intel_sub_group_avc_ime_result_   |direction from the IME dual   |

+   |   dual_reference_streamout_t result,|reference streamout results.  |

+   |   uchar major_shape,                |                              |

+   |   uchar direction )                 |The parameter major_shape must|

+   |                                     |be a valid inter macro-block  |

+   |                                     |major shape enumeration value |

+   |                                     |and a compile-time constant.  |

+   |                                     |                              |

+   |                                     |The parameter direction must  |

+   |                                     |be a valid unidirectional     |

+   |                                     |inter macro-block major       |

+   |                                     |direction value and a         |

+   |                                     |compile-time constant.        |

+   |                                     |                              |

+   |                                     |Up to 4 packed MVs are        |

+   |                                     |returned, one per work-item in|

+   |                                     |the same format as for motion |

+   |                                     |vectors for single reference  |

+   |                                     |streamout as described above. |

+   +-------------------------------------+------------------------------+

+   |ushort intel_sub_group_avc_ime_get_  |Get the distortions for the   |

+   |streamout_major_shape_distortions(   |input major shape and         |

+   |   intel_sub_group_avc_ime_result_   |direction from the IME dual   |

+   |   dual_reference_streamout_t result,|reference streamout results.  |

+   |   uchar major_shape,                |                              |

+   |   uchar direction )                 |The parameter major_shape must|

+   |                                     |be a valid inter macro-block  |

+   |                                     |major shape enumeration value |

+   |                                     |and a compile-time constant.  |

+   |                                     |                              |

+   |                                     |The parameter direction must  |

+   |                                     |be a valid unidirectional     |

+   |                                     |inter macro-block major       |

+   |                                     |direction value and a         |

+   |                                     |compile-time constant.        |

+   |                                     |                              |

+   |                                     |Up to 4 distortions are       |

+   |                                     |returned, one per work-item in|

+   |                                     |the same format as for motion |

+   |                                     |vectors as described above.   |

+   +-------------------------------------+------------------------------+

+   |uchar intel_sub_group_avc_ime_get_   |Get the reference identifiers |

+   |streamout_major_shape_reference_ids( |for the input major shape and |

+   |   intel_sub_group_avc_ime_result_   |direction from the IME dual   |

+   |   dual_reference_streamout_t result,|reference streamout results.  |

+   |   uchar major_shape,                |                              |

+   |   uchar direction )                 |                              |

+   +-------------------------------------+------------------------------+

+   |uchar intel_sub_group_avc_ime_get_   |Get the bitmask indicating    |

+   |border_reached(                      |whether any border of         |

+   |   uchar image_select,               |forward/backward reference    |

+   |   intel_sub_group_avc_ime_result_t  |image is reached by one or    |

+   |      result )                       |more MVs in the winning inter |

+   |                                     |shape. The bitmask values are |

+   |                                     |as per the inter border       |

+   |                                     |reached enumeration values.   |

+   |                                     |                              |

+   |                                     |The search window must have   |

+   |                                     |been configured for a forward |

+   |                                     |reference if image_select is  |

+   |                                     |set as                        |

+   |                                     |CLK_AVC_ME_FRAME_FORWARD_INTEL|

+   |                                     |and with a backward reference |

+   |                                     |if image_select is set as     |

+   |                                     |CLK_AVC_ME_FRAME_BACKWARD_    |

+   |                                     |INTEL.                        |

+   |                                     |                              |

+   |                                     |The value of image_select is  |

+   |                                     |either                        |

+   |                                     |CLK_AVC_ME_FRAME_FORWARD_INTEL|

+   |                                     |or CLK_AVC_ME_FRAME_BACKWARD_ |

+   |                                     |INTEL and must be a           |

+   |                                     |compile-time constant.        |   

+   +-------------------------------------+------------------------------+

+   |uchar intel_sub_group_avc_ime_get_   |Get the indication that the   |

+   |truncated_search_indication(         |search operation was prevented|

+   |   intel_sub_group_avc_ime_result_t  |from providing the lowest     |

+   |      result )                       |distortion solution due to the|

+   |                                     |tighter constraints on the    |

+   |                                     |maximum number of MB motion   |

+   |                                     |vectors configured for the    |

+   |                                     |search operation.             |

+   +-------------------------------------+------------------------------+

+   |uchar intel_sub_group_avc_ime_get_   |Get the indication that       |

+   |unidirectional_early_search_         |unidirectional search         |

+   |termination(                         |operation terminated early    |

+   |   intel_sub_group_avc_ime_result_t  |because the configured        |

+   |      result )                       |distortion threshold was met. |

+   +-------------------------------------+------------------------------+

+   |uint intel_sub_group_avc_ime_get_    |Get the 16x16 motion vector   |

+   |weighting_pattern_minimum_           |corresponding to the minimum  |

+   |motion_vector(                       |16x16 distortion when applying|

+   |   intel_sub_group_avc_ime_result_t  |the traditional Z-order SAD   |

+   |      result )                       |weighting pattern.            |

+   |                                     |                              |

+   |                                     |This can only be called if a  |

+   |                                     |SAD weighting pattern was set |

+   |                                     |prior to evaluation using     |

+   |                                     |intel_sub_group_avc_ime_set_  |

+   |                                     |weighted_sad(..).             |

+   |                                     |                              |

+   |                                     |The argument for parameter    |

+   |                                     |"pattern_id" should be a      |

+   |                                     |compile-time constant.        |

+   +-------------------------------------+------------------------------+

+   |ushort intel_sub_group_avc_ime_get_  |Get the minimum 16x16         |

+   |weighting_pattern_minimum_distortion(|distortion when applying the  |

+   |   intel_sub_group_avc_ime_result_t  |traditional Z-order SAD       |

+   |      result )                       |weighting pattern.            |

+   +-------------------------------------+------------------------------+

+   |ulong intel_sub_group_avc_ime_get_   |This is a wrapper for         |

+   |motion_vectors(                      |intel_sub_group_avc_mce_get_  |

+   |   intel_sub_group_avc_ime_result_t  |motion_vectors(..) with the   |

+   |      result )                       |result conversions to/from MCE|

+   |                                     |types. See MCE version for    |

+   |                                     |description.                  |

+   +-------------------------------------+------------------------------+

+   |ushort intel_sub_group_avc_ime_get_  |This is a wrapper for         |

+   |inter_distortions(                   |intel_sub_group_avc_mce_get_  |

+   |   intel_sub_group_avc_ime_result_t  |inter_distortions(..) with the|

+   |      result )                       |result conversions to/from MCE|

+   |                                     |types. See MCE version for    |

+   |                                     |description.                  |

+   +-------------------------------------+------------------------------+

+   |ushort intel_sub_group_avc_ime_get_  |This is a wrapper for         |

+   |best_inter_distortion(               |intel_sub_group_avc_mce_get_  |

+   |   intel_sub_group_avc_ime_result_t  |best_inter_distortion(..)with |

+   |      result )                       |the result conversions to/from|

+   |                                     |MCE types. See MCE version for|

+   |                                     |description.                  |

+   +-------------------------------------+------------------------------+

+   |uchar intel_sub_group_avc_ime_get_   |This is a wrapper for         |

+   |inter_major_shape(                   |intel_sub_group_avc_mce_get_  |

+   |   intel_sub_group_avc_ime_result_t  |inter_major_shape(..) with the|

+   |      result )                       |result conversions to/from MCE|

+   |                                     |types. See MCE version for    |

+   |                                     |description.                  |

+   +-------------------------------------+------------------------------+

+   |uchar intel_sub_group_avc_ime_get_   |This is a wrapper for         |

+   |inter_minor_shapes(                  |intel_sub_group_avc_mce_get_  |

+   |   intel_sub_group_avc_ime_result_t  |inter_minor_shapes(..) with   |

+   |      result )                       |the result conversions to/from|

+   |                                     |MCE types. See MCE version for|

+   |                                     |description.                  |

+   +-------------------------------------+------------------------------+

+   |uchar intel_sub_group_avc_ime_get_   |This is a wrapper for         |

+   |inter_directions(                    |intel_sub_group_avc_mce_get_  |

+   |   intel_sub_group_avc_ime_result_t  |inter_directions(..)  with the|

+   |      result )                       |result conversions to/from MCE|

+   |                                     |types. See MCE version for    |

+   |                                     |description.                  |

+   +-------------------------------------+------------------------------+

+   |uchar intel_sub_group_avc_ime_get_   |This is a wrapper for         |

+   |inter_motion_vector_count(           |intel_sub_group_avc_mce_get_  |

+   |   intel_sub_group_avc_ime_result_t  |inter_motion_vector_count(..) |

+   |      result )                       |with the result conversions   |

+   |                                     |to/from MCE types. See MCE    |

+   |                                     |version for description.      |

+   +-------------------------------------+------------------------------+

+   |uint intel_sub_group_avc_ime_get_    |This is a wrapper for         |

+   |inter_reference_ids(                 |intel_sub_group_avc_mce_get_  |

+   |   intel_sub_group_avc_ime_result_t  |inter_reference_ids(..) with  |

+   |      result )                       |the result conversions to/from|

+   |                                     |MCE types. See MCE version for|

+   |                                     |description.                  |

+   +-------------------------------------+------------------------------+

+   |uchar intel_sub_group_avc_ime_get_   |This is a wrapper for         |

+   |inter_reference_interlaced_          |intel_sub_group_avc_mce_get_  |

+   |field_polarities(                    |inter_reference_interlaced_   |

+   |   uint packed_reference_ids,        |field_polarities(..)  with the|

+   |   uint packed_reference_parameter_  |result conversions to/from MCE|

+   |   field_polarities,                 |types. See MCE version for    |

+   |   intel_sub_group_avc_ime_result_t  |description.                  |

+   |      result )                       |                              |

+   +-------------------------------------+------------------------------+

+   ______________________________________________________________________   

+

+   REF built-in functions 

+   ----------------------

+

+   A set of ordered phases of functions are required to be called to

+   evaluate a refinement operation result.

+

+   Initialization phase functions

+   ++++++++++++++++++++++++++++++

+

+   These built-in functions create a properly initialized payload that

+   can be used for further configured for evaluating REF operations. This

+   is a required initial phase. A call to either 

+   intel_sub_group_avc_fme_initialize or 

+   intel_sub_group_avc_bme_initialize is required.

+

+   +-----------------------------------+--------------------------------+

+   |intel_sub_group_avc_ref_payload_t  |Return an initialized payload   |

+   |intel_sub_group_avc_fme_initialize(|for a VME fractional motion     |

+   |   ushort2 src_coord,              |estimation operation (FME).     |

+   |   ulong motion_vectors,           |                                |

+   |   uchar major_shapes,             |The payload is initialized for  |

+   |   uchar minor_shapes,             |progressive frame operations,   |

+   |   uchar directions,               |and the cost configuration      |

+   |   uchar pixel_resolution,         |values and the miscellaneous    |

+   |   uchar sad_adjustment )          |property values are all         |

+   |                                   |initialized to zero. The cost   |

+   |                                   |configuration and the           |

+   |                                   |miscellaneous property          |

+   |                                   |configuration phase functions   |

+   |                                   |must be used to override the    |

+   |                                   |initial configurations in the   |

+   |                                   |payload.                        |

+   |                                   |                                |

+   |                                   |The src_coord value represents  |

+   |                                   |the 2D offset of the top left   |

+   |                                   |corner of the source MB in pixel|

+   |                                   |units in the source image.      |

+   |                                   |                                |

+   |                                   |If the source image is an       |

+   |                                   |interlaced scan image, then the |

+   |                                   |bottom field lines are          |

+   |                                   |considered as logically         |

+   |                                   |overlapping with the top field  |

+   |                                   |lines (i.e. the top field MBs   |

+   |                                   |are considered as logically     |

+   |                                   |overlapping with the bottom MBs)|

+   |                                   |for the purposes for specifying |

+   |                                   |the src_coord value.            |

+   |                                   |                                |

+   |                                   |The parameter motion_vectors    |

+   |                                   |contains the BMVs returned by an|

+   |                                   |IME in the same format as       |

+   |                                   |returned by                     |

+   |                                   |intel_sub_group_avc_mce_get_    |

+   |                                   |motion_vectors(..). The MVs are |

+   |                                   |in QPEL units. The X and Y      |

+   |                                   |coordinates of each MV must be  |

+   |                                   |in the range [-2048.00,         |

+   |                                   |2047.75), otherwise the results |

+   |                                   |are undefined.                  |

+   |                                   |                                |

+   |                                   |(If this argument value is      |

+   |                                   |manually composed, all sub-block|

+   |                                   |MVs must be replicated per its  |

+   |                                   |format for each partition.  For |

+   |                                   |example for 16x16 partition, all|

+   |                                   |sub-block MVs must be replicated|

+   |                                   |to the same MV, and for 8x8     |

+   |                                   |partition, each 8x8 must have   |

+   |                                   |its respective sub-block MVs    |

+   |                                   |replicated.)                    |

+   |                                   |                                |

+   |                                   |Legal values and format for     |

+   |                                   |major_shapes are as per the     |

+   |                                   |return value of                 |

+   |                                   |intel_sub_group_avc_mce_get_    |

+   |                                   |inter_major_shape(..).          |

+   |                                   |                                |

+   |                                   |Legal values and format for     |

+   |                                   |minor_shapes are as per the     |

+   |                                   |return value of                 |

+   |                                   |intel_sub_group_avc_mce_get_    |

+   |                                   |inter_minor_shapes(..).         |

+   |                                   |                                |

+   |                                   |Legal values and format for     |

+   |                                   |directions are as per the return|

+   |                                   |value of                        |

+   |                                   |intel_sub_group_avc_mce_get_    |

+   |                                   |inter_directions(..).           |

+   |                                   |                                |

+   |                                   |Legal values for                |

+   |                                   |pixel_resolution is either      |

+   |                                   |CLK_AVC_ME_SUBPIXEL_MODE_HPEL_  |

+   |                                   |INTEL or                        |

+   |                                   |CLK_AVC_ME_SUBPIXEL_MODE_QPEL_  |

+   |                                   |INTEL.                          |

+   |                                   |                                |

+   |                                   |The legal values for            |

+   |                                   |sad_adjustment is specified by  |

+   |                                   |its respective enumeration      |

+   |                                   |values.                         |

+   +-----------------------------------+--------------------------------+

+   |intel_sub_group_avc_ref_payload_t  |Return an initialized payload   |

+   |intel_sub_group_avc_bme_initialize(|for a VME bidirectional motion  |

+   |   ushort2 src_coord,              |estimation (BME) operation.     |

+   |   ulong motion_vectors,           |                                |

+   |   uchar major_shapes,             |The payload is initialized for  |

+   |   uchar minor_shapes,             |progressive frame operations,   |

+   |   uchar directions,               |and the cost configuration      |

+   |   uchar pixel_resolution,         |values and the miscellaneous    |

+   |   uchar bidirectional_weight,     |property values are all         |

+   |   uchar sad_adjustment )          |initialized to zero. The cost   |

+   |                                   |configuration and the           |

+   |                                   |miscellaneous property          |

+   |                                   |configuration phase functions   |

+   |                                   |must be used to override the    |

+   |                                   |initial configurations in the   |

+   |                                   |payload.                        |

+   |                                   |                                |

+   |                                   |If the specified                |

+   |                                   |pixel_resolution is a sub-pixel |

+   |                                   |resolution then an implicit FME |

+   |                                   |operation is performed with the |

+   |                                   |BME operation.                  |

+   |                                   |                                |

+   |                                   |The src_coord value represents  |

+   |                                   |the 2D offset of the top left   |

+   |                                   |corner of the source MB in pixel|

+   |                                   |units in the source image.      |

+   |                                   |                                |

+   |                                   |If the source image is an       |

+   |                                   |interlaced scan image, then the |

+   |                                   |bottom field lines are          |

+   |                                   |considered as logically         |

+   |                                   |overlapping with the top field  |

+   |                                   |lines (i.e. the top field MBs   |

+   |                                   |are considered as logically     |

+   |                                   |overlapping with the bottom MBs)|

+   |                                   |for the purposes for specifying |

+   |                                   |the src_coord value.            |

+   |                                   |                                |

+   |                                   |The parameter motion_vectors    |

+   |                                   |contains the BMVs returned by an|

+   |                                   |IME in the same format as       |

+   |                                   |returned by                     |

+   |                                   |intel_sub_group_avc_mce_get_    |

+   |                                   |motion_vectors(..).             |

+   |                                   |                                |

+   |                                   |(If this argument value is      |

+   |                                   |manually composed, all sub-block|

+   |                                   |MVs must be replicated per its  |

+   |                                   |format for each partition.  For |

+   |                                   |example for 16x16 partition, all|

+   |                                   |sub-block MVs must be replicated|

+   |                                   |to the same MV, and for 8x8     |

+   |                                   |partition, each 8x8 must have   |

+   |                                   |its respective sub-block MVs    |

+   |                                   |replicated.)                    |

+   |                                   |                                |

+   |                                   |Legal values and format for     |

+   |                                   |major_shapes is as per the      |

+   |                                   |return value of                 |

+   |                                   |intel_sub_group_avc_mce_get_    |

+   |                                   |inter_major_shape(..).          |

+   |                                   |                                |

+   |                                   |Legal values and format for     |

+   |                                   |minor_shapes is as per the      |

+   |                                   |return value of                 |

+   |                                   |intel_sub_group_avc_mce_get_    |

+   |                                   |inter_minor_shapes(..).         |

+   |                                   |                                |

+   |                                   |Legal values and format for     |

+   |                                   |directions are as per the return|

+   |                                   |value of                        |

+   |                                   |intel_sub_group_avc_mce_get_    |

+   |                                   |inter_directions(..).           |

+   |                                   |                                |

+   |                                   |Legal values for                |

+   |                                   |pixel_resolution is either      |

+   |                                   |CLK_AVC_ME_SUBPIXEL_MODE_       |

+   |                                   |INTEGER_INTEL,                  |

+   |                                   |CLK_AVC_ME_SUBPIXEL_MODE_       |

+   |                                   |HPEL_INTEL or                   |

+   |                                   |CLK_AVC_ME_SUBPIXEL_MODE_       |

+   |                                   |QPEL_INTEL.                     |

+   |                                   |                                |

+   |                                   |Legal values for                |

+   |                                   |bidirectional_weight and sad    |

+   |                                   |adjustment is as per their      |

+   |                                   |respective enumeration values.  |

+   |                                   |                                |

+   |                                   |The legal values for            |

+   |                                   |sad_adjustment is specified by  |

+   |                                   |its respective enumeration      |

+   |                                   |values.                         |

+   +-----------------------------------+--------------------------------+

+

+   Payload type conversion functions

+   +++++++++++++++++++++++++++++++++

+   

+   These are optional built-in functions that may be called following the

+   initialization phase to convert REF payload to MCE payloads and

+   vice-versa.

+

+   +-------------------------------------+------------------------------+

+   |intel_sub_group_avc_mce_payload_t    |Convert the REF payload to a  |

+   |intel_sub_group_avc_ref_             |generic ME payload.           |

+   |convert_to_mce_payload(              |                              |

+   |   intel_sub_group_avc_ref_payload_t |                              |

+   |      payload )                      |                              |

+   +-------------------------------------+------------------------------+

+   |intel_sub_group_avc_ref_payload_t    |Convert the generic ME payload|

+   |intel_sub_group_avc_mce_             |to an FBR payload.            |

+   |convert_to_ref_payload(              |                              |

+   |   intel_sub_group_avc_mce_payload_t |                              |

+   |      payload )                      |                              |

+   +-------------------------------------+------------------------------+

+

+   Multi-reference cost configuration phase functions

+   ++++++++++++++++++++++++++++++++++++++++++++++++++

+

+   These are optional built-in functions that may be called following the

+   initialization phase to enable multi-reference image costing.

+

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_ref_payload_t   |This is a wrapper for          |

+   |intel_sub_group_avc_ref_set_        |intel_sub_group_avc_mce_       |

+   |inter_base_multi_reference_penalty( |set_inter_base_multi_reference_|

+   |   uchar reference_base_penalty,    |penalty(..) the payload        |

+   |   intel_sub_group_avc_ref_payload_t|conversions with to/from MCE   |

+   |      payload )                     |types. See MCE version for     |

+   |                                    |description.                   |

+   +------------------------------------+-------------------------------+

+

+   Inter shape and direction cost configuration phase functions

+   ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

+

+   These are optional built-in functions that may be called following the

+   initialization phase to enable shape costing.

+   +-------------------------------------+------------------------------+

+   |intel_sub_group_avc_ref_payload_t    |This is a wrapper for         |

+   |intel_sub_group_avc_ref_set_         |intel_sub_group_avc_mce_set_  |

+   |inter_shape_penalty(                 |inter_shape_penalty(..) with  |

+   |   ulong packed_shape_cost,          |the payload conversions       |

+   |   intel_sub_group_avc_ref_payload_t |to/from MCE types. See MCE    |

+   |      payload )                      |version for description.      |

+   +-------------------------------------+------------------------------+

+   |intel_sub_group_avc_ref_payload_t    |This is a wrapper for         |

+   |intel_sub_group_avc_ref_set_         |intel_sub_group_avc_mce_set_  |

+   |inter_direction_penalty(             |inter_direction_penalty(..)   |

+   |   uchar direction_cost,             |with the payload conversions  |

+   |   intel_sub_group_avc_ref_payload_t |to/from MCE types. See MCE    |

+   |       payload )                     |version for description.      |

+   +-------------------------------------+------------------------------+

+

+

+   Inter motion vector cost configuration phase functions

+   ++++++++++++++++++++++++++++++++++++++++++++++++++++++

+

+   These are optional built-in functions that may be called following the

+   initialization phase to enable motion vector costing.

+

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_ref_payload_t   |This is a wrapper for          |

+   |intel_sub_group_avc_ref_set_        |intel_sub_group_avc_mce_set_   |

+   |motion_vector_cost_function(        |motion_vector_cost_function    |

+   |   ulong packed_cost_center_delta,  |with the payload conversions   |

+   |   uint2 packed_cost_table,         |to/from MCE types. See MCE     |

+   |   uchar cost_precision,            |version for description.       |

+   |   intel_sub_group_avc_ref_payload_t|                               |

+   |      payload )                     |                               |

+   +------------------------------------+-------------------------------+

+

+   Miscellaneous property configuration phase functions

+   ++++++++++++++++++++++++++++++++++++++++++++++++++++

+

+   These are optional built-in functions that may be called following the

+   initialization phase to enable miscellaneous properties setting in the

+   payload.

+

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_ref_payload_t   |This is a wrapper for          |

+   |intel_sub_group_avc_ref_set_        |intel_sub_group_avc_mce_set_   |

+   |source_interlaced_field_polarity(   |source_interlaced_field_       |

+   |   uchar src_field_polarity         |polarity(..)  with the result  |

+   |   intel_sub_group_avc_ref_payload_t|conversions to/from MCE        |

+   |      payload )                     |types. See MCE version for     |

+   |                                    |description.                   |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_ref_payload_t   |This is a wrapper for          |

+   |intel_sub_group_avc_ref_set_        |intel_sub_group_avc_mce_set_   |

+   |single_reference_interlaced_        |single_reference_              |

+   |field_polarity(                     |interlaced_field_polarity(..)  |

+   |   uchar ref_field_polarity,        |with the result conversions    |

+   |   intel_sub_group_avc_ref_payload_t|to/from MCE types. See MCE     |

+   |      payload )                     |version for description.       |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_ref_payload_t   |This is a wrapper for          |

+   |intel_sub_group_avc_ref_set_        |intel_sub_group_avc_mce_set_   |

+   |dual_reference_interlaced_image_    |dual_reference_interlaced_     |

+   |field_polarities(                   |field_polarities(..)  with the |

+   |   uchar fwd_ref_field_polarity,    |result conversions to/from MCE |

+   |   uchar bwd_ref_field_polarity,    |types. See MCE version for     |

+   |   intel_sub_group_avc_ref_payload_t|description.                   |

+   |      payload )                     |                               |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_ref_payload_t   |Update the input payload to    |

+   |intel_sub_group_avc_ref_set_        |disable a mix of bidirectional |

+   |bidirectional_mix_disable(          |and unidirectional MVs in the  |

+   |   intel_sub_group_avc_ref_payload_t|result.                        |

+   |      payload )                     |                               |

+   |                                    |Default is to enable it.       |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_sic_payload_t   |Update the input payload to do |

+   |intel_sub_group_avc_ref_set_        |enable bilinear filter         |

+   |bilinear_filter_enable(             |interpolation instead of 4-tap |

+   |   intel_sub_group_avc_ref_payload_t|filter interpolation. Default  |

+   |      payload )                     |is 4-tap filter interpolation. |

+   |                                    |                               |

+   |                                    |This should not be called if   |

+   |                                    |the payload was initialized    |

+   |                                    |with integer pixel resolution. |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_ref_payload_t   |This is a wrapper for          |

+   |intel_sub_group_avc_ref_set_        |intel_sub_group_avc_mce_set_   |

+   |ac_only_haar(                       |ac_only_haar(..) with the      |

+   |   intel_sub_group_avc_ref_payload_t|payload conversions to/from MCE|

+   |      payload )                     |types. See MCE version for     |

+   |                                    |description.                   |

+   +------------------------------------+-------------------------------+

+

+   Evaluation phase functions

+   ++++++++++++++++++++++++++

+

+   These built-in functions perform the evaluation of the REF operation

+   configured in the payload with a VME media sampler and return the

+   results.

+

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_ref_result_t    |Evaluate the basic REF         |

+   |intel_sub_group_avc_ref_            |operation with single reference|

+   |evaluate_with_single_reference(     |and return its results.        |

+   |   read_only image2d_t src_image,   |                               |

+   |   read_only image2d_t ref_image,   |The parameter ref_image must be|

+   |   sampler_t vme_media_sampler,     |a valid forward image kernel   |

+   |   intel_sub_group_avc_ref_payload_t|parameter per the ordering     |

+   |      payload )                     |conventions for the kernel     |

+   |                                    |parameter list.                |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_ref_result_t    |Evaluate the basic REF         |

+   |intel_sub_group_avc_ref_            |operation with dual reference  |

+   |evaluate_with_dual_reference(       |and return its results.        |

+   |  read_only image2d_t src_image,    |                               |

+   |  read_only image2d_t fwd_ref_image,|The parameter                  |

+   |  image2d_t bwd_ref_image,          |fwd_ref_image[bwd_ref_image]   |

+   |  sampler_t vme_media_sampler,      |must be a valid                |

+   |  intel_sub_group_avc_ref_payload_t |forward[backward] image kernel |

+   |     payload )                      |parameter per the ordering     |

+   |                                    |conventions for the kernel     |

+   |                                    |parameter list.                |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_ref_result_t    |Evaluate the basic REF         |

+   |intel_sub_group_avc_ref_            |operation with multi references|

+   |evaluate_with_multi_reference(      |and return its results.        |

+   |   read_only image2d_t src_image,   |                               |

+   |   uint packed_reference_ids,       |A unique pair of reference     |

+   |   sampler_t vme_media_sampler,     |identifiers (indicating unique |

+   |   intel_sub_group_avc_ref_payload_t|forward/backward reference     |

+   |      payload )                     |images) may be specified for   |

+   |                                    |each of the allowed major      |

+   |                                    |partitions using               |

+   |                                    |packed_dual_ref_ids.           |

+   |                                    |                               |

+   |                                    |The value of                   |

+   |                                    |packed_reference_ids is a      |

+   |                                    |integer with the following bits|

+   |                                    |specifying the values for the  |

+   |                                    |pair of reference images for   |

+   |                                    |each major partition.          |

+   |                                    |                               |

+   |                                    |3:0   => Fwd reference block 0 |

+   |                                    |7:4   => Bwd reference block 0 |

+   |                                    |11:8  => Fwd reference block 1 |

+   |                                    |15:12 => Bwd reference block 1 |

+   |                                    |19:16 => Fwd reference block 2 |

+   |                                    |23:20 => Bwd reference block 2 |

+   |                                    |27:24 => Fwd reference block 3 |

+   |                                    |31:28 => Bwd reference block 3 |

+   |                                    |                               |

+   |                                    |A forward[backward] reference  |

+   |                                    |idenitifer value of 'n'        |

+   |                                    |indicates the forward[backward]|

+   |                                    |image from the 'n'th pair of   |

+   |                                    |forward/backward reference     |

+   |                                    |images, with the value of 'n'  |

+   |                                    |ranging from 0 to 15.          |

+   |                                    |                               |

+   |                                    |If the REF operation is        |

+   |                                    |configured with only forward   |

+   |                                    |reference images then, the     |

+   |                                    |values of the backward         |

+   |                                    |reference identifiers are not  |

+   |                                    |used.                          |

+   |                                    |                               |

+   |                                    |The blocks are numbered using  |

+   |                                    |the traditional Z order. For   |

+   |                                    |larger block sizes, the        |

+   |                                    |sub-block reference identifier |

+   |                                    |pairs must be replicated. For  |

+   |                                    |example, for a 16x16 block, all|

+   |                                    |four pair of reference         |

+   |                                    |identifiers must be replicated |

+   |                                    |to the value of the first pair |

+   |                                    |for block 0.                   |

+   |                                    |                               |

+   |                                    |The value for the              |

+   |                                    |packed_reference_ids argument  |

+   |                                    |is obtained by calling         |

+   |                                    |intel_sub_group_avc_ime_get_   |

+   |                                    |inter_reference_ids(..)  for   |

+   |                                    |the preceding IME operation's  |

+   |                                    |result.                        |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_ref_result_t    |Evaluate the basic REF         |

+   |intel_sub_group_avc_ref_            |operation with multi references|

+   |evaluate_with_multi_reference(      |and return its results.  This  |

+   |   read_only image2d_t src_image,   |is used for interlaced source  |

+   |   uint packed_reference_ids,       |and reference images.          |

+   |   uchar packed_reference_          |                               |

+   |   field_polarities,                |A unique pair of reference     |

+   |   sampler_t vme_media_sampler,     |identifiers (indicating unique |

+   |   intel_sub_group_avc_ref_payload_t|forward/backward reference     |

+   |      payload )                     |images) may be specified for   |

+   |                                    |each of the allowed major      |

+   |                                    |partitions using               |

+   |                                    |packed_reference_ids.          |

+   |                                    |                               |

+   |                                    |The value of                   |

+   |                                    |packed_reference_ids is an     |

+   |                                    |integer with the following bits|

+   |                                    |specifying the values for the  |

+   |                                    |pair of reference images for   |

+   |                                    |each major partition.          |

+   |                                    |                               |

+   |                                    |3:0   => Fwd reference block 0 |

+   |                                    |7:4   => Bwd reference block 0 |

+   |                                    |11:8  => Fwd reference block 1 |

+   |                                    |15:12 => Bwd reference block 1 |

+   |                                    |19:16 => Fwd reference block 2 |

+   |                                    |23:20 => Bwd reference block 2 |

+   |                                    |27:24 => Fwd reference block 3 |

+   |                                    |31:28 => Bwd reference block 3 |

+   |                                    |                               |

+   |                                    |A forward[backward] reference  |

+   |                                    |idenitifer value of 'n'        |

+   |                                    |indicates the forward[backward]|

+   |                                    |image from the 'n'th pair of   |

+   |                                    |forward/backward reference     |

+   |                                    |images, with the value of 'n'  |

+   |                                    |ranging from 0 to 15.          |

+   |                                    |                               |

+   |                                    |If the REF operation is        |

+   |                                    |configured with only forward   |

+   |                                    |reference images then, the     |

+   |                                    |values of the backward         |

+   |                                    |reference identifiers are not  |

+   |                                    |used.                          |

+   |                                    |                               |

+   |                                    |The blocks are numbered using  |

+   |                                    |the traditional Z order. For   |

+   |                                    |larger block sizes, the        |

+   |                                    |sub-block reference identifier |

+   |                                    |pairs must be replicated. For  |

+   |                                    |example, for a 16x16 block, all|

+   |                                    |four pair of reference         |

+   |                                    |identifiers must be replicated |

+   |                                    |to the value of the first pair |

+   |                                    |for block 0.                   |

+   |                                    |                               |

+   |                                    |The value for the              |

+   |                                    |packed_reference_ids argument  |

+   |                                    |is obtained by calling         |

+   |                                    |intel_sub_group_avc_ime_get_   |

+   |                                    |inter_reference_ids(..)  for   |

+   |                                    |the preceding IME operation's  |

+   |                                    |result.                        |

+   |                                    |                               |

+   |                                    |Reference field polarities for |

+   |                                    |forward and backward reference |

+   |                                    |images are specified for each  |

+   |                                    |of the allowed major partitions|

+   |                                    |using packed_reference_field_  |

+   |                                    |polarities.                    |

+   |                                    |                               |

+   |                                    |The value of                   |

+   |                                    |packed_reference_field_        |

+   |                                    |polarities is an integer with  |

+   |                                    |the following bits specifying  |

+   |                                    |the reference field polarities |

+   |                                    |for the major partitions.      |

+   |                                    |                               |

+   |                                    |0 : Fwd reference block 0      |

+   |                                    |1 : Fwd reference block 1      |

+   |                                    |2 : Fwd reference block 2      |

+   |                                    |3 : Fwd reference block 3      |

+   |                                    |4 : Bwd reference block 0      |

+   |                                    |5 : Bwd reference block 1      |

+   |                                    |6 : Bwd reference block 2      |

+   |                                    |7 : Bwd reference block 3      |

+   |                                    |                               |

+   |                                    |If the dual-reference          |

+   |                                    |evaluation functions are not   |

+   |                                    |used, then the values of the   |

+   |                                    |backward reference field       |

+   |                                    |polarities are not used.       |

+   |                                    |                               |

+   |                                    |The blocks are numbered using  |

+   |                                    |the traditional Z order. For   |

+   |                                    |larger block sizes, the        |

+   |                                    |sub-block reference field      |

+   |                                    |polarities are replicated. For |

+   |                                    |example, for a 16x16 block all |

+   |                                    |four pairs of reference field  |

+   |                                    |polarities are replicated to   |

+   |                                    |the value of the first pair for|

+   |                                    |block 0.                       |

+   +------------------------------------+-------------------------------+

+

+   Result type conversion functions

+   +++++++++++++++++++++++++++++++++

+   

+   These are optional built-in functions that may be called following the

+   evaluation phase to convert REF results to MCE results and vice-versa.

+

+   +-----------------------------------+--------------------------------+

+   |intel_sub_group_avc_mce_result_t   |Convert the REF result into a   |

+   |intel_sub_group_avc_ref_           |MCE result.                     |

+   |convert_to_mce_result(             |                                |

+   |   intel_sub_group_avc_ref_result_t|                                |

+   |      result )                     |                                |

+   +-----------------------------------+--------------------------------+

+   |intel_sub_group_avc_ref_result_t   |Convert the MCE result into an  |

+   |intel_sub_group_avc_mce_           |REF result.                     |

+   |convert_to_ref_result(             |                                |

+   |   intel_sub_group_avc_ref_result_t|                                |

+   |      result )                     |                                |

+   +-----------------------------------+--------------------------------+

+

+   Result processing phase functions

+   +++++++++++++++++++++++++++++++++

+

+   These built-in functions are called following the evaluation phase to

+   extract the various result components from an REF evaluation result.

+

+   +-------------------------------------+------------------------------+

+   |ulong intel_sub_group_avc_ref_get_   |This is a wrapper for         |

+   |motion_vectors(                      |intel_sub_group_avc_mce_get_  |

+   |   intel_sub_group_avc_ref_result_t  |motion_vectors(..) with the   |

+   |      result )                       |result conversions to/from MCE|

+   |                                     |types. See MCE version for    |

+   |                                     |description.                  |

+   +-------------------------------------+------------------------------+

+   |ushort intel_sub_group_avc_ref_get_  |This is a wrapper for         |

+   |inter_distortions(                   |intel_sub_group_avc_mce_get_  |

+   |   intel_sub_group_avc_ref_result_t  |inter_distortions(..) with the|

+   |      result )                       |result conversions to/from MCE|

+   |                                     |types. See MCE version for    |

+   |                                     |description.                  |

+   +-------------------------------------+------------------------------+

+   |ushort intel_sub_group_avc_ref_get_  |This is a wrapper for         |

+   |best_inter_distortion(               |intel_sub_group_avc_mce_get_  |

+   |   intel_sub_group_avc_ref_result_t  |best_inter_distortion(..)with |

+   |      result )                       |the result conversions to/from|

+   |                                     |MCE types. See MCE version for|

+   |                                     |description.                  |

+   +-------------------------------------+------------------------------+

+   |uchar intel_sub_group_avc_ref_get_   |This is a wrapper for         |

+   |inter_major_shape(                   |intel_sub_group_avc_mce_get_  |

+   |   intel_sub_group_avc_ref_result_t  |inter_major_shape(..) with the|

+   |      result )                       |result conversions to/from MCE|

+   |                                     |types. See MCE version for    |

+   |                                     |description.                  |

+   +-------------------------------------+------------------------------+

+   |uchar intel_sub_group_avc_ref_get_   |This is a wrapper for         |

+   |inter_minor_shapes(                  |intel_sub_group_avc_mce_get_  |

+   |   intel_sub_group_avc_ref_result_t  |inter_minor_shapes(..) with   |

+   |      result )                       |the result conversions to/from|

+   |                                     |MCE types. See MCE version for|

+   |                                     |description.                  |

+   +-------------------------------------+------------------------------+

+   |uchar intel_sub_group_avc_ref_get_   |This is a wrapper for         |

+   |inter_directions(                    |intel_sub_group_avc_mce_get_  |

+   |   intel_sub_group_avc_ref_result_t  |inter_directions(..)  with the|

+   |      result )                       |result conversions to/from MCE|

+   |                                     |types. See MCE version for    |

+   |                                     |description.                  |

+   +-------------------------------------+------------------------------+

+   |uchar intel_sub_group_avc_ref_get_   |This is a wrapper for         |

+   |inter_motion_vector_count(           |intel_sub_group_avc_mce_get_  |

+   |   intel_sub_group_avc_ref_result_t  |inter_motion_vector_count(..) |

+   |      result )                       |with the result conversions   |

+   |                                     |to/from MCE types. See MCE    |

+   |                                     |version for description.      |

+   +-------------------------------------+------------------------------+

+   |uint intel_sub_group_avc_ref_get_    |This is a wrapper for         |

+   |inter_reference_ids(                 |intel_sub_group_avc_mce_get_  |

+   |   intel_sub_group_avc_ref_result_t  |inter_reference_ids(..) with  |

+   |      result )                       |the result conversions to/from|

+   |                                     |MCE types. See MCE version for|

+   |                                     |description.                  |

+   +-------------------------------------+------------------------------+

+   |uchar intel_sub_group_avc_ref_get_   |This is a wrapper for         |

+   |inter_reference_interlaced_          |intel_sub_group_avc_mce_get_  |

+   |field_polarities(                    |inter_reference_interlaced_   |

+   |   uint packed_reference_ids,        |field_polarities(..)  with the|

+   |   uint packed_reference_parameter_  |result conversions to/from MCE|

+   |   field_polarities,                 |types. See MCE version for    |

+   |   intel_sub_group_avc_ref_result_t  |description.                  |

+   |      result )                       |                              |

+   +-------------------------------------+------------------------------+

+

+   ______________________________________________________________________   

+

+

+   SIC built-in functions 

+   ----------------------

+

+   A set of ordered phases of functions are required to be called to

+   evaluate a skip check or intra estimation operation result.

+

+   Initialization phase functions

+   ++++++++++++++++++++++++++++++

+

+   These built-in functions create a properly initialized payload that

+   can be used for further configured for evaluating SIC operations. This

+   is a required initial phase.

+

+   +-----------------------------------+--------------------------------+

+   |intel_sub_group_avc_sic_payload_t  |Return an initialized payload   |

+   |intel_sub_group_avc_sic_initialize(|for a VME SIC operation.        |

+   |   ushort2 src_coord )             |                                |

+   |                                   |                                |

+   |                                   |If the source image is an       |

+   |                                   |interlaced scan image, then the |

+   |                                   |bottom field lines are          |

+   |                                   |considered as logically         |

+   |                                   |overlapping with the top field  |

+   |                                   |lines (i.e. the top field MBs   |

+   |                                   |are considered as logically     |

+   |                                   |overlapping with the bottom MBs)|

+   |                                   |for the purposes for specifying |

+   |                                   |the src_coord value.            |

+   |                                   |                                |

+   |                                   |If the SIC operation is being   |

+   |                                   |configured for chroma based     |

+   |                                   |intra estimation, then the x and|

+   |                                   |y coordinates of src_coord must |

+   |                                   |be multiples of 2.              |

+   +-----------------------------------+--------------------------------+

+

+   Configuration phase functions

+   +++++++++++++++++++++++++++++

+

+   These built-in functions allow for configuration of a skip check 

+   and/or intra estimation operation. This is a required phase following

+   the initialization phase to configure the skip check or intra 

+   estimation operation. A call to intel_sub_group_avc_sic_configure_

+   skc(..) or intel_sub_group_avc_sic_configure_ipe(..) is required. Both   

+   intel_sub_group_avc_sic_configure_skc(..) and intel_sub_group_avc_

+   sic_configure_ipe(..) may be called to initialize the payload to

+   perform both skip checks and intra estimation as part for the same SIC

+   evaluation, but if called together intel_sub_group_avc_sic_configure_

+   skc(..) must be called first in the call sequence.

+

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_sic_payload_t   |Configure the SIC payload for  |

+   |intel_sub_group_avc_sic_            |(uni or bi-directional) skip   |

+   |configure_skc(                      |checks.                        |

+   |   uint skip_block_partition_type,  |                               |

+   |   uint skip_motion_vector_mask,    |The legal values for           |

+   |   ulong motion_vectors,            |skip_block_partition_type must |

+   |   char bidirectional_weight,       |be one of the specified        |

+   |   uchar skip_sad_adjustment ,      |partition mask enumeration     |

+   |   intel_sub_group_avc_sic_payload_t|values.                        |

+   |     payload )                      |                               |

+   |                                    |Legal values for               |

+   |                                    |skip_motion_vector_mask can be |

+   |                                    |composed using the '|' operator|

+   |                                    |from the enumeration values    |

+   |                                    |defined for it; both           |

+   |                                    |unidirectional and             |

+   |                                    |bidirectional skip vectors can |

+   |                                    |be specified uniquely for each |

+   |                                    |major partition (16x16 or 8x8) |

+   |                                    |by an appropriate selection of |

+   |                                    |the skip motion vector mask    |

+   |                                    |enumeration values. If the     |

+   |                                    |16x16 skip_block_partition_type|

+   |                                    |is specified, then only the    |

+   |                                    |16x16 enumeration values may be|

+   |                                    |used, else only the 8x8        |

+   |                                    |enumeration values may be used.|

+   |                                    |                               |

+   |                                    |For convenience, one of the    |

+   |                                    |following two mechanisms can be|

+   |                                    |used to obtain the value for   |

+   |                                    |the skip_motion_vector_mask    |

+   |                                    |argument.                      |

+   |                                    |                               |

+   |                                    |If the shape and directions are|

+   |                                    |all compile-time constants,    |

+   |                                    |then either one of the two     |

+   |                                    |following macros may be used to|

+   |                                    |set this argument's value for  |

+   |                                    |16x16 or 8x8 major partitions  |

+   |                                    |respectively:                  |

+   |                                    |- CLK_AVC_ME_SKIP_BLOCK_16x16_ |

+   |                                    |  INTEL(DIRECTION)             |

+   |                                    |- CLK_AVC_ME_SKIP_BLOCK_8x8_   |

+   |                                    |  INTEL(DIRECTION0, DIRECTION1,|

+   |                                    |  DIRECTION2, DIRECTION3)      |

+   |                                    |                               |

+   |                                    |The DIRECTION{0-3} argument    |

+   |                                    |values must be one of the inter|

+   |                                    |macro-block major direction    |

+   |                                    |values. The directions are     |

+   |                                    |specified for each major       |

+   |                                    |partition in traditional       |

+   |                                    |Z-order.                       |

+   |                                    |                               |

+   |                                    |If the shape or directions are |

+   |                                    |not compile-time constants,    |

+   |                                    |then the helper function       |

+   |                                    |intel_sub_group_avc_sic_get_   |

+   |                                    |motion_vector_mask(..)  may be |

+   |                                    |used to set this argument's    |

+   |                                    |value.                         |

+   |                                    |                               |

+   |                                    |The parameter motion_vectors   |

+   |                                    |specifies the input packed     |

+   |                                    |BMVs. Either the forward or    |

+   |                                    |backward is ignored if the     |

+   |                                    |setting in                     |

+   |                                    |skip_motion_vector_mask is     |

+   |                                    |backward or forward            |

+   |                                    |respectively. If the setting is|

+   |                                    |bidirectional, then both the   |

+   |                                    |forward and backward motion    |

+   |                                    |vectors will be used.  If the  |

+   |                                    |skip_block_partition_type is   |

+   |                                    |16x16, work-item 0 in the      |

+   |                                    |subgroup provides the BMV, and |

+   |                                    |if the                         |

+   |                                    |skip_block_partition_type is   |

+   |                                    |8x8, work-items 0 to 4 in the  |

+   |                                    |subgroup provide the four      |

+   |                                    |BMVs. The MVs are in QPEL      |

+   |                                    |units. The X and Y coordinates |

+   |                                    |of each MV must be in the range|

+   |                                    |[-2048.00, 2047.75] and        |

+   |                                    |[-512.00 to 511.75]            |

+   |                                    |respectively, otherwise the    |

+   |                                    |results are undefined.         |

+   |                                    |                               |

+   |                                    |Legal values for               |

+   |                                    |bidirectional_weight and       |

+   |                                    |skip_sad_adjustment are as per |

+   |                                    |their respective enumeration   |

+   |                                    |values. If the setting is      |

+   |                                    |unidirectional, then the       |

+   |                                    |bidirectional_weight parameter |

+   |                                    |value is ignored and can be set|

+   |                                    |to the value '0 '.             |

+  +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_sic_payload_t   |Return an initialized payload  |

+   |intel_sub_group_avc_sic_            |for a VME luma intra prediction|

+   |configure_ipe(                      |estimation (IPE) operation.    |

+   |   uchar luma_intra_partition_mask, |                               |

+   |   uchar                            |The luma_intra_partition_mask  |

+   |   intra_neighbour_availabilty,     |can be composed from their     |

+   |   uchar left_edge_luma_pixels,     |respective enumeration values  |

+   |   uchar                            |using the '&' operator.        |

+   |   upper_left_corner_luma_pixel,    |                               |

+   |   uchar upper_edge_luma_pixels,    |Legal values for               |

+   |   uchar                            |intra_neighbour_availabilty and|

+   |   upper_right_edge_luma_pixels,    |intra_sad_adjustment are as per|

+   |   uchar intra_sad_adjustment ,     |their respective enumeration   |

+   |   intel_sub_group_avc_sic_payload_t|values, based on the intra     |

+   |      payload )                     |neighboring macroblock's to    |

+   |                                    |consider in intra mode         |

+   |                                    |estimation.                    |

+   |                                    |                               |

+   |                                    |The other parameters specify   |

+   |                                    |the neighbor edge pixels for   |

+   |                                    |the left, top-left corner, top |

+   |                                    |and top right edges with each  |

+   |                                    |work-item providing each pixel |

+   |                                    |value. These pixels values are |

+   |                                    |used to perform the intra mode |

+   |                                    |estimation.                    |

+   |                                    |                               |

+   |                                    |For the left and top edge      |

+   |                                    |pixels, successive subgroup    |

+   |                                    |work-items 0 to 15 provide the |

+   |                                    |successive edge pixels. For the|

+   |                                    |top-right edge, successive     |

+   |                                    |work-items 0 to 7 provide the  |

+   |                                    |successive edge pixels; the    |

+   |                                    |pixel values in work-items 8 to|

+   |                                    |15 are ignored. The top-left   |

+   |                                    |corner pixel is a uniform pixel|

+   |                                    |value with each work-item      |

+   |                                    |providing the same corner      |

+   |                                    |pixel.                         |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_sic_payload_t   |Return an initialized payload  |

+   |intel_sub_group_avc_sic_            |for a VME luma and chroma intra|

+   |configure_ipe(                      |prediction estimation (IPE)    |

+   |   uchar luma_intra_partition_mask, |operation.                     |

+   |   uchar intra_neighbour_           |                               |

+   |   availabilty,                     |The luma_intra_partition_mask  |

+   |   uchar left_edge_luma_pixels,     |can be composed from their     |

+   |   uchar upper_left_corner_luma_    |respective enumeration values  |

+   |   pixel,                           |using the '&' operator. Chroma |

+   |   uchar upper_edge_luma_           |intra prediction estimation is |

+   |   pixels,                          |performed on the 8x8 CbCr      |

+   |   uchar upper_right_edge_luma_     |macroblock.                    |

+   |   pixels,                          |                               |

+   |   ushort left_edge_chroma_pixels,  |Legal values for               |

+   |   ushort upper_left_corner_chroma_ |intra_neighbour_availabilty and|

+   |   pixel,                           |intra_sad_adjustment are as per|

+   |   ushort upper_edge_chroma_pixels, |their respective enumeration   |

+   |   uchar intra_sad_adjustment,      |values, based on the intra     |

+   |   intel_sub_group_avc_sic_payload_t|neighboring macroblock's to    |

+   |      payload )                     |consider in intra mode         |

+   |                                    |estimation.                    |

+   |                                    |                               |

+   |                                    |The other parameters specify   |

+   |                                    |the neighbor luma and chroma   |

+   |                                    |edge pixels for the left,      |

+   |                                    |top-left corner, top and       |

+   |                                    |top-right (luma only) edges    |

+   |                                    |with each work-item providing  |

+   |                                    |each pixel value. These pixels |

+   |                                    |values are used to perform the |

+   |                                    |intra mode estimation.         |

+   |                                    |                               |

+   |                                    |For the left and top luma edge |

+   |                                    |pixels successive, subgroup    |

+   |                                    |work-items 0 to 15 provide the |

+   |                                    |successive edge pixels. For the|

+   |                                    |top-right luma edge, successive|

+   |                                    |work-items 0 to 7 provide the  |

+   |                                    |successive edge pixels; the    |

+   |                                    |pixel values in work-items 8 to|

+   |                                    |15 are ignored. The top-left   |

+   |                                    |corner pixel is a uniform pixel|

+   |                                    |value with each work-item      |

+   |                                    |providing the same corner      |

+   |                                    |pixel.                         |

+   |                                    |                               |

+   |                                    |For the left and top chroma    |

+   |                                    |CbCr pixels, successive        |

+   |                                    |subgroup work-items 0 to 7     |

+   |                                    |provide the successive CbCr    |

+   |                                    |pixels; the pixel values in    |

+   |                                    |work-items 8 to 15 are         |

+   |                                    |ignored. The top-left corner   |

+   |                                    |pixel is a uniform CbCr pixel  |

+   |                                    |value with each work-item      |

+   |                                    |providing the same corner CbCr |

+   |                                    |pixel.                         |

+   +------------------------------------+-------------------------------+

+   |uint intel_sub_group_avc_sic_get_   |Compose the value for the input|

+   |motion_vector_mask(                 |argument                       |

+   |   uint skip_block_partition_type,  |skip_motion_vector_mask for the|

+   |   uchar direction )                |input skip_block_partition_type|

+   |                                    |and direction.                 |

+   |                                    |                               |

+   |                                    |The legal values for           |

+   |                                    |skip_block_partition_type must |

+   |                                    |be one of the specified        |

+   |                                    |partition mask enumeration     |

+   |                                    |values (16x16 or 8x8).         |

+   |                                    |                               |

+   |                                    |The direction parameter is a   |

+   |                                    |bit field with the directions  |

+   |                                    |for the 4 8x8 sub-partitions in|

+   |                                    |traditional Z order, or for    |

+   |                                    |only the 16x16 partition. Two  |

+   |                                    |bits are reserved for each of  |

+   |                                    |the four sub-partitions in     |

+   |                                    |row-major order.  The 2-bit    |

+   |                                    |values are as per the inter    |

+   |                                    |macro-block major direction    |

+   |                                    |values. If the                 |

+   |                                    |skip_block_partition_type      |

+   |                                    |indicates a 16x16 shape, then  |

+   |                                    |only the 1st 2 bits contains   |

+   |                                    |the direction, and other bits  |

+   |                                    |must be zeroed.                |

+   +------------------------------------+-------------------------------+

+

+   Payload type conversion functions

+   +++++++++++++++++++++++++++++++++

+   

+   These are optional built-in functions that may be called following the

+   configuration phase to convert IME payload to MCE payloads and vice-

+   versa.

+

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_mce_payload_t   |Convert the SIC payload to a   |

+   |intel_sub_group_avc_sic_convert_    |generic MCE payload.           |

+   |to_mce_payload(                     |                               |

+   |   intel_sub_group_avc_sic_payload_t|                               |

+   |      payload )                     |                               |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_sic_payload_t   |Convert the generic MCE payload|

+   |intel_sub_group_avc_                |to a SIC payload.              |

+   |mce_convert_to_sic_payload(         |                               |

+   |   intel_sub_group_avc_mce_payload_t|                               |

+   |      payload )                     |                               |

+   +------------------------------------+-------------------------------+

+

+   Multi-reference cost configuration phase functions

+   ++++++++++++++++++++++++++++++++++++++++++++++++++

+

+   These are optional built-in functions that may be called following the

+   configuration phase to enable multi-reference image costing.

+

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_sic_payload_t   |This is a wrapper for          |

+   |intel_sub_group_avc_sic_set_        |intel_sub_group_avc_mce_       |

+   |inter_base_multi_reference_penalty( |set_inter_base_multi_reference_|

+   |   uchar reference_base_penalty,    |penalty(..) the payload        |

+   |   intel_sub_group_avc_sic_payload_t|conversions with to/from MCE   |

+   |      payload )                     |types. See MCE version for     |

+   |                                    |description.                   |

+   +------------------------------------+-------------------------------+

+

+   Inter shape and direction cost configuration phase functions

+   ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

+

+   These are optional built-in functions that may be called following the

+   configuration phase to enable shape costing.

+   +-------------------------------------+------------------------------+

+   |intel_sub_group_avc_sic_payload_t    |This is a wrapper for         |

+   |intel_sub_group_avc_sic_set_         |intel_sub_group_avc_mce_set_  |

+   |inter_shape_penalty(                 |inter_shape_penalty(..) with  |

+   |   ulong packed_shape_cost,          |the payload conversions       |

+   |   intel_sub_group_avc_sic_payload_t |to/from MCE types. See MCE    |

+   |      payload )                      |version for description.      |

+   +-------------------------------------+------------------------------+

+   |intel_sub_group_avc_sic_payload_t    |This is a wrapper for         |

+   |intel_sub_group_avc_sic_set_         |intel_sub_group_avc_mce_set_  |

+   |inter_direction_penalty(             |inter_direction_penalty(..)   |

+   |    uchar direction_cost,            |with the payload conversions  |

+   |    intel_sub_group_avc_sic_payload_t|to/from MCE types. See MCE    |

+   |       payload )                     |version for description.      |

+   +-------------------------------------+------------------------------+

+

+   Inter motion vector cost configuration phase functions

+   ++++++++++++++++++++++++++++++++++++++++++++++++++++++

+

+   These are optional built-in functions that may be called following the

+   configuration phase to enable motion vector costing.

+

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_sic_payload_t   |This is a wrapper for          |

+   |intel_sub_group_avc_sic_set_        |intel_sub_group_avc_mce_set_   |

+   |motion_vector_cost_function(        |motion_vector_cost_function(..)|

+   |   ulong packed_cost_center_delta,  |with the payload conversions   |

+   |   uint2 packed_cost_table,         |to/from MCE types. See MCE     |

+   |   uchar cost_precision,            |version for description.       |

+   |   intel_sub_group_avc_sic_payload_t|                               |

+   |      payload )                     |                               |

+   +------------------------------------+-------------------------------+

+

+   Intra shape cost configuration phase functions

+   ++++++++++++++++++++++++++++++++++++++++++++++

+

+   These are optional built-in functions that may be called following the

+   configuration phase to enable intra shape costing.

+

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_sic_payload_t   |Set the luma shape cost penalty|

+   |intel_sub_group_avc_sic_set_        |for intra motion estimation.   |

+   |intra_luma_shape_penalty(           |                               |

+   |   uint packed_shape_cost,          |The value of packed_shape_cost |

+   |   intel_sub_group_avc_sic_payload_t|is an integer value with the   |

+   |      payload )                     |following bits specifying the  |

+   |                                    |shape cost in U4U4 format as   |

+   |                                    |follows:                       |

+   |                                    |                               |

+   |                                    |7:0   : must be zero           |

+   |                                    |15:8  : 16x16 cost             |

+   |                                    |23:16 : 8x8 cost               |

+   |                                    |31:24 : 4x4 cost               |

+   |                                    |                               |

+   |                                    |The U4U4 decoded integer values|

+   |                                    |must bit fit within 12 bits.   |

+   +------------------------------------+-------------------------------+

+

+   Intra mode cost configuration phase functions

+   +++++++++++++++++++++++++++++++++++++++++++++

+

+   These are optional built-in functions that may be called following the

+   configuration phase to enable intra mode costing.

+

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_sic_payload_t   |The value of luma_mode_penalty |

+   |intel_sub_group_avc_sic_set_        |specifies the penalty to be    |

+   |intra_luma_mode_cost_function(      |applied to the estimated luma  |

+   |   uchar luma_mode_penalty,         |mode if it differs from its    |

+   |   uint luma_packed_neighbor_modes, |predicted luma mode (based on  |

+   |   uint luma_packed_non_dc_penalty, |its neighbor intra modes). It  |

+   |   intel_sub_group_avc_sic_payload_t|is specified in U4U4 format and|

+   |      payload )                     |must bit in 10 bits.           |

+   |                                    |                               |

+   |                                    |The value of                   |

+   |                                    |luma_packed_neighbor_modes     |

+   |                                    |specifies the values of the    |

+   |                                    |already computed top and left  |

+   |                                    |neighbor modes for the         |

+   |                                    |bordering 4x4 blocks, with the |

+   |                                    |4x4 blocks numbered in the     |

+   |                                    |traditional Z-order as shown   |

+   |                                    |below.                         |

+   |                                    |                               |

+   |                                    |    0 1 4 5                    |

+   |                                    |    2 3 6 7                    |

+   |                                    |    8 9 C D                    |

+   |                                    |    A B E F                    |

+   |                                    |                               |

+   |                                    |the following bits specify the |

+   |                                    |neighbor modes.                |

+   |                                    |                               |

+   |                                    |3:0   => Left neighbor block 5 |

+   |                                    |7:4   => Left neighbor block 7 |

+   |                                    |11:8  => Left neighbor block D |

+   |                                    |15:12 => Left neighbor block F |

+   |                                    |19:16 => Top neighbor block A  |

+   |                                    |23:20 => Top neighbor block B  |

+   |                                    |27:24 => Top neighbor block E  |

+   |                                    |31:28 => Top neighbor block F  |

+   |                                    |                               |

+   |                                    |The value of                   |

+   |                                    |luma_packed_non_dc_penalty     |

+   |                                    |specifies the penalty to be    |

+   |                                    |applied for any computed non-DC|

+   |                                    |luma mode for each of the      |

+   |                                    |16x16, 8x8, and 4x4 shapes,    |

+   |                                    |with the following bits        |

+   |                                    |specifying the penalties.      |

+   |                                    |                               |

+   |                                    |7:0   => Intra16x16 non-dc     |

+   |                                    |         penalty               |

+   |                                    |15:8  => Intra8x8 non-dc       |

+   |                                    |         penalty               |

+   |                                    |23:16 => Intra4x4 non-dc       |

+   |                                    |         penalty               |

+   |                                    |31:24 => Must be zero          |

+   |                                    |                               |

+   |                                    |The component values are       |

+   |                                    |specified in 8-bit integer     |

+   |                                    |format.                        |

+   |                                    |                               |

+   |                                    |The intra distortion for each  |

+   |                                    |intra luma block can be        |

+   |                                    |described by the following     |

+   |                                    |formulas:                      |

+   |                                    |                               |

+   |                                    |Intra_4x4 SAD (or Haar) +      |

+   |                                    |luma_shape_penalty_4x4 +       |

+   |                                    |luma_non_dc_4x4_penalty (if not|

+   |                                    |DC) +                          |

+   |                                    |luma_mode_penalty (if computed |

+   |                                    |mode is not the same predicted |

+   |                                    |mode from neighbor modes)      |

+   |                                    |                               |

+   |                                    |Intra_8x8 SAD (or Haar) +      |

+   |                                    |luma_shape_penalty_8x8 +       |

+   |                                    |luma_non_dc_8x8_penalty (if not|

+   |                                    |DC) +                          |

+   |                                    |luma_mode_penalty (if computed |

+   |                                    |mode is not the same predicted |

+   |                                    |mode from neighbor modes )     |

+   |                                    |                               |

+   |                                    |Intra_16x16 SAD (or Haar) +    |

+   |                                    |luma_shape_penalty_16x16 +     |

+   |                                    |luma_non_dc_4x4_penalty (if not|

+   |                                    |DC)                            |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_sic_payload_t   |Set the intra chroma mode cost |

+   |intel_sub_group_avc_sic_set_        |function by specifying the     |

+   |intra_chroma_mode_cost_function(    |penalty to be applied to the   |

+   |   uchar chroma_mode_penalty,       |computed chroma mode.          |

+   |   intel_sub_group_avc_sic_payload_t|                               |

+   |      payload )                     |The chroma_mode_base_penalty is|

+   |                                    |the base penalty to be applied |

+   |                                    |to the computed intra chroma   |

+   |                                    |modes. This penalty is in U4U4 |

+   |                                    |format.                        |

+   |                                    |                               |

+   |                                    |The U4U4 decoded integer value |

+   |                                    |must fit in 12 bits.           |

+   |                                    |                               |

+   |                                    |The base penalty is scaled     |

+   |                                    |based on the computed mode as  |

+   |                                    |defined below.                 |

+   |                                    |                               |

+   |                                    |   DC   : 0x                   |

+   |                                    |   HORZ : 1x                   |

+   |                                    |   VERT : 1x                   |

+   |                                    |   PLANE: 2x                   |

+   |                                    |                               |

+   |                                    |The intra distortion for each  |

+   |                                    |intra 8x8 chroma block can be  |

+   |                                    |described by the following     |

+   |                                    |formulas:                      |

+   |                                    |                               |

+   |                                    |distortion =                   |

+   |                                    |SAD (or Haar) +                |

+   |                                    |chroma_mode_penalty (scaled    |

+   |                                    |based on computed mode)        |

+   +------------------------------------+-------------------------------+   

+

+   Miscellaneous property configuration phase functions

+   ++++++++++++++++++++++++++++++++++++++++++++++++++++

+

+   These are optional built-in functions that may be called following the

+   configuration phase to enable miscellaneous properties setting in the

+   payload.

+

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_sic_payload_t   |This is a wrapper for          |

+   |intel_sub_group_avc_sic_set_        |intel_sub_group_avc_mce_set_   |

+   |source_interlaced_field_polarity(   |source_interlaced_field_       |

+   |   uchar src_field_polarity         |polarity(..)  with the result  |

+   |   intel_sub_group_avc_sic_payload_t|conversions to/from MCE        |

+   |      payload )                     |types. See MCE version for     |

+   |                                    |description.                   |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_sic_payload_t   |This is a wrapper for          |

+   |intel_sub_group_avc_sic_set_        |intel_sub_group_avc_mce_set_   |

+   |single_reference_interlaced_        |single_reference_              |

+   |field_polarity(                     |interlaced_field_polarity(..)  |

+   |   uchar ref_field_polarity,        |with the result conversions    |

+   |   intel_sub_group_avc_sic_payload_t|to/from MCE types. See MCE     |

+   |      payload )                     |version for description.       |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_sic_payload_t   |This is a wrapper for          |

+   |intel_sub_group_avc_sic_set_        |intel_sub_group_avc_mce_set_   |

+   |dual_reference_interlaced_image_    |dual_reference_interlaced_     |

+   |field_polarities(                   |field_polarities(..)  with the |

+   |   uchar fwd_ref_field_polarity,    |result conversions to/from MCE |

+   |   uchar bwd_ref_field_polarity,    |types. See MCE version for     |

+   |   intel_sub_group_avc_sic_payload_t|description.                   |

+   |      payload )                     |                               |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_sic_payload_t   |Update the input payload to do |

+   |intel_sub_group_avc_sic_set_        |enable bilinear filter         |

+   |bilinear_filter_enable(             |interpolation instead of 4-tap |

+   |   intel_sub_group_avc_sic_payload_t|filter interpolation.  Default |

+   |      payload )                     |is 4-tap filter interpolation. |

+   |                                    |                               |

+   |                                    |This should not be called if   |

+   |                                    |the payload was initialized    |

+   |                                    |with integer pixel resolution. |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_sic_payload_t   |Enable skip check forward      |

+   |intel_sub_group_avc_sic_set_        |transform with the specified   |

+   |skc_forward_transform_enable(       |SAD coefficients thresholds in |

+   |   ulong packed_sad_coefficients,   |the frequency domain to        |

+   |   intel_sub_group_avc_sic_payload_t|approximate the effects of     |

+   |      payload )                     |forward quantization.          |

+   |                                    |                               |

+   |                                    |The skip decision will be      |

+   |                                    |enhanced to include an accurate|

+   |                                    |AVC forward transform for skip |

+   |                                    |estimation. This feature is in |

+   |                                    |addition to the previous SAD or|

+   |                                    |HAAR skip estimation. The      |

+   |                                    |results of the forward         |

+   |                                    |transform are compared one     |

+   |                                    |coefficient at a time against a|

+   |                                    |user-specified threshold, in   |

+   |                                    |the input argument             |

+   |                                    |packed_sad_coefficients, to    |

+   |                                    |emulate quantization's zeroing |

+   |                                    |effect. The user is returned   |

+   |                                    |the count of coefficients that |

+   |                                    |exceeded their threshold along |

+   |                                    |with a sum of the amount       |

+   |                                    |exceeded, both grouped at the  |

+   |                                    |8x8 block level (i.e. for each |

+   |                                    |8x8 block).                    |

+   |                                    |                               |

+   |                                    |The SAD coefficient threshold  |

+   |                                    |matrix for a 4x4 transform as  |

+   |                                    |shown in the table below is    |

+   |                                    |packed into a 64-bit           |

+   |                                    |integer. The low 16 bits       |

+   |                                    |contains the larger DC         |

+   |                                    |threshold. The coefficient     |

+   |                                    |thresholds for the remaining 6 |

+   |                                    |AC thresholds in the order of  |

+   |                                    |increasing frequency are       |

+   |                                    |provided by the successive     |

+   |                                    |8-bit bit ranges.              |

+   |                                    |                               |

+   |                                    |0 (DC) 1 (AC) 2 (AC) 3 (AC)    |

+   |                                    |1 (AC) 2 (AC) 3 (AC) 4 (AC)    |

+   |                                    |2 (AC) 3 (AC) 4 (AC) 5 (AC)    |

+   |                                    |3 (AC) 4 (AC) 5 (AC) 6 (AC)    |

+   |                                    |                               |

+   |                                    |This is valid only for SKC     |

+   |                                    |operations.                    |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_sic_payload_t   |The raw skip SAD computed      |

+   |intel_sub_group_avc_sic_set_        |during the evaluation phase    |

+   |block_based_raw_skip_sad(           |will be the maximal SAD of     |

+   |   uchar block_based_skip_type,     |individual 4x4 (or 8x8) blocks,|

+   |   intel_sub_group_avc_ime_payload_t|instead of the sum of the      |

+   |      payload )                     |entire individual 4x4 block    |

+   |                                    |SADs of the MB.                |

+   |                                    |                               |

+   |                                    |The legal values for           |

+   |                                    |block_based_skip_type must be  |

+   |                                    |one of the specified block     |

+   |                                    |based skip type enumeration    |

+   |                                    |values.                        |

+   |                                    |                               | 

+   |                                    |It is valid to call this       |

+   |                                    |function only if the payload is|

+   |                                    |configured for a skip check    | 

+   |                                    |operation by a prior call to   |

+   |                                    |intel_sub_group_avc_sic_       |

+   |                                    |configure_skc(..).             |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_sic_payload_t   |This is a wrapper for          |

+   |intel_sub_group_avc_sic_set_        |intel_sub_group_avc_mce_set_   |

+   |ac_only_haar(                       |ac_only_haar(..) with the      |

+   |   intel_sub_group_avc_sic_payload_t|payload conversions to/from MCE|

+   |      payload )                     |types. See MCE version for     |

+   |                                    |description.                   |

+   +------------------------------------+-------------------------------+

+

+   Evaluation phase functions

+   ++++++++++++++++++++++++++

+

+   These built-in functions perform the evaluation of the SIC operation

+   configured in the payload with a VME media sampler and return the

+   results.

+

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_sic_result_t    |Evaluate the SIC IPE operation |

+   |intel_sub_group_avc_sic_evaluate_   |and return its results.        |

+   |ipe(                                |                               |

+   |   read_only image2d_t src_image,   |                               |

+   |   sampler_t vme_media_sampler,     |                               |

+   |   intel_sub_group_avc_sic_payload_t|                               |

+   |      payload )                     |                               |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_sic_result_t    |Evaluate the SIC operation with|

+   |intel_sub_group_avc_sic_evaluate_   |single reference and return its|

+   |with_single_reference(              |results.                       |

+   |   read_only image2d_t src_image,   |                               |

+   |   read_only image2d_t ref_image,   |The parameter ref_image must be|

+   |   sampler_t vme_media_sampler,     |a valid forward image kernel   |

+   |   intel_sub_group_avc_sic_payload_t|parameter per the ordering     |

+   |      payload )                     |conventions for the kernel     |

+   |                                    |parameter list.                |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_sic_result_t    |Evaluate the SIC operation with|

+   |intel_sub_group_avc_sic_evaluate_   |dual and return its results.   |

+   |with_dual_reference (               |                               |

+   |  read_only image2d_t src_image,    |The parameter                  |

+   |  read_only image2d_t fwd_ref_image,|fwd_ref_image[bwd_ref_image]   |

+   |  read_only image2d_t bwd_ref_image,|must be a valid                |

+   |  sampler_t vme_media_sampler,      |forward[backward] image kernel |

+   |  intel_sub_group_avc_sic_payload_t |parameter per the ordering     |

+   |     payload )                      |conventions for the kernel     |

+   |                                    |parameter list.                |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_sic_result_t    |Evaluate the SIC operation with|

+   |intel_sub_group_avc_sic_evaluate_   |multi references and return its|

+   |with_multi_reference(               |results.                       |

+   |   read_only image2d_t src_image,   |                               |

+   |   uint packed_reference_ids,       |A pair of unique reference     |

+   |   sampler_t vme_media_sampler,     |identifier (indicating unique  |

+   |   intel_sub_group_avc_sic_payload_t|forward/backward reference     |

+   |      payload )                     |images) may be specified for   |

+   |                                    |each of the allowed major      |

+   |                                    |partitions (one 16x16 or four  |

+   |                                    |8x8) using packed_dual_ref_ids.|

+   |                                    |                               |

+   |                                    |The value of                   |

+   |                                    |packed_reference_ids is a      |

+   |                                    |integer with the following bits|

+   |                                    |specifying the values for the  |

+   |                                    |pair of reference images for   |

+   |                                    |each major partition.          |

+   |                                    |                               |

+   |                                    |3:0   => Fwd reference block 0 |

+   |                                    |7:4   => Bwd reference block 0 |

+   |                                    |11:8  => Fwd reference block 1 |

+   |                                    |15:12 => Bwd reference block 1 |

+   |                                    |19:16 => Fwd reference block 2 |

+   |                                    |23:20 => Bwd reference block 2 |

+   |                                    |27:24 => Fwd reference block 3 |

+   |                                    |31:28 => Bwd reference block 3 |

+   |                                    |                               |

+   |                                    |A forward[backward] reference  |

+   |                                    |idenitifer value of 'n'        |

+   |                                    |indicates the forward[backward]|

+   |                                    |image from the 'n'th pair of   |

+   |                                    |forward/backward reference     |

+   |                                    |images, with the value of 'n'  |

+   |                                    |ranging from 0 to 15.          |

+   |                                    |                               |

+   |                                    |If the REF operation is        |

+   |                                    |configured with only forward   |

+   |                                    |reference images then, the     |

+   |                                    |values of the backward         |

+   |                                    |reference identifiers are not  |

+   |                                    |used.                          |

+   |                                    |                               |

+   |                                    |The blocks are numbered using  |

+   |                                    |the traditional Z order. For   |

+   |                                    |larger block sizes, the        |

+   |                                    |sub-block reference identifier |

+   |                                    |pairs must be replicated. For  |

+   |                                    |example, for a 16x16 block, all|

+   |                                    |four pair of reference         |

+   |                                    |identifiers must be replicated |

+   |                                    |to the value of the first pair |

+   |                                    |for block 0.                   |

+   +------------------------------------+-------------------------------+

+   |intel_sub_group_avc_sic_result_t    |Evaluate the SIC operation with|

+   |intel_sub_group_avc_sic_evaluate_   |multi references and return its|

+   |with_multi_reference(               |results. This is used for      |

+   |   read_only image2d_t src_image,   |interlaced source and reference|

+   |   uint packed_reference_ids,       |images.                        |

+   |   uchar packed_reference_field_    |                               |

+   |   polarities,                      |A pair of unique reference     |

+   |   sampler_t vme_media_sampler,     |identifier (indicating unique  |

+   |   intel_sub_group_avc_sic_payload_t|forward/backward reference     |

+   |      payload )                     |images) may be specified for   |

+   |                                    |each of the allowed major      |

+   |                                    |partitions (one 16x16 or four  |

+   |                                    |8x8) using packed_dual_ref_ids.|

+   |                                    |                               |

+   |                                    |The value of                   |

+   |                                    |packed_reference_ids is a      |

+   |                                    |integer with the following bits|

+   |                                    |specifying the values for the  |

+   |                                    |pair of reference images for   |

+   |                                    |each major partition.          |

+   |                                    |                               |

+   |                                    |3:0   => Fwd reference block 0 |

+   |                                    |7:4   => Bwd reference block 0 |

+   |                                    |11:8  => Fwd reference block 1 |

+   |                                    |15:12 => Bwd reference block 1 |

+   |                                    |19:16 => Fwd reference block 2 |

+   |                                    |23:20 => Bwd reference block 2 |

+   |                                    |27:24 => Fwd reference block 3 |

+   |                                    |31:28 => Bwd reference block 3 |

+   |                                    |                               |

+   |                                    |A forward[backward] reference  |

+   |                                    |idenitifer value of 'n'        |

+   |                                    |indicates the forward[backward]|

+   |                                    |image from the ' nth' pair of  |

+   |                                    |forward/backward reference     |

+   |                                    |images, with the value of ' n' |

+   |                                    |ranging from 0 to 15.          |

+   |                                    |                               |

+   |                                    |If the REF operation is        |

+   |                                    |configured with only forward   |

+   |                                    |reference images then, the     |

+   |                                    |values of the backward         |

+   |                                    |reference identifiers are not  |

+   |                                    |used.                          |

+   |                                    |                               |

+   |                                    |The blocks are numbered using  |

+   |                                    |the traditional Z order. For   |

+   |                                    |larger block sizes, the        |

+   |                                    |sub-block reference identifier |

+   |                                    |pairs must be replicated. For  |

+   |                                    |example, for a 16x16 block, all|

+   |                                    |four pair of reference         |

+   |                                    |identifiers must be replicated |

+   |                                    |to the value of the first pair |

+   |                                    |for block 0.                   |

+   |                                    |                               |

+   |                                    |Reference field polarities for |

+   |                                    |forward and backward reference |

+   |                                    |images are specified for each  |

+   |                                    |of the allowed major partitions|

+   |                                    |using packed_reference_field_  |

+   |                                    |polarities.                    |

+   |                                    |                               |

+   |                                    |The value of packed_reference_ |

+   |                                    |field_polarities is an integer |

+   |                                    |with the following bits        |

+   |                                    |specifying the reference field |

+   |                                    |polarities for the major       |

+   |                                    |partitions.                    |

+   |                                    |                               |

+   |                                    |0 : Fwd reference block 0      |

+   |                                    |1 : Fwd reference block 1      |

+   |                                    |2 : Fwd reference block 2      |

+   |                                    |3 : Fwd reference block 3      |

+   |                                    |4 : Bwd reference block 0      |

+   |                                    |5 : Bwd reference block 1      |

+   |                                    |6 : Bwd reference block 2      |

+   |                                    |7 : Bwd reference block 3      |

+   |                                    |                               |

+   |                                    |If the dual-reference          |

+   |                                    |evaluation functions are not   |

+   |                                    |used, then the values of the   |

+   |                                    |backward reference field       |

+   |                                    |polarities are not used.       |

+   |                                    |                               |

+   |                                    |The blocks are numbered using  |

+   |                                    |the traditional Z order. For   |

+   |                                    |larger block sizes, the        |

+   |                                    |sub-block reference field      |

+   |                                    |polarities are replicated. For |

+   |                                    |example, for a 16x16 block all |

+   |                                    |four pairs of reference field  |

+   |                                    |polarities are replicated to   |

+   |                                    |the value of the first pair for|

+   |                                    |block 0.                       |

+   |                                    |                               |

+   |                                    |The value for the packed_      |

+   |                                    |interlaced_image_reference_    |

+   |                                    |field_polarities argument is   |

+   |                                    |obtained by calling            |

+   |                                    |intel_sub_group_avc_ime_get_   |

+   |                                    |inter_reference_interlaced_    |

+   |                                    |field_polarities(..)  for the  |

+   |                                    |preceding IME operation's      |

+   |                                    |result.                        |

+   +------------------------------------+-------------------------------+

+

+   Result type conversion functions

+   +++++++++++++++++++++++++++++++++

+   

+   These are optional built-in functions that may be called following the

+   evaluation phase to convert REF results to MCE results and vice-versa.

+

+   +-----------------------------------+--------------------------------+

+   |intel_sub_group_avc_mce_result_t   |Convert the SIC result into a   |

+   |intel_sub_group_avc_sic_           |MCE result.                     |

+   |convert_to_mce_result(             |                                |

+   |   intel_sub_group_avc_sic_result_t|                                |

+   |      result )                     |                                |

+   +-----------------------------------+--------------------------------+

+   |intel_sub_group_avc_sic_result_t   |Convert the MCE result into an  |

+   |intel_sub_group_avc_mce_           |SIC result.                     |

+   |convert_to_sic_result(             |                                |

+   |   intel_sub_group_avc_sic_result_t|                                |

+   |      result )                     |                                |

+   +-----------------------------------+--------------------------------+

+

+   Result processing phase functions

+   +++++++++++++++++++++++++++++++++

+

+   These built-in functions are called following the evaluation phase to

+   extract the various result components from an SIC evaluation result.

+

+   +-----------------------------------+--------------------------------+

+   |uchar intel_sub_group_avc_sic_get_ |Get the best intra shape from   |

+   |ipe_luma_shape(                    |the SIC result.                 |

+   |   intel_sub_group_avc_sic_result_t|                                |

+   |      result)                      |The returned values are as per  |

+   |                                   |the intra-MB shapes enumeration |

+   |                                   |values.                         |

+   +-----------------------------------+--------------------------------+

+   |ushort intel_sub_group_avc_sic_get_|Get the best intra luma         |

+   |best_ipe_luma_distortion(          |distortion from the SIC result  |

+   |   intel_sub_group_avc_sic_result_t|for the shape returned by       |

+   |      result)                      |intel_sub_group_avc_sic_get_    |

+   |                                   |ipe_luma_shape(..).             |

+   +-----------------------------------+--------------------------------+

+   |ushort intel_sub_group_avc_sic_get_|Get the best intra chroma       |

+   |best_ipe_chroma_distortion(        |distortion for the 8x8 shape    |

+   |   intel_sub_group_avc_sic_result_t|from the SIC result.            |

+   |      result)                      |                                |

+   +-----------------------------------+--------------------------------+

+   |ulong intel_sub_group_avc_sic_get_ |Get the packed intra luma modes |

+   |packed_ipe_luma_modes(             |for all blocks from the SIC     |

+   |   intel_sub_group_avc_sic_result_t|result. There are four bits per |

+   |      result)                      |luma mode for a block and legal |

+   |                                   |values for luma modes are as per|

+   |                                   |its defined enumeration         |

+   |                                   |values. The number of blocks is |

+   |                                   |based on the result of          |

+   |                                   |intel_sub_group_avc_sic_get_    |

+   |                                   |ipe_luma_shape(..).             |

+   |                                   |                                |

+   |                                   |If the luma shape is:           |

+   |                                   |- 16x16, then one mode is       |

+   |                                   |  returned in bits [0, 3]       |

+   |                                   |- 8x8, then four modes          |

+   |                                   |  corresponding to the four     |

+   |                                   |  partitions are returned by    |

+   |                                   |  bits in the ranges [0, 3],    |

+   |                                   |  [16,19], [32, 35], and        |

+   |                                   |  [48, 51]; the order of the    |

+   |                                   |  four partitions are in the    |

+   |                                   |  traditional Z-order           |

+   |                                   |- 4x4, then 16 modes (4 bits per|

+   |                                   |  mode) are returned of all 16  |

+   |                                   |  partitions by all the bits;   |

+   |                                   |  the order of the 16 partitions|

+   |                                   |  are in the traditional Z-order|

+   |                                   |  as shown below:               |

+   |                                   |                                |

+   |                                   |   0 1 4 5                      |

+   |                                   |   2 3 6 7                      |

+   |                                   |   8 9 C D                      |

+   |                                   |   A B E F                      |

+   +-----------------------------------+--------------------------------+

+   |uchar intel_sub_group_avc_sic_get_ |Get the intra chroma mode for   |

+   |ipe_chroma_mode(                   |the 8x8 block from the SIC      |

+   |   intel_sub_group_avc_sic_result_t|result. The legal values for    |

+   |      result)                      |chroma modes are as per its     |

+   |                                   |defined enumeration values.     |

+   +-----------------------------------+--------------------------------+

+   |uint intel_sub_group_avc_sic_get_  |Get the packed count of luma    |

+   |packed_skc_luma_count_threshold(   |coefficient components that     |

+   |   intel_sub_group_avc_sic_result_t|exceeded their transform        |

+   |      result)                      |thresholds from the SIC result  |

+   |                                   |for each 8x8 partition in       |

+   |                                   |traditional Z-order.            |

+   |                                   |                                |

+   |                                   |The format of the results is as |

+   |                                   |follows:                        |

+   |                                   |                                |

+   |                                   |- count of coefficients that    |

+   |                                   |  exceeded their respective     |

+   |                                   |  threshold for block 8x8_0 is  |

+   |                                   |  returned in bit [0, 7]        |

+   |                                   |- count of coefficients that    |

+   |                                   |  exceeded their respective     |

+   |                                   |  threshold for block 8x8_1 is  |

+   |                                   |  returned in bit [8, 15]       |

+   |                                   |- count of coefficients that    |

+   |                                   |  exceeded their respective     |

+   |                                   |  threshold for block 8x8_2 is  |

+   |                                   |  returned in bit [16, 23]      |

+   |                                   |- count of coefficients that    |

+   |                                   |  exceeded their respective     |

+   |                                   |  threshold for block 8x8_0 is  |

+   |                                   |  returned in bit [24, 31]      |

+   |                                   |                                |

+   |                                   |The results are only valid if   |

+   |                                   |the SIC operation was configured|

+   |                                   |with frequency domain SAD       |

+   |                                   |transform coefficients using    |

+   |                                   |intel_sub_group_avc_sic_set_skc_|

+   |                                   |forward_transform_enable(..).   |

+   +-----------------------------------+--------------------------------+

+   |ulong intel_sub_group_avc_sic_get_ |Get the packed sum of luma      |

+   |packed_skc_luma_sum_threshold(     |coefficient components that     |

+   |   intel_sub_group_avc_sic_result_t|exceeded their transform        |

+   |      result)                      |thresholds from the SIC result  |

+   |                                   |for each 8x8 partition in       |

+   |                                   |traditional Z-order.            |

+   |                                   |                                |

+   |                                   |The format of the results is as |

+   |                                   |follows:                        |

+   |                                   |- sum of coefficients that      |

+   |                                   |  exceeded their respective     |

+   |                                   |  threshold for block 8x8_0 is  |

+   |                                   |  returned in bit [0, 15]       |

+   |                                   |- sum of coefficients that      |

+   |                                   |  exceeded their respective     |

+   |                                   |  threshold for block 8x8_1 is  |

+   |                                   |  returned in bit [16,31]       |

+   |                                   |- sum of coefficients that      |

+   |                                   |  exceeded their respective     |

+   |                                   |  threshold for block 8x8_2 is  |

+   |                                   |  returned in bit [32,47]       |

+   |                                   |- sum of coefficients that      |

+   |                                   |  exceeded their respective     |

+   |                                   |  threshold for block 8x8_0 is  |

+   |                                   |  returned in bit [48, 63]      |

+   |                                   |                                |

+   |                                   |The results are only valid if   |

+   |                                   |the SIC operation was configured|

+   |                                   |with frequency domain SAD       |

+   |                                   |transform coefficients using    |

+   |                                   |intel_sub_group_avc_sic_set_skc_|

+   |                                   |forward_transform_enable(..).   |

+   +-----------------------------------+--------------------------------+

+   |ushort intel_sub_group_avc_sic_get_|Get the skip check raw SAD      |

+   |inter_raw_sads(                    |(i.e. without any mode or shape |

+   |   intel_sub_group_avc_sic_result_t|costs included) for the entire  |

+   |      result )                     |MB if the input payload was note|

+   |                                   |configured for block based skip |

+   |                                   |checks, otherwise return the    |

+   |                                   |maximal SAD of individual 4x4   |

+   |                                   |(or 8x8, if the block size for  |

+   |                                   |block based skip checking was   |

+   |                                   |configured as 8x8) blocks with  |

+   |                                   |the MB.                         |

+   +-----------------------------------+--------------------------------+

+   |ushort intel_sub_group_avc_sic_get_|This is a wrapper for           |

+   |inter_distortions(                 |intel_sub_group_avc_mce_get_    |

+   |   intel_sub_group_avc_sic_result_t|inter_distortions(..) with the  |

+   |      result )                     |result conversions to/from MCE  |

+   |                                   |types. See MCE version for      |

+   |                                   |description.                    |

+   +-----------------------------------+--------------------------------+

+   

+   ______________________________________________________________________

+

+

+   A Complete Example 

+   ------------------

+

+   +--------------------------------------------------------------------+

+   |__kernel __attribute__((reqd_work_group_size(16,1,1))) void         |

+   |block_motion_estimate_intel(                                        |

+   |    __read_only image2d_t src_img,                                  |

+   |    __read_only image2d_t ref_img,                                  |

+   |    __global short2* prediction_motion_vector_buffer,               |

+   |    __global short2* motion_vector_buffer,                          |

+   |    __global ushort* residuals_buffer,                              |

+   |    __global uchar2* shapes_buffer,                                 |

+   |    int iterations )                                                |

+   |{                                                                   |

+   |  // Each 16x1 workgroup processes a column of MBs in a image.      |

+   |  // The number of workgroups is equal to the number of MB in       |

+   |  // a image row. All column MBs are processed in parallel without  |

+   |  // handling any MB dependencies.                                  |

+   |  int gid_0 = get_group_id(0);                                      |

+   |  int gid_1 = 0;                                                    |

+   |                                                                    |

+   |  // Initialize the inline VME sampler.                             |

+   |  const sampler_t vme_sampler = CLK_AVC_ME_INITIALIZE_INTEL;        |

+   |                                                                    |

+   |  // Process all columns MBs in a loop.                             |

+   |  for( int i = 0; i < iterations ; i++, gid_1++ ) {                 |

+   |    ushort2 srcCoord = 0;                                           |

+   |    short2 refCoord = 0;                                            |

+   |    short2 predMV = 0;                                              |

+   |                                                                    |

+   |    // Compute the source MB coordinates.                           |

+   |                                                                    |

+   |    srcCoord.x = gid_0 * 16;                                        |

+   |    srcCoord.y = gid_1 * 16;                                        |

+   |                                                                    |

+   |    // Obtain the predictor of the source MB from input predictor   |

+   |    // buffer.                                                      |

+   |    if( prediction_motion_vector_buffer != NULL ) {                 |

+   |      predMV =                                                      |

+   |        prediction_motion_vector_buffer[                            |

+   |          gid_0 + gid_1 * get_num_groups(0) ];                      |

+   |      refCoord.x = predMV.x / 4;                                    |

+   |      refCoord.y = predMV.y / 4;                                    |

+   |      refCoord.y = refCoord.y & 0xFFFE;                             |

+   |    }                                                               |

+   |                                                                    |

+   |    // Enable the partition mask to allow all shapes and let the    |

+   |    // VME unit decide the partitioning for the source MB.          |

+   |    uchar partition_mask = CLK_AVC_ME_PARTITION_MASK_ALL_INTEL;     |

+   |    uchar sad_adjustment = CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL;   |

+   |    uchar pixel_mode = CLK_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL;         |

+   |                                                                    |

+   |    // Create and initialize the IME payload.                       |

+   |    intel_sub_group_avc_ime_payload_t payload =                     |

+   |      intel_sub_group_avc_ime_initialize(                           |

+   |          srcCoord, partition_mask, sad_adjustment );               |

+   |                                                                    |

+   |    // Setup the IME for a single reference search.                 |

+   |    // The search window is 48x40 with exhaustive search, with it   |

+   |    // location specified by the input predictor (refCooord).       |

+   |    payload =                                                       |

+   |      intel_sub_group_avc_ime_set_single_reference(                 |

+   |          refCoord, CLK_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL,      |

+   |          payload );                                                |

+   |                                                                    |

+   |    // Set a cost function for the IME operation.                   |

+   |    ulong cost_center = 0;                                          |

+   |    uint2 packed_cost_table =                                       |

+   |      intel_sub_group_avc_mce_get_default_medium_penalty_cost_\     |

+   |        table();                                                    |

+   |    uchar search_cost_precision =                                   |

+   |      CLK_AVC_ME_COST_PRECISION_HPEL_INTEL;                         |

+   |    payload =                                                       |

+   |      intel_sub_group_avc_ime_set_motion_vector_cost_function(      |

+   |        cost_center, packed_cost_table, search_cost_precision,      |

+   |        payload );                                                  |

+   |                                                                    |

+   |    // Evaluate the IME operation with its configured payload.      |

+   |    intel_sub_group_avc_ime_result_t result =                       |

+   |      intel_sub_group_avc_ime_evaluate_with_single_reference(       |

+   |          src_img, ref_img, vme_sampler, payload );                 |

+   |                                                                    |

+   |    // Extract IME results.                                         |

+   |    long mvs = intel_sub_group_avc_ime_get_motion_vectors( result );|

+   |    ushort sads =                                                   |

+   |      intel_sub_group_avc_ime_get_inter_distortions( result );      |

+   |    uchar major_shape =                                             |

+   |      intel_sub_group_avc_ime_get_inter_major_shape( result );      |

+   |    uchar minor_shapes =                                            |

+   |      intel_sub_group_avc_ime_get_inter_minor_shapes( result );     |

+   |    uchar2 shapes = { major_shape, minor_shapes };                  |

+   |    uchar directions =                                              |

+   |      intel_sub_group_avc_ime_get_inter_directions( result );       |

+   |                                                                    |

+   |    // Perform FME if sub-pixel mode is specified.                  |

+   |    // FME is performed on the results of the IME operation.        |

+   |    if( pixel_mode != CLK_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL ) {    |

+   |      intel_sub_group_avc_ref_payload_t payload =                   |

+   |        intel_sub_group_avc_fme_initialize(                         |

+   |            srcCoord, mvs, major_shape, minor_shapes, directions,   |

+   |            pixel_mode, sad_adjustment);                            |

+   |      intel_sub_group_avc_ref_result_t result =                     |

+   |        intel_sub_group_avc_ref_evaluate_with_single_reference(     |

+   |            src_img, ref_img, vme_sampler, payload );               |

+   |      payload =                                                     |

+   |        intel_sub_group_avc_ref_set_motion_vector_cost_function(    |

+   |          cost_center, packed_cost_table, search_cost_precision,    |

+   |          payload );                                                |

+   |      mvs = intel_sub_group_avc_ref_get_motion_vectors( result );   |

+   |      sads =                                                        |

+   |        intel_sub_group_avc_ref_get_inter_distortions( result );    |

+   |    }                                                               |

+   |                                                                    |

+   |    // Write out the results.                                       |

+   |    int index =                                                     |

+   |      ( gid_0 * 16 + get_local_id(0) ) +                            |

+   |      ( gid_1 * 16 * get_num_groups(0) );                           |

+   |    int2 bi_mvs = as_int2( mvs );                                   |

+   |    motion_vector_buffer [ index ] = as_short2( bi_mvs.s0 );        |

+   |    if( residuals_buffer != NULL ) {                                |

+   |      residuals_buffer [ index ] = sads;                            |

+   |    }                                                               |

+   |    shapes_buffer [gid_0 + gid_1 * get_num_groups(0)] = shapes;     |

+   |  }                                                                 |

+   |}                                                                   |

+   +--------------------------------------------------------------------+

+   

+   _____________________________________________________________________

+   "

+

+Revision History

+

+    Version 1 (12/02/2016): First public revision.