cl_intel_device_side_avc_motion_estimation
diff --git a/api/cl.xml b/api/cl.xml
index 39075e7..602a042 100644
--- a/api/cl.xml
+++ b/api/cl.xml
@@ -1062,7 +1062,9 @@
<enum value="0x4108" name="CL_DEVICE_SUB_GROUP_SIZES_INTEL"/>
<enum value="0x4109" name="CL_KERNEL_SPILL_MEM_SIZE_INTEL"/>
<enum value="0x410A" name="CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL"/>
- <unused start="0x410B" end="0x410D"/>
+ <enum value="0x410B" name="CL_DEVICE_AVC_ME_VERSION_INTEL"/>
+ <enum value="0x410C" name="CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL"/>
+ <enum value="0x410D" name="CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL"/>
<enum value="0x410E" name="CL_NV12_INTEL"/>
<unused start="0x410F" end="0x410F"/>
</enums>
@@ -1154,5 +1156,6 @@
<extension number="47" name="cl_intel_driver_diagnostics"/>
<extension number="48" name="cl_intel_subgroups_short"/>
<extension number="49" name="cl_intel_planar_yuv"/>
+ <extension number="50" name="cl_intel_device_side_avc_motion_estimation"/>
<!-- Next free extension number is assigned sequentially here -->
</registry>
diff --git a/extensions/intel/cl_intel_device_side_avc_motion_estimation.txt b/extensions/intel/cl_intel_device_side_avc_motion_estimation.txt
new file mode 100644
index 0000000..74bb7af
--- /dev/null
+++ b/extensions/intel/cl_intel_device_side_avc_motion_estimation.txt
@@ -0,0 +1,4356 @@
+Name String
+
+ cl_intel_device_side_avc_motion_estimation
+
+Contributors
+
+ Biju George, Intel
+ Raghukrishnan Embar, Intel
+ Ryan Lei, Intel
+ Ben Ashbaugh, Intel
+ Bartosz Sochacki, Intel
+ Krishna Madaparambil, Intel
+ Mateusz Tabaka, Intel
+
+Contact
+
+ Biju George, Intel (biju.george 'at' intel.com)
+
+Version
+
+ Version 1, December 2, 2016
+
+Number
+
+ OpenCL Extension #50
+
+Status
+
+ First Draft
+
+Dependencies
+
+ OpenCL 1.2 is required.
+
+ The OpenCL Intel vendor extension cl_intel_subgroups is required. The
+ VME built-in functions are an extension of the subgroup functions
+ defined in cl_intel_subgroups.
+
+ The built-in functions which perform intra estimation depend on the
+ OpenCL Intel vendor extension cl_intel_media_block_io in order to
+ read-in the neighboring macroblock edge pixels.
+
+ The built-in functions which perform chroma based intra estimation
+ operations depend on the OpenCL Intel vendor extension
+ cl_intel_planar_yuv in order to create NV12 source images.
+
+ This extension is written against revision 29 of the OpenCL 2.0 API
+ specification, against revision 33 of the OpenCL 2.0 OpenCL C
+ specification, and against revision 32 of the OpenCL 2.0 extension
+ specification.
+
+Overview
+
+ Video motion estimation (VME) is defined as of set motion estimation
+ operations that are used to determine the motion vectors, intra
+ estimation angles and macroblock partitioning combination that best
+ describe the transformation to the source macroblock, from blocks in
+ one or more previous reference pictures (inter-prediction), or from
+ other blocks in the same source picture (intra-prediction). It does
+ this by searching for spatial and temporal patterns on the current and
+ various forward and backward reference pictures.
+
+ The goal of this extension is to provide programmers with a fine-
+ grained interface to the AVC VME media sampler in Intel graphics
+ processors. It describes the specification of low-level built-in
+ functions, callable from OpenCL kernels, that facilitate the
+ programming of the VME media sampler to evaluate specific AVC motion
+ estimation operations. If only a coarser-level interface at the level
+ of built-in kernels suffices, then the Intel vendor extensions
+ cl_intel_motion_estimation and cl_advanced_motion_estimation may be
+ considered.
+
+ Built-in functions are defined for all the major operations of the VME
+ media sampler. The major operations of the AVC VME media sampler in
+ Intel Graphics Processors can be described as follows:
+
+ 1. Integer motion estimation (IME)
+
+ Perform motion estimation on a given source macroblock in
+ a source image over a single or dual reference window in a
+ reference image, at full-pixel resolution, to determine the
+ best integer motion vectors and their associated distortions,
+ and the best macroblock shape partitioning combination.
+
+ 2. Motion estimation refinement (REF)
+
+ Perform refinement operations on the results of IME. The two sub-
+ operations are:
+
+ Fractional motion estimation (FME)
+
+ Perform sub-pixel refinement on the results of an IME operation.
+ half-pixel (HPEL) or quarter-pixel (QPEL) refinements are performed
+ to determine the best sub-pixel motion vectors and their associated
+ distortions.
+
+ Bidirectional motion estimation (BME)
+
+ Perform bidirectional refinement on the results of an IME
+ operation using two reference images to check if the bidirectional
+ mode using two references yields lesser distortions. An FME can
+ optionally be performed implicitly as part of a bidirectional
+ refinement.
+
+ 3. Skip and Intra check (SIC)
+
+ Performs the following two sub-operations:
+
+ Skip check (SKC)
+
+ Compute the pixel distortion of a user-specified shape and motion
+ vector combination. The VME media sampler fetches necessary pixels,
+ performs fractional and bidirectional filtering (as necessary), and
+ then computes the distortion between the derived reference and
+ source. The skip decision can optionally be enhanced to include a
+ 4x4 forward transform, the results of which are compared against a
+ user specified threshold to emulate the effects of the forward
+ quantization zeroing effect.
+
+ Intra prediction estimation (IPE)
+
+ Perform intra prediction on a given source macroblock to determine
+ the best intra prediction modes and the best shape partitioning
+ combination.
+
+New API Enums
+
+ Accepted as arguments to clGetDeviceInfo
+
+ CL_DEVICE_AVC_ME_VERSION_INTEL 0x410B
+ CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C
+ CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL 0x410D
+
+ Additional valid constant values returned by clGetDeviceInfo:
+
+ CL_AVC_ME_VERSION_1_INTEL 0x1
+
+Additions to Chapter 4 of the OpenCL Specification:
+
+ Modify the description of function clGetDeviceInfo(..)
+
+ Table 4.3 is extended to include the following enumeration constants.
+
+ +--------------------------+-----------+----------------------------+
+ |cl_device_info |Return Type|Description |
+ +--------------------------+-----------+----------------------------+
+ |CL_DEVICE_AVC_ME_VERSION_ |cl_uint |The AVC device-side motion |
+ |INTEL | |estimation API version |
+ | | |number supported by the |
+ | | |device and driver. |
+ +--------------------------+-----------+----------------------------+
+ |CL_DEVICE_AVC_ME_SUPPORTS_|cl_bool |Is CL_TRUE if built-in |
+ |TEXTURE_SAMPLER_USE_INTEL | |functions using the texture |
+ | | |sampler may be used along |
+ | | |with AVC VME built-in |
+ | | |functions using the media |
+ | | |sampler on the device, and |
+ | | |CL_FALSE otherwise. |
+ +--------------------------+-----------+----------------------------+
+ |CL_DEVICE_AVC_ME_SUPPORTS_|cl_bool |Is CL_TRUE if an enqueue of |
+ |PREEMPTION_INTEL | |an OpenCL kernel that uses |
+ | | |AVC VME built-in functions |
+ | | |supports preemption on the |
+ | | |device, and CL_FALSE |
+ | | |otherwise. (This may be |
+ | | |useful on platforms that |
+ | | |have strict latency |
+ | | |requirements on the GPU). |
+ +--------------------------+-----------+----------------------------+
+
+ This extension requires a value of CL_AVC_ME_VERSION_1_INTEL returned
+ by clGetDeviceInfo for CL_DEVICE_AVC_ME_VERSION_INTEL.
+
+Terms, Acronyms and Definitions
+
+ Adds a new section 6.13.14.X "VME built-in functions" to the OpenCL
+ 2.0 C specification.
+
+ "
+ The following terms, acronyms and definitions are used in and
+ provide context for the VME built-in functions.
+
+ Macro-block (MB):
+ -----------------
+ An image is partitioned into macro-blocks of size 16x16 pixels. It is
+ the basic unit of processing for AVC video motion estimation
+ operations.
+
+ Shape:
+ ------
+ A MB may be partitioned into sub-blocks of one of the major shapes. A
+ sub-block with an 8x8 major shape may be further independently
+ partitioned into sub-blocks of one of the minor shapes. It is
+ represented by pre-defined shape enumeration values.
+
+ Major Shapes:
+ -------------
+ Shapes of 16x16, 16x8, 8x16, or 8x8 partitions of a MB. A 16x16 major
+ shape merely indicates that the MB was not further partitioned.
+
+ Minor Shapes:
+ -------------
+ Shapes of 8x8, 8x4, 4x8, or 4x4 sub-partitions of an 8x8 partition. A
+ 8x8 minor shape merely indicates that the 8x8 major partition was not
+ further sub-partitioned.
+
+ Block:
+ ------
+ A sub-block of a MB with one of the major or minor shapes.
+
+ Reference Image:
+ ----------------
+ An image (typically from the previously decoded buffer in an encoder
+ pipeline) from which motion estimation predictions are made.
+
+ Source Image:
+ -------------
+ The current image for which motion estimation predictions are made.
+
+ Source Macro-block Offset:
+ --------------------------
+ The 2D offset of the top left corner of the source MB in pixel units.
+ It is represented by a pair of unsigned 16-bit integers.
+
+ Reference Window Offset:
+ ------------------------
+ The 2D offset of the top left corner reference search window w.r.t to
+ the top left corner of the source MB in pixel units. It is represented
+ by a pair of signed 16-bit integers in the range [-2048, 2047].
+
+ Reference identifier:
+ ---------------------
+ Reference identifiers are associated to pairs of forward(L0)/
+ backward(L1) reference image parameters. Up to 16 pairs of reference
+ pairs of reference image parameters are permitted, with the permitted
+ values of reference identifiers ranging from 0 to 15. The reference
+ identifers are assigned in increasing order in which the reference
+ image parameter pairs are declared in the OpenCL kernel parameter
+ list. See new section 6.13.14.X "VME built-in functions" described
+ below for more details.
+
+ Motion Vector (MV):
+ -------------------
+ A 2D vector used for inter motion estimation that provides an offset
+ from the top left corner of a block in the source image to the top
+ left corner an identically sized block in the reference image.
+ Generally it is used to represent the best match of a block in the
+ reference image to a block in the source image. The best match is
+ determined as the block minimizing the distortion. MVs are specified
+ in QPEL resolution with the 2 LSB representing the fractional part
+ of the offset. It is represented by a pair of signed 16-bit signed
+ integers.
+
+ Packed Motion Vector:
+ --------------------------
+ A motion vector represented as a packed 32-bit unsigned integer. The
+ lower 16 bits contains the X coordinate and the upper 16 bits contains
+ the Y coordinate.
+
+ Bidirectional Motion Vector (BMV):
+ ----------------------------------
+ A pair of MVs for the forward(L0) and backward(L1) images. Depending
+ on how the VME operation is configured only the forward or the
+ backward MV or both may be valid.
+
+ Packed Bidirectional Motion Vector:
+ -----------------------------------
+ A bidirectional MV represented as a packed 64-bit unsigned integer.
+ The lower 32-bits contain the forward packed MV, and the upper 32-bits
+ contain the backward packed MV.
+
+ Sum Of Absolute Difference (SAD):
+ ---------------------------------
+ The sum of absolute differences of every full/sub-pixel location in
+ the source block w.r.t every corresponding full/sub pixel in the
+ reference block as specified by a given MV. The sum of absolute
+ differences may be optionally Haar transform adjusted. It is
+ represented by an unsigned 16-bit integer value.
+
+ Haar Transform (HAAR):
+ ----------------------
+ A simple wavelet transform that is used to refine the distortion
+ measure of SAD. The per pixel difference goes through a 4x4 Haar
+ transform. Then the SAD is replaced by the sum of the absolute
+ values of the transform domain coefficients in the distortion. Haar
+ transform is used as a coarse estimation of the integer transform.
+
+ Motion Vector Cost Center (CC):
+ -------------------------------
+ A MV has an associated cost w.r.t a cost center coordinate. The
+ further away from the cost center, the larger will be the cost
+ associated with the MV. Cost centers are specified in QPEL resolution
+ with the 2 LSB representing the fractional part of the offset.
+
+ Motion Vector Cost Center Delta (CCD):
+ --------------------------------------
+ The 2D offset of the cost center relative to the top left corner of
+ the source MB. Cost center deltas are specified in QPEL resolution
+ with the 2 LSB representing the fractional part of the offset.
+ It is represented by a pair of signed 16-bit integers.
+
+ Packed Motion Vector Cost Center Delta:
+ ---------------------------------------
+ A motion vector cost center delta represented as a packed 32-bit
+ unsigned integer. The lower 16 bits contains the X coordinate and the
+ upper 16 bits contains the Y coordinate.
+
+ Bidirectional Motion Vector Cost Center Delta:
+ ----------------------------------------------
+ A pair of cost center deltas for the forward and backward images.
+
+ Packed Bidirectional Motion Vector Cost Center Delta:
+ -----------------------------------------------------
+ A packed bidirectional motion vector cost center delta represented as
+ a 64-bit unsigned integer. The lower 32-bits contain the forward
+ packed CCD, and the upper 32-bits contain the backward packed CCD.
+
+ Motion Vector Cost:
+ -------------------
+ The MV cost is determined using a cost function described by a cost
+ table that is indexed based on power-of-two distances from the user
+ specified cost center, with a user specified precision (or unit) of
+ the distances from the cost center.
+
+ U4U4 Byte Format:
+ -----------------
+ Represents a value of (B<<S), where B, called base, is the 4 bit
+ LSB of the byte and S, called shift, is the 4 bit MSB of the byte.
+
+ Motion Vector Cost Table:
+ -------------------------
+ A table which specifies the cost penalties at 8 control points. The
+ first 7 control points represent the distances from cost center at
+ powers-of-two locations (2^0 to 2^6), and the last control point
+ represents the base penalty for distances that are out of range of the
+ cost function curve. It is represented by a packed array of 8 U4U4
+ unsigned integer values.
+
+ Motion Vector Cost Precision:
+ -----------------------------
+ The precision (or unit) of the control points in the MV cost table.
+ It can be used to control the precision and range of the cost
+ function. It is represented by pre-defined cost precision enumeration
+ values.
+
+ Shape Cost:
+ -----------
+ The cost associated with encoding a particular partition shape using
+ inter or intra prediction. It is represented by a packed array of 10
+ U4U4 unsigned integer values.
+
+ Distortion:
+ -----------
+ The distortion is the sum of SAD, MV cost, shape cost and multi-
+ reference cost for inter estimation, and the sum of SAD, mode cost,
+ shape cost and non-dc cost for intra estimation. It is a measure of
+ the cost of encoding a block and is represented by an unsigned 16-bit
+ integer value.
+
+ Intra Mode:
+ -----------
+ An intra-prediction angle which provides a prediction for the current
+ block from the edge pixels in its neighboring blocks. It is
+ represented by pre-defined intra mode enumeration values.
+
+ Intra Mode Cost:
+ ----------------
+ The cost associated with a computed intra mode for a block w.r.t a
+ predicted intra mode based on the computed intra modes for its
+ neighboring blocks.
+
+ Mode:
+ -----
+ The decision whether the inter-prediction or intra-prediction
+ minimizes distortion of a given MB.
+
+ Search unit (SU):
+ -----------------
+ The basic unit of searching. Possible reference search locations are
+ grouped in a predefined 4x4 pattern, and all locations within the same
+ group must be completely chosen or completely skipped. These
+ predefined groups are called search units.
+
+ Search Path (SP):
+ -----------------
+ The path taken during searching in a reference window. The steps taken
+ in a search path are in units of SUs. The search path must lie within
+ the defined search window.
+
+ Luma:
+ -----
+ Luma refers to either the Y-plane of a NV12 image or a regular image
+ with the image_channel_order and image_data_type restricted as CL_R
+ and CL_UNORM_INT8.
+
+ Chroma:
+ ------
+ Chroma refer to the UV-plane of a NV12 image.
+
+ Search Window (SW):
+ -------------------
+ The search area that will be covered during searching. The area of the
+ search window is limited to 2K luma pixels.
+
+ Search Window Configuration:
+ ----------------------------
+ The configuration of a search window which is a combination of the
+ search path and search window.
+
+ The pre-defined search window configurations are:
+
+ +--------------+-----------------------------------------------------+
+ | EXHAUSTIVE | 48x40 SW with exhaustive single reference search (or|
+ | | 32x32 dual SW for exhaustive dual-reference search);|
+ | | an exhaustive search means that all SU within the |
+ | | search window are searched in a spiral pattern with |
+ | | the search center being the middle of the search |
+ | | window |
+ +--------------+-----------------------------------------------------+
+ | SMALL | 28x28 SW with exhaustive search |
+ +--------------+-----------------------------------------------------+
+ | TINY | 24x24 SW with exhaustive search |
+ +--------------+-----------------------------------------------------+
+ | EXTRA TINY | 20x20 SW with exhaustive search |
+ +--------------+-----------------------------------------------------+
+ | DIAMOND | 48x40 SW with diamond single reference search (or |
+ | | 32x32 dual SW for diamond dual-reference search); a |
+ | | diamond pattern search path is used for the first 16|
+ | | (or 7 per reference for dual reference search) SUs, |
+ | | and then gradient based searching is used for up to |
+ | | a maximum of 57 search unit. |
+ +--------------+-----------------------------------------------------+
+ | LARGE DIAMOND| 48x40 SW with large diamond single reference search |
+ | | (or 32x32 dual SW for large diamond dual-reference |
+ | | search); a diamond pattern search pattern is used |
+ | | for the first 32 (or 10 per reference for dual |
+ | | reference search) SUs, and then gradient based |
+ | | searching is used for up to a maximum of 57 search |
+ | | units. |
+ +--------------+-----------------------------------------------------+
+
+ Inter Estimation:
+ -----------------
+ The process of determining motion vectors and shapes that best
+ describe the transformation from 2D images from previously decoded
+ images in a video sequence to the currently processed image.
+
+ Intra-Prediction Estimation (IPE):
+ ----------------------------------
+ The process of determining prediction angles and shapes that best
+ describe the transformation from neighboring MBs in an image to the
+ currently processed MB in the same image.
+
+ Luma Mode:
+ ----------
+ The prediction angle returned by IPE for the luma component for a
+ block. It is represented by an unsigned 8-bit integer with the upper 4
+ bits set to zero.
+
+ Integer Motion Estimation (IME):
+ -------------------------------
+ Inter-motion estimation in integer pixel resolution.
+
+ Fractional Motion Estimation (FME):
+ -----------------------------------
+ Inter-motion estimation in sub-pixel resolution. The result of integer
+ motion estimation on a reference image is used to perform fractional
+ refinement.
+
+ Bidirectional Motion Estimation (BME):
+ --------------------------------------
+ The process of determining if the bi-directional prediction minimizes
+ the distortion w.r.t to unidirectional prediction. The results of IME
+ on forward(L0) and backward(L1) reference images are used to perform
+ bi-directional refinement. BME can be performed in integer or sub-
+ pixel resolution. If performed in sub-pixel resolution an implicit FME
+ operation is done before performing the BME.
+
+ Refinement (REF):
+ -----------------
+ A FME and/or BME refinement operation.
+
+ Skip/Spot Check (SKC):
+ ----------------------
+ The operation determining the distortion associated with a given
+ (uni or bidirectional) MV in a reference image(s) w.r.t a source
+ image.
+
+ Skip and Intra Check (SIC):
+ ---------------------------
+ The process of performing both SKC and IPE in the same operation.
+
+ Motion Check or Estimation (MCE):
+ ---------------------------------
+ A generic IME, REF, or SIC operation.
+
+ Forward Transform (FT):
+ -----------------------
+ An 8x8 or 4x4 integer transform used to transform the residual to the
+ frequency domain.
+ "
+
+ New OpenCL C Enums
+
+ Adds the following enums to the OpenCL C language. The corresponding
+ API enums have the same name with the prefix "CLK_" replaced with
+ "CL_".
+
+ These enums are used as arguments and return values of some of the
+ VME built-in function parameters and return values described in the
+ new section 6.13.14.X "VME built-in functions".
+
+
+ Interlaced image field polarity values:
+ ---------------------------------------
+ #define CLK_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0
+ #define CLK_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1
+
+ Inter macro-block major shape values:
+ -------------------------------------
+ #define CLK_AVC_ME_MAJOR_16x16_INTEL 0x0
+ #define CLK_AVC_ME_MAJOR_16x8_INTEL 0x1
+ #define CLK_AVC_ME_MAJOR_8x16_INTEL 0x2
+ #define CLK_AVC_ME_MAJOR_8x8_INTEL 0x3
+
+ Inter macro-block minor shape values:
+ -------------------------------------
+ #define CLK_AVC_ME_MINOR_8x8_INTEL 0x0
+ #define CLK_AVC_ME_MINOR_8x4_INTEL 0x1
+ #define CLK_AVC_ME_MINOR_4x8_INTEL 0x2
+ #define CLK_AVC_ME_MINOR_4x4_INTEL 0x3
+
+ Inter macro-block major direction values:
+ -----------------------------------------
+ #define CLK_AVC_ME_MAJOR_FORWARD_INTEL 0x0
+ #define CLK_AVC_ME_MAJOR_BACKWARD_INTEL 0x1
+ #define CLK_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2
+
+ Inter (IME) partition mask values:
+ ----------------------------------
+ #define CLK_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0
+ #define CLK_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E
+ #define CLK_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D
+ #define CLK_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B
+ #define CLK_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77
+ #define CLK_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F
+ #define CLK_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F
+ #define CLK_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F
+
+ Slice type values:
+ ------------------
+ #define CLK_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0
+ #define CLK_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1
+ #define CLK_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2
+
+ Search window configuration:
+ ----------------------------
+ #define CLK_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0
+ #define CLK_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1
+ #define CLK_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2
+ #define CLK_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3
+ #define CLK_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4
+ #define CLK_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5
+ #define CLK_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6
+ #define CLK_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7
+
+ SAD adjustment mode:
+ --------------------
+ #define CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
+ #define CLK_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2
+
+ Pixel resolution:
+ -----------------
+ #define CLK_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
+ #define CLK_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
+ #define CLK_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3
+
+ Cost precision values:
+ ----------------------
+ #define CLK_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0
+ #define CLK_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1
+ #define CLK_AVC_ME_COST_PRECISION_PEL_INTEL 0x2
+ #define CLK_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3
+
+ Inter bidirectional weights:
+ ----------------------------
+ #define CLK_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10
+ #define CLK_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15
+ #define CLK_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20
+ #define CLK_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B
+ #define CLK_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30
+
+ Inter border reached values:
+ ----------------------------
+ #define CLK_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0
+ #define CLK_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2
+ #define CLK_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4
+ #define CLK_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8
+
+ Intra macro-block shape values:
+ -------------------------------
+ #define CLK_AVC_ME_INTRA_16x16_INTEL 0x0
+ #define CLK_AVC_ME_INTRA_8x8_INTEL 0x1
+ #define CLK_AVC_ME_INTRA_4x4_INTEL 0x2
+
+ Inter skip block partition type:
+ --------------------------------
+ #define CLK_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0
+ #define CLK_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x04000
+
+ Inter skip motion vector mask:
+ ------------------------------
+ #define CLK_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL (0x1<<24)
+ #define CLK_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ ENABLE_INTEL (0x2<<24)
+ #define CLK_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL (0x3<<24)
+ #define CLK_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL (0x55<<24)
+ #define CLK_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL (0xAA<<24)
+ #define CLK_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL (0xFF<<24)
+ #define CLK_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL (0x1<<24)
+ #define CLK_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL (0x2<<24)
+ #define CLK_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL (0x1<<26)
+ #define CLK_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL (0x2<<26)
+ #define CLK_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL (0x1<<28)
+ #define CLK_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL (0x2<<28)
+ #define CLK_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL (0x1<<30)
+ #define CLK_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL (0x2<<30)
+
+ Block based skip type values:
+ -----------------------------
+ #define CLK_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x0
+ #define CLK_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80
+
+ Luma intra partition mask values:
+ ---------------------------------
+ #define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_ALL_INTEL 0x0
+ #define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6
+ #define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5
+ #define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3
+
+ Intra neighbor availability mask values:
+ ----------------------------------------
+ #define CLK_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60
+ #define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10
+ #define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8
+ #define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4
+
+ Luma intra modes:
+ -----------------
+ #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
+ #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
+ #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
+ #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
+ #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
+ #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
+ #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
+ #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
+ #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
+ #define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
+
+ Chroma intra modes:
+ -------------------
+ #define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
+ #define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
+ #define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
+ #define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
+
+ Reference image select values:
+ ------------------------------
+ #define CLK_AVC_ME_FRAME_FORWARD_INTEL 0x1
+ #define CLK_AVC_ME_FRAME_BACKWARD_INTEL 0x2
+ #define CLK_AVC_ME_FRAME_DUAL_INTEL 0x3
+
+ VME media sampler initialization value:
+ ----------------------------------------
+ #define CLK_AVC_ME_INITIALIZE_INTEL 0x0
+
+ Default IME payload initialization:
+ -----------------------------------
+ #define CLK_AVC_IME_PAYLOAD_INITIALIZE_INTEL {0x0}
+
+ Default REF payload initialization:
+ -----------------------------------
+ #define CLK_AVC_REF_PAYLOAD_INITIALIZE_INTEL {0x0}
+
+ Default SIC payload initialization:
+ -----------------------------------
+ #define CLK_AVC_SIC_PAYLOAD_INITIALIZE_INTEL {0x0}
+
+ Default IME result initialization:
+ ----------------------------------
+ #define CLK_AVC_IME_RESULT_INITIALIZE_INTEL {0x0}
+
+ Default REF result initialization:
+ ----------------------------------
+ #define CLK_AVC_REF_RESULT_INITIALIZE_INTEL {0x0}
+
+ Default SIC result initialization:
+ ----------------------------------
+ #define CLK_AVC_SIC_RESULT_INITIALIZE_INTEL {0x0}
+
+New OpenCL C Types
+
+ Adds the following built-in types to the OpenCL C language. Appends
+ the following built-in type definitions to Table 6.3 in section 6.1.3
+ "Other Built-in Data Types".
+
+ These built-in types are used to declare the types of some of the VME
+ built-in function parameters and return values in the new section
+ 6.13.14.X "VME built-in functions".
+
+ +-----------------------------------+--------------------------------+
+ | intel_sub_group_avc_mce_payload_t |This is a parameter and/or |
+ | |return type of a generic AVC MCE|
+ | |operation. It is used in MCE |
+ | |built-in functions and |
+ | |represents the payload for a |
+ | |basic IME/REF/SIC operation. |
+ +-----------------------------------+--------------------------------+
+ | intel_sub_group_avc_ime_payload_t |This is a parameter and/or |
+ | |return type of a basic AVC IME |
+ | |operation. It is used in IME |
+ | |built-in functions and |
+ | |represents the payload for an |
+ | |IME operation. |
+ +-----------------------------------+--------------------------------+
+ | intel_sub_group_avc_ref_payload_t |This is a parameter and/or |
+ | |return type of an AVC REF |
+ | |operation. It is used in REF |
+ | |built-in functions and |
+ | |represents the payload for a REF|
+ | |operation. |
+ +-----------------------------------+--------------------------------+
+ | intel_sub_group_avc_sic_payload_t |This is a parameter and/or |
+ | |return type of an AVC SIC |
+ | |operation. It is used in SIC |
+ | |built-in functions and |
+ | |represents the payload for a SIC|
+ | |operation. |
+ +-----------------------------------+--------------------------------+
+ | intel_sub_group_avc_mce_result_t |This is a parameter and/or |
+ | |return type of a MCE operation. |
+ | |It is used in MCE built-in |
+ | |functions and represents the |
+ | |evaluation result for a basic |
+ | |IME/REF/SIC operation. |
+ +-----------------------------------+--------------------------------+
+ | intel_sub_group_avc_ime_result_t |This is a parameter and/or |
+ | |return type of a basic AVC IME |
+ | |operation not using the stream- |
+ | |in/streamout functionality.It is|
+ | |used in IME built-in functions |
+ | |and represents the result for a |
+ | |basic IME evaluation operation. |
+ +-----------------------------------+--------------------------------+
+ | intel_sub_group_avc_ime_result_ |This is a parameter and/or |
+ | single_reference_streamout_t |return type of an AVC single |
+ | |reference IME operation using |
+ | |the streamout functionality. It |
+ | |is used in specific IME built- |
+ | |in functions and represents the |
+ | |evaluation result of an IME |
+ | |streamout operation. |
+ +-----------------------------------+--------------------------------+
+ | intel_sub_group_avc_ime_result_ |This is a parameter and/or |
+ | dual_reference_streamout_t |return type of an AVC dual |
+ | |reference IME operation using |
+ | |the streamout functionality. It |
+ | |is used in specific IME built-in|
+ | |functions and represents the |
+ | |evaluation result for an IME |
+ | |streamout operation. |
+ +-----------------------------------+--------------------------------+
+ | intel_sub_group_avc_ime_ |This is a parameter and/or |
+ | single_reference_streamin_t |return type of an AVC single IME|
+ | |operation using the stream-in |
+ | |reference functionality. It is |
+ | |used in specific IME built-in |
+ | |functions. It represents the |
+ | |additional results from the |
+ | |result of an IME evaluation |
+ | |using the streamout |
+ | |functionality that may be |
+ | |streamed-in in a subsequent IME |
+ | |streamin call. |
+ +-----------------------------------+--------------------------------+
+ | intel_sub_group_avc_ime_ |This is a parameter and/or |
+ | dual_reference_streamin_t |return type of an AVC dual |
+ | |reference IME operation using |
+ | |the streamin functionality. It |
+ | |is used in specific IME built- |
+ | |in functions. It represents the |
+ | |additional results from the |
+ | |result of an IME evaluation |
+ | |using the streamout |
+ | |functionality that may be |
+ | |streamed-in in a subsequent IME |
+ | |call. |
+ +-----------------------------------+--------------------------------+
+ | intel_sub_group_avc_ref_result_t |This is a parameter and or/ |
+ | |return type of an AVC REF |
+ | |operation. It is used in REF |
+ | |built-in functions and |
+ | |represents the evaluation result|
+ | |from a REF evaluation operation.|
+ +-----------------------------------+--------------------------------+
+ | intel_sub_group_avc_sic_result_t |This is a parameter and/or |
+ | |return type of an AVC SKC, IPE, |
+ | |or SIC operation. It is used in |
+ | |SIC built-in functions and |
+ | |represents the evaluation result|
+ | |from a SIC evaluation operation.|
+ +-----------------------------------+--------------------------------+
+
+ Amends the following built-in type definition in Table 6.3 in section
+ 6.1.3 "Other Built-in Data Types".
+
+ +---------+----------------------------------------------------------+
+ |sampler_t|A texture or media sampler type. Refer to section 6.13.14 |
+ | |for a detailed description the built-in functions that use|
+ | |of this type. |
+ | | |
+ | |In general, the use of the word "sampler" by itself refers|
+ | |to a texture sampler. A media sampler will be explicitly |
+ | |refered to as "media sampler". |
+ | | |
+ | |In general, sampler_t parameters of built-in functions |
+ | |refer to texture sampler parameter. Media sampler |
+ | |parameters are exclusively used in the VME built-in |
+ | |functions (described in new section 6.13.14.X "VME |
+ | |built-in functions"). |
+ +---------+----------------------------------------------------------+
+
+
+ Amends the restrictions defined for sampler types in (b) section 6.9
+ "Restrictions".
+
+ "...
+ The texture sampler type (sampler_t) can only be used as the type of
+ a function argument or a variable declared in the program scope or
+ the outermost scope of a kernel function. The behavior of a sampler
+ variable declared in a non-outermost scope of a kernel function is
+ implementation-defined. The media sampler type cannot be used as a
+ function argument of a kernel function, but otherwise can be used as
+ the type of a function argument or a variable declared in any scope.
+ ... "
+
+ Amends the following section 6.13.14.1 "Samplers".
+
+ Replace all strings "sampler(s)" with "texture sampler(s)".
+
+ Appends the following paragraphs in section 6.13.14.1 "Samplers".
+
+ "The VME evaluation phase built-in functions (described in new section
+ 6.13.14.X "VME built-in functions" take a VME media sampler argument.
+ The media sampler can be declared as a constant variable in any scope,
+ and must be initialized with the 32-bit unsigned integer constant
+ CLK_AVC_ME_INITIALIZE_INTEL."
+
+ Note that VME media samplers declared using the constant qualifier are
+ not counted towards the maximum size of constant address space allowed
+ per device (i.e. CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE as described in
+ table 4.3)
+
+ Amends the following section 6.13.14.2 "Built-in Image Read
+ Functions".
+
+ "The following built-in function calls to read images with a texture
+ sampler are supported."
+
+ Amends the following section 6.13.14.3 "Built-in Image Sampler-less
+ Read Functions".
+
+ "The sampler-less read image functions behave exactly as the
+ corresponding read image functions described in section 6.13.14.2
+ that take integer coordinates and a texture sampler with filter mode
+ set to CLK_FILTER_NEAREST, normalized coordinates set to
+ CLK_NORMALIZED_COORDS_FALSE and addressing mode to CLK_ADDRESS_NONE"
+
+
+New OpenCL C built-in functions
+
+ Appends the following to the new section 6.13.14.X "VME built-in
+ functions".
+
+ "
+ The OpenCL C programming language implements the following built-in
+ functions to evaluate VME operations as a block operation by all work-
+ items in a subgroup. Furthermore they are defined only for subgroup
+ size of 16, and thus using these built-in functions in a kernel will
+ force a subgroup size of 16. The use of kernel attribute
+ intel_reqd_sub_group_size with a value other than 16 as described in
+ the Intel vendor extension cl_intel_required_subgroup_size is not
+ allowed.
+
+ These built-in functions are defined to operate on a 16x16 MB and must
+ be encountered by all work items in a subgroup executing the kernel,
+ otherwise the behavior is undefined (i.e. they can only be used only
+ in convergent control flow where all the work-items in the subgroups
+ are enabled).
+
+ ______________________________________________________________________
+
+
+ If VME built-in functions are used in an OpenCL kernel, then the
+ following additional requirements must be satisfied.
+
+ 1. The VME source and reference images must either be 8-bit luma
+ 2D images, or planar YUV images with NV12 format. If chroma based
+ intra estimation operations are being performed, then NV12 source
+ images are required, otherwise either 8-bit luma or NV12 images may
+ be used. For 8-bit luma images, the image_channel_order and the
+ image_data_type are restricted as CL_R and CL_UNORM_INT8
+ respectively. NV12 images are described in the Intel vendor
+ extension cl_intel_planar_yuv. Images created from buffers are not
+ allowed as VME source and reference images.
+ 2. OpenCL image kernel parameters that are used as VME source and
+ reference images in the VME evaluation built-in functions must
+ exclusively be used only by VME built-in functions. If the same
+ image needs to be used for other image operations within the same
+ kernel, then an additional image parameter may be used that is
+ bound to the same image object as for the VME image parameter in
+ the OpenCL host application for the kernel enqueue API call.
+ 3. OpenCL image kernel parameters that are used as VME source and
+ reference images in the VME evaluation built-in functions must be
+ declared in a specific order in the kernel parameter list of a
+ kernel that calls any of the VME built-in functions as shown below;
+ otherwise it will result in a complier reported error.
+
+ +-----------------------------------------------+
+ | __kernel void vme_custom( |
+ | read_only image2d_t src_image, |
+ | read_only image2d_t fwd0_ref_image, |
+ | read_only image2d_t bwd0_ref_image, |
+ | read_only image2d_t fwd1_ref_image, |
+ | read_only image2d_t bwd1_ref_image, |
+ | ... |
+ | read_only image2d_t fwd15_ref_image, |
+ | read_only image2d_t bwd15_ref_image, |
+ | ...) |
+ +-----------------------------------------------+
+
+ The first forward(L0) reference image parameter must immediately
+ follow the source image parameter, and the first backward(L1)
+ reference image parameter must immediately follow the first forward
+ reference image parameter. Up to 16 consecutive pairs of forward
+ and backward reference image parameters can be provided, with the
+ reference images closer to the source image in timing order
+ appearing earlier in the parameter list.
+ ______________________________________________________________________
+
+
+ The VME built-in functions are organized into categories of functions
+ based on the major VME operations as follows:
+
+ 1. MCE built-in functions
+ --------------------------
+ A set of generic built-in functions which are common across IME, REF,
+ or SIC operations. Certain functions may not be applicable for some
+ specific operation as indicated in their descriptions.
+
+ 2. IME built-in functions
+ -------------------------
+ A set of built-in functions to initialize, configure and evaluate an
+ integer motion estimation result.
+
+ 3. REF built-in functions
+ -------------------------
+ A set of built-in functions to initialize, configure and evaluate a
+ fractional and/or bidirectional refinement operation on the results
+ of IME built-in functions.
+
+ 4. SIC built-in functions
+ -------------------------
+ A set of built-in functions to initialize, configure and evaluate a
+ skip check or intra estimation operation result.
+ ______________________________________________________________________
+
+
+ A set of ordered phases of built-in functions need to be called to
+ evaluate each category of built-in functions. Some phases may not be
+ specified for certain function categories, but if present it must be
+ called in the correct order, otherwise the behavior is undefined.
+
+ 1. Initialization (required phase)
+ ---------------------------------
+ This creates and initializes the payload for the VME operation and
+ may perform some basic configuration that invariably must be set.
+
+ 2. Operation configuration (required phase, if present)
+ ------------------------------------------------------
+ This configures important sets of parameters for the VME operation
+ that were not set in the initialization phase in the payload.
+
+ 3. Inter reference frame cost configuration (optional phase)
+ -----------------------------------------------------------
+ This configures the cost assigned to matching blocks in reference
+ frames based on timing distance from the source frame.
+
+ 4. Inter motion vector/Intra mode cost configuration (optional phase)
+ --------------------------------------------------------------------
+ This configures the inter motion vector cost function or the intra
+ mode cost function to be used for the VME operation in the payload.
+
+ 5. Inter/Intra shape cost configuration (optional phase)
+ -------------------------------------------------------
+ This configures the inter/intra shape cost penalty to be used for the
+ VME operation in the payload.
+
+ 6. Miscellaneous property configuration (optional phase)
+ -------------------------------------------------------
+ This configures miscellaneous operation sub-functions in the VME
+ operation payload.
+
+ 7. Evaluation (required phase)
+ -----------------------------
+ This phase performs the evaluation of the VME operation using the
+ payload configured in the previous phases by issuing it to the VME
+ unit for processing.
+
+ 8. Result processing (required phase)
+ ------------------------------------
+ The phase performs the extraction of various motion estimation
+ operation component results from the result of the evaluation phase.
+ The result components may be distributed among multiple work-items.
+ The results components of one VME operation may be used as input to
+ another VME operation at phase (1) or phase (7).
+
+ A set of built-in functions are defined for each of the major VME
+ operations of IME, REF, and SIC (SKC and IPE). These are categorized
+ as IME, REF, and SIC built-in functions. Functions that are common
+ across all categories are classified as MCE (motion check and
+ estimation) functions. Several restrictions for function argument
+ values are stated in the descriptions of the built-in function
+ descriptions. In general, if the restrictions are not satisfied the
+ behavior is undefined, unless otherwise stated.
+
+ ______________________________________________________________________
+
+
+ MCE built-in functions
+ ----------------------
+
+ A set of generic MCE operations which may be called for IME, REF, or
+ SIC operations with the restrictions as stated in their descriptions.
+ They can be called only during specific phases of these operations as
+ indicated in the description of the built-in functions.
+
+ Multi-reference cost configuration phase functions
+ ++++++++++++++++++++++++++++++++++++++++++++++++++
+
+ These built-in functions enable multi-reference image costing. They
+ allow for the configuration of the payloads to favorably bias the
+ major partitions coming from reference images that are closer to the
+ source image, than the ones coming from reference images that are
+ further away. The distance of the reference images, in the timing
+ order, from the source image is implied based on the order in which
+ the reference images are declared in the kernel parameter list.
+
+ +-------------------------------------+-----------------------------+
+ |uchar |Get the default base |
+ |intel_sub_group_avc_mce_get_default_ |multi-reference cost penalty |
+ |inter_base_multi_reference_penalty( |in U4U4 format when HW |
+ | uchar slice_type, |assisted multi-reference |
+ | uchar qp ) |search is used. |
+ | | |
+ | |The value of slice_type must |
+ | |be a valid slice type |
+ | |enumeration value. |
+ | | |
+ | |The value of qp must be a |
+ | |valid quantization parameter |
+ | |value between 0 and 51. |
+ +-------------------------------------+-----------------------------+
+ |intel_sub_group_avc_mce_payload_t |Set the multi-reference base |
+ |intel_sub_group_avc_mce_set_inter_ |penalty when HW assisted |
+ |base_multi_reference_penalty( |multi-reference search is |
+ | uchar reference_base_penalty, |performed. |
+ | intel_sub_group_avc_mce_payload_t | |
+ | payload ) |Reference major partitions |
+ | |get associated with a penalty|
+ | |based on its distance from |
+ | |the source image. The |
+ | |reference_base_penalty is |
+ | |scaled using a scaling factor|
+ | |based on the implied distance|
+ | |of the reference image from |
+ | |the source image as shown |
+ | |below. |
+ | | |
+ | | 0 => 0x |
+ | | 1 to 2 => 1x |
+ | | 3 to 6 => 2x |
+ | | 7 to 15 => 3x |
+ | | |
+ | |The value of |
+ | |reference_base_penalty is |
+ | |specified in U4U4 format and |
+ | |the integer value must fit |
+ | |within 12 bits. |
+ +-------------------------------------+-----------------------------+
+
+ Inter shape and direction cost configuration phase functions
+ ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+ These built-in functions enable shape costing for inter estimation.
+ They allow for the configuration of payloads for the biasing of
+ certain shapes over others based on the configured parameters.
+
+ +------------------------------------+-------------------------------+
+ |ulong intel_sub_group_avc_mce_get_ |Get the default packed shape |
+ |default_inter_shape_penalty( |cost for inter estimation in |
+ | uchar slice_type, |U4U4 format. |
+ | uchar qp ) | |
+ | |The value of slice_type must be|
+ | |a valid slice type enumeration |
+ | |value. |
+ | | |
+ | |The value of qp must be a valid|
+ | |quantization parameter value |
+ | |between 0 and 51. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_mce_payload_t |Set the shape penalty for inter|
+ |intel_sub_group_avc_mce_set_inter_ |motion estimation. |
+ |shape_penalty( | |
+ | ulong packed_shape_penalty, |The value of packed_shape_cost |
+ | intel_sub_group_avc_mce_payload_t|is an unsigned long integer |
+ | payload ) |value with the following bits |
+ | |specifying the shape penalty in|
+ | |U4U4 format as follows: |
+ | | |
+ | | 7:0 => 16x8 and 8x16 |
+ | | 15:8 => 8x8 |
+ | | 23:16 => 8x4 and 4x8 |
+ | | 31:24 => 4x4 |
+ | | 39:32 => 16x16 |
+ | | 63:40 => must be zero |
+ | | |
+ | |The U4U4 decoded integer values|
+ | |for byte 0 and byte 4 must bit |
+ | |fit in 12 bits, while the U4U4 |
+ | |decoded integer values for the |
+ | |other bytes must fit within 10 |
+ | |bits. |
+ +------------------------------------+-------------------------------+
+ |uchar intel_sub_group_avc_mce_get_ |Get the default direction |
+ |default_inter_direction_penalty( |penalty for inter estimation in|
+ | uchar slice_type, |U4U4 format. |
+ | uchar qp ) | |
+ | |The value of slice_type must be|
+ | |a valid slice type enumeration |
+ | |value. |
+ | | |
+ | |The value of qp must be a valid|
+ | |quantization parameter value |
+ | |between 0 and 51. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_mce_payload_t |Set the direction penalty for |
+ |intel_sub_group_avc_mce_set_ |backward images used in inter |
+ |inter_direction_penalty( |motion estimation. |
+ | uchar direction_cost, | |
+ | intel_sub_group_avc_mce_payload_t|The value of direction_cost |
+ | payload ) |must be in U4U4 format and its |
+ | |decoded integer value must bit |
+ | |fit in 12 bits. |
+ +------------------------------------+-------------------------------+
+
+ Intra shape cost configuration phase functions
+ ++++++++++++++++++++++++++++++++++++++++++++++
+
+ These built-in functions enable shape costing for intra estimation.
+ They allow for the configuration of payloads for biasing of certain
+ shapes over others based on the configured parameters. Only the built-
+ in function providing the default shape penalty is specified as an MCE
+ function. The function which actually configures the payload for the
+ intra estimation operation is specified as a SIC built-in function.
+
+ +----------------------------------+--------------------------------+
+ |uint intel_sub_group_avc_mce_get_ |Get the default packed luma |
+ |default_intra_luma_shape_penalty( |intra shape penalty in U4U4 |
+ | uchar slice_type, |format. |
+ | uchar qp ) | |
+ +----------------------------------+--------------------------------+
+
+ Inter motion vector cost configuration phase functions
+ ++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+ These built-in functions enable motion vector costing for inter
+ estimation. The distortion measure is augmented to favor motion
+ vectors closer to the cost-center considered in conjunction with the
+ primary objective of minimizing the SAD between the source and
+ reference blocks.
+
+ +------------------------------------+-------------------------------+
+ |uint2 intel_sub_group_avc_mce_get_ |Get the default inter motion |
+ |default_inter_motion_vector_ |vector cost table for the |
+ |cost_table( |pre-defined control points in |
+ | uchar slice_type, |U4U4 format for the input Qp |
+ | uchar qp ) |and slice type. |
+ | | |
+ | |The value of slice_type must be|
+ | |a valid slice type enumeration |
+ | |value. |
+ | | |
+ | |The value of qp must be a valid|
+ | |quantization parameter value |
+ | |between 0 and 51. |
+ +------------------------------------+-------------------------------+
+ |uint2 intel_sub_group_avc_mce_get_ |Get the default predefined |
+ |default_high_penalty_cost_table( |packed U4U4 format high cost |
+ | void) |table for high Qp. This may be |
+ | |more appropriate for frame |
+ | |sequences with high motion. |
+ +------------------------------------+-------------------------------+
+ |uint2 intel_sub_group_avc_mce_get_ |Get the default predefined |
+ |default_medium_penalty_cost_table( |packed U4U4 format medium cost |
+ | void) |table for medium Qp. This may |
+ | |be more appropriate for frame |
+ | |sequences with normal motion. |
+ +------------------------------------+-------------------------------+
+ |uint2 intel_sub_group_avc_mce_get_ |Get the default predefined |
+ |default_low_penalty_cost_table( |packed U4U4 format low cost |
+ | void) |table for low Qp. This may be |
+ | |more appropriate for frame |
+ | |sequences with low motion. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_mce_payload_t |Update the input payload to set|
+ |intel_sub_group_avc_mce_set_ |the cost precision along with |
+ |motion_vector_cost_function( |the cost center and cost table |
+ | ulong packed_cost_center_delta, |and return it. |
+ | uint2 packed_cost_table, | |
+ | uchar cost_precision, |The value of |
+ | intel_sub_group_avc_mce_payload_t|packed_cost_center_delta is the|
+ | payload ) |packed bidirectional cost |
+ | |center delta value relative to |
+ | |the source macroblock, which |
+ | |specifies the 4 bidirectional |
+ | |cost centers of each of the 8x8|
+ | |partitions of the reference |
+ | |image. If only unidirectional |
+ | |search is performed then the |
+ | |values of the backward |
+ | |reference cost centers must be |
+ | |zero. Work-item 'n' provides |
+ | |the value of cost center |
+ | |'n'. It is specified in QPEL |
+ | |units. For 16x16 partitions |
+ | |work-item '0' provides the cost|
+ | |center. For 8x16 partitions |
+ | |work-items '0' and '1' provide |
+ | |the cost centers. For 16x8 |
+ | |partitions work-items '0' and |
+ | |'2' provide the cost |
+ | |centers. The X and Y |
+ | |coordinates of each cost center|
+ | |delta must be in the range |
+ | |[-2048, 2047] and [-512.00 to |
+ | |511.75] respectively, otherwise|
+ | |the results are undefined. |
+ | | |
+ | |The packed cost table specifies|
+ | |the cost penalties for |
+ | |pre-defined control points in |
+ | |U4U4 format in the cost |
+ | |function curve. The first 7 |
+ | |bytes specify 7 control points |
+ | |representing consecutive |
+ | |powers-of-two delta units (2^0 |
+ | |to 2^6). Each delta unit, dx, |
+ | |is the distance of a motion |
+ | |vector, mv, from the specified |
+ | |cost center, cc |
+ | |(dx=|mv-cc|). The cost penalty |
+ | |values at in-between control |
+ | |points are linearly |
+ | |interpolated. The range of the |
+ | |cost function is defined to be |
+ | |from 2^0 to 2^6 delta |
+ | |units. The 8th byte of the |
+ | |packed cost table specifies the|
+ | |penalty base factor (over_cost)|
+ | |for dx distances that are |
+ | |out-of-range. The penalty of |
+ | |out-of-range cost dx distances |
+ | |is computed as min(over_cost + |
+ | |int(dx) - 64, 255). |
+ | | |
+ | |The value of the cost precision|
+ | |parameter must be a valid cost |
+ | |precision enumeration value, |
+ | |and specifies the precision of |
+ | |the delta units from the cost |
+ | |center, dx. This effectively |
+ | |can be used to control the |
+ | |range of the cost function as |
+ | |follows: |
+ | | |
+ | |PRECISION DELTAS PIXEL RANGE |
+ | |--------- ------ ----------- |
+ | | |
+ | |PEL pixel 0-64 |
+ | |DPEL dual 0-127 |
+ | | pixel |
+ | |HALF half 0-31 |
+ | | pixel |
+ | |QUARTER quarter 0-15 |
+ | | pixel |
+ | | |
+ | |The inter distortion for a |
+ | |block can be described by the |
+ | |following formula: |
+ | | |
+ | |Distortion = |
+ | | SAD (or HAAR) + |
+ | | MV_Cost_Penalty + |
+ | | Shape_Penalty + |
+ | | Direction_Cost + |
+ | | Multi_Reference_Penalty |
+ +------------------------------------+-------------------------------+
+
+ Intra mode cost configuration phase functions
+ +++++++++++++++++++++++++++++++++++++++++++++
+
+ These built-in functions enable mode costing for intra estimation.
+ They allow for the configuration of payloads to bias the computed
+ intra modes to be closer to their configured neighbor modes. This form
+ of costing is similar to the inter motion vector costing. Only the
+ built-in functions providing the defaults mode costs are specified as
+ MCE functions. The remaining functions which actually configure the
+ payload for the intra estimation operation is specified as SIC built-
+ in functions.
+
+ +----------------------------------+---------------------------------+
+ |uchar intel_sub_group_avc_mce_get_|Get the default intra mode cost |
+ |default_intra_luma_mode_penalty( |penalty for in U4U4 format when |
+ | uchar slice_type, |the estimated mode differs from |
+ | uchar qp ) |its predicted mode from its |
+ | |neighbors. |
+ | | |
+ | |The value of slice_type must be a|
+ | |valid slice type enumeration |
+ | |value. |
+ | | |
+ | |The value of qp must be a valid |
+ | |quantization parameter value |
+ | |between 0 and 51. |
+ +----------------------------------+---------------------------------+
+ |uint intel_sub_group_avc_mce_get_ |Get the default intra non-dc cost|
+ |default_non_dc_luma_intra_penalty(|penalty for intra luma estimation|
+ | void ) |in 8-bit integer format. |
+ +----------------------------------+---------------------------------+
+ |uchar intel_sub_group_avc_mce_get_|Get the default chroma mode base |
+ |default_intra_chroma_mode_base_ |penalty in U4U4 format. |
+ |penalty( | |
+ | void ) | |
+ +----------------------------------+---------------------------------+
+
+ Miscellaneous property configuration phase functions
+ ++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+ These built-in functions enable miscellaneous MCE properties settings.
+
+ +--------------------------------------+-----------------------------+
+ |intel_sub_group_avc_mce_payload_t |Update the input payload to |
+ |intel_sub_group_avc_mce_set_ |enable an AC only HAAR SAD |
+ |ac_only_haar( |mode and return it. It |
+ | intel_sub_group_avc_mce_payload_t |overrides any previous |
+ | payload ) |setting for sad adjustment. |
+ | | |
+ | |This feature is mainly |
+ | |intended for improved block |
+ | |matching in frame-rate |
+ | |conversion (FRC) kernels. |
+ +--------------------------------------+-----------------------------+
+ |intel_sub_group_avc_mce_payload_t |Update the input payload to |
+ |intel_sub_group_avc_mce_set_source_ |specify the field polarities |
+ |interlaced_field_polarity( |for interlaced source images |
+ | uchar src_field_polarity, |used for inter or intra |
+ | intel_sub_group_avc_mce_payload_t |operations. |
+ | payload ) | |
+ | |The value of |
+ | |src_field_polarity must be a |
+ | |valid field polarity |
+ | |enumeration value indicating |
+ | |the field polarity for the |
+ | |source image. |
+ +--------------------------------------+-----------------------------+
+ |intel_sub_group_avc_mce_payload_t |Update the input payload to |
+ |intel_sub_group_avc_mce_set_single_ |specify the field polarities |
+ |reference_interlaced_field_polarity( |for interlaced reference |
+ | uchar ref_field_polarity, |images used for single |
+ | intel_sub_group_avc_mce_payload_t |reference inter search or |
+ | payload ) |check operation. |
+ | | |
+ | |The value of |
+ | |ref_field_polarity must be a |
+ | |valid field polarity |
+ | |enumeration value indicating |
+ | |the field polarity for the |
+ | |reference image. |
+ +--------------------------------------+-----------------------------+
+ |intel_sub_group_avc_mce_payload_t |Update the input payload to |
+ |intel_sub_group_avc_mce_set_dual_ |specify the field polarities |
+ |reference_interlaced_field_polarities(|for interlaced reference |
+ | uchar fwd_ref_field_polarity, |images used for dual |
+ | uchar bwd_ref_field_polarity, |reference inter search or |
+ | intel_sub_group_avc_mce_payload_t |check operation. |
+ | payload ) | |
+ | |The value of |
+ | |fwd_ref_field_polarity must |
+ | |be a valid field polarity |
+ | |enumeration value indicating |
+ | |the field polarity for the |
+ | |forward image. |
+ | | |
+ | |The value of |
+ | |bwd_ref_field_polarity must |
+ | |be a valid field polarity |
+ | |enumeration value indicating |
+ | |the field polarity for the |
+ | |backward image. |
+ +--------------------------------------+-----------------------------+
+
+ Result processing phase functions
+ +++++++++++++++++++++++++++++++++
+
+ These built-in functions facilitate the extraction of components of
+ the result from VME unit.
+
+ +-----------------------------------+--------------------------------+
+ |ulong intel_sub_group_avc_mce_get_ |Get the MCE packed BMVs result. |
+ |motion_vectors( | |
+ | intel_sub_group_avc_mce_result_t|Up to 16 packed BMVs are |
+ | result ) |returned, one per work-item. If |
+ | |the MCE search operation's |
+ | |payload was setup for |
+ | |unidirectional search then only |
+ | |the forward packed MV will be |
+ | |valid in each BMV, otherwise |
+ | |both packed MVs will be |
+ | |valid. The BMVs have to be |
+ | |selected by their respective |
+ | |work-items based on the result |
+ | |block major and minor shapes. |
+ | | |
+ | |If the major shape is: |
+ | | - 16x16, then one BMV is |
+ | | returned by work-item 0 |
+ | | - 16x8, or 8x16, then two |
+ | | BMVs are returned by work- |
+ | | items 0 and 8 |
+ | | - 8x8, then four sets of BMVs|
+ | | corresponding to the four |
+ | | partitions in traditional |
+ | | Z-order are returned by |
+ | | work-items in the ranges |
+ | | [0, 3], [4, 7], [8, 11], |
+ | | and [12, 15]; the minor |
+ | | shape will determine |
+ | | exactly which work-items |
+ | | in the reserved inclusive |
+ | | range for the partition |
+ | | returns the BMVs for that |
+ | | partition |
+ | | |
+ | |If the range of work-items for |
+ | |the 8x8 major partition is [n, |
+ | |n+3] and the minor shape is: |
+ | | - 8x8, then work-item 'n' |
+ | | returns the BMV for each |
+ | | minor partition |
+ | | - 8x4 or 4x8, then work-items|
+ | | 'n' and 'n+2' returns the |
+ | | BMVs for each minor |
+ | | partition |
+ | | - 4x4, then all work-items in|
+ | | [n, n+3] return the BMVs |
+ | | for each minor partition |
+ | | in traditional Z-order |
+ | | |
+ | |NOTES: |
+ | | |
+ | |(1) All sub-block BMVs get |
+ | |replicated for each partition. |
+ | |For example, for a 16x16 |
+ | |partition, all smaller sub-block|
+ | |BMVs are replicated to the same |
+ | |BMV, and for 8x8 partition, each|
+ | |8x8 must have its respective |
+ | |sub-block BMVs replicated. This |
+ | |is not important to extract the |
+ | |component BMVs itself, but is |
+ | |needed if the result of this |
+ | |function is used to initialize |
+ | |the input motion vectors of a |
+ | |REF initialization function. |
+ | | |
+ | |(2) With interlaced images, the |
+ | |MBs for the top field MBs are |
+ | |considered as logically |
+ | |overlapping with the bottom MBs.|
+ +-----------------------------------+--------------------------------+
+ |ushort intel_sub_group_avc_mce_get_|Get the MCE inter distortions |
+ |inter_distortions( |result corresponding to the BMVs|
+ | intel_sub_group_avc_mce_result_t|returned by intel_sub_group_avc_|
+ | result |mce_get_motion_vectors(..). The |
+ | |MCE inter directions result |
+ | |returned by intel_sub_group_avc_|
+ | |mce_get_inter_directions(..) |
+ | |will specify if the distortion |
+ | |corresponds to the forward MV, |
+ | |backward MV, or the |
+ | |bidirectional MV in the BMV. Up |
+ | |to 16 distortions are returned, |
+ | |one per work-item. |
+ | | |
+ | |The distortions have to be |
+ | |selected by their respective |
+ | |work-items based on the result |
+ | |block major and minor shapes |
+ | |just as for the result MVs as |
+ | |described above. |
+ +-----------------------------------+--------------------------------+
+ |ushort intel_sub_group_avc_mce_get_|Get the best inter distortion |
+ |best_inter_distortion( |for the whole MB. |
+ | intel_sub_group_avc_mce_result_t| |
+ | result ) | |
+ +-----------------------------------+--------------------------------+
+ |uchar intel_sub_group_avc_mce_get_ |Get the MCE inter MB major |
+ |inter_major_shape( |partition shape. |
+ | intel_sub_group_avc_mce_result_t| |
+ | result ) |The returned values are as per |
+ | |the inter-MB major shapes |
+ | |enumeration values. |
+ | | |
+ | |This can only be called as part |
+ | |of an IME or REF operation |
+ | |evaluation. |
+ +-----------------------------------+--------------------------------+
+ |uchar intel_sub_group_avc_mce_get_ |Get the MCE inter MB minor |
+ |inter_minor_shapes( |partition shapes. |
+ | intel_sub_group_avc_mce_result_t| |
+ | result ) |It returns a bit field with the |
+ | |minor shapes for the 4 8x8 |
+ | |sub-partitions in traditional Z |
+ | |order. Two bits are reserved for|
+ | |each of the four sub-partitions |
+ | |in row-major order. The |
+ | |returned 2-bit values are as per|
+ | |the inter-MB minor shapes |
+ | |enumeration values. |
+ | | |
+ | |This function returns valid |
+ | |results only if the major shape |
+ | |is 8x8, otherwise the results |
+ | |are undefined. |
+ | | |
+ | |This can only be called as part |
+ | |of an IME or REF operation |
+ | |evaluation. |
+ +-----------------------------------+--------------------------------+
+ |uchar intel_sub_group_avc_mce_get_ |Get the MCE inter MB major |
+ |inter_directions( |partition directions. |
+ | intel_sub_group_avc_mce_result_t| |
+ | result ) |It returns a bit field with the |
+ | |direction for up to 4 major |
+ | |sub-partitions in traditional Z |
+ | |order. Two bits are reserved for|
+ | |each of the four |
+ | |sub-partitions. The returned |
+ | |2-bit values are as per the |
+ | |inter-MB major shape direction |
+ | |enumeration values. |
+ | | |
+ | |If the major partition is: |
+ | | |
+ | |- 16x16, then bits in the range |
+ | | [0, 1] contains the direction |
+ | |- 16x8 or 8x16, then bits in the|
+ | | ranges [0, 1] and [4,5] |
+ | | contains the two partitions |
+ | | directions |
+ | |- 8x8, then bits in the ranges |
+ | | [0, 1], [2, 3], [4,5], and |
+ | | [6, 7] contains the four |
+ | | partitions directions |
+ | | |
+ | |The returned values are as per |
+ | |the inter direction enumeration |
+ | |values. |
+ | | |
+ | |This can only be called as part |
+ | |of an IME or REF operation |
+ | |evaluation. |
+ +-----------------------------------+--------------------------------+
+ |uchar intel_sub_group_avc_mce_get_ |Get the count of motion vectors |
+ |inter_motion_vector_count( |(based on the partitioning |
+ | intel_sub_group_avc_mce_result_t|decision) returned by the search|
+ | result ) |operation. |
+ | | |
+ | |This can only be called as part |
+ | |of an IME or REF operation |
+ | |evaluation. |
+ +-----------------------------------+--------------------------------+
+ |uint intel_sub_group_avc_mce_get_ |Get the MCE inter MB reference |
+ |inter_reference_ids( |identifiers in a packed integer |
+ | intel_sub_group_avc_mce_result_t|format, with the following bits |
+ | result ) |specifying the reference |
+ | |identifiers for the major |
+ | |partitions. |
+ | | |
+ | |3:0 => Fwd reference block 0 |
+ | |7:4 => Bwd reference block 0 |
+ | |11:8 => Fwd reference block 1 |
+ | |15:12 => Bwd reference block 1 |
+ | |19:16 => Fwd reference block 2 |
+ | |23:20 => Bwd reference block 2 |
+ | |27:24 => Fwd reference block 3 |
+ | |31:28 => Bwd reference block 3 |
+ | | |
+ | |The values of each individual |
+ | |4-bit reference identifier range|
+ | |from 0 to 15, with each value |
+ | |identifying the distance of |
+ | |ordered pair of forward/backward|
+ | |reference images as declared in |
+ | |the VME kernel parameter |
+ | |interface list. |
+ | | |
+ | |If the dual-reference evaluation|
+ | |functions are not used, then the|
+ | |values of the backward reference|
+ | |identifiers are undefined. |
+ | | |
+ | |The blocks are numbered using |
+ | |the traditional Z order. For |
+ | |larger block sizes, the |
+ | |sub-block reference identifier |
+ | |pairs are replicated. For |
+ | |example, for a 16x16 block all |
+ | |four pairs of reference |
+ | |identifiers are replicated to |
+ | |the value of the first pair for |
+ | |block 0. |
+ | | |
+ | |NOTE: Unless HW assisted |
+ | |multi-reference search was |
+ | |performed using the IME |
+ | |streamin/streamout evaluation |
+ | |functions, the individual 4-bit |
+ | |reference identifier pair values|
+ | |will all be the same (pointing |
+ | |to the same pair for |
+ | |forward/backward reference |
+ | |images). |
+ +-----------------------------------+--------------------------------+
+ |uchar |Get the MCE inter MB reference |
+ |intel_sub_group_avc_mce_get_inter_ |field polarities for the |
+ |reference_interlaced_field_ |corresponding reference |
+ |polarities( |identifiers returned by |
+ | uint packed_reference_ids, |intel_sub_group_avc_ |
+ | uint |mce_get_inter_reference_ids(..) |
+ | packed_reference_parameter_ |in a packed integer format, with|
+ | field_polarities, |the following bits specifying |
+ | intel_sub_group_avc_mce_result_t|the reference field polarities |
+ | result ) |for the major partitions. |
+ | | |
+ | | 0 : Fwd reference block 0 |
+ | | 1 : Fwd reference block 1 |
+ | | 2 : Fwd reference block 2 |
+ | | 3 : Fwd reference block 3 |
+ | | 4 : Bwd reference block 0 |
+ | | 5 : Bwd reference block 1 |
+ | | 6 : Bwd reference block 2 |
+ | | 7 : Bwd reference block 3 |
+ | | |
+ | |If the dual-reference evaluation|
+ | |functions are not used, then the|
+ | |values of the backward reference|
+ | |field polarities are undefined. |
+ | | |
+ | |The blocks are numbered using |
+ | |the traditional Z order. For |
+ | |larger block sizes, the |
+ | |sub-block reference field |
+ | |polarities are replicated. For |
+ | |example, for a 16x16 block all |
+ | |four pairs of reference field |
+ | |polarities are replicated to the|
+ | |value of the first pair for |
+ | |block 0. |
+ | | |
+ | |The value of |
+ | |packed_reference_ids is as |
+ | |defined by the return value of |
+ | |intel_sub_group_avc_mce_get_ |
+ | |inter_reference_ids(..). |
+ | | |
+ | |The value of |
+ | |packed_reference_parameter_ |
+ | |field_polarities specifies the |
+ | |packed bit field of field |
+ | |polarities for each of the (up |
+ | |to 16) forward/backward |
+ | |interleaved pairs of reference |
+ | |images in the same order as |
+ | |specified in the kernel |
+ | |parameter list, as used for the |
+ | |inter search operation. If less |
+ | |than 16 pairs are used then the |
+ | |corresponding bit field values |
+ | |are ignored. |
+ | | |
+ | |NOTE: An important restriction |
+ | |is that when multiple IME |
+ | |operations are performed for a |
+ | |HW multi-assisted |
+ | |multi-reference search operation|
+ | |using the streamin/streamout |
+ | |capabilities, the same reference|
+ | |image parameter cannot be used |
+ | |with different polarities in the|
+ | |sequence of IME operations used |
+ | |for a HW-assisted search |
+ | |operation. In other words, the |
+ | |field polarities for reference |
+ | |image parameters must be used |
+ | |consistently across IME |
+ | |operations used in a HW assisted|
+ | |multi-reference search |
+ | |operation. |
+ +-----------------------------------+--------------------------------+
+
+ ______________________________________________________________________
+
+
+ IME built-in functions
+ ----------------------
+ A set of ordered phases of functions are required to be called to
+ evaluate an integer motion estimation result.
+
+ Initialization phase functions
+ ++++++++++++++++++++++++++++++
+
+ These built-in functions create a properly initialized payload that
+ can be used for further configured for evaluating IME operations. This
+ is a required initial phase.
+
+ +---------------------------------+----------------------------------+
+ |intel_sub_group_avc_ime_payload_t|Return an initialized payload for |
+ |intel_sub_group_avc_ime_ |a VME integer search (IME) |
+ |initialize( |operation. |
+ | ushort2 src_coord, | |
+ | uchar partition_mask, |The payload is initialized for |
+ | uchar sad_adjustment ) |progressive frame operations, and |
+ | |the cost configuration values and |
+ | |the miscellaneous property values |
+ | |are all initialized to zero. The |
+ | |cost configuration and the |
+ | |miscellaneous property |
+ | |configuration phase functions must|
+ | |be used to override the initial |
+ | |configurations in the payload. |
+ | | |
+ | |The src_coord value represents the|
+ | |2D offset of the top left corner |
+ | |of the source MB in pixel units in|
+ | |the source image. Source MBs at |
+ | |the image borders are allowed to |
+ | |be partial, but the top-left |
+ | |corner must be within the image. |
+ | | |
+ | |If the source image is an |
+ | |interlaced scan image, then the |
+ | |bottom field lines are considered |
+ | |as logically overlapping with the |
+ | |top field lines (i.e. the top |
+ | |field MBs are considered as |
+ | |logically overlapping with the |
+ | |bottom MBs) for the purposes for |
+ | |specifying the src_coord value. |
+ | | |
+ | |The legal values for |
+ | |partition_mask can be composed by |
+ | |setting the appropriate bit fields|
+ | |specified by partition mask |
+ | |enumeration values using the '&' |
+ | |operator. |
+ | | |
+ | |The legal values for |
+ | |sad_adjustment is specified by its|
+ | |respective enumeration values. |
+ | | |
+ | |If the sad_adjustment is set to |
+ | |CLK_AVC_ME_SAD_ADJUST_MODE_HAAR_ |
+ | |INTEL, a simple wavelet transform,|
+ | |Haar transform, is used to refine |
+ | |the distortion measure of |
+ | |SAD. Haar transform here is used |
+ | |as a coarse estimation of the |
+ | |integer transform. |
+ +---------------------------------+----------------------------------+
+
+ Configuration phase functions
+ +++++++++++++++++++++++++++++
+
+ These built-in functions allow for configuration of the search window.
+ A call to either intel_sub_group_avc_ime_set_single_reference(..) or
+ intel_sub_group_avc_ime_set_dual_reference(..) is required. This is a
+ required phase immediately following the initialization phase
+
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_ime_payload_t |Update the input payload for a |
+ |intel_sub_group_avc_ime_set_ |VME single-reference search |
+ |single_reference( |with the configuration for the |
+ | short2 ref_offset, |reference window search region,|
+ | uchar search_window_config, |and return it. |
+ | intel_sub_group_avc_ime_payload_t| |
+ | payload ) |The 2D ref_offset specifies the|
+ | |reference window offset. The X |
+ | |and Y coordinates must be in |
+ | |the range [-2048, 2047], |
+ | |otherwise the results are |
+ | |undefined. The reference window|
+ | |is allowed to be partially |
+ | |outside the image. Pixel |
+ | |replication is applied to |
+ | |generate out-of-bound reference|
+ | |pixels. It is specified in PEL |
+ | |units. Results are undefined in|
+ | |the reference region is |
+ | |completely outside the image. |
+ | | |
+ | |If the reference image is an |
+ | |interlaced scan image, then the|
+ | |top field lines are considered |
+ | |as logically overlapping with |
+ | |the bottom field lines |
+ | |(i.e. the top field MBs are |
+ | |considered as logically |
+ | |overlapping with the bottom |
+ | |MBs) for the purposes for |
+ | |specifying the ref_offset |
+ | |value. |
+ | | |
+ | |The parameter |
+ | |search_window_config must be a |
+ | |compile-time constant. |
+ | | |
+ | |The value of |
+ | |search_window_config must be |
+ | |one of the unreserved search |
+ | |window configuration |
+ | |enumeration values. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_ime_payload_t |Update the input payload for a |
+ |intel_sub_group_avc_ime_ |VME dual-reference search with |
+ |set_dual_reference( |the configurations for the |
+ | short2 fwd_ref_offset, |reference window search |
+ | short2 bwd_ref_offset, |regions, and return it. |
+ | uchar search_window_config, | |
+ | intel_sub_group_avc_ime_payload_t|The 2D |
+ | payload ) |fwd_ref_offset/bwd_ref_offset |
+ | |specifies the forward/backward |
+ | |reference window offsets. The X|
+ | |and Y coordinates must be in |
+ | |the range [-2048, 2047], |
+ | |otherwise the results are |
+ | |undefined. The reference |
+ | |windows are allowed to be |
+ | |partially outside the |
+ | |image. Pixel replication is |
+ | |applied to generate |
+ | |out-of-bound reference |
+ | |pixels. It is specified in PEL |
+ | |units. |
+ | | |
+ | |If a reference image is an |
+ | |interlaced scan image, then the|
+ | |top field lines are considered |
+ | |as logically overlapping with |
+ | |the bottom field lines |
+ | |(i.e. the top field MBs are |
+ | |considered as logically |
+ | |overlapping with the bottom |
+ | |MBs) for the purposes for |
+ | |specifying the corresponding |
+ | |fwd_ref_offset and/or |
+ | |bwd_ref_offset values. |
+ | | |
+ | |The parameter |
+ | |search_window_config must be a |
+ | |compile-time constant and must |
+ | |be one of the unreserved search|
+ | |window configuration |
+ | |enumeration values. |
+ +------------------------------------+-------------------------------+
+ |ushort2 intel_sub_group_ime_ |Get the 2D size of the |
+ |ref_window_size( |reference window in pixel |
+ | uchar search_window_config, |units. |
+ | char dual_ref ) | |
+ | |The value of |
+ | |search_window_config must be |
+ | |one of the unreserved search |
+ | |window configuration |
+ | |enumeration values. |
+ | | |
+ | |The value of dual_ref must be |
+ | |set to zero for a single |
+ | |reference search window and one|
+ | |for a dual-reference search |
+ | |window. |
+ +------------------------------------+-------------------------------+
+ |short2 |If the input 2D reference |
+ |intel_sub_group_avc_ime_ |window offset, ref_offset, |
+ |adjust_ref_offset( |causes the reference window to |
+ | short2 ref_offset, |be fully out-of-bound of the |
+ | ushort2 src_coord, |reference image, adjust it such|
+ | ushort2 ref_window_size, |that the reference window is |
+ | ushort2 image_size ) |within bounds of the reference |
+ | |image. |
+ | | |
+ | |The 2D ref_offset specifies the|
+ | |reference window offset. The X |
+ | |and Y coordinates must be in |
+ | |the range [-2048, 2047], |
+ | |otherwise the results are |
+ | |undefined. |
+ | | |
+ | |The src_coord value represents |
+ | |the 2D offset of the top left |
+ | |corner of the source MB in |
+ | |pixel units in the source |
+ | |image. Source MBs at the image |
+ | |borders are allowed to be |
+ | |partial. Results are undefined |
+ | |if the source MB is completely |
+ | |outside the image. |
+ | | |
+ | |The ref_window_size specifies |
+ | |the 2D size of the reference |
+ | |window in pixel units. |
+ | | |
+ | |The image_size specifies the 2D|
+ | |size of the progressive scan, |
+ | |or top or bottom fields, of the|
+ | |interlaced scan image in pixel |
+ | |units. |
+ | | |
+ | |If the reference image is an |
+ | |interlaced scan image, then the|
+ | |bottom field lines are |
+ | |considered as logically |
+ | |overlapping with the top field |
+ | |lines the purposes for |
+ | |specifying the image_size |
+ | |value. Since, the actual layout|
+ | |of the top and bottom fields in|
+ | |the reference image is in an |
+ | |interleaved fashion, the height|
+ | |of the top or bottom fields |
+ | |should be exactly half of the |
+ | |actual reference image height. |
+ | | |
+ | |A call to |
+ | |intel_sub_group_avc_ime_ |
+ | |adjust_ref_coord(..) is |
+ | |optional. It is required only |
+ | |if the reference window offsets|
+ | |inputs to |
+ | |intel_sub_group_avc_ime_set_ |
+ | |single_reference(..) or |
+ | |intel_sub_group_avc_ime_set_ |
+ | |dual_reference (..) is |
+ | |potentially out-of-bounds and |
+ | |need to be adjusted. |
+ +------------------------------------+-------------------------------+
+
+ Payload type conversion functions
+ +++++++++++++++++++++++++++++++++
+
+ These are optional built-in functions that may be called following the
+ search configuration phase to convert IME payload to MCE payloads and
+ vice-versa.
+
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_mce_payload_t |Convert the IME payload to a |
+ |intel_sub_group_avc_ime_convert_ |generic MCE payload. |
+ |to_mce_payload( | |
+ | intel_sub_group_avc_ime_payload_t| |
+ | payload ) | |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_ime_payload_t |Convert the generic MCE payload|
+ |intel_sub_group_avc_ |to a IME payload. |
+ |mce_convert_to_ime_payload( | |
+ | intel_sub_group_avc_mce_payload_t| |
+ | payload ) | |
+ +------------------------------------+-------------------------------+
+
+ Multi-reference cost configuration phase functions
+ ++++++++++++++++++++++++++++++++++++++++++++++++++
+
+ These are optional built-in functions that may be called following the
+ search configuration phase to enable multi-reference image costing.
+
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_ime_payload_t |This is a wrapper for |
+ |intel_sub_group_avc_ime_set_ |intel_sub_group_avc_mce_ |
+ |inter_base_multi_reference_penalty( |set_inter_base_multi_reference_|
+ | uchar reference_base_penalty, |penalty(..) the payload |
+ | intel_sub_group_avc_ime_payload_t|conversions with to/from MCE |
+ | payload ) |types. See MCE version for |
+ | |description. |
+ +------------------------------------+-------------------------------+
+
+ Inter shape and direction cost configuration phase functions
+ ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+ These are optional built-in functions that may be called following the
+ search configuration phase to enable shape costing.
+
+ +-------------------------------------+------------------------------+
+ |intel_sub_group_avc_ime_payload_t |This is a wrapper for |
+ |intel_sub_group_avc_ime_set_ |intel_sub_group_avc_mce_set_ |
+ |inter_shape_penalty( |inter_shape_penalty(..) |
+ | ulong packed_shape_cost, |with the payload conversions |
+ | intel_sub_group_avc_ime_payload_t |to/from MCE types. See MCE |
+ | payload ) |version for description. |
+ | | |
+ +-------------------------------------+------------------------------+
+ |intel_sub_group_avc_ime_payload_t |This is a wrapper for |
+ |intel_sub_group_avc_ime_set_ |intel_sub_group_avc_mce_set_ |
+ |inter_direction_penalty( |inter_direction_penalty(..) |
+ | uchar direction_cost, |with the payload conversions |
+ | intel_sub_group_avc_ime_payload_t|to/from MCE types. See MCE |
+ | payload ) |version for description. |
+ +-------------------------------------+------------------------------+
+
+ Inter motion vector cost configuration phase functions
+ ++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+ These are optional built-in functions that may be called following the
+ search configuration phase to enable motion vector costing.
+
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_ime_payload_t |This is a wrapper for |
+ |intel_sub_group_avc_ime_set_ |intel_sub_group_avc_mce_set_ |
+ |motion_vector_cost_function( |motion_vector_cost_function(..)|
+ | ulong packed_cost_center_delta, |with the payload conversions |
+ | uint2 packed_cost_table, |to/from MCE types. See MCE |
+ | uchar cost_precision, |version for description. |
+ | intel_sub_group_avc_ime_payload_t| |
+ | payload ) | |
+ +------------------------------------+-------------------------------+
+
+ Miscellaneous property configuration phase functions
+ ++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+ These are optional built-in functions that may be called following the
+ search configuration phase to enable miscellaneous properties setting
+ in the payload.
+
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_ime_payload_t |This is a wrapper for |
+ |intel_sub_group_avc_ime_set_ |intel_sub_group_avc_mce_set_ |
+ |source_interlaced_field_polarity( |source_interlaced_field_ |
+ | uchar src_field_polarity |polarity(..) with the result |
+ | intel_sub_group_avc_ime_payload_t|conversions to/from MCE |
+ | payload ) |types. See MCE version for |
+ | |description. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_ime_payload_t |This is a wrapper for |
+ |intel_sub_group_avc_ime_set_ |intel_sub_group_avc_mce_ |
+ |single_reference_interlaced_field_ |set_single_reference_ |
+ |polarity( |interlaced_field_polarity(..) |
+ | uchar ref_field_polarity, |with the result conversions |
+ | intel_sub_group_avc_ime_payload_t|to/from MCE types. See MCE |
+ | payload ) |version for description. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_ime_payload_t |This is a wrapper for |
+ |intel_sub_group_avc_ime_set_ |intel_sub_group_avc_mce_set_ |
+ |dual_reference_interlaced_field_ |dual_reference_interlaced_ |
+ |polarities( |field_polarities(..) with the |
+ | uchar fwd_ref_field_polarity, |result conversions to/from MCE |
+ | uchar bwd_ref_field_polarity, |types. See MCE version for |
+ | intel_sub_group_avc_ime_payload_t|description. |
+ | payload ) | |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_ime_payload_t |Specify the maximum number of |
+ |intel_sub_group_avc_ime_set_ |motion vectors allowed for the |
+ |max_motion_vector_count( |current MB. The default setting|
+ | uchar max_motion_vector_count, |is 32. Any other value may |
+ | intel_sub_group_avc_ime_payload_t|alter the MB partitioning |
+ | payload ) |decision. The IME operation |
+ | |will compute the best allowed |
+ | |partitioning such that the |
+ | |number of sub-block motion |
+ | |vectors will not exceed |
+ | |max_motion_vector_count. |
+ | | |
+ | |The value of max_motion_vector_|
+ | |count(..) specifies the maximum|
+ | |number of motion vectors |
+ | |allowed for the current MB. It |
+ | |must be in the range [1, 32], |
+ | |otherwise the results are |
+ | |undefined. |
+ | | |
+ | |NOTE: This can be used to |
+ | |handle the restriction for |
+ | |certain profiles for AVC in |
+ | |that the maximum number of |
+ | |motion vectors allowed for two |
+ | |consecutive MBs can only be 16.|
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_ime_payload_t |Update the input payload to |
+ |intel_sub_group_avc_ime_set_ |disable a mix of forward and |
+ |unidirectional_mix_disable( |backward MVs in the result. |
+ | intel_sub_group_avc_ime_payload_t| |
+ | payload ) |Default is to enable it. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_ime_payload_t |Specifies the threshold value |
+ |intel_sub_group_avc_ime_set_early_ |of a distortion compute of a |
+ |search_termination_threshold( |16x16 partition of a MB for a |
+ | uchar threshold, |single-reference search, below |
+ | intel_sub_group_avc_ime_payload_t|which no more searching is |
+ | payload ) |performed for the MB. |
+ | | |
+ | |The value of threshold is |
+ | |specified in U4U4 format and |
+ | |the integer value must fit |
+ | |within 14 bits. |
+ | | |
+ | |The input payload must have |
+ | |been configured for a single- |
+ | |reference search with the 16x16|
+ | |partition enabled for this |
+ | |threshold to be set, or else |
+ | |the results are undefined. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_ime_payload_t |Set the (16) SAD weights for |
+ |intel_sub_group_avc_ime_set_ |each 4x4 sub-block. |
+ |weighted_sad( | |
+ | uint packed_sad_weights, |These values are used to |
+ | intel_sub_group_avc_ime_payload_t|decrease the SAD magnitude of |
+ | payload ) |each 4x4 sub-block by dividing |
+ | |the SAD of 4x4 sub-block of the|
+ | |source MB by its mapped weight.|
+ | | |
+ | |It requires a partition_mask of|
+ | |16x16 and forward search window|
+ | |configuration. |
+ | | |
+ | |The weighting pattern used is |
+ | |the traditional Z order for |
+ | |each 4x4 block. Weighted-SAD |
+ | |Control Mapping: |
+ | | |
+ | | 0 1 4 5 |
+ | | 2 3 6 7 |
+ | | 8 9 C D |
+ | | A B E F |
+ | | |
+ | |Each weight is of 2 bits |
+ | |represented in a packed format |
+ | |in packed_sad_weights for each |
+ | |of the 4x4 blocks in Z order. |
+ | | |
+ | |A prior call to |
+ | |intel_sub_group_avc_ime_ |
+ | |set_single_reference(..) set up|
+ | |for the forward reference image|
+ | |is required. |
+ | | |
+ | |This feature is mainly intended|
+ | |for improved block matching in |
+ | |image-rate conversion (FRC) |
+ | |kernels. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_ime_payload_t |This is a wrapper for |
+ |intel_sub_group_avc_ime_set_ |intel_sub_group_avc_mce_set_ac_|
+ |ac_only_haar( |only_haar(..) with the payload |
+ | intel_sub_group_avc_ime_payload_t|conversions to/from MCE |
+ | payload ) |types. See MCE version for |
+ | |description. |
+ +------------------------------------+-------------------------------+
+
+ Evaluation phase functions
+ ++++++++++++++++++++++++++
+
+ These built-in functions perform the evaluation of the IME operation
+ configured in the payload with a VME media sampler and return the
+ results.
+
+ +-------------------------------------+------------------------------+
+ |intel_sub_group_avc_ime_result_t |Evaluate the basic IME |
+ |intel_sub_group_avc_ime_evaluate_ |operation with a single |
+ |with_single_reference( |reference and return its |
+ | read_only image2d_t src_image, |results. The IME payload must |
+ | read_only image2d_t ref_image, |have been configured with |
+ | sampler_t vme_media_sampler, |intel_sub_group_avc_ime_set_ |
+ | intel_sub_group_avc_ime_payload_t |single_reference(..). |
+ | payload ) | |
+ | |The parameter ref_image must |
+ | |be a valid forward image |
+ | |kernel parameter per the |
+ | |ordering conventions for the |
+ | |kernel parameter list. |
+ +-------------------------------------+------------------------------+
+ |intel_sub_group_avc_ime_result_t |Evaluate the basic IME |
+ |intel_sub_group_avc_ime_evaluate_ |operation with dual reference |
+ |with_dual_reference( |and return its results. The |
+ | read_only image2d_t src_image, |IME payload must have been |
+ | read_only image2d_t fwd_ref_image,|configured with |
+ | read_only image2d_t bwd_ref_image,|intel_sub_group_avc_ime_set_ |
+ | sampler_t vme_media_sampler, |dual_reference(..). |
+ | intel_sub_group_avc_ime_payload_t | |
+ | payload ) |The parameter fwd_ref_image[ |
+ | |bwd_ref_image] must be a valid|
+ | |forward[backward] image kernel|
+ | |parameter per the ordering |
+ | |conventions for the kernel |
+ | |parameter list. |
+ +-------------------------------------+------------------------------+
+ |intel_sub_group_avc_ime_result_ |Evaluate the single reference |
+ |single_reference_streamout_t |IME operation with streamout |
+ |intel_sub_group_avc_ime_evaluate_ |and return its results. The |
+ |with_single_reference_streamout( |IME payload must have been |
+ | read_only image2d_t src_image, |configured with |
+ | read_only image2d_t ref_image, |intel_sub_group_avc_ime_set_ |
+ | sampler_t vme_media_sampler, |single_reference(..). |
+ | intel_sub_group_avc_ime_payload_t | |
+ | payload ) |The parameter ref_image must |
+ | |be a valid forward image |
+ | |kernel parameter per the |
+ | |ordering conventions for the |
+ | |kernel parameter list. |
+ +-------------------------------------+------------------------------+
+ |intel_sub_group_avc_ime_result_ |Evaluate the dual reference |
+ |dual_reference_streamout_t |IME operation with streamout |
+ |intel_sub_group_avc_ime_evaluate_ |and return its results. The |
+ |with_dual_reference_streamout( |IME payload must have been |
+ | read_only image2d_t src_image, |configured with |
+ | read_only image2d_t fwd_ref_image,|intel_sub_group_avc_ime_set_ |
+ | read_only image2d_t bwd_ref_image,|dual_reference(..). |
+ | sampler_t vme_media_sampler, | |
+ | intel_sub_group_avc_ime_payload_t |The parameter fwd_ref_image[ |
+ | payload ) |bwd_ref_image] must be a valid|
+ | |forward[backward] image kernel|
+ | |parameter per the ordering |
+ | |conventions for the kernel |
+ | |parameter list. |
+ +-------------------------------------+------------------------------+
+ |intel_sub_group_avc_ime_result_t |Evaluate the single reference |
+ |intel_sub_group_avc_ime_evaluate_ |IME operation with streamin |
+ |with_single_reference_streamin( |and return its results. The |
+ | read_only image2d_t src_image, |IME payload must have been |
+ | read_only image2d_t ref_image, |configured with |
+ | sampler_t vme_media_sampler, |intel_sub_group_avc_ime_set_ |
+ | intel_sub_group_avc_ime_payload_t|single_reference(..). |
+ | payload, | |
+ | intel_sub_group_avc_ime_single_ |The parameter ref_image must |
+ | reference_streamin_t |be a valid forward image |
+ | streamin_components ) |kernel parameter per the |
+ | |ordering conventions for the |
+ | |kernel parameter list. |
+ +-------------------------------------+------------------------------+
+ |intel_sub_group_avc_ime_result_t |Evaluate the dual reference |
+ |intel_sub_group_avc_ime_evaluate_ |IME operation with streamin |
+ |with_dual_reference_streamin( |and return its results. The |
+ | read_only image2d_t src_image, |IME payload must have been |
+ | read_only image2d_t fwd_ref_image,|configured with |
+ | read_only image2d_t bwd_ref_image,|intel_sub_group_avc_ime_set_ |
+ | sampler_t vme_media_sampler, |dual_reference(..). |
+ | intel_sub_group_avc_ime_payload_t | |
+ | payload, |The parameter fwd_ref_image[ |
+ | intel_sub_group_avc_ime_dual_ |bwd_ref_image] must be a valid|
+ | reference_streamin_t |forward[backward] image kernel|
+ | streamin_components |parameter per the ordering |
+ | ) |conventions for the kernel |
+ | |parameter list. |
+ +-------------------------------------+------------------------------+
+ |intel_sub_group_avc_ime_result_ |Evaluate the single reference |
+ |single_reference_streamout_t |IME operation with streamin |
+ |intel_sub_group_avc_ime_evaluate_ |and streamout and return its |
+ |with_single_reference_streaminout( |results. The IME payload must |
+ | read_only image2d_t src_image, |have been configured with |
+ | read_only image2d_t ref_image, |intel_sub_group_avc_ime_set_ |
+ | sampler_t vme_media_sampler, |single_reference(..). |
+ | intel_sub_group_avc_ime_payload_t | |
+ | payload, |The parameter ref_image must |
+ | intel_sub_group_avc_ime_single_ |be a valid forward image |
+ | reference_streamin_t |kernel parameter per the |
+ | streamin_components |ordering conventions for the |
+ | ) |kernel parameter list. |
+ +-------------------------------------+------------------------------+
+ |intel_sub_group_avc_ime_result_ |Evaluate the dual reference |
+ |dual_reference_streamout_t |IME operation with streamin |
+ |intel_sub_group_avc_ime_evaluate_ |and streamout and return its |
+ |with_dual_reference_streaminout( |results. The IME payload must |
+ | read_only image2d_t src_image, |have been configured with |
+ | read_only image2d_t fwd_ref_image,|intel_sub_ |
+ | read_only image2d_t bwd_ref_image,|group_avc_ime_set_dual_ |
+ | sampler_t vme_media_sampler, |reference(..). |
+ | intel_sub_group_avc_ime_payload_t | |
+ | payload, |The parameter fwd_ref_image[ |
+ | intel_sub_group_avc_ime_dual_ |bwd_ref_image] must be a valid|
+ | reference_streamin_t |forward[backward] image kernel|
+ | streamin_components |parameter per the ordering |
+ | ) |conventions for the kernel |
+ | |parameter list. |
+ +-------------------------------------+------------------------------+
+
+ Result type conversion functions
+ +++++++++++++++++++++++++++++++++
+
+ These are optional built-in functions that may be called following the
+ evaluation phase to convert IME results to MCE results and vice-versa.
+
+ +-----------------------------------+--------------------------------+
+ |intel_sub_group_avc_mce_result_t |Convert the IME result into a |
+ |intel_sub_group_avc_ime_ |MCE result. |
+ |convert_to_mce_result( | |
+ | intel_sub_group_avc_ime_result_t| |
+ | result ) | |
+ +-----------------------------------+--------------------------------+
+ |intel_sub_group_avc_ime_result_t |Convert the MCE result into an |
+ |intel_sub_group_avc_mce_ |IME result. |
+ |convert_to_ime_result( | |
+ | intel_sub_group_avc_mce_result_t| |
+ | result ) | |
+ +-----------------------------------+--------------------------------+
+
+ Result processing phase functions
+ +++++++++++++++++++++++++++++++++
+
+ These built-in functions are called following the evaluation phase to
+ extract the various result components from an IME evaluation result.
+
+ +-------------------------------------+------------------------------+
+ |intel_sub_group_avc_ime_ |Return the streamed out BMVs |
+ |single_reference_streamin_t |and distortions from the input|
+ |intel_sub_group_avc_ime_ |result from a single reference|
+ |get_single_reference_streamin( |streamout IME operation that |
+ | intel_sub_group_avc_ime_result_ |can be used as streamin input |
+ | single_reference_streamout_t |for a subsequent IME |
+ | result ) |operation. |
+ +-------------------------------------+------------------------------+
+ |intel_sub_group_avc_ime_ |Return the streamed out BMVs |
+ |dual_reference_streamin_t |and distortions from the input|
+ |intel_sub_group_avc_ime_get_ |result from a dual reference |
+ |dual_reference_streamin( |streamout IME operation that |
+ | intel_sub_group_avc_ime_result_ |can be used as streamin input |
+ | dual_reference_streamout_t |for a subsequent IME |
+ | result ) |operation. |
+ +-------------------------------------+------------------------------+
+ |intel_sub_group_avc_ime_result_t |Strip out the single reference|
+ |intel_sub_group_avc_ime_strip_ |streamout BMVs and distortions|
+ |single_reference_streamout( |from the streamout results and|
+ | intel_sub_group_avc_ime_result_ |return the rest. |
+ | single_reference_streamout_t | |
+ | result ) | |
+ +-------------------------------------+------------------------------+
+ |intel_sub_group_avc_ime_result_t |Strip out the dual reference |
+ |intel_sub_group_avc_ime_strip_ |streamout MVs and distortions |
+ |dual_reference_streamout( |from the streamout results and|
+ | intel_sub_group_avc_ime_result_ |return the rest. |
+ | dual_reference_streamout_t | |
+ | result ) | |
+ +-------------------------------------+------------------------------+
+ |uint intel_sub_group_avc_ime_get_ |Get the packed motion vectors |
+ |streamout_major_shape_motion_vectors(|for the input major shape from|
+ | intel_sub_group_avc_ime_result_ |the IME single reference |
+ | single_reference_streamout_t |streamout results. |
+ | result, | |
+ | uchar major_shape ) |The parameter major_shape must|
+ | |be a valid inter macro-block |
+ | |major shape enumeration value |
+ | |and a compile-time constant. |
+ | | |
+ | |Up to 4 packed MVs are |
+ | |returned, one per |
+ | |work-item. If the major shape |
+ | |is: |
+ | |- 16x6, then one packed MV |
+ | | is returned by work-item 0 |
+ | |- 16x8, or 8x16, then two |
+ | | packed MVs are returned by |
+ | | work-items 0 and 1 |
+ | |- 8x8, then four packed MVs |
+ | | are returned by work-items |
+ | | 0 to 3. |
+ +-------------------------------------+------------------------------+
+ |ushort intel_sub_group_avc_ime_get_ |Get the distortions for the |
+ |streamout_major_shape_distortions( |input major shape from the IME|
+ | intel_sub_group_avc_ime_result_ |single reference streamout |
+ | single_reference_streamout_t |results. |
+ | result, | |
+ | uchar major_shape ) |The parameter major_shape must|
+ | |be a valid inter macro-block |
+ | |major shape enumeration value |
+ | |and a compile-time constant. |
+ | | |
+ | |Up to 4 distortions are |
+ | |returned, one per work-item in|
+ | |the same format as for motion |
+ | |vectors as described above. |
+ +-------------------------------------+------------------------------+
+ |uchar intel_sub_group_avc_ime_get_ |Get the reference identifiers |
+ |streamout_major_shape_reference_ids( |for the input major shape and |
+ | intel_sub_group_avc_ime_result_ |direction from the IME dual |
+ | single_reference_streamout_t |reference streamout results. |
+ | result, | |
+ | uchar major_shape ) |The parameter major_shape must|
+ | |be a valid inter macro-block |
+ | |major shape enumeration value |
+ | |and a compile-time constant. |
+ | | |
+ | |Up to 4 reference identifiers |
+ | |are returned, one per |
+ | |work-item in the same format |
+ | |as for motion vectors as |
+ | |described above. |
+ +-------------------------------------+------------------------------+
+ |uint intel_sub_group_avc_ime_get_ |Get the packed motion vectors |
+ |streamout_major_shape_motion_vectors(|for the input major shape and |
+ | intel_sub_group_avc_ime_result_ |direction from the IME dual |
+ | dual_reference_streamout_t result,|reference streamout results. |
+ | uchar major_shape, | |
+ | uchar direction ) |The parameter major_shape must|
+ | |be a valid inter macro-block |
+ | |major shape enumeration value |
+ | |and a compile-time constant. |
+ | | |
+ | |The parameter direction must |
+ | |be a valid unidirectional |
+ | |inter macro-block major |
+ | |direction value and a |
+ | |compile-time constant. |
+ | | |
+ | |Up to 4 packed MVs are |
+ | |returned, one per work-item in|
+ | |the same format as for motion |
+ | |vectors for single reference |
+ | |streamout as described above. |
+ +-------------------------------------+------------------------------+
+ |ushort intel_sub_group_avc_ime_get_ |Get the distortions for the |
+ |streamout_major_shape_distortions( |input major shape and |
+ | intel_sub_group_avc_ime_result_ |direction from the IME dual |
+ | dual_reference_streamout_t result,|reference streamout results. |
+ | uchar major_shape, | |
+ | uchar direction ) |The parameter major_shape must|
+ | |be a valid inter macro-block |
+ | |major shape enumeration value |
+ | |and a compile-time constant. |
+ | | |
+ | |The parameter direction must |
+ | |be a valid unidirectional |
+ | |inter macro-block major |
+ | |direction value and a |
+ | |compile-time constant. |
+ | | |
+ | |Up to 4 distortions are |
+ | |returned, one per work-item in|
+ | |the same format as for motion |
+ | |vectors as described above. |
+ +-------------------------------------+------------------------------+
+ |uchar intel_sub_group_avc_ime_get_ |Get the reference identifiers |
+ |streamout_major_shape_reference_ids( |for the input major shape and |
+ | intel_sub_group_avc_ime_result_ |direction from the IME dual |
+ | dual_reference_streamout_t result,|reference streamout results. |
+ | uchar major_shape, | |
+ | uchar direction ) | |
+ +-------------------------------------+------------------------------+
+ |uchar intel_sub_group_avc_ime_get_ |Get the bitmask indicating |
+ |border_reached( |whether any border of |
+ | uchar image_select, |forward/backward reference |
+ | intel_sub_group_avc_ime_result_t |image is reached by one or |
+ | result ) |more MVs in the winning inter |
+ | |shape. The bitmask values are |
+ | |as per the inter border |
+ | |reached enumeration values. |
+ | | |
+ | |The search window must have |
+ | |been configured for a forward |
+ | |reference if image_select is |
+ | |set as |
+ | |CLK_AVC_ME_FRAME_FORWARD_INTEL|
+ | |and with a backward reference |
+ | |if image_select is set as |
+ | |CLK_AVC_ME_FRAME_BACKWARD_ |
+ | |INTEL. |
+ | | |
+ | |The value of image_select is |
+ | |either |
+ | |CLK_AVC_ME_FRAME_FORWARD_INTEL|
+ | |or CLK_AVC_ME_FRAME_BACKWARD_ |
+ | |INTEL and must be a |
+ | |compile-time constant. |
+ +-------------------------------------+------------------------------+
+ |uchar intel_sub_group_avc_ime_get_ |Get the indication that the |
+ |truncated_search_indication( |search operation was prevented|
+ | intel_sub_group_avc_ime_result_t |from providing the lowest |
+ | result ) |distortion solution due to the|
+ | |tighter constraints on the |
+ | |maximum number of MB motion |
+ | |vectors configured for the |
+ | |search operation. |
+ +-------------------------------------+------------------------------+
+ |uchar intel_sub_group_avc_ime_get_ |Get the indication that |
+ |unidirectional_early_search_ |unidirectional search |
+ |termination( |operation terminated early |
+ | intel_sub_group_avc_ime_result_t |because the configured |
+ | result ) |distortion threshold was met. |
+ +-------------------------------------+------------------------------+
+ |uint intel_sub_group_avc_ime_get_ |Get the 16x16 motion vector |
+ |weighting_pattern_minimum_ |corresponding to the minimum |
+ |motion_vector( |16x16 distortion when applying|
+ | intel_sub_group_avc_ime_result_t |the traditional Z-order SAD |
+ | result ) |weighting pattern. |
+ | | |
+ | |This can only be called if a |
+ | |SAD weighting pattern was set |
+ | |prior to evaluation using |
+ | |intel_sub_group_avc_ime_set_ |
+ | |weighted_sad(..). |
+ | | |
+ | |The argument for parameter |
+ | |"pattern_id" should be a |
+ | |compile-time constant. |
+ +-------------------------------------+------------------------------+
+ |ushort intel_sub_group_avc_ime_get_ |Get the minimum 16x16 |
+ |weighting_pattern_minimum_distortion(|distortion when applying the |
+ | intel_sub_group_avc_ime_result_t |traditional Z-order SAD |
+ | result ) |weighting pattern. |
+ +-------------------------------------+------------------------------+
+ |ulong intel_sub_group_avc_ime_get_ |This is a wrapper for |
+ |motion_vectors( |intel_sub_group_avc_mce_get_ |
+ | intel_sub_group_avc_ime_result_t |motion_vectors(..) with the |
+ | result ) |result conversions to/from MCE|
+ | |types. See MCE version for |
+ | |description. |
+ +-------------------------------------+------------------------------+
+ |ushort intel_sub_group_avc_ime_get_ |This is a wrapper for |
+ |inter_distortions( |intel_sub_group_avc_mce_get_ |
+ | intel_sub_group_avc_ime_result_t |inter_distortions(..) with the|
+ | result ) |result conversions to/from MCE|
+ | |types. See MCE version for |
+ | |description. |
+ +-------------------------------------+------------------------------+
+ |ushort intel_sub_group_avc_ime_get_ |This is a wrapper for |
+ |best_inter_distortion( |intel_sub_group_avc_mce_get_ |
+ | intel_sub_group_avc_ime_result_t |best_inter_distortion(..)with |
+ | result ) |the result conversions to/from|
+ | |MCE types. See MCE version for|
+ | |description. |
+ +-------------------------------------+------------------------------+
+ |uchar intel_sub_group_avc_ime_get_ |This is a wrapper for |
+ |inter_major_shape( |intel_sub_group_avc_mce_get_ |
+ | intel_sub_group_avc_ime_result_t |inter_major_shape(..) with the|
+ | result ) |result conversions to/from MCE|
+ | |types. See MCE version for |
+ | |description. |
+ +-------------------------------------+------------------------------+
+ |uchar intel_sub_group_avc_ime_get_ |This is a wrapper for |
+ |inter_minor_shapes( |intel_sub_group_avc_mce_get_ |
+ | intel_sub_group_avc_ime_result_t |inter_minor_shapes(..) with |
+ | result ) |the result conversions to/from|
+ | |MCE types. See MCE version for|
+ | |description. |
+ +-------------------------------------+------------------------------+
+ |uchar intel_sub_group_avc_ime_get_ |This is a wrapper for |
+ |inter_directions( |intel_sub_group_avc_mce_get_ |
+ | intel_sub_group_avc_ime_result_t |inter_directions(..) with the|
+ | result ) |result conversions to/from MCE|
+ | |types. See MCE version for |
+ | |description. |
+ +-------------------------------------+------------------------------+
+ |uchar intel_sub_group_avc_ime_get_ |This is a wrapper for |
+ |inter_motion_vector_count( |intel_sub_group_avc_mce_get_ |
+ | intel_sub_group_avc_ime_result_t |inter_motion_vector_count(..) |
+ | result ) |with the result conversions |
+ | |to/from MCE types. See MCE |
+ | |version for description. |
+ +-------------------------------------+------------------------------+
+ |uint intel_sub_group_avc_ime_get_ |This is a wrapper for |
+ |inter_reference_ids( |intel_sub_group_avc_mce_get_ |
+ | intel_sub_group_avc_ime_result_t |inter_reference_ids(..) with |
+ | result ) |the result conversions to/from|
+ | |MCE types. See MCE version for|
+ | |description. |
+ +-------------------------------------+------------------------------+
+ |uchar intel_sub_group_avc_ime_get_ |This is a wrapper for |
+ |inter_reference_interlaced_ |intel_sub_group_avc_mce_get_ |
+ |field_polarities( |inter_reference_interlaced_ |
+ | uint packed_reference_ids, |field_polarities(..) with the|
+ | uint packed_reference_parameter_ |result conversions to/from MCE|
+ | field_polarities, |types. See MCE version for |
+ | intel_sub_group_avc_ime_result_t |description. |
+ | result ) | |
+ +-------------------------------------+------------------------------+
+ ______________________________________________________________________
+
+ REF built-in functions
+ ----------------------
+
+ A set of ordered phases of functions are required to be called to
+ evaluate a refinement operation result.
+
+ Initialization phase functions
+ ++++++++++++++++++++++++++++++
+
+ These built-in functions create a properly initialized payload that
+ can be used for further configured for evaluating REF operations. This
+ is a required initial phase. A call to either
+ intel_sub_group_avc_fme_initialize or
+ intel_sub_group_avc_bme_initialize is required.
+
+ +-----------------------------------+--------------------------------+
+ |intel_sub_group_avc_ref_payload_t |Return an initialized payload |
+ |intel_sub_group_avc_fme_initialize(|for a VME fractional motion |
+ | ushort2 src_coord, |estimation operation (FME). |
+ | ulong motion_vectors, | |
+ | uchar major_shapes, |The payload is initialized for |
+ | uchar minor_shapes, |progressive frame operations, |
+ | uchar directions, |and the cost configuration |
+ | uchar pixel_resolution, |values and the miscellaneous |
+ | uchar sad_adjustment ) |property values are all |
+ | |initialized to zero. The cost |
+ | |configuration and the |
+ | |miscellaneous property |
+ | |configuration phase functions |
+ | |must be used to override the |
+ | |initial configurations in the |
+ | |payload. |
+ | | |
+ | |The src_coord value represents |
+ | |the 2D offset of the top left |
+ | |corner of the source MB in pixel|
+ | |units in the source image. |
+ | | |
+ | |If the source image is an |
+ | |interlaced scan image, then the |
+ | |bottom field lines are |
+ | |considered as logically |
+ | |overlapping with the top field |
+ | |lines (i.e. the top field MBs |
+ | |are considered as logically |
+ | |overlapping with the bottom MBs)|
+ | |for the purposes for specifying |
+ | |the src_coord value. |
+ | | |
+ | |The parameter motion_vectors |
+ | |contains the BMVs returned by an|
+ | |IME in the same format as |
+ | |returned by |
+ | |intel_sub_group_avc_mce_get_ |
+ | |motion_vectors(..). The MVs are |
+ | |in QPEL units. The X and Y |
+ | |coordinates of each MV must be |
+ | |in the range [-2048.00, |
+ | |2047.75), otherwise the results |
+ | |are undefined. |
+ | | |
+ | |(If this argument value is |
+ | |manually composed, all sub-block|
+ | |MVs must be replicated per its |
+ | |format for each partition. For |
+ | |example for 16x16 partition, all|
+ | |sub-block MVs must be replicated|
+ | |to the same MV, and for 8x8 |
+ | |partition, each 8x8 must have |
+ | |its respective sub-block MVs |
+ | |replicated.) |
+ | | |
+ | |Legal values and format for |
+ | |major_shapes are as per the |
+ | |return value of |
+ | |intel_sub_group_avc_mce_get_ |
+ | |inter_major_shape(..). |
+ | | |
+ | |Legal values and format for |
+ | |minor_shapes are as per the |
+ | |return value of |
+ | |intel_sub_group_avc_mce_get_ |
+ | |inter_minor_shapes(..). |
+ | | |
+ | |Legal values and format for |
+ | |directions are as per the return|
+ | |value of |
+ | |intel_sub_group_avc_mce_get_ |
+ | |inter_directions(..). |
+ | | |
+ | |Legal values for |
+ | |pixel_resolution is either |
+ | |CLK_AVC_ME_SUBPIXEL_MODE_HPEL_ |
+ | |INTEL or |
+ | |CLK_AVC_ME_SUBPIXEL_MODE_QPEL_ |
+ | |INTEL. |
+ | | |
+ | |The legal values for |
+ | |sad_adjustment is specified by |
+ | |its respective enumeration |
+ | |values. |
+ +-----------------------------------+--------------------------------+
+ |intel_sub_group_avc_ref_payload_t |Return an initialized payload |
+ |intel_sub_group_avc_bme_initialize(|for a VME bidirectional motion |
+ | ushort2 src_coord, |estimation (BME) operation. |
+ | ulong motion_vectors, | |
+ | uchar major_shapes, |The payload is initialized for |
+ | uchar minor_shapes, |progressive frame operations, |
+ | uchar directions, |and the cost configuration |
+ | uchar pixel_resolution, |values and the miscellaneous |
+ | uchar bidirectional_weight, |property values are all |
+ | uchar sad_adjustment ) |initialized to zero. The cost |
+ | |configuration and the |
+ | |miscellaneous property |
+ | |configuration phase functions |
+ | |must be used to override the |
+ | |initial configurations in the |
+ | |payload. |
+ | | |
+ | |If the specified |
+ | |pixel_resolution is a sub-pixel |
+ | |resolution then an implicit FME |
+ | |operation is performed with the |
+ | |BME operation. |
+ | | |
+ | |The src_coord value represents |
+ | |the 2D offset of the top left |
+ | |corner of the source MB in pixel|
+ | |units in the source image. |
+ | | |
+ | |If the source image is an |
+ | |interlaced scan image, then the |
+ | |bottom field lines are |
+ | |considered as logically |
+ | |overlapping with the top field |
+ | |lines (i.e. the top field MBs |
+ | |are considered as logically |
+ | |overlapping with the bottom MBs)|
+ | |for the purposes for specifying |
+ | |the src_coord value. |
+ | | |
+ | |The parameter motion_vectors |
+ | |contains the BMVs returned by an|
+ | |IME in the same format as |
+ | |returned by |
+ | |intel_sub_group_avc_mce_get_ |
+ | |motion_vectors(..). |
+ | | |
+ | |(If this argument value is |
+ | |manually composed, all sub-block|
+ | |MVs must be replicated per its |
+ | |format for each partition. For |
+ | |example for 16x16 partition, all|
+ | |sub-block MVs must be replicated|
+ | |to the same MV, and for 8x8 |
+ | |partition, each 8x8 must have |
+ | |its respective sub-block MVs |
+ | |replicated.) |
+ | | |
+ | |Legal values and format for |
+ | |major_shapes is as per the |
+ | |return value of |
+ | |intel_sub_group_avc_mce_get_ |
+ | |inter_major_shape(..). |
+ | | |
+ | |Legal values and format for |
+ | |minor_shapes is as per the |
+ | |return value of |
+ | |intel_sub_group_avc_mce_get_ |
+ | |inter_minor_shapes(..). |
+ | | |
+ | |Legal values and format for |
+ | |directions are as per the return|
+ | |value of |
+ | |intel_sub_group_avc_mce_get_ |
+ | |inter_directions(..). |
+ | | |
+ | |Legal values for |
+ | |pixel_resolution is either |
+ | |CLK_AVC_ME_SUBPIXEL_MODE_ |
+ | |INTEGER_INTEL, |
+ | |CLK_AVC_ME_SUBPIXEL_MODE_ |
+ | |HPEL_INTEL or |
+ | |CLK_AVC_ME_SUBPIXEL_MODE_ |
+ | |QPEL_INTEL. |
+ | | |
+ | |Legal values for |
+ | |bidirectional_weight and sad |
+ | |adjustment is as per their |
+ | |respective enumeration values. |
+ | | |
+ | |The legal values for |
+ | |sad_adjustment is specified by |
+ | |its respective enumeration |
+ | |values. |
+ +-----------------------------------+--------------------------------+
+
+ Payload type conversion functions
+ +++++++++++++++++++++++++++++++++
+
+ These are optional built-in functions that may be called following the
+ initialization phase to convert REF payload to MCE payloads and
+ vice-versa.
+
+ +-------------------------------------+------------------------------+
+ |intel_sub_group_avc_mce_payload_t |Convert the REF payload to a |
+ |intel_sub_group_avc_ref_ |generic ME payload. |
+ |convert_to_mce_payload( | |
+ | intel_sub_group_avc_ref_payload_t | |
+ | payload ) | |
+ +-------------------------------------+------------------------------+
+ |intel_sub_group_avc_ref_payload_t |Convert the generic ME payload|
+ |intel_sub_group_avc_mce_ |to an FBR payload. |
+ |convert_to_ref_payload( | |
+ | intel_sub_group_avc_mce_payload_t | |
+ | payload ) | |
+ +-------------------------------------+------------------------------+
+
+ Multi-reference cost configuration phase functions
+ ++++++++++++++++++++++++++++++++++++++++++++++++++
+
+ These are optional built-in functions that may be called following the
+ initialization phase to enable multi-reference image costing.
+
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_ref_payload_t |This is a wrapper for |
+ |intel_sub_group_avc_ref_set_ |intel_sub_group_avc_mce_ |
+ |inter_base_multi_reference_penalty( |set_inter_base_multi_reference_|
+ | uchar reference_base_penalty, |penalty(..) the payload |
+ | intel_sub_group_avc_ref_payload_t|conversions with to/from MCE |
+ | payload ) |types. See MCE version for |
+ | |description. |
+ +------------------------------------+-------------------------------+
+
+ Inter shape and direction cost configuration phase functions
+ ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+ These are optional built-in functions that may be called following the
+ initialization phase to enable shape costing.
+ +-------------------------------------+------------------------------+
+ |intel_sub_group_avc_ref_payload_t |This is a wrapper for |
+ |intel_sub_group_avc_ref_set_ |intel_sub_group_avc_mce_set_ |
+ |inter_shape_penalty( |inter_shape_penalty(..) with |
+ | ulong packed_shape_cost, |the payload conversions |
+ | intel_sub_group_avc_ref_payload_t |to/from MCE types. See MCE |
+ | payload ) |version for description. |
+ +-------------------------------------+------------------------------+
+ |intel_sub_group_avc_ref_payload_t |This is a wrapper for |
+ |intel_sub_group_avc_ref_set_ |intel_sub_group_avc_mce_set_ |
+ |inter_direction_penalty( |inter_direction_penalty(..) |
+ | uchar direction_cost, |with the payload conversions |
+ | intel_sub_group_avc_ref_payload_t |to/from MCE types. See MCE |
+ | payload ) |version for description. |
+ +-------------------------------------+------------------------------+
+
+
+ Inter motion vector cost configuration phase functions
+ ++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+ These are optional built-in functions that may be called following the
+ initialization phase to enable motion vector costing.
+
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_ref_payload_t |This is a wrapper for |
+ |intel_sub_group_avc_ref_set_ |intel_sub_group_avc_mce_set_ |
+ |motion_vector_cost_function( |motion_vector_cost_function |
+ | ulong packed_cost_center_delta, |with the payload conversions |
+ | uint2 packed_cost_table, |to/from MCE types. See MCE |
+ | uchar cost_precision, |version for description. |
+ | intel_sub_group_avc_ref_payload_t| |
+ | payload ) | |
+ +------------------------------------+-------------------------------+
+
+ Miscellaneous property configuration phase functions
+ ++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+ These are optional built-in functions that may be called following the
+ initialization phase to enable miscellaneous properties setting in the
+ payload.
+
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_ref_payload_t |This is a wrapper for |
+ |intel_sub_group_avc_ref_set_ |intel_sub_group_avc_mce_set_ |
+ |source_interlaced_field_polarity( |source_interlaced_field_ |
+ | uchar src_field_polarity |polarity(..) with the result |
+ | intel_sub_group_avc_ref_payload_t|conversions to/from MCE |
+ | payload ) |types. See MCE version for |
+ | |description. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_ref_payload_t |This is a wrapper for |
+ |intel_sub_group_avc_ref_set_ |intel_sub_group_avc_mce_set_ |
+ |single_reference_interlaced_ |single_reference_ |
+ |field_polarity( |interlaced_field_polarity(..) |
+ | uchar ref_field_polarity, |with the result conversions |
+ | intel_sub_group_avc_ref_payload_t|to/from MCE types. See MCE |
+ | payload ) |version for description. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_ref_payload_t |This is a wrapper for |
+ |intel_sub_group_avc_ref_set_ |intel_sub_group_avc_mce_set_ |
+ |dual_reference_interlaced_image_ |dual_reference_interlaced_ |
+ |field_polarities( |field_polarities(..) with the |
+ | uchar fwd_ref_field_polarity, |result conversions to/from MCE |
+ | uchar bwd_ref_field_polarity, |types. See MCE version for |
+ | intel_sub_group_avc_ref_payload_t|description. |
+ | payload ) | |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_ref_payload_t |Update the input payload to |
+ |intel_sub_group_avc_ref_set_ |disable a mix of bidirectional |
+ |bidirectional_mix_disable( |and unidirectional MVs in the |
+ | intel_sub_group_avc_ref_payload_t|result. |
+ | payload ) | |
+ | |Default is to enable it. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_sic_payload_t |Update the input payload to do |
+ |intel_sub_group_avc_ref_set_ |enable bilinear filter |
+ |bilinear_filter_enable( |interpolation instead of 4-tap |
+ | intel_sub_group_avc_ref_payload_t|filter interpolation. Default |
+ | payload ) |is 4-tap filter interpolation. |
+ | | |
+ | |This should not be called if |
+ | |the payload was initialized |
+ | |with integer pixel resolution. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_ref_payload_t |This is a wrapper for |
+ |intel_sub_group_avc_ref_set_ |intel_sub_group_avc_mce_set_ |
+ |ac_only_haar( |ac_only_haar(..) with the |
+ | intel_sub_group_avc_ref_payload_t|payload conversions to/from MCE|
+ | payload ) |types. See MCE version for |
+ | |description. |
+ +------------------------------------+-------------------------------+
+
+ Evaluation phase functions
+ ++++++++++++++++++++++++++
+
+ These built-in functions perform the evaluation of the REF operation
+ configured in the payload with a VME media sampler and return the
+ results.
+
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_ref_result_t |Evaluate the basic REF |
+ |intel_sub_group_avc_ref_ |operation with single reference|
+ |evaluate_with_single_reference( |and return its results. |
+ | read_only image2d_t src_image, | |
+ | read_only image2d_t ref_image, |The parameter ref_image must be|
+ | sampler_t vme_media_sampler, |a valid forward image kernel |
+ | intel_sub_group_avc_ref_payload_t|parameter per the ordering |
+ | payload ) |conventions for the kernel |
+ | |parameter list. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_ref_result_t |Evaluate the basic REF |
+ |intel_sub_group_avc_ref_ |operation with dual reference |
+ |evaluate_with_dual_reference( |and return its results. |
+ | read_only image2d_t src_image, | |
+ | read_only image2d_t fwd_ref_image,|The parameter |
+ | image2d_t bwd_ref_image, |fwd_ref_image[bwd_ref_image] |
+ | sampler_t vme_media_sampler, |must be a valid |
+ | intel_sub_group_avc_ref_payload_t |forward[backward] image kernel |
+ | payload ) |parameter per the ordering |
+ | |conventions for the kernel |
+ | |parameter list. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_ref_result_t |Evaluate the basic REF |
+ |intel_sub_group_avc_ref_ |operation with multi references|
+ |evaluate_with_multi_reference( |and return its results. |
+ | read_only image2d_t src_image, | |
+ | uint packed_reference_ids, |A unique pair of reference |
+ | sampler_t vme_media_sampler, |identifiers (indicating unique |
+ | intel_sub_group_avc_ref_payload_t|forward/backward reference |
+ | payload ) |images) may be specified for |
+ | |each of the allowed major |
+ | |partitions using |
+ | |packed_dual_ref_ids. |
+ | | |
+ | |The value of |
+ | |packed_reference_ids is a |
+ | |integer with the following bits|
+ | |specifying the values for the |
+ | |pair of reference images for |
+ | |each major partition. |
+ | | |
+ | |3:0 => Fwd reference block 0 |
+ | |7:4 => Bwd reference block 0 |
+ | |11:8 => Fwd reference block 1 |
+ | |15:12 => Bwd reference block 1 |
+ | |19:16 => Fwd reference block 2 |
+ | |23:20 => Bwd reference block 2 |
+ | |27:24 => Fwd reference block 3 |
+ | |31:28 => Bwd reference block 3 |
+ | | |
+ | |A forward[backward] reference |
+ | |idenitifer value of 'n' |
+ | |indicates the forward[backward]|
+ | |image from the 'n'th pair of |
+ | |forward/backward reference |
+ | |images, with the value of 'n' |
+ | |ranging from 0 to 15. |
+ | | |
+ | |If the REF operation is |
+ | |configured with only forward |
+ | |reference images then, the |
+ | |values of the backward |
+ | |reference identifiers are not |
+ | |used. |
+ | | |
+ | |The blocks are numbered using |
+ | |the traditional Z order. For |
+ | |larger block sizes, the |
+ | |sub-block reference identifier |
+ | |pairs must be replicated. For |
+ | |example, for a 16x16 block, all|
+ | |four pair of reference |
+ | |identifiers must be replicated |
+ | |to the value of the first pair |
+ | |for block 0. |
+ | | |
+ | |The value for the |
+ | |packed_reference_ids argument |
+ | |is obtained by calling |
+ | |intel_sub_group_avc_ime_get_ |
+ | |inter_reference_ids(..) for |
+ | |the preceding IME operation's |
+ | |result. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_ref_result_t |Evaluate the basic REF |
+ |intel_sub_group_avc_ref_ |operation with multi references|
+ |evaluate_with_multi_reference( |and return its results. This |
+ | read_only image2d_t src_image, |is used for interlaced source |
+ | uint packed_reference_ids, |and reference images. |
+ | uchar packed_reference_ | |
+ | field_polarities, |A unique pair of reference |
+ | sampler_t vme_media_sampler, |identifiers (indicating unique |
+ | intel_sub_group_avc_ref_payload_t|forward/backward reference |
+ | payload ) |images) may be specified for |
+ | |each of the allowed major |
+ | |partitions using |
+ | |packed_reference_ids. |
+ | | |
+ | |The value of |
+ | |packed_reference_ids is an |
+ | |integer with the following bits|
+ | |specifying the values for the |
+ | |pair of reference images for |
+ | |each major partition. |
+ | | |
+ | |3:0 => Fwd reference block 0 |
+ | |7:4 => Bwd reference block 0 |
+ | |11:8 => Fwd reference block 1 |
+ | |15:12 => Bwd reference block 1 |
+ | |19:16 => Fwd reference block 2 |
+ | |23:20 => Bwd reference block 2 |
+ | |27:24 => Fwd reference block 3 |
+ | |31:28 => Bwd reference block 3 |
+ | | |
+ | |A forward[backward] reference |
+ | |idenitifer value of 'n' |
+ | |indicates the forward[backward]|
+ | |image from the 'n'th pair of |
+ | |forward/backward reference |
+ | |images, with the value of 'n' |
+ | |ranging from 0 to 15. |
+ | | |
+ | |If the REF operation is |
+ | |configured with only forward |
+ | |reference images then, the |
+ | |values of the backward |
+ | |reference identifiers are not |
+ | |used. |
+ | | |
+ | |The blocks are numbered using |
+ | |the traditional Z order. For |
+ | |larger block sizes, the |
+ | |sub-block reference identifier |
+ | |pairs must be replicated. For |
+ | |example, for a 16x16 block, all|
+ | |four pair of reference |
+ | |identifiers must be replicated |
+ | |to the value of the first pair |
+ | |for block 0. |
+ | | |
+ | |The value for the |
+ | |packed_reference_ids argument |
+ | |is obtained by calling |
+ | |intel_sub_group_avc_ime_get_ |
+ | |inter_reference_ids(..) for |
+ | |the preceding IME operation's |
+ | |result. |
+ | | |
+ | |Reference field polarities for |
+ | |forward and backward reference |
+ | |images are specified for each |
+ | |of the allowed major partitions|
+ | |using packed_reference_field_ |
+ | |polarities. |
+ | | |
+ | |The value of |
+ | |packed_reference_field_ |
+ | |polarities is an integer with |
+ | |the following bits specifying |
+ | |the reference field polarities |
+ | |for the major partitions. |
+ | | |
+ | |0 : Fwd reference block 0 |
+ | |1 : Fwd reference block 1 |
+ | |2 : Fwd reference block 2 |
+ | |3 : Fwd reference block 3 |
+ | |4 : Bwd reference block 0 |
+ | |5 : Bwd reference block 1 |
+ | |6 : Bwd reference block 2 |
+ | |7 : Bwd reference block 3 |
+ | | |
+ | |If the dual-reference |
+ | |evaluation functions are not |
+ | |used, then the values of the |
+ | |backward reference field |
+ | |polarities are not used. |
+ | | |
+ | |The blocks are numbered using |
+ | |the traditional Z order. For |
+ | |larger block sizes, the |
+ | |sub-block reference field |
+ | |polarities are replicated. For |
+ | |example, for a 16x16 block all |
+ | |four pairs of reference field |
+ | |polarities are replicated to |
+ | |the value of the first pair for|
+ | |block 0. |
+ +------------------------------------+-------------------------------+
+
+ Result type conversion functions
+ +++++++++++++++++++++++++++++++++
+
+ These are optional built-in functions that may be called following the
+ evaluation phase to convert REF results to MCE results and vice-versa.
+
+ +-----------------------------------+--------------------------------+
+ |intel_sub_group_avc_mce_result_t |Convert the REF result into a |
+ |intel_sub_group_avc_ref_ |MCE result. |
+ |convert_to_mce_result( | |
+ | intel_sub_group_avc_ref_result_t| |
+ | result ) | |
+ +-----------------------------------+--------------------------------+
+ |intel_sub_group_avc_ref_result_t |Convert the MCE result into an |
+ |intel_sub_group_avc_mce_ |REF result. |
+ |convert_to_ref_result( | |
+ | intel_sub_group_avc_ref_result_t| |
+ | result ) | |
+ +-----------------------------------+--------------------------------+
+
+ Result processing phase functions
+ +++++++++++++++++++++++++++++++++
+
+ These built-in functions are called following the evaluation phase to
+ extract the various result components from an REF evaluation result.
+
+ +-------------------------------------+------------------------------+
+ |ulong intel_sub_group_avc_ref_get_ |This is a wrapper for |
+ |motion_vectors( |intel_sub_group_avc_mce_get_ |
+ | intel_sub_group_avc_ref_result_t |motion_vectors(..) with the |
+ | result ) |result conversions to/from MCE|
+ | |types. See MCE version for |
+ | |description. |
+ +-------------------------------------+------------------------------+
+ |ushort intel_sub_group_avc_ref_get_ |This is a wrapper for |
+ |inter_distortions( |intel_sub_group_avc_mce_get_ |
+ | intel_sub_group_avc_ref_result_t |inter_distortions(..) with the|
+ | result ) |result conversions to/from MCE|
+ | |types. See MCE version for |
+ | |description. |
+ +-------------------------------------+------------------------------+
+ |ushort intel_sub_group_avc_ref_get_ |This is a wrapper for |
+ |best_inter_distortion( |intel_sub_group_avc_mce_get_ |
+ | intel_sub_group_avc_ref_result_t |best_inter_distortion(..)with |
+ | result ) |the result conversions to/from|
+ | |MCE types. See MCE version for|
+ | |description. |
+ +-------------------------------------+------------------------------+
+ |uchar intel_sub_group_avc_ref_get_ |This is a wrapper for |
+ |inter_major_shape( |intel_sub_group_avc_mce_get_ |
+ | intel_sub_group_avc_ref_result_t |inter_major_shape(..) with the|
+ | result ) |result conversions to/from MCE|
+ | |types. See MCE version for |
+ | |description. |
+ +-------------------------------------+------------------------------+
+ |uchar intel_sub_group_avc_ref_get_ |This is a wrapper for |
+ |inter_minor_shapes( |intel_sub_group_avc_mce_get_ |
+ | intel_sub_group_avc_ref_result_t |inter_minor_shapes(..) with |
+ | result ) |the result conversions to/from|
+ | |MCE types. See MCE version for|
+ | |description. |
+ +-------------------------------------+------------------------------+
+ |uchar intel_sub_group_avc_ref_get_ |This is a wrapper for |
+ |inter_directions( |intel_sub_group_avc_mce_get_ |
+ | intel_sub_group_avc_ref_result_t |inter_directions(..) with the|
+ | result ) |result conversions to/from MCE|
+ | |types. See MCE version for |
+ | |description. |
+ +-------------------------------------+------------------------------+
+ |uchar intel_sub_group_avc_ref_get_ |This is a wrapper for |
+ |inter_motion_vector_count( |intel_sub_group_avc_mce_get_ |
+ | intel_sub_group_avc_ref_result_t |inter_motion_vector_count(..) |
+ | result ) |with the result conversions |
+ | |to/from MCE types. See MCE |
+ | |version for description. |
+ +-------------------------------------+------------------------------+
+ |uint intel_sub_group_avc_ref_get_ |This is a wrapper for |
+ |inter_reference_ids( |intel_sub_group_avc_mce_get_ |
+ | intel_sub_group_avc_ref_result_t |inter_reference_ids(..) with |
+ | result ) |the result conversions to/from|
+ | |MCE types. See MCE version for|
+ | |description. |
+ +-------------------------------------+------------------------------+
+ |uchar intel_sub_group_avc_ref_get_ |This is a wrapper for |
+ |inter_reference_interlaced_ |intel_sub_group_avc_mce_get_ |
+ |field_polarities( |inter_reference_interlaced_ |
+ | uint packed_reference_ids, |field_polarities(..) with the|
+ | uint packed_reference_parameter_ |result conversions to/from MCE|
+ | field_polarities, |types. See MCE version for |
+ | intel_sub_group_avc_ref_result_t |description. |
+ | result ) | |
+ +-------------------------------------+------------------------------+
+
+ ______________________________________________________________________
+
+
+ SIC built-in functions
+ ----------------------
+
+ A set of ordered phases of functions are required to be called to
+ evaluate a skip check or intra estimation operation result.
+
+ Initialization phase functions
+ ++++++++++++++++++++++++++++++
+
+ These built-in functions create a properly initialized payload that
+ can be used for further configured for evaluating SIC operations. This
+ is a required initial phase.
+
+ +-----------------------------------+--------------------------------+
+ |intel_sub_group_avc_sic_payload_t |Return an initialized payload |
+ |intel_sub_group_avc_sic_initialize(|for a VME SIC operation. |
+ | ushort2 src_coord ) | |
+ | | |
+ | |If the source image is an |
+ | |interlaced scan image, then the |
+ | |bottom field lines are |
+ | |considered as logically |
+ | |overlapping with the top field |
+ | |lines (i.e. the top field MBs |
+ | |are considered as logically |
+ | |overlapping with the bottom MBs)|
+ | |for the purposes for specifying |
+ | |the src_coord value. |
+ | | |
+ | |If the SIC operation is being |
+ | |configured for chroma based |
+ | |intra estimation, then the x and|
+ | |y coordinates of src_coord must |
+ | |be multiples of 2. |
+ +-----------------------------------+--------------------------------+
+
+ Configuration phase functions
+ +++++++++++++++++++++++++++++
+
+ These built-in functions allow for configuration of a skip check
+ and/or intra estimation operation. This is a required phase following
+ the initialization phase to configure the skip check or intra
+ estimation operation. A call to intel_sub_group_avc_sic_configure_
+ skc(..) or intel_sub_group_avc_sic_configure_ipe(..) is required. Both
+ intel_sub_group_avc_sic_configure_skc(..) and intel_sub_group_avc_
+ sic_configure_ipe(..) may be called to initialize the payload to
+ perform both skip checks and intra estimation as part for the same SIC
+ evaluation, but if called together intel_sub_group_avc_sic_configure_
+ skc(..) must be called first in the call sequence.
+
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_sic_payload_t |Configure the SIC payload for |
+ |intel_sub_group_avc_sic_ |(uni or bi-directional) skip |
+ |configure_skc( |checks. |
+ | uint skip_block_partition_type, | |
+ | uint skip_motion_vector_mask, |The legal values for |
+ | ulong motion_vectors, |skip_block_partition_type must |
+ | char bidirectional_weight, |be one of the specified |
+ | uchar skip_sad_adjustment , |partition mask enumeration |
+ | intel_sub_group_avc_sic_payload_t|values. |
+ | payload ) | |
+ | |Legal values for |
+ | |skip_motion_vector_mask can be |
+ | |composed using the '|' operator|
+ | |from the enumeration values |
+ | |defined for it; both |
+ | |unidirectional and |
+ | |bidirectional skip vectors can |
+ | |be specified uniquely for each |
+ | |major partition (16x16 or 8x8) |
+ | |by an appropriate selection of |
+ | |the skip motion vector mask |
+ | |enumeration values. If the |
+ | |16x16 skip_block_partition_type|
+ | |is specified, then only the |
+ | |16x16 enumeration values may be|
+ | |used, else only the 8x8 |
+ | |enumeration values may be used.|
+ | | |
+ | |For convenience, one of the |
+ | |following two mechanisms can be|
+ | |used to obtain the value for |
+ | |the skip_motion_vector_mask |
+ | |argument. |
+ | | |
+ | |If the shape and directions are|
+ | |all compile-time constants, |
+ | |then either one of the two |
+ | |following macros may be used to|
+ | |set this argument's value for |
+ | |16x16 or 8x8 major partitions |
+ | |respectively: |
+ | |- CLK_AVC_ME_SKIP_BLOCK_16x16_ |
+ | | INTEL(DIRECTION) |
+ | |- CLK_AVC_ME_SKIP_BLOCK_8x8_ |
+ | | INTEL(DIRECTION0, DIRECTION1,|
+ | | DIRECTION2, DIRECTION3) |
+ | | |
+ | |The DIRECTION{0-3} argument |
+ | |values must be one of the inter|
+ | |macro-block major direction |
+ | |values. The directions are |
+ | |specified for each major |
+ | |partition in traditional |
+ | |Z-order. |
+ | | |
+ | |If the shape or directions are |
+ | |not compile-time constants, |
+ | |then the helper function |
+ | |intel_sub_group_avc_sic_get_ |
+ | |motion_vector_mask(..) may be |
+ | |used to set this argument's |
+ | |value. |
+ | | |
+ | |The parameter motion_vectors |
+ | |specifies the input packed |
+ | |BMVs. Either the forward or |
+ | |backward is ignored if the |
+ | |setting in |
+ | |skip_motion_vector_mask is |
+ | |backward or forward |
+ | |respectively. If the setting is|
+ | |bidirectional, then both the |
+ | |forward and backward motion |
+ | |vectors will be used. If the |
+ | |skip_block_partition_type is |
+ | |16x16, work-item 0 in the |
+ | |subgroup provides the BMV, and |
+ | |if the |
+ | |skip_block_partition_type is |
+ | |8x8, work-items 0 to 4 in the |
+ | |subgroup provide the four |
+ | |BMVs. The MVs are in QPEL |
+ | |units. The X and Y coordinates |
+ | |of each MV must be in the range|
+ | |[-2048.00, 2047.75] and |
+ | |[-512.00 to 511.75] |
+ | |respectively, otherwise the |
+ | |results are undefined. |
+ | | |
+ | |Legal values for |
+ | |bidirectional_weight and |
+ | |skip_sad_adjustment are as per |
+ | |their respective enumeration |
+ | |values. If the setting is |
+ | |unidirectional, then the |
+ | |bidirectional_weight parameter |
+ | |value is ignored and can be set|
+ | |to the value '0 '. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_sic_payload_t |Return an initialized payload |
+ |intel_sub_group_avc_sic_ |for a VME luma intra prediction|
+ |configure_ipe( |estimation (IPE) operation. |
+ | uchar luma_intra_partition_mask, | |
+ | uchar |The luma_intra_partition_mask |
+ | intra_neighbour_availabilty, |can be composed from their |
+ | uchar left_edge_luma_pixels, |respective enumeration values |
+ | uchar |using the '&' operator. |
+ | upper_left_corner_luma_pixel, | |
+ | uchar upper_edge_luma_pixels, |Legal values for |
+ | uchar |intra_neighbour_availabilty and|
+ | upper_right_edge_luma_pixels, |intra_sad_adjustment are as per|
+ | uchar intra_sad_adjustment , |their respective enumeration |
+ | intel_sub_group_avc_sic_payload_t|values, based on the intra |
+ | payload ) |neighboring macroblock's to |
+ | |consider in intra mode |
+ | |estimation. |
+ | | |
+ | |The other parameters specify |
+ | |the neighbor edge pixels for |
+ | |the left, top-left corner, top |
+ | |and top right edges with each |
+ | |work-item providing each pixel |
+ | |value. These pixels values are |
+ | |used to perform the intra mode |
+ | |estimation. |
+ | | |
+ | |For the left and top edge |
+ | |pixels, successive subgroup |
+ | |work-items 0 to 15 provide the |
+ | |successive edge pixels. For the|
+ | |top-right edge, successive |
+ | |work-items 0 to 7 provide the |
+ | |successive edge pixels; the |
+ | |pixel values in work-items 8 to|
+ | |15 are ignored. The top-left |
+ | |corner pixel is a uniform pixel|
+ | |value with each work-item |
+ | |providing the same corner |
+ | |pixel. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_sic_payload_t |Return an initialized payload |
+ |intel_sub_group_avc_sic_ |for a VME luma and chroma intra|
+ |configure_ipe( |prediction estimation (IPE) |
+ | uchar luma_intra_partition_mask, |operation. |
+ | uchar intra_neighbour_ | |
+ | availabilty, |The luma_intra_partition_mask |
+ | uchar left_edge_luma_pixels, |can be composed from their |
+ | uchar upper_left_corner_luma_ |respective enumeration values |
+ | pixel, |using the '&' operator. Chroma |
+ | uchar upper_edge_luma_ |intra prediction estimation is |
+ | pixels, |performed on the 8x8 CbCr |
+ | uchar upper_right_edge_luma_ |macroblock. |
+ | pixels, | |
+ | ushort left_edge_chroma_pixels, |Legal values for |
+ | ushort upper_left_corner_chroma_ |intra_neighbour_availabilty and|
+ | pixel, |intra_sad_adjustment are as per|
+ | ushort upper_edge_chroma_pixels, |their respective enumeration |
+ | uchar intra_sad_adjustment, |values, based on the intra |
+ | intel_sub_group_avc_sic_payload_t|neighboring macroblock's to |
+ | payload ) |consider in intra mode |
+ | |estimation. |
+ | | |
+ | |The other parameters specify |
+ | |the neighbor luma and chroma |
+ | |edge pixels for the left, |
+ | |top-left corner, top and |
+ | |top-right (luma only) edges |
+ | |with each work-item providing |
+ | |each pixel value. These pixels |
+ | |values are used to perform the |
+ | |intra mode estimation. |
+ | | |
+ | |For the left and top luma edge |
+ | |pixels successive, subgroup |
+ | |work-items 0 to 15 provide the |
+ | |successive edge pixels. For the|
+ | |top-right luma edge, successive|
+ | |work-items 0 to 7 provide the |
+ | |successive edge pixels; the |
+ | |pixel values in work-items 8 to|
+ | |15 are ignored. The top-left |
+ | |corner pixel is a uniform pixel|
+ | |value with each work-item |
+ | |providing the same corner |
+ | |pixel. |
+ | | |
+ | |For the left and top chroma |
+ | |CbCr pixels, successive |
+ | |subgroup work-items 0 to 7 |
+ | |provide the successive CbCr |
+ | |pixels; the pixel values in |
+ | |work-items 8 to 15 are |
+ | |ignored. The top-left corner |
+ | |pixel is a uniform CbCr pixel |
+ | |value with each work-item |
+ | |providing the same corner CbCr |
+ | |pixel. |
+ +------------------------------------+-------------------------------+
+ |uint intel_sub_group_avc_sic_get_ |Compose the value for the input|
+ |motion_vector_mask( |argument |
+ | uint skip_block_partition_type, |skip_motion_vector_mask for the|
+ | uchar direction ) |input skip_block_partition_type|
+ | |and direction. |
+ | | |
+ | |The legal values for |
+ | |skip_block_partition_type must |
+ | |be one of the specified |
+ | |partition mask enumeration |
+ | |values (16x16 or 8x8). |
+ | | |
+ | |The direction parameter is a |
+ | |bit field with the directions |
+ | |for the 4 8x8 sub-partitions in|
+ | |traditional Z order, or for |
+ | |only the 16x16 partition. Two |
+ | |bits are reserved for each of |
+ | |the four sub-partitions in |
+ | |row-major order. The 2-bit |
+ | |values are as per the inter |
+ | |macro-block major direction |
+ | |values. If the |
+ | |skip_block_partition_type |
+ | |indicates a 16x16 shape, then |
+ | |only the 1st 2 bits contains |
+ | |the direction, and other bits |
+ | |must be zeroed. |
+ +------------------------------------+-------------------------------+
+
+ Payload type conversion functions
+ +++++++++++++++++++++++++++++++++
+
+ These are optional built-in functions that may be called following the
+ configuration phase to convert IME payload to MCE payloads and vice-
+ versa.
+
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_mce_payload_t |Convert the SIC payload to a |
+ |intel_sub_group_avc_sic_convert_ |generic MCE payload. |
+ |to_mce_payload( | |
+ | intel_sub_group_avc_sic_payload_t| |
+ | payload ) | |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_sic_payload_t |Convert the generic MCE payload|
+ |intel_sub_group_avc_ |to a SIC payload. |
+ |mce_convert_to_sic_payload( | |
+ | intel_sub_group_avc_mce_payload_t| |
+ | payload ) | |
+ +------------------------------------+-------------------------------+
+
+ Multi-reference cost configuration phase functions
+ ++++++++++++++++++++++++++++++++++++++++++++++++++
+
+ These are optional built-in functions that may be called following the
+ configuration phase to enable multi-reference image costing.
+
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_sic_payload_t |This is a wrapper for |
+ |intel_sub_group_avc_sic_set_ |intel_sub_group_avc_mce_ |
+ |inter_base_multi_reference_penalty( |set_inter_base_multi_reference_|
+ | uchar reference_base_penalty, |penalty(..) the payload |
+ | intel_sub_group_avc_sic_payload_t|conversions with to/from MCE |
+ | payload ) |types. See MCE version for |
+ | |description. |
+ +------------------------------------+-------------------------------+
+
+ Inter shape and direction cost configuration phase functions
+ ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+ These are optional built-in functions that may be called following the
+ configuration phase to enable shape costing.
+ +-------------------------------------+------------------------------+
+ |intel_sub_group_avc_sic_payload_t |This is a wrapper for |
+ |intel_sub_group_avc_sic_set_ |intel_sub_group_avc_mce_set_ |
+ |inter_shape_penalty( |inter_shape_penalty(..) with |
+ | ulong packed_shape_cost, |the payload conversions |
+ | intel_sub_group_avc_sic_payload_t |to/from MCE types. See MCE |
+ | payload ) |version for description. |
+ +-------------------------------------+------------------------------+
+ |intel_sub_group_avc_sic_payload_t |This is a wrapper for |
+ |intel_sub_group_avc_sic_set_ |intel_sub_group_avc_mce_set_ |
+ |inter_direction_penalty( |inter_direction_penalty(..) |
+ | uchar direction_cost, |with the payload conversions |
+ | intel_sub_group_avc_sic_payload_t|to/from MCE types. See MCE |
+ | payload ) |version for description. |
+ +-------------------------------------+------------------------------+
+
+ Inter motion vector cost configuration phase functions
+ ++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+ These are optional built-in functions that may be called following the
+ configuration phase to enable motion vector costing.
+
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_sic_payload_t |This is a wrapper for |
+ |intel_sub_group_avc_sic_set_ |intel_sub_group_avc_mce_set_ |
+ |motion_vector_cost_function( |motion_vector_cost_function(..)|
+ | ulong packed_cost_center_delta, |with the payload conversions |
+ | uint2 packed_cost_table, |to/from MCE types. See MCE |
+ | uchar cost_precision, |version for description. |
+ | intel_sub_group_avc_sic_payload_t| |
+ | payload ) | |
+ +------------------------------------+-------------------------------+
+
+ Intra shape cost configuration phase functions
+ ++++++++++++++++++++++++++++++++++++++++++++++
+
+ These are optional built-in functions that may be called following the
+ configuration phase to enable intra shape costing.
+
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_sic_payload_t |Set the luma shape cost penalty|
+ |intel_sub_group_avc_sic_set_ |for intra motion estimation. |
+ |intra_luma_shape_penalty( | |
+ | uint packed_shape_cost, |The value of packed_shape_cost |
+ | intel_sub_group_avc_sic_payload_t|is an integer value with the |
+ | payload ) |following bits specifying the |
+ | |shape cost in U4U4 format as |
+ | |follows: |
+ | | |
+ | |7:0 : must be zero |
+ | |15:8 : 16x16 cost |
+ | |23:16 : 8x8 cost |
+ | |31:24 : 4x4 cost |
+ | | |
+ | |The U4U4 decoded integer values|
+ | |must bit fit within 12 bits. |
+ +------------------------------------+-------------------------------+
+
+ Intra mode cost configuration phase functions
+ +++++++++++++++++++++++++++++++++++++++++++++
+
+ These are optional built-in functions that may be called following the
+ configuration phase to enable intra mode costing.
+
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_sic_payload_t |The value of luma_mode_penalty |
+ |intel_sub_group_avc_sic_set_ |specifies the penalty to be |
+ |intra_luma_mode_cost_function( |applied to the estimated luma |
+ | uchar luma_mode_penalty, |mode if it differs from its |
+ | uint luma_packed_neighbor_modes, |predicted luma mode (based on |
+ | uint luma_packed_non_dc_penalty, |its neighbor intra modes). It |
+ | intel_sub_group_avc_sic_payload_t|is specified in U4U4 format and|
+ | payload ) |must bit in 10 bits. |
+ | | |
+ | |The value of |
+ | |luma_packed_neighbor_modes |
+ | |specifies the values of the |
+ | |already computed top and left |
+ | |neighbor modes for the |
+ | |bordering 4x4 blocks, with the |
+ | |4x4 blocks numbered in the |
+ | |traditional Z-order as shown |
+ | |below. |
+ | | |
+ | | 0 1 4 5 |
+ | | 2 3 6 7 |
+ | | 8 9 C D |
+ | | A B E F |
+ | | |
+ | |the following bits specify the |
+ | |neighbor modes. |
+ | | |
+ | |3:0 => Left neighbor block 5 |
+ | |7:4 => Left neighbor block 7 |
+ | |11:8 => Left neighbor block D |
+ | |15:12 => Left neighbor block F |
+ | |19:16 => Top neighbor block A |
+ | |23:20 => Top neighbor block B |
+ | |27:24 => Top neighbor block E |
+ | |31:28 => Top neighbor block F |
+ | | |
+ | |The value of |
+ | |luma_packed_non_dc_penalty |
+ | |specifies the penalty to be |
+ | |applied for any computed non-DC|
+ | |luma mode for each of the |
+ | |16x16, 8x8, and 4x4 shapes, |
+ | |with the following bits |
+ | |specifying the penalties. |
+ | | |
+ | |7:0 => Intra16x16 non-dc |
+ | | penalty |
+ | |15:8 => Intra8x8 non-dc |
+ | | penalty |
+ | |23:16 => Intra4x4 non-dc |
+ | | penalty |
+ | |31:24 => Must be zero |
+ | | |
+ | |The component values are |
+ | |specified in 8-bit integer |
+ | |format. |
+ | | |
+ | |The intra distortion for each |
+ | |intra luma block can be |
+ | |described by the following |
+ | |formulas: |
+ | | |
+ | |Intra_4x4 SAD (or Haar) + |
+ | |luma_shape_penalty_4x4 + |
+ | |luma_non_dc_4x4_penalty (if not|
+ | |DC) + |
+ | |luma_mode_penalty (if computed |
+ | |mode is not the same predicted |
+ | |mode from neighbor modes) |
+ | | |
+ | |Intra_8x8 SAD (or Haar) + |
+ | |luma_shape_penalty_8x8 + |
+ | |luma_non_dc_8x8_penalty (if not|
+ | |DC) + |
+ | |luma_mode_penalty (if computed |
+ | |mode is not the same predicted |
+ | |mode from neighbor modes ) |
+ | | |
+ | |Intra_16x16 SAD (or Haar) + |
+ | |luma_shape_penalty_16x16 + |
+ | |luma_non_dc_4x4_penalty (if not|
+ | |DC) |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_sic_payload_t |Set the intra chroma mode cost |
+ |intel_sub_group_avc_sic_set_ |function by specifying the |
+ |intra_chroma_mode_cost_function( |penalty to be applied to the |
+ | uchar chroma_mode_penalty, |computed chroma mode. |
+ | intel_sub_group_avc_sic_payload_t| |
+ | payload ) |The chroma_mode_base_penalty is|
+ | |the base penalty to be applied |
+ | |to the computed intra chroma |
+ | |modes. This penalty is in U4U4 |
+ | |format. |
+ | | |
+ | |The U4U4 decoded integer value |
+ | |must fit in 12 bits. |
+ | | |
+ | |The base penalty is scaled |
+ | |based on the computed mode as |
+ | |defined below. |
+ | | |
+ | | DC : 0x |
+ | | HORZ : 1x |
+ | | VERT : 1x |
+ | | PLANE: 2x |
+ | | |
+ | |The intra distortion for each |
+ | |intra 8x8 chroma block can be |
+ | |described by the following |
+ | |formulas: |
+ | | |
+ | |distortion = |
+ | |SAD (or Haar) + |
+ | |chroma_mode_penalty (scaled |
+ | |based on computed mode) |
+ +------------------------------------+-------------------------------+
+
+ Miscellaneous property configuration phase functions
+ ++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+ These are optional built-in functions that may be called following the
+ configuration phase to enable miscellaneous properties setting in the
+ payload.
+
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_sic_payload_t |This is a wrapper for |
+ |intel_sub_group_avc_sic_set_ |intel_sub_group_avc_mce_set_ |
+ |source_interlaced_field_polarity( |source_interlaced_field_ |
+ | uchar src_field_polarity |polarity(..) with the result |
+ | intel_sub_group_avc_sic_payload_t|conversions to/from MCE |
+ | payload ) |types. See MCE version for |
+ | |description. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_sic_payload_t |This is a wrapper for |
+ |intel_sub_group_avc_sic_set_ |intel_sub_group_avc_mce_set_ |
+ |single_reference_interlaced_ |single_reference_ |
+ |field_polarity( |interlaced_field_polarity(..) |
+ | uchar ref_field_polarity, |with the result conversions |
+ | intel_sub_group_avc_sic_payload_t|to/from MCE types. See MCE |
+ | payload ) |version for description. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_sic_payload_t |This is a wrapper for |
+ |intel_sub_group_avc_sic_set_ |intel_sub_group_avc_mce_set_ |
+ |dual_reference_interlaced_image_ |dual_reference_interlaced_ |
+ |field_polarities( |field_polarities(..) with the |
+ | uchar fwd_ref_field_polarity, |result conversions to/from MCE |
+ | uchar bwd_ref_field_polarity, |types. See MCE version for |
+ | intel_sub_group_avc_sic_payload_t|description. |
+ | payload ) | |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_sic_payload_t |Update the input payload to do |
+ |intel_sub_group_avc_sic_set_ |enable bilinear filter |
+ |bilinear_filter_enable( |interpolation instead of 4-tap |
+ | intel_sub_group_avc_sic_payload_t|filter interpolation. Default |
+ | payload ) |is 4-tap filter interpolation. |
+ | | |
+ | |This should not be called if |
+ | |the payload was initialized |
+ | |with integer pixel resolution. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_sic_payload_t |Enable skip check forward |
+ |intel_sub_group_avc_sic_set_ |transform with the specified |
+ |skc_forward_transform_enable( |SAD coefficients thresholds in |
+ | ulong packed_sad_coefficients, |the frequency domain to |
+ | intel_sub_group_avc_sic_payload_t|approximate the effects of |
+ | payload ) |forward quantization. |
+ | | |
+ | |The skip decision will be |
+ | |enhanced to include an accurate|
+ | |AVC forward transform for skip |
+ | |estimation. This feature is in |
+ | |addition to the previous SAD or|
+ | |HAAR skip estimation. The |
+ | |results of the forward |
+ | |transform are compared one |
+ | |coefficient at a time against a|
+ | |user-specified threshold, in |
+ | |the input argument |
+ | |packed_sad_coefficients, to |
+ | |emulate quantization's zeroing |
+ | |effect. The user is returned |
+ | |the count of coefficients that |
+ | |exceeded their threshold along |
+ | |with a sum of the amount |
+ | |exceeded, both grouped at the |
+ | |8x8 block level (i.e. for each |
+ | |8x8 block). |
+ | | |
+ | |The SAD coefficient threshold |
+ | |matrix for a 4x4 transform as |
+ | |shown in the table below is |
+ | |packed into a 64-bit |
+ | |integer. The low 16 bits |
+ | |contains the larger DC |
+ | |threshold. The coefficient |
+ | |thresholds for the remaining 6 |
+ | |AC thresholds in the order of |
+ | |increasing frequency are |
+ | |provided by the successive |
+ | |8-bit bit ranges. |
+ | | |
+ | |0 (DC) 1 (AC) 2 (AC) 3 (AC) |
+ | |1 (AC) 2 (AC) 3 (AC) 4 (AC) |
+ | |2 (AC) 3 (AC) 4 (AC) 5 (AC) |
+ | |3 (AC) 4 (AC) 5 (AC) 6 (AC) |
+ | | |
+ | |This is valid only for SKC |
+ | |operations. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_sic_payload_t |The raw skip SAD computed |
+ |intel_sub_group_avc_sic_set_ |during the evaluation phase |
+ |block_based_raw_skip_sad( |will be the maximal SAD of |
+ | uchar block_based_skip_type, |individual 4x4 (or 8x8) blocks,|
+ | intel_sub_group_avc_ime_payload_t|instead of the sum of the |
+ | payload ) |entire individual 4x4 block |
+ | |SADs of the MB. |
+ | | |
+ | |The legal values for |
+ | |block_based_skip_type must be |
+ | |one of the specified block |
+ | |based skip type enumeration |
+ | |values. |
+ | | |
+ | |It is valid to call this |
+ | |function only if the payload is|
+ | |configured for a skip check |
+ | |operation by a prior call to |
+ | |intel_sub_group_avc_sic_ |
+ | |configure_skc(..). |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_sic_payload_t |This is a wrapper for |
+ |intel_sub_group_avc_sic_set_ |intel_sub_group_avc_mce_set_ |
+ |ac_only_haar( |ac_only_haar(..) with the |
+ | intel_sub_group_avc_sic_payload_t|payload conversions to/from MCE|
+ | payload ) |types. See MCE version for |
+ | |description. |
+ +------------------------------------+-------------------------------+
+
+ Evaluation phase functions
+ ++++++++++++++++++++++++++
+
+ These built-in functions perform the evaluation of the SIC operation
+ configured in the payload with a VME media sampler and return the
+ results.
+
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_sic_result_t |Evaluate the SIC IPE operation |
+ |intel_sub_group_avc_sic_evaluate_ |and return its results. |
+ |ipe( | |
+ | read_only image2d_t src_image, | |
+ | sampler_t vme_media_sampler, | |
+ | intel_sub_group_avc_sic_payload_t| |
+ | payload ) | |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_sic_result_t |Evaluate the SIC operation with|
+ |intel_sub_group_avc_sic_evaluate_ |single reference and return its|
+ |with_single_reference( |results. |
+ | read_only image2d_t src_image, | |
+ | read_only image2d_t ref_image, |The parameter ref_image must be|
+ | sampler_t vme_media_sampler, |a valid forward image kernel |
+ | intel_sub_group_avc_sic_payload_t|parameter per the ordering |
+ | payload ) |conventions for the kernel |
+ | |parameter list. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_sic_result_t |Evaluate the SIC operation with|
+ |intel_sub_group_avc_sic_evaluate_ |dual and return its results. |
+ |with_dual_reference ( | |
+ | read_only image2d_t src_image, |The parameter |
+ | read_only image2d_t fwd_ref_image,|fwd_ref_image[bwd_ref_image] |
+ | read_only image2d_t bwd_ref_image,|must be a valid |
+ | sampler_t vme_media_sampler, |forward[backward] image kernel |
+ | intel_sub_group_avc_sic_payload_t |parameter per the ordering |
+ | payload ) |conventions for the kernel |
+ | |parameter list. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_sic_result_t |Evaluate the SIC operation with|
+ |intel_sub_group_avc_sic_evaluate_ |multi references and return its|
+ |with_multi_reference( |results. |
+ | read_only image2d_t src_image, | |
+ | uint packed_reference_ids, |A pair of unique reference |
+ | sampler_t vme_media_sampler, |identifier (indicating unique |
+ | intel_sub_group_avc_sic_payload_t|forward/backward reference |
+ | payload ) |images) may be specified for |
+ | |each of the allowed major |
+ | |partitions (one 16x16 or four |
+ | |8x8) using packed_dual_ref_ids.|
+ | | |
+ | |The value of |
+ | |packed_reference_ids is a |
+ | |integer with the following bits|
+ | |specifying the values for the |
+ | |pair of reference images for |
+ | |each major partition. |
+ | | |
+ | |3:0 => Fwd reference block 0 |
+ | |7:4 => Bwd reference block 0 |
+ | |11:8 => Fwd reference block 1 |
+ | |15:12 => Bwd reference block 1 |
+ | |19:16 => Fwd reference block 2 |
+ | |23:20 => Bwd reference block 2 |
+ | |27:24 => Fwd reference block 3 |
+ | |31:28 => Bwd reference block 3 |
+ | | |
+ | |A forward[backward] reference |
+ | |idenitifer value of 'n' |
+ | |indicates the forward[backward]|
+ | |image from the 'n'th pair of |
+ | |forward/backward reference |
+ | |images, with the value of 'n' |
+ | |ranging from 0 to 15. |
+ | | |
+ | |If the REF operation is |
+ | |configured with only forward |
+ | |reference images then, the |
+ | |values of the backward |
+ | |reference identifiers are not |
+ | |used. |
+ | | |
+ | |The blocks are numbered using |
+ | |the traditional Z order. For |
+ | |larger block sizes, the |
+ | |sub-block reference identifier |
+ | |pairs must be replicated. For |
+ | |example, for a 16x16 block, all|
+ | |four pair of reference |
+ | |identifiers must be replicated |
+ | |to the value of the first pair |
+ | |for block 0. |
+ +------------------------------------+-------------------------------+
+ |intel_sub_group_avc_sic_result_t |Evaluate the SIC operation with|
+ |intel_sub_group_avc_sic_evaluate_ |multi references and return its|
+ |with_multi_reference( |results. This is used for |
+ | read_only image2d_t src_image, |interlaced source and reference|
+ | uint packed_reference_ids, |images. |
+ | uchar packed_reference_field_ | |
+ | polarities, |A pair of unique reference |
+ | sampler_t vme_media_sampler, |identifier (indicating unique |
+ | intel_sub_group_avc_sic_payload_t|forward/backward reference |
+ | payload ) |images) may be specified for |
+ | |each of the allowed major |
+ | |partitions (one 16x16 or four |
+ | |8x8) using packed_dual_ref_ids.|
+ | | |
+ | |The value of |
+ | |packed_reference_ids is a |
+ | |integer with the following bits|
+ | |specifying the values for the |
+ | |pair of reference images for |
+ | |each major partition. |
+ | | |
+ | |3:0 => Fwd reference block 0 |
+ | |7:4 => Bwd reference block 0 |
+ | |11:8 => Fwd reference block 1 |
+ | |15:12 => Bwd reference block 1 |
+ | |19:16 => Fwd reference block 2 |
+ | |23:20 => Bwd reference block 2 |
+ | |27:24 => Fwd reference block 3 |
+ | |31:28 => Bwd reference block 3 |
+ | | |
+ | |A forward[backward] reference |
+ | |idenitifer value of 'n' |
+ | |indicates the forward[backward]|
+ | |image from the ' nth' pair of |
+ | |forward/backward reference |
+ | |images, with the value of ' n' |
+ | |ranging from 0 to 15. |
+ | | |
+ | |If the REF operation is |
+ | |configured with only forward |
+ | |reference images then, the |
+ | |values of the backward |
+ | |reference identifiers are not |
+ | |used. |
+ | | |
+ | |The blocks are numbered using |
+ | |the traditional Z order. For |
+ | |larger block sizes, the |
+ | |sub-block reference identifier |
+ | |pairs must be replicated. For |
+ | |example, for a 16x16 block, all|
+ | |four pair of reference |
+ | |identifiers must be replicated |
+ | |to the value of the first pair |
+ | |for block 0. |
+ | | |
+ | |Reference field polarities for |
+ | |forward and backward reference |
+ | |images are specified for each |
+ | |of the allowed major partitions|
+ | |using packed_reference_field_ |
+ | |polarities. |
+ | | |
+ | |The value of packed_reference_ |
+ | |field_polarities is an integer |
+ | |with the following bits |
+ | |specifying the reference field |
+ | |polarities for the major |
+ | |partitions. |
+ | | |
+ | |0 : Fwd reference block 0 |
+ | |1 : Fwd reference block 1 |
+ | |2 : Fwd reference block 2 |
+ | |3 : Fwd reference block 3 |
+ | |4 : Bwd reference block 0 |
+ | |5 : Bwd reference block 1 |
+ | |6 : Bwd reference block 2 |
+ | |7 : Bwd reference block 3 |
+ | | |
+ | |If the dual-reference |
+ | |evaluation functions are not |
+ | |used, then the values of the |
+ | |backward reference field |
+ | |polarities are not used. |
+ | | |
+ | |The blocks are numbered using |
+ | |the traditional Z order. For |
+ | |larger block sizes, the |
+ | |sub-block reference field |
+ | |polarities are replicated. For |
+ | |example, for a 16x16 block all |
+ | |four pairs of reference field |
+ | |polarities are replicated to |
+ | |the value of the first pair for|
+ | |block 0. |
+ | | |
+ | |The value for the packed_ |
+ | |interlaced_image_reference_ |
+ | |field_polarities argument is |
+ | |obtained by calling |
+ | |intel_sub_group_avc_ime_get_ |
+ | |inter_reference_interlaced_ |
+ | |field_polarities(..) for the |
+ | |preceding IME operation's |
+ | |result. |
+ +------------------------------------+-------------------------------+
+
+ Result type conversion functions
+ +++++++++++++++++++++++++++++++++
+
+ These are optional built-in functions that may be called following the
+ evaluation phase to convert REF results to MCE results and vice-versa.
+
+ +-----------------------------------+--------------------------------+
+ |intel_sub_group_avc_mce_result_t |Convert the SIC result into a |
+ |intel_sub_group_avc_sic_ |MCE result. |
+ |convert_to_mce_result( | |
+ | intel_sub_group_avc_sic_result_t| |
+ | result ) | |
+ +-----------------------------------+--------------------------------+
+ |intel_sub_group_avc_sic_result_t |Convert the MCE result into an |
+ |intel_sub_group_avc_mce_ |SIC result. |
+ |convert_to_sic_result( | |
+ | intel_sub_group_avc_sic_result_t| |
+ | result ) | |
+ +-----------------------------------+--------------------------------+
+
+ Result processing phase functions
+ +++++++++++++++++++++++++++++++++
+
+ These built-in functions are called following the evaluation phase to
+ extract the various result components from an SIC evaluation result.
+
+ +-----------------------------------+--------------------------------+
+ |uchar intel_sub_group_avc_sic_get_ |Get the best intra shape from |
+ |ipe_luma_shape( |the SIC result. |
+ | intel_sub_group_avc_sic_result_t| |
+ | result) |The returned values are as per |
+ | |the intra-MB shapes enumeration |
+ | |values. |
+ +-----------------------------------+--------------------------------+
+ |ushort intel_sub_group_avc_sic_get_|Get the best intra luma |
+ |best_ipe_luma_distortion( |distortion from the SIC result |
+ | intel_sub_group_avc_sic_result_t|for the shape returned by |
+ | result) |intel_sub_group_avc_sic_get_ |
+ | |ipe_luma_shape(..). |
+ +-----------------------------------+--------------------------------+
+ |ushort intel_sub_group_avc_sic_get_|Get the best intra chroma |
+ |best_ipe_chroma_distortion( |distortion for the 8x8 shape |
+ | intel_sub_group_avc_sic_result_t|from the SIC result. |
+ | result) | |
+ +-----------------------------------+--------------------------------+
+ |ulong intel_sub_group_avc_sic_get_ |Get the packed intra luma modes |
+ |packed_ipe_luma_modes( |for all blocks from the SIC |
+ | intel_sub_group_avc_sic_result_t|result. There are four bits per |
+ | result) |luma mode for a block and legal |
+ | |values for luma modes are as per|
+ | |its defined enumeration |
+ | |values. The number of blocks is |
+ | |based on the result of |
+ | |intel_sub_group_avc_sic_get_ |
+ | |ipe_luma_shape(..). |
+ | | |
+ | |If the luma shape is: |
+ | |- 16x16, then one mode is |
+ | | returned in bits [0, 3] |
+ | |- 8x8, then four modes |
+ | | corresponding to the four |
+ | | partitions are returned by |
+ | | bits in the ranges [0, 3], |
+ | | [16,19], [32, 35], and |
+ | | [48, 51]; the order of the |
+ | | four partitions are in the |
+ | | traditional Z-order |
+ | |- 4x4, then 16 modes (4 bits per|
+ | | mode) are returned of all 16 |
+ | | partitions by all the bits; |
+ | | the order of the 16 partitions|
+ | | are in the traditional Z-order|
+ | | as shown below: |
+ | | |
+ | | 0 1 4 5 |
+ | | 2 3 6 7 |
+ | | 8 9 C D |
+ | | A B E F |
+ +-----------------------------------+--------------------------------+
+ |uchar intel_sub_group_avc_sic_get_ |Get the intra chroma mode for |
+ |ipe_chroma_mode( |the 8x8 block from the SIC |
+ | intel_sub_group_avc_sic_result_t|result. The legal values for |
+ | result) |chroma modes are as per its |
+ | |defined enumeration values. |
+ +-----------------------------------+--------------------------------+
+ |uint intel_sub_group_avc_sic_get_ |Get the packed count of luma |
+ |packed_skc_luma_count_threshold( |coefficient components that |
+ | intel_sub_group_avc_sic_result_t|exceeded their transform |
+ | result) |thresholds from the SIC result |
+ | |for each 8x8 partition in |
+ | |traditional Z-order. |
+ | | |
+ | |The format of the results is as |
+ | |follows: |
+ | | |
+ | |- count of coefficients that |
+ | | exceeded their respective |
+ | | threshold for block 8x8_0 is |
+ | | returned in bit [0, 7] |
+ | |- count of coefficients that |
+ | | exceeded their respective |
+ | | threshold for block 8x8_1 is |
+ | | returned in bit [8, 15] |
+ | |- count of coefficients that |
+ | | exceeded their respective |
+ | | threshold for block 8x8_2 is |
+ | | returned in bit [16, 23] |
+ | |- count of coefficients that |
+ | | exceeded their respective |
+ | | threshold for block 8x8_0 is |
+ | | returned in bit [24, 31] |
+ | | |
+ | |The results are only valid if |
+ | |the SIC operation was configured|
+ | |with frequency domain SAD |
+ | |transform coefficients using |
+ | |intel_sub_group_avc_sic_set_skc_|
+ | |forward_transform_enable(..). |
+ +-----------------------------------+--------------------------------+
+ |ulong intel_sub_group_avc_sic_get_ |Get the packed sum of luma |
+ |packed_skc_luma_sum_threshold( |coefficient components that |
+ | intel_sub_group_avc_sic_result_t|exceeded their transform |
+ | result) |thresholds from the SIC result |
+ | |for each 8x8 partition in |
+ | |traditional Z-order. |
+ | | |
+ | |The format of the results is as |
+ | |follows: |
+ | |- sum of coefficients that |
+ | | exceeded their respective |
+ | | threshold for block 8x8_0 is |
+ | | returned in bit [0, 15] |
+ | |- sum of coefficients that |
+ | | exceeded their respective |
+ | | threshold for block 8x8_1 is |
+ | | returned in bit [16,31] |
+ | |- sum of coefficients that |
+ | | exceeded their respective |
+ | | threshold for block 8x8_2 is |
+ | | returned in bit [32,47] |
+ | |- sum of coefficients that |
+ | | exceeded their respective |
+ | | threshold for block 8x8_0 is |
+ | | returned in bit [48, 63] |
+ | | |
+ | |The results are only valid if |
+ | |the SIC operation was configured|
+ | |with frequency domain SAD |
+ | |transform coefficients using |
+ | |intel_sub_group_avc_sic_set_skc_|
+ | |forward_transform_enable(..). |
+ +-----------------------------------+--------------------------------+
+ |ushort intel_sub_group_avc_sic_get_|Get the skip check raw SAD |
+ |inter_raw_sads( |(i.e. without any mode or shape |
+ | intel_sub_group_avc_sic_result_t|costs included) for the entire |
+ | result ) |MB if the input payload was note|
+ | |configured for block based skip |
+ | |checks, otherwise return the |
+ | |maximal SAD of individual 4x4 |
+ | |(or 8x8, if the block size for |
+ | |block based skip checking was |
+ | |configured as 8x8) blocks with |
+ | |the MB. |
+ +-----------------------------------+--------------------------------+
+ |ushort intel_sub_group_avc_sic_get_|This is a wrapper for |
+ |inter_distortions( |intel_sub_group_avc_mce_get_ |
+ | intel_sub_group_avc_sic_result_t|inter_distortions(..) with the |
+ | result ) |result conversions to/from MCE |
+ | |types. See MCE version for |
+ | |description. |
+ +-----------------------------------+--------------------------------+
+
+ ______________________________________________________________________
+
+
+ A Complete Example
+ ------------------
+
+ +--------------------------------------------------------------------+
+ |__kernel __attribute__((reqd_work_group_size(16,1,1))) void |
+ |block_motion_estimate_intel( |
+ | __read_only image2d_t src_img, |
+ | __read_only image2d_t ref_img, |
+ | __global short2* prediction_motion_vector_buffer, |
+ | __global short2* motion_vector_buffer, |
+ | __global ushort* residuals_buffer, |
+ | __global uchar2* shapes_buffer, |
+ | int iterations ) |
+ |{ |
+ | // Each 16x1 workgroup processes a column of MBs in a image. |
+ | // The number of workgroups is equal to the number of MB in |
+ | // a image row. All column MBs are processed in parallel without |
+ | // handling any MB dependencies. |
+ | int gid_0 = get_group_id(0); |
+ | int gid_1 = 0; |
+ | |
+ | // Initialize the inline VME sampler. |
+ | const sampler_t vme_sampler = CLK_AVC_ME_INITIALIZE_INTEL; |
+ | |
+ | // Process all columns MBs in a loop. |
+ | for( int i = 0; i < iterations ; i++, gid_1++ ) { |
+ | ushort2 srcCoord = 0; |
+ | short2 refCoord = 0; |
+ | short2 predMV = 0; |
+ | |
+ | // Compute the source MB coordinates. |
+ | |
+ | srcCoord.x = gid_0 * 16; |
+ | srcCoord.y = gid_1 * 16; |
+ | |
+ | // Obtain the predictor of the source MB from input predictor |
+ | // buffer. |
+ | if( prediction_motion_vector_buffer != NULL ) { |
+ | predMV = |
+ | prediction_motion_vector_buffer[ |
+ | gid_0 + gid_1 * get_num_groups(0) ]; |
+ | refCoord.x = predMV.x / 4; |
+ | refCoord.y = predMV.y / 4; |
+ | refCoord.y = refCoord.y & 0xFFFE; |
+ | } |
+ | |
+ | // Enable the partition mask to allow all shapes and let the |
+ | // VME unit decide the partitioning for the source MB. |
+ | uchar partition_mask = CLK_AVC_ME_PARTITION_MASK_ALL_INTEL; |
+ | uchar sad_adjustment = CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL; |
+ | uchar pixel_mode = CLK_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL; |
+ | |
+ | // Create and initialize the IME payload. |
+ | intel_sub_group_avc_ime_payload_t payload = |
+ | intel_sub_group_avc_ime_initialize( |
+ | srcCoord, partition_mask, sad_adjustment ); |
+ | |
+ | // Setup the IME for a single reference search. |
+ | // The search window is 48x40 with exhaustive search, with it |
+ | // location specified by the input predictor (refCooord). |
+ | payload = |
+ | intel_sub_group_avc_ime_set_single_reference( |
+ | refCoord, CLK_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL, |
+ | payload ); |
+ | |
+ | // Set a cost function for the IME operation. |
+ | ulong cost_center = 0; |
+ | uint2 packed_cost_table = |
+ | intel_sub_group_avc_mce_get_default_medium_penalty_cost_\ |
+ | table(); |
+ | uchar search_cost_precision = |
+ | CLK_AVC_ME_COST_PRECISION_HPEL_INTEL; |
+ | payload = |
+ | intel_sub_group_avc_ime_set_motion_vector_cost_function( |
+ | cost_center, packed_cost_table, search_cost_precision, |
+ | payload ); |
+ | |
+ | // Evaluate the IME operation with its configured payload. |
+ | intel_sub_group_avc_ime_result_t result = |
+ | intel_sub_group_avc_ime_evaluate_with_single_reference( |
+ | src_img, ref_img, vme_sampler, payload ); |
+ | |
+ | // Extract IME results. |
+ | long mvs = intel_sub_group_avc_ime_get_motion_vectors( result );|
+ | ushort sads = |
+ | intel_sub_group_avc_ime_get_inter_distortions( result ); |
+ | uchar major_shape = |
+ | intel_sub_group_avc_ime_get_inter_major_shape( result ); |
+ | uchar minor_shapes = |
+ | intel_sub_group_avc_ime_get_inter_minor_shapes( result ); |
+ | uchar2 shapes = { major_shape, minor_shapes }; |
+ | uchar directions = |
+ | intel_sub_group_avc_ime_get_inter_directions( result ); |
+ | |
+ | // Perform FME if sub-pixel mode is specified. |
+ | // FME is performed on the results of the IME operation. |
+ | if( pixel_mode != CLK_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL ) { |
+ | intel_sub_group_avc_ref_payload_t payload = |
+ | intel_sub_group_avc_fme_initialize( |
+ | srcCoord, mvs, major_shape, minor_shapes, directions, |
+ | pixel_mode, sad_adjustment); |
+ | intel_sub_group_avc_ref_result_t result = |
+ | intel_sub_group_avc_ref_evaluate_with_single_reference( |
+ | src_img, ref_img, vme_sampler, payload ); |
+ | payload = |
+ | intel_sub_group_avc_ref_set_motion_vector_cost_function( |
+ | cost_center, packed_cost_table, search_cost_precision, |
+ | payload ); |
+ | mvs = intel_sub_group_avc_ref_get_motion_vectors( result ); |
+ | sads = |
+ | intel_sub_group_avc_ref_get_inter_distortions( result ); |
+ | } |
+ | |
+ | // Write out the results. |
+ | int index = |
+ | ( gid_0 * 16 + get_local_id(0) ) + |
+ | ( gid_1 * 16 * get_num_groups(0) ); |
+ | int2 bi_mvs = as_int2( mvs ); |
+ | motion_vector_buffer [ index ] = as_short2( bi_mvs.s0 ); |
+ | if( residuals_buffer != NULL ) { |
+ | residuals_buffer [ index ] = sads; |
+ | } |
+ | shapes_buffer [gid_0 + gid_1 * get_num_groups(0)] = shapes; |
+ | } |
+ |} |
+ +--------------------------------------------------------------------+
+
+ _____________________________________________________________________
+ "
+
+Revision History
+
+ Version 1 (12/02/2016): First public revision.