Publish Intel SPIR-V Extensions (#57)
* update cl_intel_device_side_avc_motion_estimation
* switch to HTML extension specs for Intel extensions
cl_intel_required_subgroup_size
cl_intel_subgroups
cl_intel_subgroups_short
* add Intel SPIR-V extensions
cl_intel_spirv_subgroups
cl_intel_spirv_media_block_io
cl_intel_spirv_device_side_avc_motion_estimation
diff --git a/extensions/intel/cl_intel_device_side_avc_motion_estimation.txt b/extensions/intel/cl_intel_device_side_avc_motion_estimation.txt
index 7911918..f9ae8c7 100644
--- a/extensions/intel/cl_intel_device_side_avc_motion_estimation.txt
+++ b/extensions/intel/cl_intel_device_side_avc_motion_estimation.txt
@@ -18,7 +18,7 @@
Version
- Version 3, October 2, 2018
+ Version 5, November 9, 2018
Number
@@ -1518,7 +1518,7 @@
|ushort intel_sub_group_avc_mce_get_|Get the MCE inter distortions |
|inter_distortions( |result corresponding to the BMVs|
| intel_sub_group_avc_mce_result_t|returned by intel_sub_group_avc_|
- | result |mce_get_motion_vectors(..). The |
+ | result ) |mce_get_motion_vectors(..). The |
| |MCE inter directions result |
| |returned by intel_sub_group_avc_|
| |mce_get_inter_directions(..) |
@@ -1910,16 +1910,16 @@
| |window configuration |
| |enumeration values. |
+------------------------------------+-------------------------------+
- |ushort2 intel_sub_group_ime_ |Get the 2D size of the |
+ |ushort2 intel_sub_group_avc_ime_ |Get the 2D size of the |
|ref_window_size( |reference window in pixel |
| uchar search_window_config, |units. |
| char dual_ref ) | |
| |The value of |
- | |search_window_config must be |
- | |one of the unreserved search |
- | |window configuration |
- | |enumeration values. |
- | | |
+ |// deprecated |search_window_config must be |
+ |ushort2 intel_sub_group_ime_ |one of the unreserved search |
+ |ref_window_size( |window configuration |
+ | uchar search_window_config, |enumeration values. |
+ | char dual_ref ) | |
| |The value of dual_ref must be |
| |set to zero for a single |
| |reference search window and one|
@@ -2079,7 +2079,7 @@
|intel_sub_group_avc_ime_payload_t |This is a wrapper for |
|intel_sub_group_avc_ime_set_ |intel_sub_group_avc_mce_set_ |
|source_interlaced_field_polarity( |source_interlaced_field_ |
- | uchar src_field_polarity |polarity(..) with the result |
+ | uchar src_field_polarity, |polarity(..) with the result |
| intel_sub_group_avc_ime_payload_t|conversions to/from MCE |
| payload ) |types. See MCE version for |
| |description. |
@@ -2900,7 +2900,7 @@
|intel_sub_group_avc_ref_payload_t |This is a wrapper for |
|intel_sub_group_avc_ref_set_ |intel_sub_group_avc_mce_set_ |
|source_interlaced_field_polarity( |source_interlaced_field_ |
- | uchar src_field_polarity |polarity(..) with the result |
+ | uchar src_field_polarity, |polarity(..) with the result |
| intel_sub_group_avc_ref_payload_t|conversions to/from MCE |
| payload ) |types. See MCE version for |
| |description. |
@@ -2915,7 +2915,7 @@
+------------------------------------+-------------------------------+
|intel_sub_group_avc_ref_payload_t |This is a wrapper for |
|intel_sub_group_avc_ref_set_ |intel_sub_group_avc_mce_set_ |
- |dual_reference_interlaced_image_ |dual_reference_interlaced_ |
+ |dual_reference_interlaced_ |dual_reference_interlaced_ |
|field_polarities( |field_polarities(..) with the |
| uchar fwd_ref_field_polarity, |result conversions to/from MCE |
| uchar bwd_ref_field_polarity, |types. See MCE version for |
@@ -2929,7 +2929,7 @@
| payload ) | |
| |Default is to enable it. |
+------------------------------------+-------------------------------+
- |intel_sub_group_avc_sic_payload_t |Update the input payload to do |
+ |intel_sub_group_avc_ref_payload_t |Update the input payload to do |
|intel_sub_group_avc_ref_set_ |enable bilinear filter |
|bilinear_filter_enable( |interpolation instead of 4-tap |
| intel_sub_group_avc_ref_payload_t|filter interpolation. Default |
@@ -2970,7 +2970,7 @@
|evaluate_with_dual_reference( |and return its results. |
| read_only image2d_t src_image, | |
| read_only image2d_t fwd_ref_image,|The parameter |
- | image2d_t bwd_ref_image, |fwd_ref_image[bwd_ref_image] |
+ | read_only image2d_t bwd_ref_image,|fwd_ref_image[bwd_ref_image] |
| sampler_t vme_media_sampler, |must be a valid |
| intel_sub_group_avc_ref_payload_t |forward[backward] image kernel |
| payload ) |parameter per the ordering |
@@ -3160,7 +3160,7 @@
|intel_sub_group_avc_ref_result_t |Convert the MCE result into an |
|intel_sub_group_avc_mce_ |REF result. |
|convert_to_ref_result( | |
- | intel_sub_group_avc_ref_result_t| |
+ | intel_sub_group_avc_mce_result_t| |
| result ) | |
+-----------------------------------+--------------------------------+
@@ -3297,8 +3297,8 @@
| uint skip_block_partition_type, | |
| uint skip_motion_vector_mask, |The legal values for |
| ulong motion_vectors, |skip_block_partition_type must |
- | char bidirectional_weight, |be one of the specified |
- | uchar skip_sad_adjustment , |partition mask enumeration |
+ | uchar bidirectional_weight, |be one of the specified |
+ | uchar skip_sad_adjustment, |partition mask enumeration |
| intel_sub_group_avc_sic_payload_t|values. |
| payload ) | |
| |Legal values for |
@@ -3754,7 +3754,7 @@
|intel_sub_group_avc_sic_payload_t |This is a wrapper for |
|intel_sub_group_avc_sic_set_ |intel_sub_group_avc_mce_set_ |
|source_interlaced_field_polarity( |source_interlaced_field_ |
- | uchar src_field_polarity |polarity(..) with the result |
+ | uchar src_field_polarity, |polarity(..) with the result |
| intel_sub_group_avc_sic_payload_t|conversions to/from MCE |
| payload ) |types. See MCE version for |
| |description. |
@@ -3769,7 +3769,7 @@
+------------------------------------+-------------------------------+
|intel_sub_group_avc_sic_payload_t |This is a wrapper for |
|intel_sub_group_avc_sic_set_ |intel_sub_group_avc_mce_set_ |
- |dual_reference_interlaced_image_ |dual_reference_interlaced_ |
+ |dual_reference_interlaced_ |dual_reference_interlaced_ |
|field_polarities( |field_polarities(..) with the |
| uchar fwd_ref_field_polarity, |result conversions to/from MCE |
| uchar bwd_ref_field_polarity, |types. See MCE version for |
@@ -3778,7 +3778,7 @@
+------------------------------------+-------------------------------+
|intel_sub_group_avc_sic_payload_t |Update the input payload to do |
|intel_sub_group_avc_sic_set_ |enable bilinear filter |
- |bilinear_filter_enable( |interpolation instead of 4-tap |
+ |skc_bilinear_filter_enable( |interpolation instead of 4-tap |
| intel_sub_group_avc_sic_payload_t|filter interpolation. Default |
| payload ) |is 4-tap filter interpolation. |
| | |
@@ -3839,7 +3839,7 @@
|intel_sub_group_avc_sic_set_ |during the evaluation phase |
|block_based_raw_skip_sad( |will be the maximal SAD of |
| uchar block_based_skip_type, |individual 4x4 (or 8x8) blocks,|
- | intel_sub_group_avc_ime_payload_t|instead of the sum of the |
+ | intel_sub_group_avc_sic_payload_t|instead of the sum of the |
| payload ) |entire individual 4x4 block |
| |SADs of the MB. |
| | |
@@ -4078,7 +4078,7 @@
|intel_sub_group_avc_sic_result_t |Convert the MCE result into an |
|intel_sub_group_avc_mce_ |SIC result. |
|convert_to_sic_result( | |
- | intel_sub_group_avc_sic_result_t| |
+ | intel_sub_group_avc_mce_result_t| |
| result ) | |
+-----------------------------------+--------------------------------+
@@ -4374,3 +4374,6 @@
Version 3 (10/02/2018): Modified definitions of default initialization
literals to be pre-defined enumeration literals.
Fixed typo in intra cost configuration function.
+ Version 4 (10/31/2018): Minor typo fixes.
+ Version 5 (11/09/2018): Marked intel_sub_group_ime_ref_window_size
+ function as deprecated.
diff --git a/extensions/intel/cl_intel_required_subgroup_size.html b/extensions/intel/cl_intel_required_subgroup_size.html
new file mode 100644
index 0000000..593b7f0
--- /dev/null
+++ b/extensions/intel/cl_intel_required_subgroup_size.html
@@ -0,0 +1,1026 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta name="generator" content="AsciiDoc 8.6.9">
+<title>cl_intel_required_subgroup_size</title>
+<style type="text/css">
+/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
+
+/* Default font. */
+body {
+ font-family: Georgia,serif;
+}
+
+/* Title font. */
+h1, h2, h3, h4, h5, h6,
+div.title, caption.title,
+thead, p.table.header,
+#toctitle,
+#author, #revnumber, #revdate, #revremark,
+#footer {
+ font-family: Arial,Helvetica,sans-serif;
+}
+
+body {
+ margin: 1em 5% 1em 5%;
+}
+
+a {
+ color: blue;
+ text-decoration: underline;
+}
+a:visited {
+ color: fuchsia;
+}
+
+em {
+ font-style: italic;
+ color: navy;
+}
+
+strong {
+ font-weight: bold;
+ color: #083194;
+}
+
+h1, h2, h3, h4, h5, h6 {
+ color: #527bbd;
+ margin-top: 1.2em;
+ margin-bottom: 0.5em;
+ line-height: 1.3;
+}
+
+h1, h2, h3 {
+ border-bottom: 2px solid silver;
+}
+h2 {
+ padding-top: 0.5em;
+}
+h3 {
+ float: left;
+}
+h3 + * {
+ clear: left;
+}
+h5 {
+ font-size: 1.0em;
+}
+
+div.sectionbody {
+ margin-left: 0;
+}
+
+hr {
+ border: 1px solid silver;
+}
+
+p {
+ margin-top: 0.5em;
+ margin-bottom: 0.5em;
+}
+
+ul, ol, li > p {
+ margin-top: 0;
+}
+ul > li { color: #aaa; }
+ul > li > * { color: black; }
+
+.monospaced, code, pre {
+ font-family: "Courier New", Courier, monospace;
+ font-size: inherit;
+ color: navy;
+ padding: 0;
+ margin: 0;
+}
+pre {
+ white-space: pre-wrap;
+}
+
+#author {
+ color: #527bbd;
+ font-weight: bold;
+ font-size: 1.1em;
+}
+#email {
+}
+#revnumber, #revdate, #revremark {
+}
+
+#footer {
+ font-size: small;
+ border-top: 2px solid silver;
+ padding-top: 0.5em;
+ margin-top: 4.0em;
+}
+#footer-text {
+ float: left;
+ padding-bottom: 0.5em;
+}
+#footer-badges {
+ float: right;
+ padding-bottom: 0.5em;
+}
+
+#preamble {
+ margin-top: 1.5em;
+ margin-bottom: 1.5em;
+}
+div.imageblock, div.exampleblock, div.verseblock,
+div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
+div.admonitionblock {
+ margin-top: 1.0em;
+ margin-bottom: 1.5em;
+}
+div.admonitionblock {
+ margin-top: 2.0em;
+ margin-bottom: 2.0em;
+ margin-right: 10%;
+ color: #606060;
+}
+
+div.content { /* Block element content. */
+ padding: 0;
+}
+
+/* Block element titles. */
+div.title, caption.title {
+ color: #527bbd;
+ font-weight: bold;
+ text-align: left;
+ margin-top: 1.0em;
+ margin-bottom: 0.5em;
+}
+div.title + * {
+ margin-top: 0;
+}
+
+td div.title:first-child {
+ margin-top: 0.0em;
+}
+div.content div.title:first-child {
+ margin-top: 0.0em;
+}
+div.content + div.title {
+ margin-top: 0.0em;
+}
+
+div.sidebarblock > div.content {
+ background: #ffffee;
+ border: 1px solid #dddddd;
+ border-left: 4px solid #f0f0f0;
+ padding: 0.5em;
+}
+
+div.listingblock > div.content {
+ border: 1px solid #dddddd;
+ border-left: 5px solid #f0f0f0;
+ background: #f8f8f8;
+ padding: 0.5em;
+}
+
+div.quoteblock, div.verseblock {
+ padding-left: 1.0em;
+ margin-left: 1.0em;
+ margin-right: 10%;
+ border-left: 5px solid #f0f0f0;
+ color: #888;
+}
+
+div.quoteblock > div.attribution {
+ padding-top: 0.5em;
+ text-align: right;
+}
+
+div.verseblock > pre.content {
+ font-family: inherit;
+ font-size: inherit;
+}
+div.verseblock > div.attribution {
+ padding-top: 0.75em;
+ text-align: left;
+}
+/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
+div.verseblock + div.attribution {
+ text-align: left;
+}
+
+div.admonitionblock .icon {
+ vertical-align: top;
+ font-size: 1.1em;
+ font-weight: bold;
+ text-decoration: underline;
+ color: #527bbd;
+ padding-right: 0.5em;
+}
+div.admonitionblock td.content {
+ padding-left: 0.5em;
+ border-left: 3px solid #dddddd;
+}
+
+div.exampleblock > div.content {
+ border-left: 3px solid #dddddd;
+ padding-left: 0.5em;
+}
+
+div.imageblock div.content { padding-left: 0; }
+span.image img { border-style: none; vertical-align: text-bottom; }
+a.image:visited { color: white; }
+
+dl {
+ margin-top: 0.8em;
+ margin-bottom: 0.8em;
+}
+dt {
+ margin-top: 0.5em;
+ margin-bottom: 0;
+ font-style: normal;
+ color: navy;
+}
+dd > *:first-child {
+ margin-top: 0.1em;
+}
+
+ul, ol {
+ list-style-position: outside;
+}
+ol.arabic {
+ list-style-type: decimal;
+}
+ol.loweralpha {
+ list-style-type: lower-alpha;
+}
+ol.upperalpha {
+ list-style-type: upper-alpha;
+}
+ol.lowerroman {
+ list-style-type: lower-roman;
+}
+ol.upperroman {
+ list-style-type: upper-roman;
+}
+
+div.compact ul, div.compact ol,
+div.compact p, div.compact p,
+div.compact div, div.compact div {
+ margin-top: 0.1em;
+ margin-bottom: 0.1em;
+}
+
+tfoot {
+ font-weight: bold;
+}
+td > div.verse {
+ white-space: pre;
+}
+
+div.hdlist {
+ margin-top: 0.8em;
+ margin-bottom: 0.8em;
+}
+div.hdlist tr {
+ padding-bottom: 15px;
+}
+dt.hdlist1.strong, td.hdlist1.strong {
+ font-weight: bold;
+}
+td.hdlist1 {
+ vertical-align: top;
+ font-style: normal;
+ padding-right: 0.8em;
+ color: navy;
+}
+td.hdlist2 {
+ vertical-align: top;
+}
+div.hdlist.compact tr {
+ margin: 0;
+ padding-bottom: 0;
+}
+
+.comment {
+ background: yellow;
+}
+
+.footnote, .footnoteref {
+ font-size: 0.8em;
+}
+
+span.footnote, span.footnoteref {
+ vertical-align: super;
+}
+
+#footnotes {
+ margin: 20px 0 20px 0;
+ padding: 7px 0 0 0;
+}
+
+#footnotes div.footnote {
+ margin: 0 0 5px 0;
+}
+
+#footnotes hr {
+ border: none;
+ border-top: 1px solid silver;
+ height: 1px;
+ text-align: left;
+ margin-left: 0;
+ width: 20%;
+ min-width: 100px;
+}
+
+div.colist td {
+ padding-right: 0.5em;
+ padding-bottom: 0.3em;
+ vertical-align: top;
+}
+div.colist td img {
+ margin-top: 0.3em;
+}
+
+@media print {
+ #footer-badges { display: none; }
+}
+
+#toc {
+ margin-bottom: 2.5em;
+}
+
+#toctitle {
+ color: #527bbd;
+ font-size: 1.1em;
+ font-weight: bold;
+ margin-top: 1.0em;
+ margin-bottom: 0.1em;
+}
+
+div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
+ margin-top: 0;
+ margin-bottom: 0;
+}
+div.toclevel2 {
+ margin-left: 2em;
+ font-size: 0.9em;
+}
+div.toclevel3 {
+ margin-left: 4em;
+ font-size: 0.9em;
+}
+div.toclevel4 {
+ margin-left: 6em;
+ font-size: 0.9em;
+}
+
+span.aqua { color: aqua; }
+span.black { color: black; }
+span.blue { color: blue; }
+span.fuchsia { color: fuchsia; }
+span.gray { color: gray; }
+span.green { color: green; }
+span.lime { color: lime; }
+span.maroon { color: maroon; }
+span.navy { color: navy; }
+span.olive { color: olive; }
+span.purple { color: purple; }
+span.red { color: red; }
+span.silver { color: silver; }
+span.teal { color: teal; }
+span.white { color: white; }
+span.yellow { color: yellow; }
+
+span.aqua-background { background: aqua; }
+span.black-background { background: black; }
+span.blue-background { background: blue; }
+span.fuchsia-background { background: fuchsia; }
+span.gray-background { background: gray; }
+span.green-background { background: green; }
+span.lime-background { background: lime; }
+span.maroon-background { background: maroon; }
+span.navy-background { background: navy; }
+span.olive-background { background: olive; }
+span.purple-background { background: purple; }
+span.red-background { background: red; }
+span.silver-background { background: silver; }
+span.teal-background { background: teal; }
+span.white-background { background: white; }
+span.yellow-background { background: yellow; }
+
+span.big { font-size: 2em; }
+span.small { font-size: 0.6em; }
+
+span.underline { text-decoration: underline; }
+span.overline { text-decoration: overline; }
+span.line-through { text-decoration: line-through; }
+
+div.unbreakable { page-break-inside: avoid; }
+
+
+/*
+ * xhtml11 specific
+ *
+ * */
+
+div.tableblock {
+ margin-top: 1.0em;
+ margin-bottom: 1.5em;
+}
+div.tableblock > table {
+ border: 3px solid #527bbd;
+}
+thead, p.table.header {
+ font-weight: bold;
+ color: #527bbd;
+}
+p.table {
+ margin-top: 0;
+}
+/* Because the table frame attribute is overriden by CSS in most browsers. */
+div.tableblock > table[frame="void"] {
+ border-style: none;
+}
+div.tableblock > table[frame="hsides"] {
+ border-left-style: none;
+ border-right-style: none;
+}
+div.tableblock > table[frame="vsides"] {
+ border-top-style: none;
+ border-bottom-style: none;
+}
+
+
+/*
+ * html5 specific
+ *
+ * */
+
+table.tableblock {
+ margin-top: 1.0em;
+ margin-bottom: 1.5em;
+}
+thead, p.tableblock.header {
+ font-weight: bold;
+ color: #527bbd;
+}
+p.tableblock {
+ margin-top: 0;
+}
+table.tableblock {
+ border-width: 3px;
+ border-spacing: 0px;
+ border-style: solid;
+ border-color: #527bbd;
+ border-collapse: collapse;
+}
+th.tableblock, td.tableblock {
+ border-width: 1px;
+ padding: 4px;
+ border-style: solid;
+ border-color: #527bbd;
+}
+
+table.tableblock.frame-topbot {
+ border-left-style: hidden;
+ border-right-style: hidden;
+}
+table.tableblock.frame-sides {
+ border-top-style: hidden;
+ border-bottom-style: hidden;
+}
+table.tableblock.frame-none {
+ border-style: hidden;
+}
+
+th.tableblock.halign-left, td.tableblock.halign-left {
+ text-align: left;
+}
+th.tableblock.halign-center, td.tableblock.halign-center {
+ text-align: center;
+}
+th.tableblock.halign-right, td.tableblock.halign-right {
+ text-align: right;
+}
+
+th.tableblock.valign-top, td.tableblock.valign-top {
+ vertical-align: top;
+}
+th.tableblock.valign-middle, td.tableblock.valign-middle {
+ vertical-align: middle;
+}
+th.tableblock.valign-bottom, td.tableblock.valign-bottom {
+ vertical-align: bottom;
+}
+
+
+/*
+ * manpage specific
+ *
+ * */
+
+body.manpage h1 {
+ padding-top: 0.5em;
+ padding-bottom: 0.5em;
+ border-top: 2px solid silver;
+ border-bottom: 2px solid silver;
+}
+body.manpage h2 {
+ border-style: none;
+}
+body.manpage div.sectionbody {
+ margin-left: 3em;
+}
+
+@media print {
+ body.manpage div#toc { display: none; }
+}
+
+
+@media screen {
+ body {
+ max-width: 50em; /* approximately 80 characters wide */
+ margin-left: 16em;
+ }
+
+ #toc {
+ position: fixed;
+ top: 0;
+ left: 0;
+ bottom: 0;
+ width: 13em;
+ padding: 0.5em;
+ padding-bottom: 1.5em;
+ margin: 0;
+ overflow: auto;
+ border-right: 3px solid #f8f8f8;
+ background-color: white;
+ }
+
+ #toc .toclevel1 {
+ margin-top: 0.5em;
+ }
+
+ #toc .toclevel2 {
+ margin-top: 0.25em;
+ display: list-item;
+ color: #aaaaaa;
+ }
+
+ #toctitle {
+ margin-top: 0.5em;
+ }
+}
+</style>
+<script type="text/javascript">
+/*<![CDATA[*/
+var asciidoc = { // Namespace.
+
+/////////////////////////////////////////////////////////////////////
+// Table Of Contents generator
+/////////////////////////////////////////////////////////////////////
+
+/* Author: Mihai Bazon, September 2002
+ * http://students.infoiasi.ro/~mishoo
+ *
+ * Table Of Content generator
+ * Version: 0.4
+ *
+ * Feel free to use this script under the terms of the GNU General Public
+ * License, as long as you do not remove or alter this notice.
+ */
+
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
+
+// toclevels = 1..4.
+toc: function (toclevels) {
+
+ function getText(el) {
+ var text = "";
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
+ if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
+ text += i.data;
+ else if (i.firstChild != null)
+ text += getText(i);
+ }
+ return text;
+ }
+
+ function TocEntry(el, text, toclevel) {
+ this.element = el;
+ this.text = text;
+ this.toclevel = toclevel;
+ }
+
+ function tocEntries(el, toclevels) {
+ var result = new Array;
+ var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
+ // Function that scans the DOM tree for header elements (the DOM2
+ // nodeIterator API would be a better technique but not supported by all
+ // browsers).
+ var iterate = function (el) {
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
+ if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
+ var mo = re.exec(i.tagName);
+ if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
+ result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
+ }
+ iterate(i);
+ }
+ }
+ }
+ iterate(el);
+ return result;
+ }
+
+ var toc = document.getElementById("toc");
+ if (!toc) {
+ return;
+ }
+
+ // Delete existing TOC entries in case we're reloading the TOC.
+ var tocEntriesToRemove = [];
+ var i;
+ for (i = 0; i < toc.childNodes.length; i++) {
+ var entry = toc.childNodes[i];
+ if (entry.nodeName.toLowerCase() == 'div'
+ && entry.getAttribute("class")
+ && entry.getAttribute("class").match(/^toclevel/))
+ tocEntriesToRemove.push(entry);
+ }
+ for (i = 0; i < tocEntriesToRemove.length; i++) {
+ toc.removeChild(tocEntriesToRemove[i]);
+ }
+
+ // Rebuild TOC entries.
+ var entries = tocEntries(document.getElementById("content"), toclevels);
+ for (var i = 0; i < entries.length; ++i) {
+ var entry = entries[i];
+ if (entry.element.id == "")
+ entry.element.id = "_toc_" + i;
+ var a = document.createElement("a");
+ a.href = "#" + entry.element.id;
+ a.appendChild(document.createTextNode(entry.text));
+ var div = document.createElement("div");
+ div.appendChild(a);
+ div.className = "toclevel" + entry.toclevel;
+ toc.appendChild(div);
+ }
+ if (entries.length == 0)
+ toc.parentNode.removeChild(toc);
+},
+
+
+/////////////////////////////////////////////////////////////////////
+// Footnotes generator
+/////////////////////////////////////////////////////////////////////
+
+/* Based on footnote generation code from:
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
+ */
+
+footnotes: function () {
+ // Delete existing footnote entries in case we're reloading the footnodes.
+ var i;
+ var noteholder = document.getElementById("footnotes");
+ if (!noteholder) {
+ return;
+ }
+ var entriesToRemove = [];
+ for (i = 0; i < noteholder.childNodes.length; i++) {
+ var entry = noteholder.childNodes[i];
+ if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
+ entriesToRemove.push(entry);
+ }
+ for (i = 0; i < entriesToRemove.length; i++) {
+ noteholder.removeChild(entriesToRemove[i]);
+ }
+
+ // Rebuild footnote entries.
+ var cont = document.getElementById("content");
+ var spans = cont.getElementsByTagName("span");
+ var refs = {};
+ var n = 0;
+ for (i=0; i<spans.length; i++) {
+ if (spans[i].className == "footnote") {
+ n++;
+ var note = spans[i].getAttribute("data-note");
+ if (!note) {
+ // Use [\s\S] in place of . so multi-line matches work.
+ // Because JavaScript has no s (dotall) regex flag.
+ note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
+ spans[i].innerHTML =
+ "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
+ spans[i].setAttribute("data-note", note);
+ }
+ noteholder.innerHTML +=
+ "<div class='footnote' id='_footnote_" + n + "'>" +
+ "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
+ n + "</a>. " + note + "</div>";
+ var id =spans[i].getAttribute("id");
+ if (id != null) refs["#"+id] = n;
+ }
+ }
+ if (n == 0)
+ noteholder.parentNode.removeChild(noteholder);
+ else {
+ // Process footnoterefs.
+ for (i=0; i<spans.length; i++) {
+ if (spans[i].className == "footnoteref") {
+ var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
+ href = href.match(/#.*/)[0]; // Because IE return full URL.
+ n = refs[href];
+ spans[i].innerHTML =
+ "[<a href='#_footnote_" + n +
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
+ }
+ }
+ }
+},
+
+install: function(toclevels) {
+ var timerId;
+
+ function reinstall() {
+ asciidoc.footnotes();
+ if (toclevels) {
+ asciidoc.toc(toclevels);
+ }
+ }
+
+ function reinstallAndRemoveTimer() {
+ clearInterval(timerId);
+ reinstall();
+ }
+
+ timerId = setInterval(reinstall, 500);
+ if (document.addEventListener)
+ document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
+ else
+ window.onload = reinstallAndRemoveTimer;
+}
+
+}
+asciidoc.install(1);
+/*]]>*/
+</script>
+</head>
+<body class="article">
+<div id="header">
+<h1>cl_intel_required_subgroup_size</h1>
+<div id="toc">
+ <div id="toctitle">Table of Contents</div>
+ <noscript><p><b>JavaScript must be enabled in your browser to display the table of contents.</b></p></noscript>
+</div>
+</div>
+<div id="content">
+<div class="sect1">
+<h2 id="_name_strings">Name Strings</h2>
+<div class="sectionbody">
+<div class="paragraph"><p><span class="monospaced">cl_intel_required_subgroup_size</span></p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_contact">Contact</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Ben Ashbaugh, Intel (ben <em>dot</em> ashbaugh <em>at</em> intel <em>dot</em> com)</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_contributors">Contributors</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Ben Ashbaugh, Intel</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_notice">Notice</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Copyright (c) 2018 Intel Corporation. All rights reserved.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_status">Status</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Final Draft</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_version">Version</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Built On: 2018-11-16<br>
+Revision: 2</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_dependencies">Dependencies</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Support for OpenCL 2.1, <span class="monospaced">cl_khr_subgroups, or `cl_intel_subgroups</span> is required.
+This extension is written against revision 23 of the OpenCL 2.1 API specification, against revision 30 of the OpenCL 2.0 OpenCL C specification, against version 31 of the OpenCL 2.0 Extensions specification, and against version 3 of the <span class="monospaced">cl_intel_subgroups</span> specification.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_overview">Overview</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>The goal of this extension is to allow programmers to optionally specify the required subgroup size for a kernel function.
+This information is important for the correctness of many subgroup algorithms, and in some cases may be used by the compiler to generate more optimal code.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_new_api_functions">New API Functions</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>None.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_new_api_enums">New API Enums</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Accepted as the <em>param_name</em> parameter of <strong>clGetDeviceInfo</strong>:</p></div>
+<div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt>CL_DEVICE_SUB_GROUP_SIZES_INTEL <span style="color: #993399">0x4108</span></tt></pre></div></div>
+<div class="paragraph"><p>Accepted as the <em>param_name</em> parameter of <strong>clGetKernelWorkGroupInfo</strong>:</p></div>
+<div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt>CL_KERNEL_SPILL_MEM_SIZE_INTEL <span style="color: #993399">0x4109</span></tt></pre></div></div>
+<div class="paragraph"><p>Accepted as the <em>param_name</em> parameter of <strong>clGetKernelSubGroupInfo</strong> and/or
+<strong>clGetKernelSubGroupInfoKHR</strong>:</p></div>
+<div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt>CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL <span style="color: #993399">0x410A</span></tt></pre></div></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_new_opencl_c_optional_attribute_qualifiers">New OpenCL C Optional Attribute Qualifiers</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Optional __kernel qualifier:</p></div>
+<div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="font-weight: bold"><span style="color: #000000">__attribute__</span></span><span style="color: #990000">((</span><span style="font-weight: bold"><span style="color: #000000">intel_reqd_sub_group_size</span></span><span style="color: #990000">(<</span><span style="color: #009900">int</span><span style="color: #990000">>)))</span></tt></pre></div></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_modifications_to_the_opencl_api_specification">Modifications to the OpenCL API Specification</h2>
+<div class="sectionbody">
+<div class="sect2">
+<h3 id="_additions_to_table_4_3_opencl_device_queries">Additions to Table 4.3 - "OpenCL Device Queries"</h3>
+<table class="tableblock frame-all grid-all"
+style="
+width:100%;
+">
+<col style="width:30%;">
+<col style="width:20%;">
+<col style="width:50%;">
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top" > <strong>cl_device_info</strong> </th>
+<th class="tableblock halign-left valign-top" > Return Type </th>
+<th class="tableblock halign-left valign-top" > Description</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top" ><p class="tableblock"><strong>CL_DEVICE_SUB_GROUP_SIZES_INTEL</strong></p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock"><span class="monospaced">size_t[]</span></p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Returns the set of subgroup sizes supported by the device.</p></td>
+</tr>
+</tbody>
+</table>
+</div>
+<div class="sect2">
+<h3 id="_additions_to_table_5_21_clgetkernelworkgroupinfo_parameter_queries">Additions to Table 5.21 - "clGetKernelWorkGroupInfo parameter queries":</h3>
+<table class="tableblock frame-all grid-all"
+style="
+width:100%;
+">
+<col style="width:34%;">
+<col style="width:33%;">
+<col style="width:33%;">
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top" > <strong>cl_kernel_work_group_info</strong> </th>
+<th class="tableblock halign-left valign-top" > Return Type </th>
+<th class="tableblock halign-left valign-top" > Info. returned in <em>param_value</em></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top" ><p class="tableblock"><strong>CL_KERNEL_SPILL_MEM_SIZE_INTEL</strong></p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock"><span class="monospaced">cl_ulong</span></p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Returns the amount of spill memory used by a kernel.
+The meaning of this value will vary from implementation-to-implementation, however a return value of 0 will always indicate that compiler was able to compile the kernel to fit into the device’s register file without spilling registers to memory.</p></td>
+</tr>
+</tbody>
+</table>
+</div>
+<div class="sect2">
+<h3 id="_additions_to_clgetkernelsubgroupinfo_parameter_queries">Additions to "clGetKernelSubGroupInfo parameter queries":</h3>
+<div class="paragraph"><p>This is Table 5.22 - "<strong>clGetKernelSubGroupInfo</strong> parameter queries" in the OpenCL 2.1 API spec, in Section 9.17.2.1 for <strong>clGetKernelSubGroupInfoKHR</strong> in the OpenCL 2.0 Extensions spec, and in the section describing the changes to Section 5.9.3 for <strong>clGetKernelSubGroupInfoKHR</strong> in the <span class="monospaced">cl_intel_subgroups</span> spec:</p></div>
+<table class="tableblock frame-all grid-all"
+style="
+width:100%;
+">
+<col style="width:25%;">
+<col style="width:25%;">
+<col style="width:25%;">
+<col style="width:25%;">
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top" > <strong>cl_kernel_sub_group_info</strong> </th>
+<th class="tableblock halign-left valign-top" > Input Type </th>
+<th class="tableblock halign-left valign-top" > Return Type </th>
+<th class="tableblock halign-left valign-top" > Info. returned in <em>param_value</em></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top" ><p class="tableblock"><strong>CL_KERNEL_COMPILE_<br>
+SUB_GROUP_SIZE_INTEL</strong></p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock"><span class="monospaced">ignored</span></p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock"><span class="monospaced">size_t</span></p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Returns the subgroup size specified by the <span class="monospaced">__attribute__(( intel_reqd_sub_group_size(<int>) ))</span> qualifier.
+Refer to section 6.7.2.</p>
+<p class="tableblock">If the subgroup size is not specified using the above attribute qualifier then 0 is returned.</p></td>
+</tr>
+</tbody>
+</table>
+</div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_modifications_to_the_opencl_c_specification">Modifications to the OpenCL C Specification</h2>
+<div class="sectionbody">
+<div class="sect2">
+<h3 id="_additions_to_section_6_7_2_optional_attribute_qualifiers">Additions to Section 6.7.2 - "Optional Attribute Qualifiers"</h3>
+<div class="paragraph"><p>The optional <span class="monospaced">__attribute__((intel_reqd_sub_group_size(<int>)))</span> can be used to indicate that the kernel must be compiled and executed with the specified subgroup size.
+When this attribute is present, <strong>get_max_sub_group_size</strong>() is guaranteed to return the specified integer value.
+This is important for the correctness of many subgroup algorithms, and in some cases may be used by the compiler to generate more optimal code.</p></div>
+<div class="paragraph"><p>Note that there is no guarantee for the value of <strong>get_sub_group_size</strong>() even when this attribute is present, particularly when the work-group size is not evenly divisible by the required subgroup size.</p></div>
+<div class="paragraph"><p>Note as well that some devices may support a limited number of subgroup sizes, and that some devices may not support all language constructs with all subgroup sizes.
+This means that some kernels may fail compilation with one required subgroup size and succeed with another required subgroup size, even if both subgroup sizes are supported by the device.</p></div>
+<div class="paragraph"><p>Finally, note that requiring one subgroup size (particularly, a larger subgroup size) may require more spill memory than another subgroup size, and may negatively impact application performance."</p></div>
+</div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_issues">Issues</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>None.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_revision_history">Revision History</h2>
+<div class="sectionbody">
+<table class="tableblock frame-all grid-rows"
+style="
+width:100%;
+">
+<col style="width:4%;">
+<col style="width:14%;">
+<col style="width:14%;">
+<col style="width:66%;">
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top" >Rev</th>
+<th class="tableblock halign-left valign-top" >Date</th>
+<th class="tableblock halign-left valign-top" >Author</th>
+<th class="tableblock halign-left valign-top" >Changes</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">1</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">2016-07-14</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Ben Ashbaugh</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock"><strong>First public revision.</strong></p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">2</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">2018-11-15</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Ben Ashbaugh</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Conversion to asciidoc.</p></td>
+</tr>
+</tbody>
+</table>
+</div>
+</div>
+</div>
+<div id="footnotes"><hr></div>
+<div id="footer">
+<div id="footer-text">
+Last updated
+ 2018-11-16 10:11:35 PST
+</div>
+</div>
+</body>
+</html>
diff --git a/extensions/intel/cl_intel_required_subgroup_size.txt b/extensions/intel/cl_intel_required_subgroup_size.txt
index f8e0c78..6736a71 100644
--- a/extensions/intel/cl_intel_required_subgroup_size.txt
+++ b/extensions/intel/cl_intel_required_subgroup_size.txt
@@ -1,129 +1,11 @@
-Name String
+The cl_intel_required_subgroup_size extension is no longer authored in plain text.
- cl_intel_required_subgroup_size
+Please refer to the HTML extension specification instead,
+which may be found here:
-Contributors
+https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_required_subgroup_size.html
- Ben Ashbaugh, Intel
+The asciidoc source for this extension specification may
+be found here:
-Contact
-
- Ben Ashbaugh, Intel (ben.ashbaugh 'at' intel.com)
-
-Version
-
- Version 1, July 14, 2016
-
-Number
-
- OpenCL Extension #43
-
-Status
-
- Final Draft
-
-Dependencies
-
- Support for OpenCL 2.1, cl_khr_subgroups, or cl_intel_subgroups is required.
-
- This extension is written against revision 23 of the OpenCL 2.1 API
- specification, against revision 30 of the OpenCL 2.0 OpenCL C specification,
- against version 31 of the OpenCL 2.0 Extensions specification, and against
- version 3 of the cl_intel_subgroups specification.
-
-Overview
-
- The goal of this extension is to allow programmers to optionally specify
- the required subgroup size for a kernel function. This information is
- important for the correctness of many subgroup algorithms, and in some cases
- may be used by the compiler to generate more optimal code.
-
-New API Enums
-
- Accepted as the <param_name> parameter of clGetDeviceInfo:
-
- CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108
-
- Accepted as the <param_name> parameter of clGetKernelWorkGroupInfo:
-
- CL_KERNEL_SPILL_MEM_SIZE_INTEL 0x4109
-
- Accepted as the <param_name> parameter of clGetKernelSubGroupInfo and/or
- clGetKernelSubGroupInfoKHR:
-
- CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL 0x410A
-
-New OpenCL C Optional Attribute Qualifiers
-
- Optional __kernel qualifier:
-
- __attribute__((intel_reqd_sub_group_size(<int>)))
-
-Add to Table 4.3 - "OpenCL Device Queries":
-
- "--------------------------------------------------------------------------------------
- cl_device_info Return Type Description
- ------------------------------- ----------- ----------------------------------------
- CL_DEVICE_SUB_GROUP_SIZES_INTEL size_t[] Returns the set of subgroup sizes
- supported by the device.
- --------------------------------------------------------------------------------------"
-
-Add to Table 5.21 - "clGetKernelWorkGroupInfo parameter queries":
-
- "--------------------------------------------------------------------------------------
- cl_kernel_work_group_info Return Type Description
- ------------------------- ----------- ----------------------------------------------
- CL_KERNEL_SPILL_MEM_ cl_ulong Returns the amount of spill memory used by
- SIZE_INTEL a kernel. The meaning of this value will
- vary from implementation-to-implementation,
- however a return value of 0 will always
- indicate that compiler was able to compile
- the kernel to fit into the device's register
- file without spilling registers to memory.
- --------------------------------------------------------------------------------------"
-
-Add to Table 5.22 - "clGetKernelSubGroupInfo parameter queries" in the OpenCL 2.1 API
-spec, to the table in Section 9.17.2.1 for clGetKernelSubGroupInfoKHR in the OpenCL
-2.0 Extensions spec, and to the table describing the changes to Section 5.9.3 for
-clGetKernelSubGroupInfoKHR in the cl_intel_subgroups spec:
-
- "--------------------------------------------------------------------------------------
- cl_kernel_sub_group_info Input Type Return Type Description
- ------------------------ ---------- ----------- -----------------------------------
- CL_KERNEL_COMPILE_ ignored size_t Returns the subgroup size specified
- SUB_GROUP_SIZE_INTEL by the __attribute__((
- intel_reqd_sub_group_size(<int>)))
- qualifier. Refer to section 6.7.2.
-
- If the subgroup size is not
- specified using the above attribute
- qualifier then 0 is returned.
- --------------------------------------------------------------------------------------"
-
-Add to Section 6.7.2 - "Optional Attribute Qualifiers"
-
- "The optional __attribute__((intel_reqd_sub_group_size(<int>))) can be used to indicate
- that the kernel must be compiled and executed with the specified subgroup size. When
- this attribute is present, get_max_sub_group_size() is guaranteed to return the
- specified integer value. This is important for the correctness of many subgroup
- algorithms, and in some cases may be used by the compiler to generate more optimal
- code.
-
- Note that there is no guarantee for the value of get_sub_group_size() even when this
- attribute is present, particularly when the work-group size is not evenly divisible by
- the required subgroup size.
-
- Note as well that some devices may support a limited number of subgroup sizes, and
- that some devices may not support all language constructs with all subgroup sizes.
- This means that some kernels may fail compilation with one required subgroup size and
- succeed with another required subgroup size, even if both subgroup sizes are supported
- by the device.
-
- Finally, note that requiring one subgroup size (particularly, a larger subgroup size)
- may require more spill memory than another subgroup size, and may negatively impact
- application performance."
-
-Revision History
-
- Version 1 - Initial Revision
-
+https://github.com/KhronosGroup/OpenCL-Docs/blob/master/extensions/cl_intel_required_subgroup_size.asciidoc
diff --git a/extensions/intel/cl_intel_spirv_device_side_avc_motion_estimation.html b/extensions/intel/cl_intel_spirv_device_side_avc_motion_estimation.html
new file mode 100644
index 0000000..acdc0cb
--- /dev/null
+++ b/extensions/intel/cl_intel_spirv_device_side_avc_motion_estimation.html
@@ -0,0 +1,912 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta name="generator" content="AsciiDoc 8.6.9">
+<title>cl_intel_spirv_device_side_avc_motion_estimation</title>
+<style type="text/css">
+/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
+
+/* Default font. */
+body {
+ font-family: Georgia,serif;
+}
+
+/* Title font. */
+h1, h2, h3, h4, h5, h6,
+div.title, caption.title,
+thead, p.table.header,
+#toctitle,
+#author, #revnumber, #revdate, #revremark,
+#footer {
+ font-family: Arial,Helvetica,sans-serif;
+}
+
+body {
+ margin: 1em 5% 1em 5%;
+}
+
+a {
+ color: blue;
+ text-decoration: underline;
+}
+a:visited {
+ color: fuchsia;
+}
+
+em {
+ font-style: italic;
+ color: navy;
+}
+
+strong {
+ font-weight: bold;
+ color: #083194;
+}
+
+h1, h2, h3, h4, h5, h6 {
+ color: #527bbd;
+ margin-top: 1.2em;
+ margin-bottom: 0.5em;
+ line-height: 1.3;
+}
+
+h1, h2, h3 {
+ border-bottom: 2px solid silver;
+}
+h2 {
+ padding-top: 0.5em;
+}
+h3 {
+ float: left;
+}
+h3 + * {
+ clear: left;
+}
+h5 {
+ font-size: 1.0em;
+}
+
+div.sectionbody {
+ margin-left: 0;
+}
+
+hr {
+ border: 1px solid silver;
+}
+
+p {
+ margin-top: 0.5em;
+ margin-bottom: 0.5em;
+}
+
+ul, ol, li > p {
+ margin-top: 0;
+}
+ul > li { color: #aaa; }
+ul > li > * { color: black; }
+
+.monospaced, code, pre {
+ font-family: "Courier New", Courier, monospace;
+ font-size: inherit;
+ color: navy;
+ padding: 0;
+ margin: 0;
+}
+pre {
+ white-space: pre-wrap;
+}
+
+#author {
+ color: #527bbd;
+ font-weight: bold;
+ font-size: 1.1em;
+}
+#email {
+}
+#revnumber, #revdate, #revremark {
+}
+
+#footer {
+ font-size: small;
+ border-top: 2px solid silver;
+ padding-top: 0.5em;
+ margin-top: 4.0em;
+}
+#footer-text {
+ float: left;
+ padding-bottom: 0.5em;
+}
+#footer-badges {
+ float: right;
+ padding-bottom: 0.5em;
+}
+
+#preamble {
+ margin-top: 1.5em;
+ margin-bottom: 1.5em;
+}
+div.imageblock, div.exampleblock, div.verseblock,
+div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
+div.admonitionblock {
+ margin-top: 1.0em;
+ margin-bottom: 1.5em;
+}
+div.admonitionblock {
+ margin-top: 2.0em;
+ margin-bottom: 2.0em;
+ margin-right: 10%;
+ color: #606060;
+}
+
+div.content { /* Block element content. */
+ padding: 0;
+}
+
+/* Block element titles. */
+div.title, caption.title {
+ color: #527bbd;
+ font-weight: bold;
+ text-align: left;
+ margin-top: 1.0em;
+ margin-bottom: 0.5em;
+}
+div.title + * {
+ margin-top: 0;
+}
+
+td div.title:first-child {
+ margin-top: 0.0em;
+}
+div.content div.title:first-child {
+ margin-top: 0.0em;
+}
+div.content + div.title {
+ margin-top: 0.0em;
+}
+
+div.sidebarblock > div.content {
+ background: #ffffee;
+ border: 1px solid #dddddd;
+ border-left: 4px solid #f0f0f0;
+ padding: 0.5em;
+}
+
+div.listingblock > div.content {
+ border: 1px solid #dddddd;
+ border-left: 5px solid #f0f0f0;
+ background: #f8f8f8;
+ padding: 0.5em;
+}
+
+div.quoteblock, div.verseblock {
+ padding-left: 1.0em;
+ margin-left: 1.0em;
+ margin-right: 10%;
+ border-left: 5px solid #f0f0f0;
+ color: #888;
+}
+
+div.quoteblock > div.attribution {
+ padding-top: 0.5em;
+ text-align: right;
+}
+
+div.verseblock > pre.content {
+ font-family: inherit;
+ font-size: inherit;
+}
+div.verseblock > div.attribution {
+ padding-top: 0.75em;
+ text-align: left;
+}
+/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
+div.verseblock + div.attribution {
+ text-align: left;
+}
+
+div.admonitionblock .icon {
+ vertical-align: top;
+ font-size: 1.1em;
+ font-weight: bold;
+ text-decoration: underline;
+ color: #527bbd;
+ padding-right: 0.5em;
+}
+div.admonitionblock td.content {
+ padding-left: 0.5em;
+ border-left: 3px solid #dddddd;
+}
+
+div.exampleblock > div.content {
+ border-left: 3px solid #dddddd;
+ padding-left: 0.5em;
+}
+
+div.imageblock div.content { padding-left: 0; }
+span.image img { border-style: none; vertical-align: text-bottom; }
+a.image:visited { color: white; }
+
+dl {
+ margin-top: 0.8em;
+ margin-bottom: 0.8em;
+}
+dt {
+ margin-top: 0.5em;
+ margin-bottom: 0;
+ font-style: normal;
+ color: navy;
+}
+dd > *:first-child {
+ margin-top: 0.1em;
+}
+
+ul, ol {
+ list-style-position: outside;
+}
+ol.arabic {
+ list-style-type: decimal;
+}
+ol.loweralpha {
+ list-style-type: lower-alpha;
+}
+ol.upperalpha {
+ list-style-type: upper-alpha;
+}
+ol.lowerroman {
+ list-style-type: lower-roman;
+}
+ol.upperroman {
+ list-style-type: upper-roman;
+}
+
+div.compact ul, div.compact ol,
+div.compact p, div.compact p,
+div.compact div, div.compact div {
+ margin-top: 0.1em;
+ margin-bottom: 0.1em;
+}
+
+tfoot {
+ font-weight: bold;
+}
+td > div.verse {
+ white-space: pre;
+}
+
+div.hdlist {
+ margin-top: 0.8em;
+ margin-bottom: 0.8em;
+}
+div.hdlist tr {
+ padding-bottom: 15px;
+}
+dt.hdlist1.strong, td.hdlist1.strong {
+ font-weight: bold;
+}
+td.hdlist1 {
+ vertical-align: top;
+ font-style: normal;
+ padding-right: 0.8em;
+ color: navy;
+}
+td.hdlist2 {
+ vertical-align: top;
+}
+div.hdlist.compact tr {
+ margin: 0;
+ padding-bottom: 0;
+}
+
+.comment {
+ background: yellow;
+}
+
+.footnote, .footnoteref {
+ font-size: 0.8em;
+}
+
+span.footnote, span.footnoteref {
+ vertical-align: super;
+}
+
+#footnotes {
+ margin: 20px 0 20px 0;
+ padding: 7px 0 0 0;
+}
+
+#footnotes div.footnote {
+ margin: 0 0 5px 0;
+}
+
+#footnotes hr {
+ border: none;
+ border-top: 1px solid silver;
+ height: 1px;
+ text-align: left;
+ margin-left: 0;
+ width: 20%;
+ min-width: 100px;
+}
+
+div.colist td {
+ padding-right: 0.5em;
+ padding-bottom: 0.3em;
+ vertical-align: top;
+}
+div.colist td img {
+ margin-top: 0.3em;
+}
+
+@media print {
+ #footer-badges { display: none; }
+}
+
+#toc {
+ margin-bottom: 2.5em;
+}
+
+#toctitle {
+ color: #527bbd;
+ font-size: 1.1em;
+ font-weight: bold;
+ margin-top: 1.0em;
+ margin-bottom: 0.1em;
+}
+
+div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
+ margin-top: 0;
+ margin-bottom: 0;
+}
+div.toclevel2 {
+ margin-left: 2em;
+ font-size: 0.9em;
+}
+div.toclevel3 {
+ margin-left: 4em;
+ font-size: 0.9em;
+}
+div.toclevel4 {
+ margin-left: 6em;
+ font-size: 0.9em;
+}
+
+span.aqua { color: aqua; }
+span.black { color: black; }
+span.blue { color: blue; }
+span.fuchsia { color: fuchsia; }
+span.gray { color: gray; }
+span.green { color: green; }
+span.lime { color: lime; }
+span.maroon { color: maroon; }
+span.navy { color: navy; }
+span.olive { color: olive; }
+span.purple { color: purple; }
+span.red { color: red; }
+span.silver { color: silver; }
+span.teal { color: teal; }
+span.white { color: white; }
+span.yellow { color: yellow; }
+
+span.aqua-background { background: aqua; }
+span.black-background { background: black; }
+span.blue-background { background: blue; }
+span.fuchsia-background { background: fuchsia; }
+span.gray-background { background: gray; }
+span.green-background { background: green; }
+span.lime-background { background: lime; }
+span.maroon-background { background: maroon; }
+span.navy-background { background: navy; }
+span.olive-background { background: olive; }
+span.purple-background { background: purple; }
+span.red-background { background: red; }
+span.silver-background { background: silver; }
+span.teal-background { background: teal; }
+span.white-background { background: white; }
+span.yellow-background { background: yellow; }
+
+span.big { font-size: 2em; }
+span.small { font-size: 0.6em; }
+
+span.underline { text-decoration: underline; }
+span.overline { text-decoration: overline; }
+span.line-through { text-decoration: line-through; }
+
+div.unbreakable { page-break-inside: avoid; }
+
+
+/*
+ * xhtml11 specific
+ *
+ * */
+
+div.tableblock {
+ margin-top: 1.0em;
+ margin-bottom: 1.5em;
+}
+div.tableblock > table {
+ border: 3px solid #527bbd;
+}
+thead, p.table.header {
+ font-weight: bold;
+ color: #527bbd;
+}
+p.table {
+ margin-top: 0;
+}
+/* Because the table frame attribute is overriden by CSS in most browsers. */
+div.tableblock > table[frame="void"] {
+ border-style: none;
+}
+div.tableblock > table[frame="hsides"] {
+ border-left-style: none;
+ border-right-style: none;
+}
+div.tableblock > table[frame="vsides"] {
+ border-top-style: none;
+ border-bottom-style: none;
+}
+
+
+/*
+ * html5 specific
+ *
+ * */
+
+table.tableblock {
+ margin-top: 1.0em;
+ margin-bottom: 1.5em;
+}
+thead, p.tableblock.header {
+ font-weight: bold;
+ color: #527bbd;
+}
+p.tableblock {
+ margin-top: 0;
+}
+table.tableblock {
+ border-width: 3px;
+ border-spacing: 0px;
+ border-style: solid;
+ border-color: #527bbd;
+ border-collapse: collapse;
+}
+th.tableblock, td.tableblock {
+ border-width: 1px;
+ padding: 4px;
+ border-style: solid;
+ border-color: #527bbd;
+}
+
+table.tableblock.frame-topbot {
+ border-left-style: hidden;
+ border-right-style: hidden;
+}
+table.tableblock.frame-sides {
+ border-top-style: hidden;
+ border-bottom-style: hidden;
+}
+table.tableblock.frame-none {
+ border-style: hidden;
+}
+
+th.tableblock.halign-left, td.tableblock.halign-left {
+ text-align: left;
+}
+th.tableblock.halign-center, td.tableblock.halign-center {
+ text-align: center;
+}
+th.tableblock.halign-right, td.tableblock.halign-right {
+ text-align: right;
+}
+
+th.tableblock.valign-top, td.tableblock.valign-top {
+ vertical-align: top;
+}
+th.tableblock.valign-middle, td.tableblock.valign-middle {
+ vertical-align: middle;
+}
+th.tableblock.valign-bottom, td.tableblock.valign-bottom {
+ vertical-align: bottom;
+}
+
+
+/*
+ * manpage specific
+ *
+ * */
+
+body.manpage h1 {
+ padding-top: 0.5em;
+ padding-bottom: 0.5em;
+ border-top: 2px solid silver;
+ border-bottom: 2px solid silver;
+}
+body.manpage h2 {
+ border-style: none;
+}
+body.manpage div.sectionbody {
+ margin-left: 3em;
+}
+
+@media print {
+ body.manpage div#toc { display: none; }
+}
+
+
+@media screen {
+ body {
+ max-width: 50em; /* approximately 80 characters wide */
+ margin-left: 16em;
+ }
+
+ #toc {
+ position: fixed;
+ top: 0;
+ left: 0;
+ bottom: 0;
+ width: 13em;
+ padding: 0.5em;
+ padding-bottom: 1.5em;
+ margin: 0;
+ overflow: auto;
+ border-right: 3px solid #f8f8f8;
+ background-color: white;
+ }
+
+ #toc .toclevel1 {
+ margin-top: 0.5em;
+ }
+
+ #toc .toclevel2 {
+ margin-top: 0.25em;
+ display: list-item;
+ color: #aaaaaa;
+ }
+
+ #toctitle {
+ margin-top: 0.5em;
+ }
+}
+</style>
+<script type="text/javascript">
+/*<![CDATA[*/
+var asciidoc = { // Namespace.
+
+/////////////////////////////////////////////////////////////////////
+// Table Of Contents generator
+/////////////////////////////////////////////////////////////////////
+
+/* Author: Mihai Bazon, September 2002
+ * http://students.infoiasi.ro/~mishoo
+ *
+ * Table Of Content generator
+ * Version: 0.4
+ *
+ * Feel free to use this script under the terms of the GNU General Public
+ * License, as long as you do not remove or alter this notice.
+ */
+
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
+
+// toclevels = 1..4.
+toc: function (toclevels) {
+
+ function getText(el) {
+ var text = "";
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
+ if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
+ text += i.data;
+ else if (i.firstChild != null)
+ text += getText(i);
+ }
+ return text;
+ }
+
+ function TocEntry(el, text, toclevel) {
+ this.element = el;
+ this.text = text;
+ this.toclevel = toclevel;
+ }
+
+ function tocEntries(el, toclevels) {
+ var result = new Array;
+ var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
+ // Function that scans the DOM tree for header elements (the DOM2
+ // nodeIterator API would be a better technique but not supported by all
+ // browsers).
+ var iterate = function (el) {
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
+ if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
+ var mo = re.exec(i.tagName);
+ if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
+ result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
+ }
+ iterate(i);
+ }
+ }
+ }
+ iterate(el);
+ return result;
+ }
+
+ var toc = document.getElementById("toc");
+ if (!toc) {
+ return;
+ }
+
+ // Delete existing TOC entries in case we're reloading the TOC.
+ var tocEntriesToRemove = [];
+ var i;
+ for (i = 0; i < toc.childNodes.length; i++) {
+ var entry = toc.childNodes[i];
+ if (entry.nodeName.toLowerCase() == 'div'
+ && entry.getAttribute("class")
+ && entry.getAttribute("class").match(/^toclevel/))
+ tocEntriesToRemove.push(entry);
+ }
+ for (i = 0; i < tocEntriesToRemove.length; i++) {
+ toc.removeChild(tocEntriesToRemove[i]);
+ }
+
+ // Rebuild TOC entries.
+ var entries = tocEntries(document.getElementById("content"), toclevels);
+ for (var i = 0; i < entries.length; ++i) {
+ var entry = entries[i];
+ if (entry.element.id == "")
+ entry.element.id = "_toc_" + i;
+ var a = document.createElement("a");
+ a.href = "#" + entry.element.id;
+ a.appendChild(document.createTextNode(entry.text));
+ var div = document.createElement("div");
+ div.appendChild(a);
+ div.className = "toclevel" + entry.toclevel;
+ toc.appendChild(div);
+ }
+ if (entries.length == 0)
+ toc.parentNode.removeChild(toc);
+},
+
+
+/////////////////////////////////////////////////////////////////////
+// Footnotes generator
+/////////////////////////////////////////////////////////////////////
+
+/* Based on footnote generation code from:
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
+ */
+
+footnotes: function () {
+ // Delete existing footnote entries in case we're reloading the footnodes.
+ var i;
+ var noteholder = document.getElementById("footnotes");
+ if (!noteholder) {
+ return;
+ }
+ var entriesToRemove = [];
+ for (i = 0; i < noteholder.childNodes.length; i++) {
+ var entry = noteholder.childNodes[i];
+ if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
+ entriesToRemove.push(entry);
+ }
+ for (i = 0; i < entriesToRemove.length; i++) {
+ noteholder.removeChild(entriesToRemove[i]);
+ }
+
+ // Rebuild footnote entries.
+ var cont = document.getElementById("content");
+ var spans = cont.getElementsByTagName("span");
+ var refs = {};
+ var n = 0;
+ for (i=0; i<spans.length; i++) {
+ if (spans[i].className == "footnote") {
+ n++;
+ var note = spans[i].getAttribute("data-note");
+ if (!note) {
+ // Use [\s\S] in place of . so multi-line matches work.
+ // Because JavaScript has no s (dotall) regex flag.
+ note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
+ spans[i].innerHTML =
+ "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
+ spans[i].setAttribute("data-note", note);
+ }
+ noteholder.innerHTML +=
+ "<div class='footnote' id='_footnote_" + n + "'>" +
+ "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
+ n + "</a>. " + note + "</div>";
+ var id =spans[i].getAttribute("id");
+ if (id != null) refs["#"+id] = n;
+ }
+ }
+ if (n == 0)
+ noteholder.parentNode.removeChild(noteholder);
+ else {
+ // Process footnoterefs.
+ for (i=0; i<spans.length; i++) {
+ if (spans[i].className == "footnoteref") {
+ var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
+ href = href.match(/#.*/)[0]; // Because IE return full URL.
+ n = refs[href];
+ spans[i].innerHTML =
+ "[<a href='#_footnote_" + n +
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
+ }
+ }
+ }
+},
+
+install: function(toclevels) {
+ var timerId;
+
+ function reinstall() {
+ asciidoc.footnotes();
+ if (toclevels) {
+ asciidoc.toc(toclevels);
+ }
+ }
+
+ function reinstallAndRemoveTimer() {
+ clearInterval(timerId);
+ reinstall();
+ }
+
+ timerId = setInterval(reinstall, 500);
+ if (document.addEventListener)
+ document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
+ else
+ window.onload = reinstallAndRemoveTimer;
+}
+
+}
+asciidoc.install(1);
+/*]]>*/
+</script>
+</head>
+<body class="article">
+<div id="header">
+<h1>cl_intel_spirv_device_side_avc_motion_estimation</h1>
+<div id="toc">
+ <div id="toctitle">Table of Contents</div>
+ <noscript><p><b>JavaScript must be enabled in your browser to display the table of contents.</b></p></noscript>
+</div>
+</div>
+<div id="content">
+<div class="sect1">
+<h2 id="_name_strings">Name Strings</h2>
+<div class="sectionbody">
+<div class="paragraph"><p><span class="monospaced">cl_intel_spirv_device_side_avc_motion_estimation</span></p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_contact">Contact</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Ben Ashbaugh, Intel (ben <em>dot</em> ashbaugh <em>at</em> intel <em>dot</em> com)</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_contributors">Contributors</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Ben Ashbaugh, Intel<br>
+Biju George, Intel</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_notice">Notice</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Copyright (c) 2018 Intel Corporation. All rights reserved.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_status">Status</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Final Draft</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_version">Version</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Built On: 2018-10-29<br>
+Revision: 1</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_dependencies">Dependencies</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>This extension is written against the OpenCL SPIR-V Environment Specification Version 2.2, Revision v2.2-3.</p></div>
+<div class="paragraph"><p>This extension requires OpenCL support for SPIR-V, either via OpenCL 2.1 or via the <span class="monospaced">cl_khr_il_program</span> extension, and support for the <span class="monospaced">cl_intel_device_side_avc_motion_estimation</span> extension.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_overview">Overview</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>This extension defines how modules using the SPIR-V extension <span class="monospaced">SPV_INTEL_device_side_avc_motion_estimation</span> may behave in an OpenCL environment.</p></div>
+<div class="paragraph"><p>This extension is a companion to the <span class="monospaced">cl_intel_device_side_avc_motion_estimation</span> OpenCL extension, and the functionality described in this extension and <span class="monospaced">SPV_INTEL_device_side_avc_motion_estimation</span> is sufficient to implement the built-in functions defined in the <span class="monospaced">cl_intel_device_side_avc_motion_estimation</span> extension.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_new_api_functions">New API Functions</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>None.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_new_api_enums">New API Enums</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>None.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_modifications_to_the_opencl_spir_v_environment_specification">Modifications to the OpenCL SPIR-V Environment Specification</h2>
+<div class="sectionbody">
+<div class="sect2">
+<h3 id="_add_a_new_section_7_1_x_span_class_monospaced_cl_intel_spirv_device_side_avc_motion_estimation_span">Add a new Section 7.1.X - <span class="monospaced">cl_intel_spirv_device_side_avc_motion_estimation</span></h3>
+<div class="paragraph"><p>If the OpenCL environment supports the extension <span class="monospaced">cl_intel_spirv_device_side_avc_motion_estimation</span>, then the environment must accept SPIR-V modules that declare use of the <span class="monospaced">SPV_INTEL_device_side_avc_motion_estimation</span> extension via <strong>OpExtension</strong>.</p></div>
+<div class="paragraph"><p>If the OpenCL environment supports the extension <span class="monospaced">cl_intel_spirv_device_side_avc_motion_estimation</span> and use of the <span class="monospaced">SPV_INTEL_device_side_avc_motion_estimation</span> extension is declared in the module via <strong>OpExtension</strong>, then the environment must accept modules that declare the following SPIR-V capabilities:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+<strong>SubgroupAvcMotionEstimationINTEL</strong>
+</p>
+</li>
+<li>
+<p>
+<strong>SubgroupAvcMotionEstimationIntraINTEL</strong>
+</p>
+</li>
+<li>
+<p>
+<strong>SubgroupAvcMotionEstimationChromaINTEL</strong>
+</p>
+</li>
+</ul></div>
+</div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_issues">Issues</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>None.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_revision_history">Revision History</h2>
+<div class="sectionbody">
+<table class="tableblock frame-all grid-rows"
+style="
+width:100%;
+">
+<col style="width:4%;">
+<col style="width:14%;">
+<col style="width:14%;">
+<col style="width:66%;">
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top" >Rev</th>
+<th class="tableblock halign-left valign-top" >Date</th>
+<th class="tableblock halign-left valign-top" >Author</th>
+<th class="tableblock halign-left valign-top" >Changes</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">1</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">2018-10-29</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Ben Ashbaugh</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock"><strong>Initial revision</strong></p></td>
+</tr>
+</tbody>
+</table>
+</div>
+</div>
+</div>
+<div id="footnotes"><hr></div>
+<div id="footer">
+<div id="footer-text">
+Last updated
+ 2018-10-29 11:37:27 PDT
+</div>
+</div>
+</body>
+</html>
diff --git a/extensions/intel/cl_intel_spirv_media_block_io.html b/extensions/intel/cl_intel_spirv_media_block_io.html
new file mode 100644
index 0000000..de43807
--- /dev/null
+++ b/extensions/intel/cl_intel_spirv_media_block_io.html
@@ -0,0 +1,1080 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta name="generator" content="AsciiDoc 8.6.9">
+<title>cl_intel_spirv_media_block_io</title>
+<style type="text/css">
+/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
+
+/* Default font. */
+body {
+ font-family: Georgia,serif;
+}
+
+/* Title font. */
+h1, h2, h3, h4, h5, h6,
+div.title, caption.title,
+thead, p.table.header,
+#toctitle,
+#author, #revnumber, #revdate, #revremark,
+#footer {
+ font-family: Arial,Helvetica,sans-serif;
+}
+
+body {
+ margin: 1em 5% 1em 5%;
+}
+
+a {
+ color: blue;
+ text-decoration: underline;
+}
+a:visited {
+ color: fuchsia;
+}
+
+em {
+ font-style: italic;
+ color: navy;
+}
+
+strong {
+ font-weight: bold;
+ color: #083194;
+}
+
+h1, h2, h3, h4, h5, h6 {
+ color: #527bbd;
+ margin-top: 1.2em;
+ margin-bottom: 0.5em;
+ line-height: 1.3;
+}
+
+h1, h2, h3 {
+ border-bottom: 2px solid silver;
+}
+h2 {
+ padding-top: 0.5em;
+}
+h3 {
+ float: left;
+}
+h3 + * {
+ clear: left;
+}
+h5 {
+ font-size: 1.0em;
+}
+
+div.sectionbody {
+ margin-left: 0;
+}
+
+hr {
+ border: 1px solid silver;
+}
+
+p {
+ margin-top: 0.5em;
+ margin-bottom: 0.5em;
+}
+
+ul, ol, li > p {
+ margin-top: 0;
+}
+ul > li { color: #aaa; }
+ul > li > * { color: black; }
+
+.monospaced, code, pre {
+ font-family: "Courier New", Courier, monospace;
+ font-size: inherit;
+ color: navy;
+ padding: 0;
+ margin: 0;
+}
+pre {
+ white-space: pre-wrap;
+}
+
+#author {
+ color: #527bbd;
+ font-weight: bold;
+ font-size: 1.1em;
+}
+#email {
+}
+#revnumber, #revdate, #revremark {
+}
+
+#footer {
+ font-size: small;
+ border-top: 2px solid silver;
+ padding-top: 0.5em;
+ margin-top: 4.0em;
+}
+#footer-text {
+ float: left;
+ padding-bottom: 0.5em;
+}
+#footer-badges {
+ float: right;
+ padding-bottom: 0.5em;
+}
+
+#preamble {
+ margin-top: 1.5em;
+ margin-bottom: 1.5em;
+}
+div.imageblock, div.exampleblock, div.verseblock,
+div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
+div.admonitionblock {
+ margin-top: 1.0em;
+ margin-bottom: 1.5em;
+}
+div.admonitionblock {
+ margin-top: 2.0em;
+ margin-bottom: 2.0em;
+ margin-right: 10%;
+ color: #606060;
+}
+
+div.content { /* Block element content. */
+ padding: 0;
+}
+
+/* Block element titles. */
+div.title, caption.title {
+ color: #527bbd;
+ font-weight: bold;
+ text-align: left;
+ margin-top: 1.0em;
+ margin-bottom: 0.5em;
+}
+div.title + * {
+ margin-top: 0;
+}
+
+td div.title:first-child {
+ margin-top: 0.0em;
+}
+div.content div.title:first-child {
+ margin-top: 0.0em;
+}
+div.content + div.title {
+ margin-top: 0.0em;
+}
+
+div.sidebarblock > div.content {
+ background: #ffffee;
+ border: 1px solid #dddddd;
+ border-left: 4px solid #f0f0f0;
+ padding: 0.5em;
+}
+
+div.listingblock > div.content {
+ border: 1px solid #dddddd;
+ border-left: 5px solid #f0f0f0;
+ background: #f8f8f8;
+ padding: 0.5em;
+}
+
+div.quoteblock, div.verseblock {
+ padding-left: 1.0em;
+ margin-left: 1.0em;
+ margin-right: 10%;
+ border-left: 5px solid #f0f0f0;
+ color: #888;
+}
+
+div.quoteblock > div.attribution {
+ padding-top: 0.5em;
+ text-align: right;
+}
+
+div.verseblock > pre.content {
+ font-family: inherit;
+ font-size: inherit;
+}
+div.verseblock > div.attribution {
+ padding-top: 0.75em;
+ text-align: left;
+}
+/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
+div.verseblock + div.attribution {
+ text-align: left;
+}
+
+div.admonitionblock .icon {
+ vertical-align: top;
+ font-size: 1.1em;
+ font-weight: bold;
+ text-decoration: underline;
+ color: #527bbd;
+ padding-right: 0.5em;
+}
+div.admonitionblock td.content {
+ padding-left: 0.5em;
+ border-left: 3px solid #dddddd;
+}
+
+div.exampleblock > div.content {
+ border-left: 3px solid #dddddd;
+ padding-left: 0.5em;
+}
+
+div.imageblock div.content { padding-left: 0; }
+span.image img { border-style: none; vertical-align: text-bottom; }
+a.image:visited { color: white; }
+
+dl {
+ margin-top: 0.8em;
+ margin-bottom: 0.8em;
+}
+dt {
+ margin-top: 0.5em;
+ margin-bottom: 0;
+ font-style: normal;
+ color: navy;
+}
+dd > *:first-child {
+ margin-top: 0.1em;
+}
+
+ul, ol {
+ list-style-position: outside;
+}
+ol.arabic {
+ list-style-type: decimal;
+}
+ol.loweralpha {
+ list-style-type: lower-alpha;
+}
+ol.upperalpha {
+ list-style-type: upper-alpha;
+}
+ol.lowerroman {
+ list-style-type: lower-roman;
+}
+ol.upperroman {
+ list-style-type: upper-roman;
+}
+
+div.compact ul, div.compact ol,
+div.compact p, div.compact p,
+div.compact div, div.compact div {
+ margin-top: 0.1em;
+ margin-bottom: 0.1em;
+}
+
+tfoot {
+ font-weight: bold;
+}
+td > div.verse {
+ white-space: pre;
+}
+
+div.hdlist {
+ margin-top: 0.8em;
+ margin-bottom: 0.8em;
+}
+div.hdlist tr {
+ padding-bottom: 15px;
+}
+dt.hdlist1.strong, td.hdlist1.strong {
+ font-weight: bold;
+}
+td.hdlist1 {
+ vertical-align: top;
+ font-style: normal;
+ padding-right: 0.8em;
+ color: navy;
+}
+td.hdlist2 {
+ vertical-align: top;
+}
+div.hdlist.compact tr {
+ margin: 0;
+ padding-bottom: 0;
+}
+
+.comment {
+ background: yellow;
+}
+
+.footnote, .footnoteref {
+ font-size: 0.8em;
+}
+
+span.footnote, span.footnoteref {
+ vertical-align: super;
+}
+
+#footnotes {
+ margin: 20px 0 20px 0;
+ padding: 7px 0 0 0;
+}
+
+#footnotes div.footnote {
+ margin: 0 0 5px 0;
+}
+
+#footnotes hr {
+ border: none;
+ border-top: 1px solid silver;
+ height: 1px;
+ text-align: left;
+ margin-left: 0;
+ width: 20%;
+ min-width: 100px;
+}
+
+div.colist td {
+ padding-right: 0.5em;
+ padding-bottom: 0.3em;
+ vertical-align: top;
+}
+div.colist td img {
+ margin-top: 0.3em;
+}
+
+@media print {
+ #footer-badges { display: none; }
+}
+
+#toc {
+ margin-bottom: 2.5em;
+}
+
+#toctitle {
+ color: #527bbd;
+ font-size: 1.1em;
+ font-weight: bold;
+ margin-top: 1.0em;
+ margin-bottom: 0.1em;
+}
+
+div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
+ margin-top: 0;
+ margin-bottom: 0;
+}
+div.toclevel2 {
+ margin-left: 2em;
+ font-size: 0.9em;
+}
+div.toclevel3 {
+ margin-left: 4em;
+ font-size: 0.9em;
+}
+div.toclevel4 {
+ margin-left: 6em;
+ font-size: 0.9em;
+}
+
+span.aqua { color: aqua; }
+span.black { color: black; }
+span.blue { color: blue; }
+span.fuchsia { color: fuchsia; }
+span.gray { color: gray; }
+span.green { color: green; }
+span.lime { color: lime; }
+span.maroon { color: maroon; }
+span.navy { color: navy; }
+span.olive { color: olive; }
+span.purple { color: purple; }
+span.red { color: red; }
+span.silver { color: silver; }
+span.teal { color: teal; }
+span.white { color: white; }
+span.yellow { color: yellow; }
+
+span.aqua-background { background: aqua; }
+span.black-background { background: black; }
+span.blue-background { background: blue; }
+span.fuchsia-background { background: fuchsia; }
+span.gray-background { background: gray; }
+span.green-background { background: green; }
+span.lime-background { background: lime; }
+span.maroon-background { background: maroon; }
+span.navy-background { background: navy; }
+span.olive-background { background: olive; }
+span.purple-background { background: purple; }
+span.red-background { background: red; }
+span.silver-background { background: silver; }
+span.teal-background { background: teal; }
+span.white-background { background: white; }
+span.yellow-background { background: yellow; }
+
+span.big { font-size: 2em; }
+span.small { font-size: 0.6em; }
+
+span.underline { text-decoration: underline; }
+span.overline { text-decoration: overline; }
+span.line-through { text-decoration: line-through; }
+
+div.unbreakable { page-break-inside: avoid; }
+
+
+/*
+ * xhtml11 specific
+ *
+ * */
+
+div.tableblock {
+ margin-top: 1.0em;
+ margin-bottom: 1.5em;
+}
+div.tableblock > table {
+ border: 3px solid #527bbd;
+}
+thead, p.table.header {
+ font-weight: bold;
+ color: #527bbd;
+}
+p.table {
+ margin-top: 0;
+}
+/* Because the table frame attribute is overriden by CSS in most browsers. */
+div.tableblock > table[frame="void"] {
+ border-style: none;
+}
+div.tableblock > table[frame="hsides"] {
+ border-left-style: none;
+ border-right-style: none;
+}
+div.tableblock > table[frame="vsides"] {
+ border-top-style: none;
+ border-bottom-style: none;
+}
+
+
+/*
+ * html5 specific
+ *
+ * */
+
+table.tableblock {
+ margin-top: 1.0em;
+ margin-bottom: 1.5em;
+}
+thead, p.tableblock.header {
+ font-weight: bold;
+ color: #527bbd;
+}
+p.tableblock {
+ margin-top: 0;
+}
+table.tableblock {
+ border-width: 3px;
+ border-spacing: 0px;
+ border-style: solid;
+ border-color: #527bbd;
+ border-collapse: collapse;
+}
+th.tableblock, td.tableblock {
+ border-width: 1px;
+ padding: 4px;
+ border-style: solid;
+ border-color: #527bbd;
+}
+
+table.tableblock.frame-topbot {
+ border-left-style: hidden;
+ border-right-style: hidden;
+}
+table.tableblock.frame-sides {
+ border-top-style: hidden;
+ border-bottom-style: hidden;
+}
+table.tableblock.frame-none {
+ border-style: hidden;
+}
+
+th.tableblock.halign-left, td.tableblock.halign-left {
+ text-align: left;
+}
+th.tableblock.halign-center, td.tableblock.halign-center {
+ text-align: center;
+}
+th.tableblock.halign-right, td.tableblock.halign-right {
+ text-align: right;
+}
+
+th.tableblock.valign-top, td.tableblock.valign-top {
+ vertical-align: top;
+}
+th.tableblock.valign-middle, td.tableblock.valign-middle {
+ vertical-align: middle;
+}
+th.tableblock.valign-bottom, td.tableblock.valign-bottom {
+ vertical-align: bottom;
+}
+
+
+/*
+ * manpage specific
+ *
+ * */
+
+body.manpage h1 {
+ padding-top: 0.5em;
+ padding-bottom: 0.5em;
+ border-top: 2px solid silver;
+ border-bottom: 2px solid silver;
+}
+body.manpage h2 {
+ border-style: none;
+}
+body.manpage div.sectionbody {
+ margin-left: 3em;
+}
+
+@media print {
+ body.manpage div#toc { display: none; }
+}
+
+
+@media screen {
+ body {
+ max-width: 50em; /* approximately 80 characters wide */
+ margin-left: 16em;
+ }
+
+ #toc {
+ position: fixed;
+ top: 0;
+ left: 0;
+ bottom: 0;
+ width: 13em;
+ padding: 0.5em;
+ padding-bottom: 1.5em;
+ margin: 0;
+ overflow: auto;
+ border-right: 3px solid #f8f8f8;
+ background-color: white;
+ }
+
+ #toc .toclevel1 {
+ margin-top: 0.5em;
+ }
+
+ #toc .toclevel2 {
+ margin-top: 0.25em;
+ display: list-item;
+ color: #aaaaaa;
+ }
+
+ #toctitle {
+ margin-top: 0.5em;
+ }
+}
+</style>
+<script type="text/javascript">
+/*<![CDATA[*/
+var asciidoc = { // Namespace.
+
+/////////////////////////////////////////////////////////////////////
+// Table Of Contents generator
+/////////////////////////////////////////////////////////////////////
+
+/* Author: Mihai Bazon, September 2002
+ * http://students.infoiasi.ro/~mishoo
+ *
+ * Table Of Content generator
+ * Version: 0.4
+ *
+ * Feel free to use this script under the terms of the GNU General Public
+ * License, as long as you do not remove or alter this notice.
+ */
+
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
+
+// toclevels = 1..4.
+toc: function (toclevels) {
+
+ function getText(el) {
+ var text = "";
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
+ if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
+ text += i.data;
+ else if (i.firstChild != null)
+ text += getText(i);
+ }
+ return text;
+ }
+
+ function TocEntry(el, text, toclevel) {
+ this.element = el;
+ this.text = text;
+ this.toclevel = toclevel;
+ }
+
+ function tocEntries(el, toclevels) {
+ var result = new Array;
+ var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
+ // Function that scans the DOM tree for header elements (the DOM2
+ // nodeIterator API would be a better technique but not supported by all
+ // browsers).
+ var iterate = function (el) {
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
+ if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
+ var mo = re.exec(i.tagName);
+ if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
+ result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
+ }
+ iterate(i);
+ }
+ }
+ }
+ iterate(el);
+ return result;
+ }
+
+ var toc = document.getElementById("toc");
+ if (!toc) {
+ return;
+ }
+
+ // Delete existing TOC entries in case we're reloading the TOC.
+ var tocEntriesToRemove = [];
+ var i;
+ for (i = 0; i < toc.childNodes.length; i++) {
+ var entry = toc.childNodes[i];
+ if (entry.nodeName.toLowerCase() == 'div'
+ && entry.getAttribute("class")
+ && entry.getAttribute("class").match(/^toclevel/))
+ tocEntriesToRemove.push(entry);
+ }
+ for (i = 0; i < tocEntriesToRemove.length; i++) {
+ toc.removeChild(tocEntriesToRemove[i]);
+ }
+
+ // Rebuild TOC entries.
+ var entries = tocEntries(document.getElementById("content"), toclevels);
+ for (var i = 0; i < entries.length; ++i) {
+ var entry = entries[i];
+ if (entry.element.id == "")
+ entry.element.id = "_toc_" + i;
+ var a = document.createElement("a");
+ a.href = "#" + entry.element.id;
+ a.appendChild(document.createTextNode(entry.text));
+ var div = document.createElement("div");
+ div.appendChild(a);
+ div.className = "toclevel" + entry.toclevel;
+ toc.appendChild(div);
+ }
+ if (entries.length == 0)
+ toc.parentNode.removeChild(toc);
+},
+
+
+/////////////////////////////////////////////////////////////////////
+// Footnotes generator
+/////////////////////////////////////////////////////////////////////
+
+/* Based on footnote generation code from:
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
+ */
+
+footnotes: function () {
+ // Delete existing footnote entries in case we're reloading the footnodes.
+ var i;
+ var noteholder = document.getElementById("footnotes");
+ if (!noteholder) {
+ return;
+ }
+ var entriesToRemove = [];
+ for (i = 0; i < noteholder.childNodes.length; i++) {
+ var entry = noteholder.childNodes[i];
+ if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
+ entriesToRemove.push(entry);
+ }
+ for (i = 0; i < entriesToRemove.length; i++) {
+ noteholder.removeChild(entriesToRemove[i]);
+ }
+
+ // Rebuild footnote entries.
+ var cont = document.getElementById("content");
+ var spans = cont.getElementsByTagName("span");
+ var refs = {};
+ var n = 0;
+ for (i=0; i<spans.length; i++) {
+ if (spans[i].className == "footnote") {
+ n++;
+ var note = spans[i].getAttribute("data-note");
+ if (!note) {
+ // Use [\s\S] in place of . so multi-line matches work.
+ // Because JavaScript has no s (dotall) regex flag.
+ note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
+ spans[i].innerHTML =
+ "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
+ spans[i].setAttribute("data-note", note);
+ }
+ noteholder.innerHTML +=
+ "<div class='footnote' id='_footnote_" + n + "'>" +
+ "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
+ n + "</a>. " + note + "</div>";
+ var id =spans[i].getAttribute("id");
+ if (id != null) refs["#"+id] = n;
+ }
+ }
+ if (n == 0)
+ noteholder.parentNode.removeChild(noteholder);
+ else {
+ // Process footnoterefs.
+ for (i=0; i<spans.length; i++) {
+ if (spans[i].className == "footnoteref") {
+ var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
+ href = href.match(/#.*/)[0]; // Because IE return full URL.
+ n = refs[href];
+ spans[i].innerHTML =
+ "[<a href='#_footnote_" + n +
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
+ }
+ }
+ }
+},
+
+install: function(toclevels) {
+ var timerId;
+
+ function reinstall() {
+ asciidoc.footnotes();
+ if (toclevels) {
+ asciidoc.toc(toclevels);
+ }
+ }
+
+ function reinstallAndRemoveTimer() {
+ clearInterval(timerId);
+ reinstall();
+ }
+
+ timerId = setInterval(reinstall, 500);
+ if (document.addEventListener)
+ document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
+ else
+ window.onload = reinstallAndRemoveTimer;
+}
+
+}
+asciidoc.install(1);
+/*]]>*/
+</script>
+</head>
+<body class="article">
+<div id="header">
+<h1>cl_intel_spirv_media_block_io</h1>
+<div id="toc">
+ <div id="toctitle">Table of Contents</div>
+ <noscript><p><b>JavaScript must be enabled in your browser to display the table of contents.</b></p></noscript>
+</div>
+</div>
+<div id="content">
+<div class="sect1">
+<h2 id="_name_strings">Name Strings</h2>
+<div class="sectionbody">
+<div class="paragraph"><p><span class="monospaced">cl_intel_spirv_media_block_io</span></p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_contact">Contact</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Ben Ashbaugh, Intel (ben <em>dot</em> ashbaugh <em>at</em> intel <em>dot</em> com)</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_contributors">Contributors</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Ben Ashbaugh, Intel<br>
+Biju George, Intel<br>
+Pawel Jurek, Intel</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_notice">Notice</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Copyright (c) 2018 Intel Corporation. All rights reserved.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_status">Status</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Final Draft</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_version">Version</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Built On: 2018-10-29<br>
+Revision: 1</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_dependencies">Dependencies</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>This extension is written against the OpenCL SPIR-V Environment Specification Version 2.2, Revision v2.2-3.</p></div>
+<div class="paragraph"><p>This extension requires OpenCL support for SPIR-V, either via OpenCL 2.1 or via the <span class="monospaced">cl_khr_il_program</span> extension, and support for the <span class="monospaced">cl_intel_media_block_io</span> extension.</p></div>
+<div class="paragraph"><p>This extension interacts with the <span class="monospaced">cl_intel_packed_yuv</span> extension.</p></div>
+<div class="paragraph"><p>This extension interacts with the <span class="monospaced">cl_intel_planar_yuv</span> extension.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_overview">Overview</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>This extension defines how modules using the SPIR-V extension <span class="monospaced">SPV_INTEL_media_block_io</span> may behave in an OpenCL environment.</p></div>
+<div class="paragraph"><p>This extension is a companion to the <span class="monospaced">cl_intel_media_block_io</span> OpenCL extension, and the functionality described in this extension and <span class="monospaced">SPV_INTEL_media_block_io</span> is sufficient to implement the built-in functions defined in the <span class="monospaced">cl_intel_media_block_io</span> extension.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_new_api_functions">New API Functions</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>None.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_new_api_enums">New API Enums</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>None.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_modifications_to_the_opencl_spir_v_environment_specification">Modifications to the OpenCL SPIR-V Environment Specification</h2>
+<div class="sectionbody">
+<div class="sect2">
+<h3 id="_add_a_new_section_7_1_x_span_class_monospaced_cl_intel_spirv_media_block_io_span">Add a new Section 7.1.X - <span class="monospaced">cl_intel_spirv_media_block_io</span></h3>
+<div class="paragraph"><p>If the OpenCL environment supports the extension <span class="monospaced">cl_intel_spirv_media_block_io</span>, then the environment must accept SPIR-V modules that declare use of the <span class="monospaced">SPV_INTEL_media_block_io</span> extension via <strong>OpExtension</strong>.</p></div>
+<div class="paragraph"><p>If the OpenCL environment supports the extension <span class="monospaced">cl_intel_spirv_media_block_io</span> and use of the <span class="monospaced">SPV_INTEL_media_block_io</span> extension is declared in the module via <strong>OpExtension</strong>, then the environment must accept modules that declare the following SPIR-V capabilities:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+<strong>SubgroupImageMediaBlockIOINTEL</strong>
+</p>
+</li>
+</ul></div>
+<div class="paragraph"><p>Additionally, the environment must accept the following types for <em>Result Type</em> for <strong>OpSubgroupImageMediaBlockReadINTEL</strong>, and for the type of <em>Data</em> for <strong>OpSubgroupImageMediaBlockWriteINTEL</strong>:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+Scalars and <strong>OpTypeVectors</strong> with 2, 4, 8, or 16 <em>Component Count</em> components of the following <em>Component Type</em> types:
+</p>
+<div class="ulist"><ul>
+<li>
+<p>
+<strong>OpTypeInt</strong> with a <em>Width</em> of 8 bits and <em>Signedness</em> of 0 (equivalent to <span class="monospaced">char</span> and <span class="monospaced">uchar</span>)
+</p>
+</li>
+<li>
+<p>
+<strong>OpTypeInt</strong> with a <em>Width</em> of 16 bits and <em>Signedness</em> of 0 (equivalent to <span class="monospaced">short</span> and <span class="monospaced">ushort</span>)
+</p>
+</li>
+<li>
+<p>
+<strong>OpTypeInt</strong> with a <em>Width</em> of 32 bits and <em>Signedness</em> of 0 (equivalent to <span class="monospaced">int</span> and <span class="monospaced">uint</span>)
+</p>
+</li>
+</ul></div>
+</li>
+</ul></div>
+<div class="paragraph"><p>For <em>Image</em>:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+<em>Dim</em> must be <strong>2D</strong>
+</p>
+</li>
+<li>
+<p>
+<em>Depth</em> must be 0 (not a depth image)
+</p>
+</li>
+<li>
+<p>
+<em>Arrayed</em> must be 0 (non-arrayed content)
+</p>
+</li>
+<li>
+<p>
+<em>MS</em> must be 0 (single-sampled content)
+</p>
+</li>
+<li>
+<p>
+(equivalent to <span class="monospaced">image2d_t</span>)
+</p>
+</li>
+</ul></div>
+<div class="paragraph"><p>For <em>Coordinate</em>, the following types are supported:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+<strong>OpTypeVectors</strong> with 2 <em>Component Count</em> components of <em>Component Type</em> <strong>OpTypeInt</strong> with a <em>Width</em> of 32 bits and <em>Signedness</em> of 0 (equivalent to <span class="monospaced">int2</span>)
+</p>
+</li>
+</ul></div>
+<div class="paragraph"><p>For <em>Width</em> and <em>Height</em>, the following type is supported:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+Scalars of <strong>OpTypeInt</strong> with a <em>Width</em> of 32 bits and a <em>Signedness</em> of 0 (equivalent to <span class="monospaced">int</span>)
+</p>
+</li>
+</ul></div>
+</div>
+<div class="sect2">
+<h3 id="_add_a_new_section_7_1_x_1_notes_and_restrictions">Add a new Section 7.1.X.1 - Notes and Restrictions</h3>
+<div class="paragraph"><p>Both <strong>OpSubgroupImageMediaBlockReadINTEL</strong> and <strong>OpSubgroupImageMediaBlockWriteINTEL</strong> must be encountered by all work items in the subgroup executing the kernel, otherwise the behavior is undefined (i.e. they can only be used in convergent control flow where all the work items in the subgroup are enabled).</p></div>
+<div class="paragraph"><p>The block <em>Width</em> determines the maximum <em>Height</em> for <strong>OpSubgroupImageMediaBlockReadINTEL</strong> and <strong>OpSubgroupImageMediaBlockWriteINTEL</strong>, and is summarized in the following table:</p></div>
+<table class="tableblock frame-all grid-all"
+style="
+width:100%;
+">
+<col style="width:50%;">
+<col style="width:50%;">
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top" ><strong>Width (bytes)</strong></th>
+<th class="tableblock halign-left valign-top" ><strong>Maximum Height (rows)</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">4</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">64</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">8</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">32</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">12, 16</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">16</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">20, 24, 28, 32</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">8</p></td>
+</tr>
+</tbody>
+</table>
+<div class="paragraph"><p>Both the first component of <em>Component</em>, which represents the byte offset into the <em>Image</em>, and the block <em>Width</em> must be four byte aligned.</p></div>
+<div class="paragraph"><p>Both the block <em>Width</em> and <em>Height</em> must be compile time constants.</p></div>
+<div class="paragraph"><p>The <em>Image</em> operand must only be used by other <strong>SubgroupImageMediaBlockIOINTEL</strong> instructions or image query instructions. They may not be used by any other instructions that read texels from or write texels to the <em>Image</em>.</p></div>
+<div class="paragraph"><p>Behavior is undefined if <em>Image</em> is a planar YUV image, however <em>Image</em> may represent an individual plane of a planar YUV image.</p></div>
+<div class="paragraph"><p>The <em>Image</em> operand must be created such that the image byte width, defined as the image width multiplied by the <em>Image Format</em> size, is a multiple of four bytes.</p></div>
+<div class="paragraph"><p>For <strong>OpSubgroupImageMediaBlockReadINTEL</strong>, if the <em>Image Format</em> size is smaller than the block read <em>Component Type</em>, then an out-of-bounds read will return data replicated from the nearest edge element, otherwise out-of-bound read behavior is undefined. For example:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+For an image with <em>Image Format</em> size equal to a single byte (for example <strong>R8</strong>), and a 32-bit boundary value <span class="monospaced">B0B1B2B3</span>, replicating off the left edge may result in the 32-bit value <span class="monospaced">B0B0B0B0</span>, and replicating off the right edge may result in the 32-bit value <span class="monospaced">B3B3B3B3</span>.
+</p>
+</li>
+<li>
+<p>
+For an image with an <em>Image Format</em> size equal to two bytes (for example <strong>R16</strong>), replicating off the left edge may result in the 32-bit value <span class="monospaced">B0B1B0B1</span>, and replicating off the right edge may result in the 32-bit value <span class="monospaced">B2B3B2B3</span>.
+</p>
+</li>
+<li>
+<p>
+For an image with an <em>Image Format</em> size equal to four bytes (for example <strong>Rgba8</strong>), the entire boundary value is replicated, for both the left or right edges.
+</p>
+</li>
+<li>
+<p>
+Because the maximum <em>Component Type</em> is a four byte component type, there is no defined out-of-bounds behavior for images with an <em>Image Format</em> size greater than four bytes.
+</p>
+</li>
+<li>
+<p>
+As a special case, an image with a packed YUV <em>Image Format</em> (and hence an <em>Image Format</em> size equal to two bytes) behaves as follows:
+</p>
+<div class="ulist"><ul>
+<li>
+<p>
+Replicating off of the left edge replicates the UV components and the first Y component, so, for example, replicating the 32-bit boundary value <span class="monospaced">Y0U0Y1V0</span> will result in the 32-bit value <span class="monospaced">Y0U0Y0V0</span>.
+</p>
+</li>
+<li>
+<p>
+Replicating off the right edge replicates the UV components and the second Y component, so, for example, replicating the 32-bit boundary value <span class="monospaced">Y0U0Y1V0</span> will result in the 32-bit value <span class="monospaced">Y1U0Y1V0</span>.
+</p>
+</li>
+</ul></div>
+</li>
+</ul></div>
+<div class="paragraph"><p>For <strong>OpSubgroupImageMediaBlockWriteINTEL</strong>, if the <em>Image Format</em> size is smaller than the block write <em>Component Type</em>, then out-of-bounds writes will be dropped, otherwise out-of-bounds write behavior is undefined.</p></div>
+<div class="paragraph"><p>When reading or writing a 2D <em>Image</em> created from a buffer:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+The <em>image row pitch</em> is required to be a multiple of 64-bytes, in addition to the <span class="monospaced">CL_DEVICE_IMAGE_PITCH_ALIGNMENT</span> requirements.
+</p>
+</li>
+<li>
+<p>
+If the buffer is a <span class="monospaced">cl_mem</span> that was created with <span class="monospaced">CL_MEM_USE_HOST_PTR</span>, then the <em>host_ptr</em> must be 256-bit (32-byte) aligned.
+</p>
+</li>
+<li>
+<p>
+If the buffer is a <span class="monospaced">cl_mem</span> that is a sub-buffer, then the <em>origin</em> must be a multiple of 32-bytes. Additionally, if the <em>buffer</em> that the sub-buffer is created from was created with <span class="monospaced">CL_MEM_USE_HOST_PTR</span>, then the <em>host_ptr</em> for the <em>buffer</em> must be 256-bit (32-byte) aligned.
+</p>
+</li>
+<li>
+<p>
+The maximum <em>Height</em> is further restricted to 16 rows or less.
+</p>
+</li>
+</ul></div>
+<div class="paragraph"><p>Behavior is undefined if the size of the 2D source region (defined by the type of <em>Data</em> and <strong>SubgroupMaxSize</strong>) is smaller than the size of the 2D region to write (defined by <em>Width</em>, <em>Height</em>, and block write <em>Component Type</em>).</p></div>
+</div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_issues">Issues</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>None.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_revision_history">Revision History</h2>
+<div class="sectionbody">
+<table class="tableblock frame-all grid-rows"
+style="
+width:100%;
+">
+<col style="width:4%;">
+<col style="width:14%;">
+<col style="width:14%;">
+<col style="width:66%;">
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top" >Rev</th>
+<th class="tableblock halign-left valign-top" >Date</th>
+<th class="tableblock halign-left valign-top" >Author</th>
+<th class="tableblock halign-left valign-top" >Changes</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">1</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">2018-10-29</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Ben Ashbaugh</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock"><strong>Initial revision</strong></p></td>
+</tr>
+</tbody>
+</table>
+</div>
+</div>
+</div>
+<div id="footnotes"><hr></div>
+<div id="footer">
+<div id="footer-text">
+Last updated
+ 2018-10-29 11:47:08 PDT
+</div>
+</div>
+</body>
+</html>
diff --git a/extensions/intel/cl_intel_spirv_subgroups.html b/extensions/intel/cl_intel_spirv_subgroups.html
new file mode 100644
index 0000000..5f67db7
--- /dev/null
+++ b/extensions/intel/cl_intel_spirv_subgroups.html
@@ -0,0 +1,1247 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta name="generator" content="AsciiDoc 8.6.9">
+<title>cl_intel_spirv_subgroups</title>
+<style type="text/css">
+/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
+
+/* Default font. */
+body {
+ font-family: Georgia,serif;
+}
+
+/* Title font. */
+h1, h2, h3, h4, h5, h6,
+div.title, caption.title,
+thead, p.table.header,
+#toctitle,
+#author, #revnumber, #revdate, #revremark,
+#footer {
+ font-family: Arial,Helvetica,sans-serif;
+}
+
+body {
+ margin: 1em 5% 1em 5%;
+}
+
+a {
+ color: blue;
+ text-decoration: underline;
+}
+a:visited {
+ color: fuchsia;
+}
+
+em {
+ font-style: italic;
+ color: navy;
+}
+
+strong {
+ font-weight: bold;
+ color: #083194;
+}
+
+h1, h2, h3, h4, h5, h6 {
+ color: #527bbd;
+ margin-top: 1.2em;
+ margin-bottom: 0.5em;
+ line-height: 1.3;
+}
+
+h1, h2, h3 {
+ border-bottom: 2px solid silver;
+}
+h2 {
+ padding-top: 0.5em;
+}
+h3 {
+ float: left;
+}
+h3 + * {
+ clear: left;
+}
+h5 {
+ font-size: 1.0em;
+}
+
+div.sectionbody {
+ margin-left: 0;
+}
+
+hr {
+ border: 1px solid silver;
+}
+
+p {
+ margin-top: 0.5em;
+ margin-bottom: 0.5em;
+}
+
+ul, ol, li > p {
+ margin-top: 0;
+}
+ul > li { color: #aaa; }
+ul > li > * { color: black; }
+
+.monospaced, code, pre {
+ font-family: "Courier New", Courier, monospace;
+ font-size: inherit;
+ color: navy;
+ padding: 0;
+ margin: 0;
+}
+pre {
+ white-space: pre-wrap;
+}
+
+#author {
+ color: #527bbd;
+ font-weight: bold;
+ font-size: 1.1em;
+}
+#email {
+}
+#revnumber, #revdate, #revremark {
+}
+
+#footer {
+ font-size: small;
+ border-top: 2px solid silver;
+ padding-top: 0.5em;
+ margin-top: 4.0em;
+}
+#footer-text {
+ float: left;
+ padding-bottom: 0.5em;
+}
+#footer-badges {
+ float: right;
+ padding-bottom: 0.5em;
+}
+
+#preamble {
+ margin-top: 1.5em;
+ margin-bottom: 1.5em;
+}
+div.imageblock, div.exampleblock, div.verseblock,
+div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
+div.admonitionblock {
+ margin-top: 1.0em;
+ margin-bottom: 1.5em;
+}
+div.admonitionblock {
+ margin-top: 2.0em;
+ margin-bottom: 2.0em;
+ margin-right: 10%;
+ color: #606060;
+}
+
+div.content { /* Block element content. */
+ padding: 0;
+}
+
+/* Block element titles. */
+div.title, caption.title {
+ color: #527bbd;
+ font-weight: bold;
+ text-align: left;
+ margin-top: 1.0em;
+ margin-bottom: 0.5em;
+}
+div.title + * {
+ margin-top: 0;
+}
+
+td div.title:first-child {
+ margin-top: 0.0em;
+}
+div.content div.title:first-child {
+ margin-top: 0.0em;
+}
+div.content + div.title {
+ margin-top: 0.0em;
+}
+
+div.sidebarblock > div.content {
+ background: #ffffee;
+ border: 1px solid #dddddd;
+ border-left: 4px solid #f0f0f0;
+ padding: 0.5em;
+}
+
+div.listingblock > div.content {
+ border: 1px solid #dddddd;
+ border-left: 5px solid #f0f0f0;
+ background: #f8f8f8;
+ padding: 0.5em;
+}
+
+div.quoteblock, div.verseblock {
+ padding-left: 1.0em;
+ margin-left: 1.0em;
+ margin-right: 10%;
+ border-left: 5px solid #f0f0f0;
+ color: #888;
+}
+
+div.quoteblock > div.attribution {
+ padding-top: 0.5em;
+ text-align: right;
+}
+
+div.verseblock > pre.content {
+ font-family: inherit;
+ font-size: inherit;
+}
+div.verseblock > div.attribution {
+ padding-top: 0.75em;
+ text-align: left;
+}
+/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
+div.verseblock + div.attribution {
+ text-align: left;
+}
+
+div.admonitionblock .icon {
+ vertical-align: top;
+ font-size: 1.1em;
+ font-weight: bold;
+ text-decoration: underline;
+ color: #527bbd;
+ padding-right: 0.5em;
+}
+div.admonitionblock td.content {
+ padding-left: 0.5em;
+ border-left: 3px solid #dddddd;
+}
+
+div.exampleblock > div.content {
+ border-left: 3px solid #dddddd;
+ padding-left: 0.5em;
+}
+
+div.imageblock div.content { padding-left: 0; }
+span.image img { border-style: none; vertical-align: text-bottom; }
+a.image:visited { color: white; }
+
+dl {
+ margin-top: 0.8em;
+ margin-bottom: 0.8em;
+}
+dt {
+ margin-top: 0.5em;
+ margin-bottom: 0;
+ font-style: normal;
+ color: navy;
+}
+dd > *:first-child {
+ margin-top: 0.1em;
+}
+
+ul, ol {
+ list-style-position: outside;
+}
+ol.arabic {
+ list-style-type: decimal;
+}
+ol.loweralpha {
+ list-style-type: lower-alpha;
+}
+ol.upperalpha {
+ list-style-type: upper-alpha;
+}
+ol.lowerroman {
+ list-style-type: lower-roman;
+}
+ol.upperroman {
+ list-style-type: upper-roman;
+}
+
+div.compact ul, div.compact ol,
+div.compact p, div.compact p,
+div.compact div, div.compact div {
+ margin-top: 0.1em;
+ margin-bottom: 0.1em;
+}
+
+tfoot {
+ font-weight: bold;
+}
+td > div.verse {
+ white-space: pre;
+}
+
+div.hdlist {
+ margin-top: 0.8em;
+ margin-bottom: 0.8em;
+}
+div.hdlist tr {
+ padding-bottom: 15px;
+}
+dt.hdlist1.strong, td.hdlist1.strong {
+ font-weight: bold;
+}
+td.hdlist1 {
+ vertical-align: top;
+ font-style: normal;
+ padding-right: 0.8em;
+ color: navy;
+}
+td.hdlist2 {
+ vertical-align: top;
+}
+div.hdlist.compact tr {
+ margin: 0;
+ padding-bottom: 0;
+}
+
+.comment {
+ background: yellow;
+}
+
+.footnote, .footnoteref {
+ font-size: 0.8em;
+}
+
+span.footnote, span.footnoteref {
+ vertical-align: super;
+}
+
+#footnotes {
+ margin: 20px 0 20px 0;
+ padding: 7px 0 0 0;
+}
+
+#footnotes div.footnote {
+ margin: 0 0 5px 0;
+}
+
+#footnotes hr {
+ border: none;
+ border-top: 1px solid silver;
+ height: 1px;
+ text-align: left;
+ margin-left: 0;
+ width: 20%;
+ min-width: 100px;
+}
+
+div.colist td {
+ padding-right: 0.5em;
+ padding-bottom: 0.3em;
+ vertical-align: top;
+}
+div.colist td img {
+ margin-top: 0.3em;
+}
+
+@media print {
+ #footer-badges { display: none; }
+}
+
+#toc {
+ margin-bottom: 2.5em;
+}
+
+#toctitle {
+ color: #527bbd;
+ font-size: 1.1em;
+ font-weight: bold;
+ margin-top: 1.0em;
+ margin-bottom: 0.1em;
+}
+
+div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
+ margin-top: 0;
+ margin-bottom: 0;
+}
+div.toclevel2 {
+ margin-left: 2em;
+ font-size: 0.9em;
+}
+div.toclevel3 {
+ margin-left: 4em;
+ font-size: 0.9em;
+}
+div.toclevel4 {
+ margin-left: 6em;
+ font-size: 0.9em;
+}
+
+span.aqua { color: aqua; }
+span.black { color: black; }
+span.blue { color: blue; }
+span.fuchsia { color: fuchsia; }
+span.gray { color: gray; }
+span.green { color: green; }
+span.lime { color: lime; }
+span.maroon { color: maroon; }
+span.navy { color: navy; }
+span.olive { color: olive; }
+span.purple { color: purple; }
+span.red { color: red; }
+span.silver { color: silver; }
+span.teal { color: teal; }
+span.white { color: white; }
+span.yellow { color: yellow; }
+
+span.aqua-background { background: aqua; }
+span.black-background { background: black; }
+span.blue-background { background: blue; }
+span.fuchsia-background { background: fuchsia; }
+span.gray-background { background: gray; }
+span.green-background { background: green; }
+span.lime-background { background: lime; }
+span.maroon-background { background: maroon; }
+span.navy-background { background: navy; }
+span.olive-background { background: olive; }
+span.purple-background { background: purple; }
+span.red-background { background: red; }
+span.silver-background { background: silver; }
+span.teal-background { background: teal; }
+span.white-background { background: white; }
+span.yellow-background { background: yellow; }
+
+span.big { font-size: 2em; }
+span.small { font-size: 0.6em; }
+
+span.underline { text-decoration: underline; }
+span.overline { text-decoration: overline; }
+span.line-through { text-decoration: line-through; }
+
+div.unbreakable { page-break-inside: avoid; }
+
+
+/*
+ * xhtml11 specific
+ *
+ * */
+
+div.tableblock {
+ margin-top: 1.0em;
+ margin-bottom: 1.5em;
+}
+div.tableblock > table {
+ border: 3px solid #527bbd;
+}
+thead, p.table.header {
+ font-weight: bold;
+ color: #527bbd;
+}
+p.table {
+ margin-top: 0;
+}
+/* Because the table frame attribute is overriden by CSS in most browsers. */
+div.tableblock > table[frame="void"] {
+ border-style: none;
+}
+div.tableblock > table[frame="hsides"] {
+ border-left-style: none;
+ border-right-style: none;
+}
+div.tableblock > table[frame="vsides"] {
+ border-top-style: none;
+ border-bottom-style: none;
+}
+
+
+/*
+ * html5 specific
+ *
+ * */
+
+table.tableblock {
+ margin-top: 1.0em;
+ margin-bottom: 1.5em;
+}
+thead, p.tableblock.header {
+ font-weight: bold;
+ color: #527bbd;
+}
+p.tableblock {
+ margin-top: 0;
+}
+table.tableblock {
+ border-width: 3px;
+ border-spacing: 0px;
+ border-style: solid;
+ border-color: #527bbd;
+ border-collapse: collapse;
+}
+th.tableblock, td.tableblock {
+ border-width: 1px;
+ padding: 4px;
+ border-style: solid;
+ border-color: #527bbd;
+}
+
+table.tableblock.frame-topbot {
+ border-left-style: hidden;
+ border-right-style: hidden;
+}
+table.tableblock.frame-sides {
+ border-top-style: hidden;
+ border-bottom-style: hidden;
+}
+table.tableblock.frame-none {
+ border-style: hidden;
+}
+
+th.tableblock.halign-left, td.tableblock.halign-left {
+ text-align: left;
+}
+th.tableblock.halign-center, td.tableblock.halign-center {
+ text-align: center;
+}
+th.tableblock.halign-right, td.tableblock.halign-right {
+ text-align: right;
+}
+
+th.tableblock.valign-top, td.tableblock.valign-top {
+ vertical-align: top;
+}
+th.tableblock.valign-middle, td.tableblock.valign-middle {
+ vertical-align: middle;
+}
+th.tableblock.valign-bottom, td.tableblock.valign-bottom {
+ vertical-align: bottom;
+}
+
+
+/*
+ * manpage specific
+ *
+ * */
+
+body.manpage h1 {
+ padding-top: 0.5em;
+ padding-bottom: 0.5em;
+ border-top: 2px solid silver;
+ border-bottom: 2px solid silver;
+}
+body.manpage h2 {
+ border-style: none;
+}
+body.manpage div.sectionbody {
+ margin-left: 3em;
+}
+
+@media print {
+ body.manpage div#toc { display: none; }
+}
+
+
+@media screen {
+ body {
+ max-width: 50em; /* approximately 80 characters wide */
+ margin-left: 16em;
+ }
+
+ #toc {
+ position: fixed;
+ top: 0;
+ left: 0;
+ bottom: 0;
+ width: 13em;
+ padding: 0.5em;
+ padding-bottom: 1.5em;
+ margin: 0;
+ overflow: auto;
+ border-right: 3px solid #f8f8f8;
+ background-color: white;
+ }
+
+ #toc .toclevel1 {
+ margin-top: 0.5em;
+ }
+
+ #toc .toclevel2 {
+ margin-top: 0.25em;
+ display: list-item;
+ color: #aaaaaa;
+ }
+
+ #toctitle {
+ margin-top: 0.5em;
+ }
+}
+</style>
+<script type="text/javascript">
+/*<![CDATA[*/
+var asciidoc = { // Namespace.
+
+/////////////////////////////////////////////////////////////////////
+// Table Of Contents generator
+/////////////////////////////////////////////////////////////////////
+
+/* Author: Mihai Bazon, September 2002
+ * http://students.infoiasi.ro/~mishoo
+ *
+ * Table Of Content generator
+ * Version: 0.4
+ *
+ * Feel free to use this script under the terms of the GNU General Public
+ * License, as long as you do not remove or alter this notice.
+ */
+
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
+
+// toclevels = 1..4.
+toc: function (toclevels) {
+
+ function getText(el) {
+ var text = "";
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
+ if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
+ text += i.data;
+ else if (i.firstChild != null)
+ text += getText(i);
+ }
+ return text;
+ }
+
+ function TocEntry(el, text, toclevel) {
+ this.element = el;
+ this.text = text;
+ this.toclevel = toclevel;
+ }
+
+ function tocEntries(el, toclevels) {
+ var result = new Array;
+ var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
+ // Function that scans the DOM tree for header elements (the DOM2
+ // nodeIterator API would be a better technique but not supported by all
+ // browsers).
+ var iterate = function (el) {
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
+ if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
+ var mo = re.exec(i.tagName);
+ if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
+ result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
+ }
+ iterate(i);
+ }
+ }
+ }
+ iterate(el);
+ return result;
+ }
+
+ var toc = document.getElementById("toc");
+ if (!toc) {
+ return;
+ }
+
+ // Delete existing TOC entries in case we're reloading the TOC.
+ var tocEntriesToRemove = [];
+ var i;
+ for (i = 0; i < toc.childNodes.length; i++) {
+ var entry = toc.childNodes[i];
+ if (entry.nodeName.toLowerCase() == 'div'
+ && entry.getAttribute("class")
+ && entry.getAttribute("class").match(/^toclevel/))
+ tocEntriesToRemove.push(entry);
+ }
+ for (i = 0; i < tocEntriesToRemove.length; i++) {
+ toc.removeChild(tocEntriesToRemove[i]);
+ }
+
+ // Rebuild TOC entries.
+ var entries = tocEntries(document.getElementById("content"), toclevels);
+ for (var i = 0; i < entries.length; ++i) {
+ var entry = entries[i];
+ if (entry.element.id == "")
+ entry.element.id = "_toc_" + i;
+ var a = document.createElement("a");
+ a.href = "#" + entry.element.id;
+ a.appendChild(document.createTextNode(entry.text));
+ var div = document.createElement("div");
+ div.appendChild(a);
+ div.className = "toclevel" + entry.toclevel;
+ toc.appendChild(div);
+ }
+ if (entries.length == 0)
+ toc.parentNode.removeChild(toc);
+},
+
+
+/////////////////////////////////////////////////////////////////////
+// Footnotes generator
+/////////////////////////////////////////////////////////////////////
+
+/* Based on footnote generation code from:
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
+ */
+
+footnotes: function () {
+ // Delete existing footnote entries in case we're reloading the footnodes.
+ var i;
+ var noteholder = document.getElementById("footnotes");
+ if (!noteholder) {
+ return;
+ }
+ var entriesToRemove = [];
+ for (i = 0; i < noteholder.childNodes.length; i++) {
+ var entry = noteholder.childNodes[i];
+ if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
+ entriesToRemove.push(entry);
+ }
+ for (i = 0; i < entriesToRemove.length; i++) {
+ noteholder.removeChild(entriesToRemove[i]);
+ }
+
+ // Rebuild footnote entries.
+ var cont = document.getElementById("content");
+ var spans = cont.getElementsByTagName("span");
+ var refs = {};
+ var n = 0;
+ for (i=0; i<spans.length; i++) {
+ if (spans[i].className == "footnote") {
+ n++;
+ var note = spans[i].getAttribute("data-note");
+ if (!note) {
+ // Use [\s\S] in place of . so multi-line matches work.
+ // Because JavaScript has no s (dotall) regex flag.
+ note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
+ spans[i].innerHTML =
+ "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
+ spans[i].setAttribute("data-note", note);
+ }
+ noteholder.innerHTML +=
+ "<div class='footnote' id='_footnote_" + n + "'>" +
+ "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
+ n + "</a>. " + note + "</div>";
+ var id =spans[i].getAttribute("id");
+ if (id != null) refs["#"+id] = n;
+ }
+ }
+ if (n == 0)
+ noteholder.parentNode.removeChild(noteholder);
+ else {
+ // Process footnoterefs.
+ for (i=0; i<spans.length; i++) {
+ if (spans[i].className == "footnoteref") {
+ var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
+ href = href.match(/#.*/)[0]; // Because IE return full URL.
+ n = refs[href];
+ spans[i].innerHTML =
+ "[<a href='#_footnote_" + n +
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
+ }
+ }
+ }
+},
+
+install: function(toclevels) {
+ var timerId;
+
+ function reinstall() {
+ asciidoc.footnotes();
+ if (toclevels) {
+ asciidoc.toc(toclevels);
+ }
+ }
+
+ function reinstallAndRemoveTimer() {
+ clearInterval(timerId);
+ reinstall();
+ }
+
+ timerId = setInterval(reinstall, 500);
+ if (document.addEventListener)
+ document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
+ else
+ window.onload = reinstallAndRemoveTimer;
+}
+
+}
+asciidoc.install(1);
+/*]]>*/
+</script>
+</head>
+<body class="article">
+<div id="header">
+<h1>cl_intel_spirv_subgroups</h1>
+<div id="toc">
+ <div id="toctitle">Table of Contents</div>
+ <noscript><p><b>JavaScript must be enabled in your browser to display the table of contents.</b></p></noscript>
+</div>
+</div>
+<div id="content">
+<div class="sect1">
+<h2 id="_name_strings">Name Strings</h2>
+<div class="sectionbody">
+<div class="paragraph"><p><span class="monospaced">cl_intel_spirv_subgroups</span></p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_contact">Contact</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Ben Ashbaugh, Intel (ben <em>dot</em> ashbaugh <em>at</em> intel <em>dot</em> com)</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_contributors">Contributors</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Ben Ashbaugh, Intel<br>
+Biju George, Intel<br>
+Michael Kinsner, Intel<br>
+Mariusz Merecki, Intel</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_notice">Notice</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Copyright (c) 2018 Intel Corporation. All rights reserved.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_status">Status</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Final Draft</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_version">Version</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Built On: 2018-10-29<br>
+Revision: 1</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_dependencies">Dependencies</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>This extension is written against the OpenCL SPIR-V Environment Specification Version 2.2, Revision v2.2-3.</p></div>
+<div class="paragraph"><p>This extension requires OpenCL support for SPIR-V, either via OpenCL 2.1 or via the <span class="monospaced">cl_khr_il_program</span> extension, and support for the <span class="monospaced">cl_intel_subgroups</span> extension.</p></div>
+<div class="paragraph"><p>This extension interacts with the <span class="monospaced">cl_intel_subgroups_short</span> extension.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_overview">Overview</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>This extension defines how modules using the SPIR-V extension <span class="monospaced">SPV_INTEL_subgroups</span> may behave in an OpenCL environment.</p></div>
+<div class="paragraph"><p>This extension is a companion to the <span class="monospaced">cl_intel_subgroups</span> and <span class="monospaced">cl_intel_subgroups_short</span> OpenCL extensions, and the functionality described in this extension and <span class="monospaced">SPV_INTEL_subgroups</span> is sufficient to implement the built-in functions defined in the <span class="monospaced">cl_intel_subgroups</span> and <span class="monospaced">cl_intel_subgroups_short</span> extensions.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_new_api_functions">New API Functions</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>None.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_new_api_enums">New API Enums</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>None.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_modifications_to_the_opencl_spir_v_environment_specification">Modifications to the OpenCL SPIR-V Environment Specification</h2>
+<div class="sectionbody">
+<div class="sect2">
+<h3 id="_add_a_new_section_7_1_x_span_class_monospaced_cl_intel_spirv_subgroups_span">Add a new Section 7.1.X - <span class="monospaced">cl_intel_spirv_subgroups</span></h3>
+<div class="paragraph"><p>If the OpenCL environment supports the extension <span class="monospaced">cl_intel_spirv_subgroups</span>, then the environment must accept SPIR-V modules that declare use of the <span class="monospaced">SPV_INTEL_subgroups</span> extension via <strong>OpExtension</strong>.</p></div>
+<div class="paragraph"><p>If the OpenCL environment supports the extension <span class="monospaced">cl_intel_spirv_subgroups</span> and use of the <span class="monospaced">SPV_INTEL_subgroups</span> extension is declared in the module via <strong>OpExtension</strong>, then the environment must accept modules that declare the following SPIR-V capabilities:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+<strong>SubgroupShuffleINTEL</strong>
+</p>
+</li>
+<li>
+<p>
+<strong>SubgroupBufferBlockIOINTEL</strong>
+</p>
+</li>
+<li>
+<p>
+<strong>SubgroupImageBlockIOINTEL</strong>
+</p>
+</li>
+</ul></div>
+<div class="paragraph"><p>Additionally, the environment must accept modules that use the following <strong>BuiltIns</strong>:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+<strong>SubgroupSize</strong>
+</p>
+</li>
+<li>
+<p>
+<strong>SubgroupMaxSize</strong>
+</p>
+</li>
+<li>
+<p>
+<strong>NumSubgroups</strong>
+</p>
+</li>
+<li>
+<p>
+<strong>SubgroupId</strong>
+</p>
+</li>
+<li>
+<p>
+<strong>SubgroupLocalInvocationId</strong>
+</p>
+</li>
+</ul></div>
+<div class="paragraph"><p>For an OpenCL 2.0 or newer environment, the following <strong>BuiltIns</strong> must additionally be accepted:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+<strong>NumEnqueuedSubgroups</strong>
+</p>
+</li>
+</ul></div>
+<div class="paragraph"><p>Additionally, the environment must accept the following instruction semantics:</p></div>
+<div class="paragraph"><p>For the control <em>Barrier Instruction</em>:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+<strong>OpControlBarrier</strong>:
+</p>
+<div class="ulist"><ul>
+<li>
+<p>
+The <em>Scope</em> for <em>Execution</em> may be <strong>Subgroup</strong>.
+</p>
+</li>
+<li>
+<p>
+The <em>Scope</em> for <em>Memory</em> may be <strong>Subgroup</strong>.
+</p>
+</li>
+</ul></div>
+</li>
+</ul></div>
+<div class="paragraph"><p>For the <em>Group Instructions</em>:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+<strong>OpGroupAll</strong>
+</p>
+</li>
+<li>
+<p>
+<strong>OpGroupAny</strong>
+</p>
+</li>
+<li>
+<p>
+<strong>OpGroupBroadcast</strong>
+</p>
+</li>
+<li>
+<p>
+<strong>OpGroupIAdd</strong>
+</p>
+</li>
+<li>
+<p>
+<strong>OpGroupFAdd</strong>
+</p>
+</li>
+<li>
+<p>
+<strong>OpGroupFMin</strong>
+</p>
+</li>
+<li>
+<p>
+<strong>OpGroupUMin</strong>
+</p>
+</li>
+<li>
+<p>
+<strong>OpGroupSMin</strong>
+</p>
+</li>
+<li>
+<p>
+<strong>OpGroupFMax</strong>
+</p>
+</li>
+<li>
+<p>
+<strong>OpGroupUMax</strong>
+</p>
+</li>
+<li>
+<p>
+<strong>OpGroupSMax</strong>
+</p>
+<div class="ulist"><ul>
+<li>
+<p>
+The <em>Scope</em> for <em>Execution</em> may be <strong>Subgroup</strong>.
+</p>
+</li>
+</ul></div>
+</li>
+</ul></div>
+</div>
+<div class="sect2">
+<h3 id="_add_a_new_section_7_1_x_1_shuffle_instructions">Add a new Section 7.1.X.1 - Shuffle Instructions</h3>
+<div class="paragraph"><p>Because support for <span class="monospaced">cl_intel_subgroups</span> is required for <span class="monospaced">cl_intel_spirv_subgroups</span>, if the OpenCL environment supports the extension <span class="monospaced">cl_intel_spirv_subgroups</span> and use of the <span class="monospaced">SPV_INTEL_subgroups</span> extension is declared in the module via <strong>OpExtension</strong>, then the environment must accept the following types for <em>Data</em> for the <strong>SubgroupShuffleINTEL</strong> instructions:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+Scalars and <strong>OpTypeVectors</strong> with 2, 4, 8, or 16 <em>Component Count</em> components of the following <em>Component Type</em> types:
+</p>
+<div class="ulist"><ul>
+<li>
+<p>
+<strong>OpTypeFloat</strong> with a <em>Width</em> of 32 bits (equivalent to <span class="monospaced">float</span>)
+</p>
+</li>
+<li>
+<p>
+<strong>OpTypeInt</strong> with a <em>Width</em> of 32 bits and <em>Signedness</em> of 0 (equivalent to <span class="monospaced">int</span> and <span class="monospaced">uint</span>)
+</p>
+</li>
+</ul></div>
+</li>
+<li>
+<p>
+Scalars of <strong>OpTypeInt</strong> with a <em>Width</em> of 64 bits and <em>Signedness</em> of 0 (equivalent to <span class="monospaced">long</span> and <span class="monospaced">ulong</span>)
+</p>
+</li>
+</ul></div>
+<div class="paragraph"><p>Additionally, if the <strong>Float16</strong> capability is declared and supported:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+Scalars of <strong>OpTypeFloat</strong> with a <em>Width</em> of 16 bits (equivalent to <span class="monospaced">half</span>)
+</p>
+</li>
+</ul></div>
+<div class="paragraph"><p>Additionally, if the <strong>Float64</strong> capability is declared and supported:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+Scalars of <strong>OpTypeFloat</strong> with a <em>Width</em> of 64 bits (equivalent to <span class="monospaced">double</span>)
+</p>
+</li>
+</ul></div>
+<div class="paragraph"><p>Additionally, if the OpenCL environment supports the extension <span class="monospaced">cl_intel_subgroups_short</span>:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+Scalars and <strong>OpTypeVectors</strong> with 2, 4, 8, or 16 <em>Component Count</em> components of the following <em>Component Type</em> types:
+</p>
+<div class="ulist"><ul>
+<li>
+<p>
+<strong>OpTypeInt</strong> with a <em>Width</em> of 16 bits and <em>Signedness</em> of 0 (equivalent to <span class="monospaced">short</span> and <span class="monospaced">ushort</span>)
+</p>
+</li>
+</ul></div>
+</li>
+</ul></div>
+</div>
+<div class="sect2">
+<h3 id="_add_a_new_section_7_1_x_2_block_io_instructions">Add a new Section 7.1.X.2 - Block IO Instructions</h3>
+<div class="paragraph"><p>Because support for <span class="monospaced">cl_intel_subgroups</span> is required for <span class="monospaced">cl_intel_spirv_subgroups</span>, if the OpenCL environment supports the extension <span class="monospaced">cl_intel_spirv_subgroups</span> and use of the <span class="monospaced">SPV_INTEL_subgroups</span> extension is declared in the module via <strong>OpExtension</strong>, then the environment must accept the following types for <em>Result</em> and <em>Data</em> for the <strong>SubgroupBufferBlockIOINTEL</strong> and <strong>SubgroupImageBlockIOINTEL</strong> instructions:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+Scalars and <strong>OpTypeVectors</strong> with 2, 4, or 8 <em>Component Count</em> components of the following <em>Component Type</em> types:
+</p>
+<div class="ulist"><ul>
+<li>
+<p>
+<strong>OpTypeInt</strong> with a <em>Width</em> of 32 bits and <em>Signedness</em> of 0 (equivalent to <span class="monospaced">int</span> and <span class="monospaced">uint</span>)
+</p>
+</li>
+</ul></div>
+</li>
+</ul></div>
+<div class="paragraph"><p>Additionally, if the OpenCL environment supports the extension <span class="monospaced">cl_intel_subgroups_short</span>:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+Scalars and <strong>OpTypeVectors</strong> with 2, 4, or 8 <em>Component Count</em> components of the following <em>Component Type</em> types:
+</p>
+<div class="ulist"><ul>
+<li>
+<p>
+<strong>OpTypeInt</strong> with a <em>Width</em> of 16 bits and <em>Signedness</em> of 0 (equivalent to <span class="monospaced">short</span> and <span class="monospaced">ushort</span>)
+</p>
+</li>
+</ul></div>
+</li>
+</ul></div>
+<div class="paragraph"><p>For <em>Ptr</em>, valid <em>Storage Classes</em> are:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+<strong>CrossWorkGroup</strong> (equivalent to the <span class="monospaced">global</span> address space)
+</p>
+</li>
+</ul></div>
+<div class="paragraph"><p>For <em>Image</em>:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+<em>Dim</em> must be <strong>2D</strong>
+</p>
+</li>
+<li>
+<p>
+<em>Depth</em> must be 0 (not a depth image)
+</p>
+</li>
+<li>
+<p>
+<em>Arrayed</em> must be 0 (non-arrayed content)
+</p>
+</li>
+<li>
+<p>
+<em>MS</em> must be 0 (single-sampled content)
+</p>
+</li>
+<li>
+<p>
+(equivalent to <span class="monospaced">image2d_t</span>)
+</p>
+</li>
+</ul></div>
+<div class="paragraph"><p>For <em>Coordinate</em>, the following types are supported:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+<strong>OpTypeVectors</strong> with 2 <em>Component Count</em> components of <em>Component Type</em> <strong>OpTypeInt</strong> with a <em>Width</em> of 32 bits and <em>Signedness</em> of 0 (equivalent to <span class="monospaced">int2</span>)
+</p>
+</li>
+</ul></div>
+</div>
+<div class="sect2">
+<h3 id="_add_a_new_section_7_1_x_3_notes_and_restrictions">Add a new Section 7.1.X.3 - Notes and Restrictions</h3>
+<div class="paragraph"><p>The <strong>SubgroupShuffleINTEL</strong> instructions may be placed within non-uniform control flow and hence do not have to be encountered by all invocations in the subgroup, however <em>Data</em> may only be shuffled among invocations encountering the <strong>SubgroupShuffleINTEL</strong> instruction. Shuffling <em>Data</em> from an invocation that does not encounter the <strong>SubgroupShuffleINTEL</strong> instruction will produce undefined results.</p></div>
+<div class="paragraph"><p>There is no defined behavior for out-of-range shuffle indices for the <strong>SubgroupShuffleINTEL</strong> instructions.</p></div>
+<div class="paragraph"><p>The <strong>SubgroupBufferBlockIOINTEL</strong> and <strong>SubgroupImageBlockIOINTEL</strong> instructions are only guaranteed to work correctly if placed strictly within uniform control flow within the subgroup. This ensures that if any invocation executes it, all invocations will execute it. If placed elsewhere, behavior is undefined.</p></div>
+<div class="paragraph"><p>There is no defined out-of-range behavior for the <strong>SubgroupBufferBlockIOINTEL</strong> instructions.</p></div>
+<div class="paragraph"><p>The <strong>SubgroupImageBlockIOINTEL</strong> instructions do support bounds checking, however they bounds-check to the image width in units of <span class="monospaced">uints</span>, not in units of image elements. This means:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+If the image has an <em>Image Format</em> size equal to the size of a <span class="monospaced">uint</span> (four bytes, for example <strong>Rgba8</strong>), the image will be correctly bounds-checked. In this case, out-of-bounds reads will return the edge image element (the equivalent of <strong>ClampToEdge</strong>), and out-of-bounds writes will be ignored.
+</p>
+</li>
+<li>
+<p>
+If the image has an <em>Image Format</em> size less than the size of a <span class="monospaced">uint</span> (such as <strong>R8</strong>), the entire image is addressable, however bounds checking will occur too late. For this reason, extra care should be taken to avoid out-of-bounds reads and writes, since out-of-bounds reads may return invalid data and out-of-bounds writes may corrupt other images or buffers unpredictably.
+</p>
+</li>
+</ul></div>
+<div class="paragraph"><p>The following restrictions apply to the <strong>SubgroupBufferBlockIOINTEL</strong> instructions:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+The pointer <em>Ptr</em> must be 32-bit (4-byte) aligned for reads, and must be 128-bit (16-byte) aligned for writes.
+</p>
+</li>
+<li>
+<p>
+If the pointer <em>Ptr</em> is computed from a kernel argument that is a <span class="monospaced">cl_mem</span> that was created with <span class="monospaced">CL_MEM_USE_HOST_PTR</span>, then the <em>host_ptr</em> must be 32-bit (4-byte) aligned for reads, and must be 128-bit (16-byte) aligned for writes.
+</p>
+</li>
+<li>
+<p>
+If the pointer <em>Ptr</em> is computed from a kernel argument that is a <span class="monospaced">cl_mem</span> that is a sub-buffer, then the <em>origin</em> defining the sub-buffer offset into the <em>buffer</em> must be a multiple of 4 bytes for reads, and must be a multiple of 16 bytes for write, in addition to the <span class="monospaced">CL_DEVICE_MEM_BASE_ADDR_ALIGN</span> requirements. Additionally, if the <em>buffer</em> that the sub-buffer is created from was created with <span class="monospaced">CL_MEM_USE_HOST_PTR</span>, then the <em>host_ptr</em> for the <em>buffer</em> must be 32-bit (4-byte) aligned for reads, and must be 128-bit (16-byte) aligned for writes.
+</p>
+</li>
+<li>
+<p>
+If the pointer <em>Ptr</em> is computed from an SVM pointer kernel argument, then the SVM pointer kernel argument must be 32-bit (4-byte) aligned for reads, and must be 128-bit (16-byte) aligned for writes.
+</p>
+</li>
+</ul></div>
+<div class="paragraph"><p>The following restrictions apply to the <strong>SubgroupImageBlockIOINTEL</strong> instructions:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+The behavior of the <strong>SubgroupImageBlockIOINTEL</strong> instructions is undefined for images with an element size greater than four bytes (such as <strong>Rgba32f</strong>).
+</p>
+</li>
+<li>
+<p>
+When reading or writing a 2D image created from a buffer with the <strong>SubgroupImageBlockIOINTEL</strong> instructions, the image row pitch is required to be a multiple of 64-bytes, in addition to the <span class="monospaced">CL_DEVICE_IMAGE_PITCH_ALIGNMENT</span> requirements.
+</p>
+</li>
+<li>
+<p>
+When reading or writing a 2D image created from a buffer with the <strong>SubgroupImageBlockIOINTEL</strong> instructions, if the buffer is a <span class="monospaced">cl_mem</span> that was created with <span class="monospaced">CL_MEM_USE_HOST_PTR</span>, then the <em>host_ptr</em> must be 256-bit (32-byte) aligned.
+</p>
+</li>
+<li>
+<p>
+When reading or writing a 2D image created from a buffer with the <strong>SubgroupImageBlockIOINTEL</strong> instructions, if the buffer is a <span class="monospaced">cl_mem</span> that is a sub-buffer, then the <em>origin</em> must be a multiple of 32-bytes. Additionally, if the <em>buffer</em> that the sub-buffer is created from was created with <span class="monospaced">CL_MEM_USE_HOST_PTR</span>, then the <em>host_ptr</em> for the <em>buffer</em> must be 256-bit (32-byte) aligned.
+</p>
+</li>
+</ul></div>
+<div class="paragraph"><p>The following restrictions apply to the <strong>OpSubgroupImageBlockWriteINTEL</strong> instruction:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+Unlike the image block read instruction, which may read from any arbitrary byte offset, the x-component of the byte coordinate for the image block write instruction must be a multiple of four; in other words, the write must begin at a 32-bit boundary. There is no restriction on the y-component of the coordinate.
+</p>
+</li>
+</ul></div>
+</div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_issues">Issues</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>None.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_revision_history">Revision History</h2>
+<div class="sectionbody">
+<table class="tableblock frame-all grid-rows"
+style="
+width:100%;
+">
+<col style="width:4%;">
+<col style="width:14%;">
+<col style="width:14%;">
+<col style="width:66%;">
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top" >Rev</th>
+<th class="tableblock halign-left valign-top" >Date</th>
+<th class="tableblock halign-left valign-top" >Author</th>
+<th class="tableblock halign-left valign-top" >Changes</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">1</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">2018-10-29</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Ben Ashbaugh</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock"><strong>Initial revision</strong></p></td>
+</tr>
+</tbody>
+</table>
+</div>
+</div>
+</div>
+<div id="footnotes"><hr></div>
+<div id="footer">
+<div id="footer-text">
+Last updated
+ 2018-10-29 11:49:46 PDT
+</div>
+</div>
+</body>
+</html>
diff --git a/extensions/intel/cl_intel_subgroups.html b/extensions/intel/cl_intel_subgroups.html
new file mode 100644
index 0000000..9750409
--- /dev/null
+++ b/extensions/intel/cl_intel_subgroups.html
@@ -0,0 +1,2062 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta name="generator" content="AsciiDoc 8.6.9">
+<title>cl_intel_subgroups</title>
+<style type="text/css">
+/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
+
+/* Default font. */
+body {
+ font-family: Georgia,serif;
+}
+
+/* Title font. */
+h1, h2, h3, h4, h5, h6,
+div.title, caption.title,
+thead, p.table.header,
+#toctitle,
+#author, #revnumber, #revdate, #revremark,
+#footer {
+ font-family: Arial,Helvetica,sans-serif;
+}
+
+body {
+ margin: 1em 5% 1em 5%;
+}
+
+a {
+ color: blue;
+ text-decoration: underline;
+}
+a:visited {
+ color: fuchsia;
+}
+
+em {
+ font-style: italic;
+ color: navy;
+}
+
+strong {
+ font-weight: bold;
+ color: #083194;
+}
+
+h1, h2, h3, h4, h5, h6 {
+ color: #527bbd;
+ margin-top: 1.2em;
+ margin-bottom: 0.5em;
+ line-height: 1.3;
+}
+
+h1, h2, h3 {
+ border-bottom: 2px solid silver;
+}
+h2 {
+ padding-top: 0.5em;
+}
+h3 {
+ float: left;
+}
+h3 + * {
+ clear: left;
+}
+h5 {
+ font-size: 1.0em;
+}
+
+div.sectionbody {
+ margin-left: 0;
+}
+
+hr {
+ border: 1px solid silver;
+}
+
+p {
+ margin-top: 0.5em;
+ margin-bottom: 0.5em;
+}
+
+ul, ol, li > p {
+ margin-top: 0;
+}
+ul > li { color: #aaa; }
+ul > li > * { color: black; }
+
+.monospaced, code, pre {
+ font-family: "Courier New", Courier, monospace;
+ font-size: inherit;
+ color: navy;
+ padding: 0;
+ margin: 0;
+}
+pre {
+ white-space: pre-wrap;
+}
+
+#author {
+ color: #527bbd;
+ font-weight: bold;
+ font-size: 1.1em;
+}
+#email {
+}
+#revnumber, #revdate, #revremark {
+}
+
+#footer {
+ font-size: small;
+ border-top: 2px solid silver;
+ padding-top: 0.5em;
+ margin-top: 4.0em;
+}
+#footer-text {
+ float: left;
+ padding-bottom: 0.5em;
+}
+#footer-badges {
+ float: right;
+ padding-bottom: 0.5em;
+}
+
+#preamble {
+ margin-top: 1.5em;
+ margin-bottom: 1.5em;
+}
+div.imageblock, div.exampleblock, div.verseblock,
+div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
+div.admonitionblock {
+ margin-top: 1.0em;
+ margin-bottom: 1.5em;
+}
+div.admonitionblock {
+ margin-top: 2.0em;
+ margin-bottom: 2.0em;
+ margin-right: 10%;
+ color: #606060;
+}
+
+div.content { /* Block element content. */
+ padding: 0;
+}
+
+/* Block element titles. */
+div.title, caption.title {
+ color: #527bbd;
+ font-weight: bold;
+ text-align: left;
+ margin-top: 1.0em;
+ margin-bottom: 0.5em;
+}
+div.title + * {
+ margin-top: 0;
+}
+
+td div.title:first-child {
+ margin-top: 0.0em;
+}
+div.content div.title:first-child {
+ margin-top: 0.0em;
+}
+div.content + div.title {
+ margin-top: 0.0em;
+}
+
+div.sidebarblock > div.content {
+ background: #ffffee;
+ border: 1px solid #dddddd;
+ border-left: 4px solid #f0f0f0;
+ padding: 0.5em;
+}
+
+div.listingblock > div.content {
+ border: 1px solid #dddddd;
+ border-left: 5px solid #f0f0f0;
+ background: #f8f8f8;
+ padding: 0.5em;
+}
+
+div.quoteblock, div.verseblock {
+ padding-left: 1.0em;
+ margin-left: 1.0em;
+ margin-right: 10%;
+ border-left: 5px solid #f0f0f0;
+ color: #888;
+}
+
+div.quoteblock > div.attribution {
+ padding-top: 0.5em;
+ text-align: right;
+}
+
+div.verseblock > pre.content {
+ font-family: inherit;
+ font-size: inherit;
+}
+div.verseblock > div.attribution {
+ padding-top: 0.75em;
+ text-align: left;
+}
+/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
+div.verseblock + div.attribution {
+ text-align: left;
+}
+
+div.admonitionblock .icon {
+ vertical-align: top;
+ font-size: 1.1em;
+ font-weight: bold;
+ text-decoration: underline;
+ color: #527bbd;
+ padding-right: 0.5em;
+}
+div.admonitionblock td.content {
+ padding-left: 0.5em;
+ border-left: 3px solid #dddddd;
+}
+
+div.exampleblock > div.content {
+ border-left: 3px solid #dddddd;
+ padding-left: 0.5em;
+}
+
+div.imageblock div.content { padding-left: 0; }
+span.image img { border-style: none; vertical-align: text-bottom; }
+a.image:visited { color: white; }
+
+dl {
+ margin-top: 0.8em;
+ margin-bottom: 0.8em;
+}
+dt {
+ margin-top: 0.5em;
+ margin-bottom: 0;
+ font-style: normal;
+ color: navy;
+}
+dd > *:first-child {
+ margin-top: 0.1em;
+}
+
+ul, ol {
+ list-style-position: outside;
+}
+ol.arabic {
+ list-style-type: decimal;
+}
+ol.loweralpha {
+ list-style-type: lower-alpha;
+}
+ol.upperalpha {
+ list-style-type: upper-alpha;
+}
+ol.lowerroman {
+ list-style-type: lower-roman;
+}
+ol.upperroman {
+ list-style-type: upper-roman;
+}
+
+div.compact ul, div.compact ol,
+div.compact p, div.compact p,
+div.compact div, div.compact div {
+ margin-top: 0.1em;
+ margin-bottom: 0.1em;
+}
+
+tfoot {
+ font-weight: bold;
+}
+td > div.verse {
+ white-space: pre;
+}
+
+div.hdlist {
+ margin-top: 0.8em;
+ margin-bottom: 0.8em;
+}
+div.hdlist tr {
+ padding-bottom: 15px;
+}
+dt.hdlist1.strong, td.hdlist1.strong {
+ font-weight: bold;
+}
+td.hdlist1 {
+ vertical-align: top;
+ font-style: normal;
+ padding-right: 0.8em;
+ color: navy;
+}
+td.hdlist2 {
+ vertical-align: top;
+}
+div.hdlist.compact tr {
+ margin: 0;
+ padding-bottom: 0;
+}
+
+.comment {
+ background: yellow;
+}
+
+.footnote, .footnoteref {
+ font-size: 0.8em;
+}
+
+span.footnote, span.footnoteref {
+ vertical-align: super;
+}
+
+#footnotes {
+ margin: 20px 0 20px 0;
+ padding: 7px 0 0 0;
+}
+
+#footnotes div.footnote {
+ margin: 0 0 5px 0;
+}
+
+#footnotes hr {
+ border: none;
+ border-top: 1px solid silver;
+ height: 1px;
+ text-align: left;
+ margin-left: 0;
+ width: 20%;
+ min-width: 100px;
+}
+
+div.colist td {
+ padding-right: 0.5em;
+ padding-bottom: 0.3em;
+ vertical-align: top;
+}
+div.colist td img {
+ margin-top: 0.3em;
+}
+
+@media print {
+ #footer-badges { display: none; }
+}
+
+#toc {
+ margin-bottom: 2.5em;
+}
+
+#toctitle {
+ color: #527bbd;
+ font-size: 1.1em;
+ font-weight: bold;
+ margin-top: 1.0em;
+ margin-bottom: 0.1em;
+}
+
+div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
+ margin-top: 0;
+ margin-bottom: 0;
+}
+div.toclevel2 {
+ margin-left: 2em;
+ font-size: 0.9em;
+}
+div.toclevel3 {
+ margin-left: 4em;
+ font-size: 0.9em;
+}
+div.toclevel4 {
+ margin-left: 6em;
+ font-size: 0.9em;
+}
+
+span.aqua { color: aqua; }
+span.black { color: black; }
+span.blue { color: blue; }
+span.fuchsia { color: fuchsia; }
+span.gray { color: gray; }
+span.green { color: green; }
+span.lime { color: lime; }
+span.maroon { color: maroon; }
+span.navy { color: navy; }
+span.olive { color: olive; }
+span.purple { color: purple; }
+span.red { color: red; }
+span.silver { color: silver; }
+span.teal { color: teal; }
+span.white { color: white; }
+span.yellow { color: yellow; }
+
+span.aqua-background { background: aqua; }
+span.black-background { background: black; }
+span.blue-background { background: blue; }
+span.fuchsia-background { background: fuchsia; }
+span.gray-background { background: gray; }
+span.green-background { background: green; }
+span.lime-background { background: lime; }
+span.maroon-background { background: maroon; }
+span.navy-background { background: navy; }
+span.olive-background { background: olive; }
+span.purple-background { background: purple; }
+span.red-background { background: red; }
+span.silver-background { background: silver; }
+span.teal-background { background: teal; }
+span.white-background { background: white; }
+span.yellow-background { background: yellow; }
+
+span.big { font-size: 2em; }
+span.small { font-size: 0.6em; }
+
+span.underline { text-decoration: underline; }
+span.overline { text-decoration: overline; }
+span.line-through { text-decoration: line-through; }
+
+div.unbreakable { page-break-inside: avoid; }
+
+
+/*
+ * xhtml11 specific
+ *
+ * */
+
+div.tableblock {
+ margin-top: 1.0em;
+ margin-bottom: 1.5em;
+}
+div.tableblock > table {
+ border: 3px solid #527bbd;
+}
+thead, p.table.header {
+ font-weight: bold;
+ color: #527bbd;
+}
+p.table {
+ margin-top: 0;
+}
+/* Because the table frame attribute is overriden by CSS in most browsers. */
+div.tableblock > table[frame="void"] {
+ border-style: none;
+}
+div.tableblock > table[frame="hsides"] {
+ border-left-style: none;
+ border-right-style: none;
+}
+div.tableblock > table[frame="vsides"] {
+ border-top-style: none;
+ border-bottom-style: none;
+}
+
+
+/*
+ * html5 specific
+ *
+ * */
+
+table.tableblock {
+ margin-top: 1.0em;
+ margin-bottom: 1.5em;
+}
+thead, p.tableblock.header {
+ font-weight: bold;
+ color: #527bbd;
+}
+p.tableblock {
+ margin-top: 0;
+}
+table.tableblock {
+ border-width: 3px;
+ border-spacing: 0px;
+ border-style: solid;
+ border-color: #527bbd;
+ border-collapse: collapse;
+}
+th.tableblock, td.tableblock {
+ border-width: 1px;
+ padding: 4px;
+ border-style: solid;
+ border-color: #527bbd;
+}
+
+table.tableblock.frame-topbot {
+ border-left-style: hidden;
+ border-right-style: hidden;
+}
+table.tableblock.frame-sides {
+ border-top-style: hidden;
+ border-bottom-style: hidden;
+}
+table.tableblock.frame-none {
+ border-style: hidden;
+}
+
+th.tableblock.halign-left, td.tableblock.halign-left {
+ text-align: left;
+}
+th.tableblock.halign-center, td.tableblock.halign-center {
+ text-align: center;
+}
+th.tableblock.halign-right, td.tableblock.halign-right {
+ text-align: right;
+}
+
+th.tableblock.valign-top, td.tableblock.valign-top {
+ vertical-align: top;
+}
+th.tableblock.valign-middle, td.tableblock.valign-middle {
+ vertical-align: middle;
+}
+th.tableblock.valign-bottom, td.tableblock.valign-bottom {
+ vertical-align: bottom;
+}
+
+
+/*
+ * manpage specific
+ *
+ * */
+
+body.manpage h1 {
+ padding-top: 0.5em;
+ padding-bottom: 0.5em;
+ border-top: 2px solid silver;
+ border-bottom: 2px solid silver;
+}
+body.manpage h2 {
+ border-style: none;
+}
+body.manpage div.sectionbody {
+ margin-left: 3em;
+}
+
+@media print {
+ body.manpage div#toc { display: none; }
+}
+
+
+@media screen {
+ body {
+ max-width: 50em; /* approximately 80 characters wide */
+ margin-left: 16em;
+ }
+
+ #toc {
+ position: fixed;
+ top: 0;
+ left: 0;
+ bottom: 0;
+ width: 13em;
+ padding: 0.5em;
+ padding-bottom: 1.5em;
+ margin: 0;
+ overflow: auto;
+ border-right: 3px solid #f8f8f8;
+ background-color: white;
+ }
+
+ #toc .toclevel1 {
+ margin-top: 0.5em;
+ }
+
+ #toc .toclevel2 {
+ margin-top: 0.25em;
+ display: list-item;
+ color: #aaaaaa;
+ }
+
+ #toctitle {
+ margin-top: 0.5em;
+ }
+}
+</style>
+<script type="text/javascript">
+/*<![CDATA[*/
+var asciidoc = { // Namespace.
+
+/////////////////////////////////////////////////////////////////////
+// Table Of Contents generator
+/////////////////////////////////////////////////////////////////////
+
+/* Author: Mihai Bazon, September 2002
+ * http://students.infoiasi.ro/~mishoo
+ *
+ * Table Of Content generator
+ * Version: 0.4
+ *
+ * Feel free to use this script under the terms of the GNU General Public
+ * License, as long as you do not remove or alter this notice.
+ */
+
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
+
+// toclevels = 1..4.
+toc: function (toclevels) {
+
+ function getText(el) {
+ var text = "";
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
+ if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
+ text += i.data;
+ else if (i.firstChild != null)
+ text += getText(i);
+ }
+ return text;
+ }
+
+ function TocEntry(el, text, toclevel) {
+ this.element = el;
+ this.text = text;
+ this.toclevel = toclevel;
+ }
+
+ function tocEntries(el, toclevels) {
+ var result = new Array;
+ var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
+ // Function that scans the DOM tree for header elements (the DOM2
+ // nodeIterator API would be a better technique but not supported by all
+ // browsers).
+ var iterate = function (el) {
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
+ if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
+ var mo = re.exec(i.tagName);
+ if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
+ result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
+ }
+ iterate(i);
+ }
+ }
+ }
+ iterate(el);
+ return result;
+ }
+
+ var toc = document.getElementById("toc");
+ if (!toc) {
+ return;
+ }
+
+ // Delete existing TOC entries in case we're reloading the TOC.
+ var tocEntriesToRemove = [];
+ var i;
+ for (i = 0; i < toc.childNodes.length; i++) {
+ var entry = toc.childNodes[i];
+ if (entry.nodeName.toLowerCase() == 'div'
+ && entry.getAttribute("class")
+ && entry.getAttribute("class").match(/^toclevel/))
+ tocEntriesToRemove.push(entry);
+ }
+ for (i = 0; i < tocEntriesToRemove.length; i++) {
+ toc.removeChild(tocEntriesToRemove[i]);
+ }
+
+ // Rebuild TOC entries.
+ var entries = tocEntries(document.getElementById("content"), toclevels);
+ for (var i = 0; i < entries.length; ++i) {
+ var entry = entries[i];
+ if (entry.element.id == "")
+ entry.element.id = "_toc_" + i;
+ var a = document.createElement("a");
+ a.href = "#" + entry.element.id;
+ a.appendChild(document.createTextNode(entry.text));
+ var div = document.createElement("div");
+ div.appendChild(a);
+ div.className = "toclevel" + entry.toclevel;
+ toc.appendChild(div);
+ }
+ if (entries.length == 0)
+ toc.parentNode.removeChild(toc);
+},
+
+
+/////////////////////////////////////////////////////////////////////
+// Footnotes generator
+/////////////////////////////////////////////////////////////////////
+
+/* Based on footnote generation code from:
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
+ */
+
+footnotes: function () {
+ // Delete existing footnote entries in case we're reloading the footnodes.
+ var i;
+ var noteholder = document.getElementById("footnotes");
+ if (!noteholder) {
+ return;
+ }
+ var entriesToRemove = [];
+ for (i = 0; i < noteholder.childNodes.length; i++) {
+ var entry = noteholder.childNodes[i];
+ if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
+ entriesToRemove.push(entry);
+ }
+ for (i = 0; i < entriesToRemove.length; i++) {
+ noteholder.removeChild(entriesToRemove[i]);
+ }
+
+ // Rebuild footnote entries.
+ var cont = document.getElementById("content");
+ var spans = cont.getElementsByTagName("span");
+ var refs = {};
+ var n = 0;
+ for (i=0; i<spans.length; i++) {
+ if (spans[i].className == "footnote") {
+ n++;
+ var note = spans[i].getAttribute("data-note");
+ if (!note) {
+ // Use [\s\S] in place of . so multi-line matches work.
+ // Because JavaScript has no s (dotall) regex flag.
+ note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
+ spans[i].innerHTML =
+ "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
+ spans[i].setAttribute("data-note", note);
+ }
+ noteholder.innerHTML +=
+ "<div class='footnote' id='_footnote_" + n + "'>" +
+ "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
+ n + "</a>. " + note + "</div>";
+ var id =spans[i].getAttribute("id");
+ if (id != null) refs["#"+id] = n;
+ }
+ }
+ if (n == 0)
+ noteholder.parentNode.removeChild(noteholder);
+ else {
+ // Process footnoterefs.
+ for (i=0; i<spans.length; i++) {
+ if (spans[i].className == "footnoteref") {
+ var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
+ href = href.match(/#.*/)[0]; // Because IE return full URL.
+ n = refs[href];
+ spans[i].innerHTML =
+ "[<a href='#_footnote_" + n +
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
+ }
+ }
+ }
+},
+
+install: function(toclevels) {
+ var timerId;
+
+ function reinstall() {
+ asciidoc.footnotes();
+ if (toclevels) {
+ asciidoc.toc(toclevels);
+ }
+ }
+
+ function reinstallAndRemoveTimer() {
+ clearInterval(timerId);
+ reinstall();
+ }
+
+ timerId = setInterval(reinstall, 500);
+ if (document.addEventListener)
+ document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
+ else
+ window.onload = reinstallAndRemoveTimer;
+}
+
+}
+asciidoc.install(1);
+/*]]>*/
+</script>
+</head>
+<body class="article">
+<div id="header">
+<h1>cl_intel_subgroups</h1>
+<div id="toc">
+ <div id="toctitle">Table of Contents</div>
+ <noscript><p><b>JavaScript must be enabled in your browser to display the table of contents.</b></p></noscript>
+</div>
+</div>
+<div id="content">
+<div class="sect1">
+<h2 id="_name_strings">Name Strings</h2>
+<div class="sectionbody">
+<div class="paragraph"><p><span class="monospaced">cl_intel_subgroups</span></p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_contact">Contact</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Ben Ashbaugh, Intel (ben <em>dot</em> ashbaugh <em>at</em> intel <em>dot</em> com)</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_contributors">Contributors</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Ben Ashbaugh, Intel<br>
+Allen Hux, Intel<br>
+Pranayini Gudali, Intel<br>
+Dawid Dominiak, Intel<br>
+Biju George, Intel</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_notice">Notice</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Copyright (c) 2018 Intel Corporation. All rights reserved.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_status">Status</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Final Draft</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_version">Version</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Built On: 2018-11-16<br>
+Revision: 5</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_dependencies">Dependencies</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>OpenCL 1.2 is required.
+Some features (<span class="monospaced">get_num_enqueued_sub_groups()</span> and the <span class="monospaced">sub_group_barrier()</span> function that accept a memory scope) require OpenCL 2.0.</p></div>
+<div class="paragraph"><p>This extension is written against revision 24 of the OpenCL 2.0 API specification, against revision 24 of the OpenCL 2.0 OpenCL C specification, and against revision 24 of the OpenCL 2.0 extension specification.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_overview">Overview</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>The goal of this extension is to allow programmers to improve the performance of their applications by taking advantage of the fact that some work items in a work group execute together as a group (a "subgroup"), and that work items in a subgroup can take advantage of hardware features that are not available to work items in a work group.
+Specifically, this extension is designed to allow work items in a subgroup to share data without the use of local memory and work group barriers, and to utilize specialized hardware to load and store blocks of data.</p></div>
+<div class="paragraph"><p>There is a large amount of overlap between the functionality in this extension and the functionality in the Khronos subgroups extension <span class="monospaced">cl_khr_subgroups</span>, so this extension reuses many of the names, concepts, and functions already described by the <span class="monospaced">cl_khr_subgroups</span> extension.
+The key differences between the Intel subgroups extension and the Khronos subgroups extension are:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+The Khronos subgroups extension requires OpenCL 2.0, but the Intel subgroups extension may be available on OpenCL 1.2 devices.
+</p>
+</li>
+<li>
+<p>
+The Khronos subgroups extension guarantees that subgroups in a work group will make independent forward progress, but the Intel extension does not guarantee that subgroups in a work group will make independent forward progress.
+</p>
+</li>
+<li>
+<p>
+The Intel extension adds a rich set of subgroup "shuffle" functions to allow work items within a work group to interchange data without the use of local memory and work group barriers.
+</p>
+</li>
+<li>
+<p>
+The Intel extension adds a set of subgroup "block read and write" functions to take advantage of specialized hardware to read or write blocks of data from or to buffers or images.
+</p>
+</li>
+<li>
+<p>
+The Intel subgroups extension does not include the subgroup pipes functions that are included as part of the Khronos subgroups extension.
+</p>
+</li>
+<li>
+<p>
+The Intel subgroups extension does not include the device-side kernel query functions for subgroups that are included as part of the Khronos subgroups extension.
+</p>
+</li>
+</ul></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_new_api_functions">New API Functions</h2>
+<div class="sectionbody">
+<div class="dlist"><dl>
+<dt class="hdlist1">
+This function is copied unchanged from the Khronos subgroups extension:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">cl_int</span> <span style="font-weight: bold"><span style="color: #000000">clGetKernelSubGroupInfoKHR</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">cl_kernel</span> kernel<span style="color: #990000">,</span>
+ <span style="color: #008080">cl_device_id</span> device<span style="color: #990000">,</span>
+ <span style="color: #008080">cl_kernel_sub_group_info</span> param_name<span style="color: #990000">,</span>
+ <span style="color: #008080">size_t</span> input_value_size<span style="color: #990000">,</span>
+ <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #009900">void</span><span style="color: #990000">*</span> input_value<span style="color: #990000">,</span>
+ <span style="color: #008080">size_t</span> param_value_size<span style="color: #990000">,</span>
+ <span style="color: #009900">void</span><span style="color: #990000">*</span> param_value<span style="color: #990000">,</span>
+ size_t<span style="color: #990000">*</span> param_value_size_ret<span style="color: #990000">)</span></tt></pre></div></div>
+</div></div>
+</dd>
+</dl></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_new_api_enums">New API Enums</h2>
+<div class="sectionbody">
+<div class="dlist"><dl>
+<dt class="hdlist1">
+These enums are copied unchanged from the Khronos subgroups extension:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="paragraph"><p>Accepted as the <em>param_name</em> parameter of <strong>clGetKernelSubGroupInfoKHR</strong>:</p></div>
+<div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt>CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR <span style="color: #993399">0x2033</span>
+CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR <span style="color: #993399">0x2034</span></tt></pre></div></div>
+</div></div>
+</dd>
+</dl></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_new_opencl_c_functions">New OpenCL C Functions</h2>
+<div class="sectionbody">
+<div class="dlist"><dl>
+<dt class="hdlist1">
+These built-in functions are copied unchanged from the Khronos subgroups extension:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">uint</span> <span style="font-weight: bold"><span style="color: #000000">get_sub_group_size</span></span><span style="color: #990000">(</span> <span style="color: #009900">void</span> <span style="color: #990000">);</span>
+<span style="color: #008080">uint</span> <span style="font-weight: bold"><span style="color: #000000">get_max_sub_group_size</span></span><span style="color: #990000">(</span> <span style="color: #009900">void</span> <span style="color: #990000">);</span>
+<span style="color: #008080">uint</span> <span style="font-weight: bold"><span style="color: #000000">get_num_sub_groups</span></span><span style="color: #990000">(</span> <span style="color: #009900">void</span> <span style="color: #990000">);</span>
+
+<span style="color: #008080">uint</span> <span style="font-weight: bold"><span style="color: #000000">get_sub_group_id</span></span><span style="color: #990000">(</span> <span style="color: #009900">void</span> <span style="color: #990000">);</span>
+<span style="color: #008080">uint</span> <span style="font-weight: bold"><span style="color: #000000">get_sub_group_local_id</span></span><span style="color: #990000">(</span> <span style="color: #009900">void</span> <span style="color: #990000">);</span>
+
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_barrier</span></span><span style="color: #990000">(</span> <span style="color: #008080">cl_mem_fence_flags</span> flags <span style="color: #990000">);</span>
+
+<span style="color: #009900">int</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_all</span></span><span style="color: #990000">(</span> <span style="color: #009900">int</span> predicate <span style="color: #990000">);</span>
+<span style="color: #009900">int</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_any</span></span><span style="color: #990000">(</span> <span style="color: #009900">int</span> predicate <span style="color: #990000">);</span></tt></pre></div></div>
+<div class="paragraph"><p>If OpenCL 2.0 is supported:</p></div>
+<div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">uint</span> <span style="font-weight: bold"><span style="color: #000000">get_enqueued_num_sub_groups</span></span><span style="color: #990000">(</span> <span style="color: #009900">void</span> <span style="color: #990000">);</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_barrier</span></span><span style="color: #990000">(</span> <span style="color: #008080">cl_mem_fence_flags</span> flags<span style="color: #990000">,</span> <span style="color: #008080">memory_scope</span> scope <span style="color: #990000">);</span></tt></pre></div></div>
+<div class="paragraph"><p>For the sub_group_broadcast functions, <span class="monospaced">gentype</span> is <span class="monospaced">int</span>, <span class="monospaced">uint</span>, <span class="monospaced">long</span>, <span class="monospaced">ulong</span>, or <span class="monospaced">float</span>.</p></div>
+<div class="paragraph"><p>If cl_khr_fp16 is supported, <span class="monospaced">gentype</span> also includes <span class="monospaced">half</span>.</p></div>
+<div class="paragraph"><p>If cl_khr_fp64 or doubles are supported, <span class="monospaced">gentype</span> also includes <span class="monospaced">double</span>.</p></div>
+<div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_broadcast</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x<span style="color: #990000">,</span> <span style="color: #008080">uint</span> sub_group_local_id <span style="color: #990000">);</span></tt></pre></div></div>
+<div class="paragraph"><p>For the sub_group_reduce, sub_group_scan_exclusive, and sub_group_scan_inclusive functions, <span class="monospaced">gentype</span> is <span class="monospaced">int</span>, <span class="monospaced">uint</span>, <span class="monospaced">long</span>, <span class="monospaced">ulong</span>, or <span class="monospaced">float</span>.</p></div>
+<div class="paragraph"><p>If cl_khr_fp16 is supported, <span class="monospaced">gentype</span> also includes <span class="monospaced">half</span>.</p></div>
+<div class="paragraph"><p>If cl_khr_fp64 or doubles are supported, <span class="monospaced">gentype</span> also includes <span class="monospaced">double</span>.</p></div>
+<div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_reduce_add</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_reduce_min</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_reduce_max</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x <span style="color: #990000">)</span>
+
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_scan_exclusive_add</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_scan_exclusive_add</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_scan_exclusive_add</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x <span style="color: #990000">)</span>
+
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_scan_inclusive_add</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x<span style="color: #990000">)</span>
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_scan_inclusive_min</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x<span style="color: #990000">)</span>
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_scan_inclusive_max</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x<span style="color: #990000">)</span></tt></pre></div></div>
+</div></div>
+</dd>
+<dt class="hdlist1">
+These built-in functions are unique to the Intel subgroups extension and are not part of the Khronos subgroups extension:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="paragraph"><p>For the sub_group_shuffle, sub_group_shuffle_down, sub_group_shuffle_up, and sub_group_shuffle_xor functions, <span class="monospaced">gentype</span> is <span class="monospaced">float</span>, <span class="monospaced">float2</span>, <span class="monospaced">float4</span>, <span class="monospaced">float8</span>, <span class="monospaced">float16</span>, <span class="monospaced">int</span>, <span class="monospaced">int2</span>, <span class="monospaced">int4</span>, <span class="monospaced">int8</span>, <span class="monospaced">int16</span>, <span class="monospaced">uint</span>, <span class="monospaced">uint2</span>,<span class="monospaced">uint4</span>, <span class="monospaced">uint8</span>, <span class="monospaced">uint16</span>, <span class="monospaced">long</span>, or <span class="monospaced">ulong</span>.</p></div>
+<div class="paragraph"><p>If cl_khr_fp16 is supported, <span class="monospaced">gentype</span> also includes <span class="monospaced">half</span>.</p></div>
+<div class="paragraph"><p>If cl_khr_fp64 or doubles are supported, <span class="monospaced">gentype</span> also includes <span class="monospaced">double</span>.</p></div>
+<div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_shuffle</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> data<span style="color: #990000">,</span> <span style="color: #008080">uint</span> c <span style="color: #990000">);</span>
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_shuffle_down</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">gentype</span> current<span style="color: #990000">,</span> <span style="color: #008080">gentype</span> next<span style="color: #990000">,</span> <span style="color: #008080">uint</span> delta <span style="color: #990000">);</span>
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_shuffle_up</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">gentype</span> previous<span style="color: #990000">,</span> <span style="color: #008080">gentype</span> current<span style="color: #990000">,</span> <span style="color: #008080">uint</span> delta <span style="color: #990000">);</span>
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_shuffle_xor</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> data<span style="color: #990000">,</span> <span style="color: #008080">uint</span> value <span style="color: #990000">);</span></tt></pre></div></div>
+<div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">uint</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read</span></span><span style="color: #990000">(</span> <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p <span style="color: #990000">);</span>
+<span style="color: #008080">uint2</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read2</span></span><span style="color: #990000">(</span> <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p <span style="color: #990000">);</span>
+<span style="color: #008080">uint4</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read4</span></span><span style="color: #990000">(</span> <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p <span style="color: #990000">);</span>
+<span style="color: #008080">uint8</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read8</span></span><span style="color: #990000">(</span> <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p <span style="color: #990000">);</span>
+
+<span style="color: #008080">uint</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read</span></span><span style="color: #990000">(</span> <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span> <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">);</span>
+<span style="color: #008080">uint2</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read2</span></span><span style="color: #990000">(</span> <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span> <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">);</span>
+<span style="color: #008080">uint4</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read4</span></span><span style="color: #990000">(</span> <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span> <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">);</span>
+<span style="color: #008080">uint8</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read8</span></span><span style="color: #990000">(</span> <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span> <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">);</span>
+
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write</span></span><span style="color: #990000">(</span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">uint</span> data <span style="color: #990000">);</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write2</span></span><span style="color: #990000">(</span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">uint2</span> data <span style="color: #990000">);</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write4</span></span><span style="color: #990000">(</span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">uint4</span> data <span style="color: #990000">);</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write8</span></span><span style="color: #990000">(</span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">uint8</span> data <span style="color: #990000">);</span>
+
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write</span></span><span style="color: #990000">(</span> <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span> <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">uint</span> data <span style="color: #990000">);</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write2</span></span><span style="color: #990000">(</span> <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span> <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">uint2</span> data <span style="color: #990000">);</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write4</span></span><span style="color: #990000">(</span> <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span> <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">uint4</span> data <span style="color: #990000">);</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write8</span></span><span style="color: #990000">(</span> <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span> <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">uint8</span> data <span style="color: #990000">);</span></tt></pre></div></div>
+</div></div>
+</dd>
+</dl></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_modifications_to_the_opencl_api_specification">Modifications to the OpenCL API Specification</h2>
+<div class="sectionbody">
+<div class="sect2">
+<h3 id="_modifications_to_section_2_glossary">Modifications to Section 2 - "Glossary"</h3>
+<div class="dlist"><dl>
+<dt class="hdlist1">
+Add memory_scope_sub_group to the description of Memory Scopes:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="dlist"><dl>
+<dt class="hdlist1">
+Memory Scopes
+</dt>
+<dd>
+<p>
+Memory scopes define a hierarchy of visibilities when analyzing the ordering constraints of memory operations.
+They are defined by the values of the <span class="monospaced">memory_scope</span> enumeration constant.
+Current values are <span class="monospaced">memory_scope_work_item</span> (memory constraints only apply to a single work item and in practice only apply to image operations), <span class="monospaced">memory_scope_sub_group</span> (memory-ordering constraints only apply to work items executing in a subgroup), <span class="monospaced">memory_scope_work_group</span> …
+</p>
+</dd>
+</dl></div>
+</div></div>
+</dd>
+<dt class="hdlist1">
+Add memory_scope_sub_group to the description of Scope inclusion:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="dlist"><dl>
+<dt class="hdlist1">
+Scope inclusion
+</dt>
+<dd>
+<p>
+Two actions <strong>A</strong> and <strong>B</strong> are defined to have an inclusive scope if they have the same scope <strong>P</strong> such that: (1) if <strong>P</strong> is <span class="monospaced">memory_scope_sub_group</span>, and <strong>A</strong> and <strong>B</strong> are executed by work items within the same subgroup, or (2) if <strong>P</strong> is <span class="monospaced">memory_scope_work_group</span>, and <strong>A</strong> and <strong>B</strong> are executed by work items within the same workgroup …
+</p>
+</dd>
+</dl></div>
+</div></div>
+</dd>
+<dt class="hdlist1">
+Change the description for Subgroups to:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="dlist"><dl>
+<dt class="hdlist1">
+Subgroup
+</dt>
+<dd>
+<p>
+Subgroups are an implementation-dependent grouping of work items within a
+work group.
+The size and number of subgroups is implementation-defined and not exposed in the core OpenCL 2.0 feature set.
+Subgroups execute concurrently within a work group, but are not guaranteed to make independent forward progress.
+Subgroups may synchronize internally using subgroup barrier operations without synchronizing with other subgroups.
+</p>
+</dd>
+</dl></div>
+</div></div>
+</dd>
+</dl></div>
+</div>
+<div class="sect2">
+<h3 id="_modifications_to_section_3_2_1_execution_model_mapping_work_items_onto_an_ndrange">Modifications to Section 3.2.1 - "Execution Model: Mapping Work Items Onto an NDRange"</h3>
+<div class="dlist"><dl>
+<dt class="hdlist1">
+Change the paragraph describing subgroups to:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="paragraph"><p>An implementation of OpenCL may divide each work group into one or more subgroups.
+The size and number of subgroups is implementation-defined and not exposed in the
+core OpenCL 2.0 feature set.</p></div>
+</div></div>
+</dd>
+</dl></div>
+</div>
+<div class="sect2">
+<h3 id="_modifications_to_section_3_2_2_execution_model_execution_of_kernel_instances">Modifications to Section 3.2.2 - "Execution Model: Execution of Kernel Instances"</h3>
+<div class="paragraph"><p>Remove the last paragraph describing subgroups and independent forward progress.</p></div>
+</div>
+<div class="sect2">
+<h3 id="_additions_to_section_3_2_execution_model">Additions to Section 3.2 - "Execution Model"</h3>
+<div class="dlist"><dl>
+<dt class="hdlist1">
+This text is largely the same as the text in the Khronos subgroups extension. Only the sentence about independent forward progress has been modified:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="paragraph"><p>Within a work group, work items may be divided into subgroups in an implementation-
+defined fashion. The mapping of work items to subgroups is implementation-defined
+and may be queried at runtime. While subgroups may be used in multi-dimensional
+work groups, each subgroup is 1-dimensional and any given work item may query which
+subgroup it is a member of.</p></div>
+<div class="paragraph"><p>Work items are mapped into subgroups through a combination of compile-time decisions
+and the parameters of the dispatch. The mapping to subgroups is invariant for the
+duration of a kernel’s execution, across dispatches of a given kernel with the same
+launch parameters, and from one work group to another within the dispatch (excluding
+the trailing edge work groups in the presence of non-uniform work group sizes). In
+addition, all subgroups within a work group will be the same size, apart from the
+subgroup with the maximum index, which may be smaller if the size of the work group
+is not evenly divisible by the size of the subgroups.</p></div>
+<div class="paragraph"><p>Subgroups execute concurrently within a given work group. Similar to work items
+within a work group, subgroups executing within a work group are not guaranteed to make
+independent forward progress. Work items in a subgroup can internally synchronize
+using subgroup barrier operations without synchronizing with other subgroups.</p></div>
+</div></div>
+</dd>
+</dl></div>
+</div>
+<div class="sect2">
+<h3 id="_additions_to_section_3_3_4_memory_model_memory_consistency_model">Additions to Section 3.3.4 - "Memory Model: Memory Consistency Model"</h3>
+<div class="dlist"><dl>
+<dt class="hdlist1">
+Add memory_scope_sub_group to the bulleted descriptions of memory scopes:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="ulist"><ul>
+<li>
+<p>
+<span class="monospaced">memory_scope_sub_group</span>: memory-ordering constraints only apply to work items executing within a single subgroup.
+</p>
+</li>
+<li>
+<p>
+<span class="monospaced">memory_scope_work_group</span>: …
+</p>
+</li>
+</ul></div>
+</div></div>
+</dd>
+<dt class="hdlist1">
+In the paragraph after the bulleted descriptions of memory scopes, include memory_scope_sub_group as a valid memory scope for local memory:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="paragraph"><p>... For local memory, <span class="monospaced">memory_scope_sub_group</span> and <span class="monospaced">memory_scope_work_group</span> are valid, and may constrain visibility to the subgroup or workgroup.</p></div>
+</div></div>
+</dd>
+</dl></div>
+</div>
+<div class="sect2">
+<h3 id="_additions_to_section_3_3_5_memory_model_overview_of_atomic_and_fence_operations">Additions to Section 3.3.5 - "Memory Model: Overview of atomic and fence operations"</h3>
+<div class="dlist"><dl>
+<dt class="hdlist1">
+Add memory_scope_sub_group to the definition of inclusive scope:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="ulist"><ul>
+<li>
+<p>
+<strong>P</strong> is <span class="monospaced">memory_scope_sub_group</span> and <strong>A</strong> and <strong>B</strong> are executed by work items within the same subgroup.
+</p>
+</li>
+<li>
+<p>
+<strong>P</strong> is <span class="monospaced">memory_scope_work_group</span> …
+</p>
+</li>
+</ul></div>
+</div></div>
+</dd>
+</dl></div>
+</div>
+<div class="sect2">
+<h3 id="_additions_to_section_5_9_3_kernel_object_queries">Additions to Section 5.9.3 - "Kernel Object Queries"</h3>
+<div class="dlist"><dl>
+<dt class="hdlist1">
+This addition is copied unchanged from the Khronos subgroups extension:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="paragraph"><p>The function</p></div>
+<div class="paragraph"><p></p></div>
+<div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">cl_int</span> <span style="font-weight: bold"><span style="color: #000000">clGetKernelSubGroupInfoKHR</span></span><span style="color: #990000">(</span><span style="color: #008080">cl_kernel</span> kernel<span style="color: #990000">,</span>
+ <span style="color: #008080">cl_device_id</span> device<span style="color: #990000">,</span>
+ <span style="color: #008080">cl_kernel_sub_group_info</span> param_name<span style="color: #990000">,</span>
+ <span style="color: #008080">size_t</span> input_value_size<span style="color: #990000">,</span>
+ <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #009900">void</span> <span style="color: #990000">*</span>input_value<span style="color: #990000">,</span>
+ <span style="color: #008080">size_t</span> param_value_size<span style="color: #990000">,</span>
+ <span style="color: #009900">void</span> <span style="color: #990000">*</span>param_value<span style="color: #990000">,</span>
+ <span style="color: #008080">size_t</span> <span style="color: #990000">*</span>param_value_size_ret<span style="color: #990000">)</span></tt></pre></div></div>
+<div class="paragraph"><p>returns information about the kernel object.</p></div>
+<div class="paragraph"><p><em>kernel</em> specifies the kernel object being queried.</p></div>
+<div class="paragraph"><p><em>device</em> identifies a specific device in the list of devices associated with
+<em>kernel</em>.
+The list of devices is the list of devices in the OpenCL context that is
+associated with <em>kernel</em>.
+If the list of devices associated with <em>kernel</em> is a single device, <em>device</em>
+can be a <span class="monospaced">NULL</span> value.</p></div>
+<div class="paragraph"><p><em>param_name</em> specifies the information to query.
+The list of supported <em>param_name</em> types and the information returned in
+<em>param_value</em> by <strong>clGetKernelSubGroupInfoKHR</strong> is described in the table below.</p></div>
+<div class="paragraph"><p><em>input_value_size</em> is used to specify the size in bytes of memory pointed to
+by <em>input_value</em>.
+This size must be equal to the size of input type as described in the table below.</p></div>
+<div class="paragraph"><p><em>input_value</em> is a pointer to memory where the appropriate parameterization
+of the query is passed from.
+If <em>input_value</em> is <span class="monospaced">NULL</span>, it is ignored.</p></div>
+<div class="paragraph"><p><em>param_value</em> is a pointer to memory where the appropriate result being
+queried is returned.
+If <em>param_value</em> is <span class="monospaced">NULL</span>, it is ignored.</p></div>
+<div class="paragraph"><p><em>param_value_size</em> is used to specify the size in bytes of memory pointed to
+by <em>param_value</em>.
+This size must be greater than or equal to the size of the return type as described in the
+table below.</p></div>
+<div class="paragraph"><p><em>param_value_size_ret</em> returns the actual size in bytes of data being
+queried by <em>param_name</em>.
+If <em>param_value_size_ret</em> is <span class="monospaced">NULL</span>, it is ignored.</p></div>
+<table class="tableblock frame-all grid-all" id="cl_khr_subgroups-kernel-subgroup-info-table"
+style="
+width:100%;
+">
+<caption class="title">Table 1. <strong>clGetKernelSubGroupInfoKHR</strong> parameter queries</caption>
+<col style="width:25%;">
+<col style="width:25%;">
+<col style="width:25%;">
+<col style="width:25%;">
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top" > <strong>cl_kernel_sub_group_info</strong> </th>
+<th class="tableblock halign-left valign-top" > Input Type </th>
+<th class="tableblock halign-left valign-top" > Return Type </th>
+<th class="tableblock halign-left valign-top" > Info. returned in <em>param_value</em></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top" ><p class="tableblock"><strong>CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR</strong></p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">size_t *</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">size_t</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Returns the maximum sub-group size for this kernel.
+ All sub-groups must be the same size, while the last subgroup in
+ any work-group (i.e. the subgroup with the maximum index) could
+ be the same or smaller size.</p>
+<p class="tableblock"> The <em>input_value</em> must be an array of size_t values
+ corresponding to the local work size parameter of the intended
+ dispatch.
+ The number of dimensions in the ND-range will be inferred from
+ the value specified for <em>input_value_size</em>.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><p class="tableblock"><strong>CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR</strong></p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">size_t *</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">size_t</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Returns the number of sub-groups that will be present in each
+ work-group for a given local work size.
+ All workgroups, apart from the last work-group in each dimension
+ in the presence of non-uniform work-group sizes, will have the
+ same number of sub-groups.</p>
+<p class="tableblock"> The <em>input_value</em> must be an array of size_t values
+ corresponding to the local work size parameter of the intended
+ dispatch.
+ The number of dimensions in the ND-range will be inferred from
+ the value specified for <em>input_value_size</em>.</p></td>
+</tr>
+</tbody>
+</table>
+<div class="paragraph"><p><strong>clGetKernelSubGroupInfoKHR</strong> returns CL_SUCCESS if the function is executed
+successfully.
+Otherwise, it returns one of the following errors:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+<span class="monospaced">CL_INVALID_DEVICE</span> if <em>device</em> is not in the list of devices associated
+ with <em>kernel</em> or if <em>device</em> is <span class="monospaced">NULL</span> but there is more than one device
+ associated with <em>kernel</em>.
+</p>
+</li>
+<li>
+<p>
+<span class="monospaced">CL_INVALID_VALUE</span> if <em>param_name</em> is not valid, or if size in bytes
+ specified by <em>param_value_size</em> is less than the size of return type as described in
+ the table above and <em>param_value</em> is not <span class="monospaced">NULL</span>.
+</p>
+</li>
+<li>
+<p>
+<span class="monospaced">CL_INVALID_VALUE</span> if <em>param_name</em> is
+ <span class="monospaced">CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE</span> and the size in bytes specified by
+ <em>input_value_size</em> is not valid or if <em>input_value</em> is <span class="monospaced">NULL</span>.
+</p>
+</li>
+<li>
+<p>
+<span class="monospaced">CL_INVALID_KERNEL</span> if <em>kernel</em> is a not a valid kernel object.
+</p>
+</li>
+<li>
+<p>
+<span class="monospaced">CL_OUT_OF_RESOURCES</span> if there is a failure to allocate resources required
+ by the OpenCL implementation on the device.
+</p>
+</li>
+<li>
+<p>
+<span class="monospaced">CL_OUT_OF_HOST_MEMORY</span> if there is a failure to allocate resources
+ required by the OpenCL implementation on the host.
+</p>
+</li>
+</ul></div>
+</div></div>
+</dd>
+</dl></div>
+</div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_modifications_to_the_opencl_c_specification">Modifications to the OpenCL C Specification</h2>
+<div class="sectionbody">
+<div class="sect2">
+<h3 id="_additions_to_section_6_13_1_work_item_functions">Additions to section 6.13.1 - "Work Item Functions"</h3>
+<div class="dlist"><dl>
+<dt class="hdlist1">
+These additions are copied unchanged from the Khronos subgroups extension:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<table class="tableblock frame-all grid-all"
+style="
+width:100%;
+">
+<col style="width:50%;">
+<col style="width:50%;">
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top" > <strong>Function</strong></th>
+<th class="tableblock halign-left valign-top" > <strong>Description</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">uint</span> <span style="font-weight: bold"><span style="color: #000000">get_sub_group_size</span></span><span style="color: #990000">(</span> <span style="color: #009900">void</span> <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Returns the number of work items in the subgroup.
+This value is no more than the maximum subgroup size and is implementation-defined based on a combination of the compiled kernel and the dispatch dimensions.
+This will be a constant value for the lifetime of the subgroup.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">uint</span> <span style="font-weight: bold"><span style="color: #000000">get_max_sub_group_size</span></span><span style="color: #990000">(</span> <span style="color: #009900">void</span> <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Returns the maximum size of a subgroup with the dispatch.
+This value will be invariant for a given set of dispatch dimensions and a kernel object compiled for a given device.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">uint</span> <span style="font-weight: bold"><span style="color: #000000">get_num_sub_groups</span></span><span style="color: #990000">(</span> <span style="color: #009900">void</span> <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Returns the number of subgroups that the current work group is divided into.</p>
+<p class="tableblock">This number will be constant for the duration of a work group’s execution.
+If the kernel is executed with a non-uniform work group size in any dimension, calls to this built-in may return a different values for some work groups than for other work groups.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">uint</span> <span style="font-weight: bold"><span style="color: #000000">get_sub_group_id</span></span><span style="color: #990000">(</span> <span style="color: #009900">void</span> <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Returns the subgroup ID, which is a number from zero to <strong>get_num_sub_groups</strong> - 1.</p>
+<p class="tableblock">For <strong>clEnqueueTask</strong>, this returns 0.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">uint</span> <span style="font-weight: bold"><span style="color: #000000">get_sub_group_local_id</span></span><span style="color: #990000">(</span> <span style="color: #009900">void</span> <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Returns the unique work item ID within the current subgroup.
+The mapping from <strong>get_local_id</strong> to <strong>get_sub_group_local_id</strong> will be invariant for the lifetime of the work group.</p></td>
+</tr>
+</tbody>
+</table>
+<div class="paragraph"><p>If OpenCL 2.0 is supported:</p></div>
+<table class="tableblock frame-all grid-all"
+style="
+width:100%;
+">
+<col style="width:50%;">
+<col style="width:50%;">
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top" > <strong>Function</strong></th>
+<th class="tableblock halign-left valign-top" > <strong>Description</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">uint</span> <span style="font-weight: bold"><span style="color: #000000">get_enqueued_num_sub_groups</span></span><span style="color: #990000">(</span> <span style="color: #009900">void</span> <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Returns the same value as that returned by <strong>get_num_sub_groups</strong> if the kernel is executed with a uniform work group size. This value will be constant for the entire NDRange.</p>
+<p class="tableblock">If the kernel is executed with a non-uniform work group size, returns the number of subgroups in a work group that makes up the uniform region of the global NDRange.</p></td>
+</tr>
+</tbody>
+</table>
+</div></div>
+</dd>
+</dl></div>
+</div>
+<div class="sect2">
+<h3 id="_additions_to_section_6_13_8_synchronization_functions">Additions to Section 6.13.8 - "Synchronization Functions"</h3>
+<div class="dlist"><dl>
+<dt class="hdlist1">
+These additions are mostly unchanged from the Khronos subgroups extension, with only minor edits for clarity:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<table class="tableblock frame-all grid-all"
+style="
+width:100%;
+">
+<col style="width:50%;">
+<col style="width:50%;">
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top" > <strong>Function</strong></th>
+<th class="tableblock halign-left valign-top" > <strong>Description</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_barrier</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">cl_mem_fence_flags</span> flags <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">All work items in a subgroup executing the kernel on a processor must execute this function before any are allowed to continue execution beyond the subgroup barrier.
+This function must be encountered by all work items in a subgroup executing the kernel.
+These rules apply to NDRanges implemented with uniform and non-uniform work groups.</p>
+<p class="tableblock">If <strong>sub_group_barrier</strong> is inside a conditional statement then all work items within the subgroup must enter the conditional if any work item in the subgroup enters the conditional statement and executes the <strong>sub_group_barrier</strong>.</p>
+<p class="tableblock">If <strong>sub_group_barrier</strong> is inside a loop, all work items within the subgroup must execute the <strong>sub_group_barrier</strong> for each iteration of the loop before any are allowed to continue execution beyond the <strong>sub_group_barrier</strong>.</p>
+<p class="tableblock">The <strong>sub_group_barrier</strong> function also queues a memory fence (reads and writes) to ensure correct ordering of memory operations to local or global memory.</p>
+<p class="tableblock">The flags argument specifies the memory address space and can be set to a combination of the following values:</p>
+<p class="tableblock"><span class="monospaced">CLK_LOCAL_MEM_FENCE</span> - The <strong>sub_group_barrier</strong> function will either flush any variables stored in local memory or queue a memory fence to ensure correct ordering of memory operations to local memory.</p>
+<p class="tableblock"><span class="monospaced">CLK_GLOBAL_MEM_FENCE</span> - The <strong>sub_group_barrier</strong> function will queue a memory fence to ensure correct ordering of memory operations to global memory.
+This can be useful when work items, for example, write to buffer objects and then want to read the updated data from these buffer objects.</p></td>
+</tr>
+</tbody>
+</table>
+<div class="paragraph"><p>If OpenCL 2.0 is supported, add the following to the table above:</p></div>
+<table class="tableblock frame-all grid-all"
+style="
+width:100%;
+">
+<col style="width:50%;">
+<col style="width:50%;">
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top" > <strong>Function</strong></th>
+<th class="tableblock halign-left valign-top" > <strong>Description</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_barrier</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">cl_mem_fence_flags</span> flags<span style="color: #990000">,</span>
+ <span style="color: #008080">memory_scope</span> scope <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">…</p>
+<p class="tableblock">The <strong>sub_group_barrier</strong> function also supports a variant that specifies the memory scope.
+For the sub_group_barrier variant that does not take a memory scope, the scope is <span class="monospaced">memory_scope_sub_group</span>.</p>
+<p class="tableblock">The scope argument specifies whether the memory accesses of work items in the subgroup to memory address space(s) identified by flags become visible to all work items in the subgroup, the work group, the device, or all SVM devices.</p>
+<p class="tableblock">…</p>
+<p class="tableblock"><span class="monospaced">CLK_IMAGE_MEM_FENCE</span> - The <strong>sub_group_barrier</strong> function will queue a memory fence to ensure correct ordering of memory operations to image objects. This can be useful when work items, for example, write to image objects and then want to read the updated data from these image objects.</p></td>
+</tr>
+</tbody>
+</table>
+</div></div>
+</dd>
+</dl></div>
+</div>
+<div class="sect2">
+<h3 id="_additions_to_section_6_13_11_atomic_functions">Additions to Section 6.13.11 - "Atomic Functions"</h3>
+<div class="dlist"><dl>
+<dt class="hdlist1">
+Modify the bullet describing behavior for functions that do not have a memory_scope argument to say:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="ulist"><ul>
+<li>
+<p>
+The subgroup functions that do not have a <em>memory_scope</em> argument have the same semantics as the corresponding functions with the <em>memory_scope</em> argument set to <span class="monospaced">memory_scope_sub_group</span>.
+Other functions that do not have a <em>memory_scope</em> argument have the same semantics as the corresponding functions with the <em>memory_scope</em> argument set to <span class="monospaced">memory_scope_device</span>.
+</p>
+</li>
+</ul></div>
+</div></div>
+</dd>
+<dt class="hdlist1">
+The following addition is copied unchanged from the Khronos subgroups extension:
+</dt>
+<dt class="hdlist1">
+Add the following new value to the enumerated type memory_scope defined in Section 6.13.11.4:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="listingblock">
+<div class="content monospaced">
+<pre>memory_scope_sub_group</pre>
+</div></div>
+<div class="paragraph"><p>The <span class="monospaced">memory_scope_sub_group</span> specifies that the memory ordering constraints
+given by <span class="monospaced">memory_order</span> apply to work items in a subgroup.
+This memory scope can be used when performing atomic operations to global or
+local memory.</p></div>
+</div></div>
+</dd>
+</dl></div>
+</div>
+<div class="sect2">
+<h3 id="_additions_to_section_6_13_15_work_group_functions">Additions to Section 6.13.15 - "Work Group Functions"</h3>
+<div class="dlist"><dl>
+<dt class="hdlist1">
+These additions are copied from the Khronos subgroups extension:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="paragraph"><p>The OpenCL C programming language implements the following built-in
+functions that operate on a subgroup level.
+These built-in functions must be encountered by all work items in a subgroup
+executing the kernel.
+We use the generic type name <span class="monospaced">gentype</span> to indicate the built-in data types
+<span class="monospaced">int</span>, <span class="monospaced">uint</span>, <span class="monospaced">long</span>, <span class="monospaced">ulong</span>, or <span class="monospaced">float</span> as the type for the arguments.</p></div>
+<div class="paragraph"><p>If <span class="monospaced">cl_khr_fp16</span> is supported, <span class="monospaced">gentype</span> also includes <span class="monospaced">half</span>.</p></div>
+<div class="paragraph"><p>If <span class="monospaced">cl_khr_fp64</span> or doubles are supported, <span class="monospaced">gentype</span> also includes <span class="monospaced">double</span>.</p></div>
+<table class="tableblock frame-all grid-all"
+style="
+width:100%;
+">
+<col style="width:66%;">
+<col style="width:33%;">
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top" > <strong>Function</strong></th>
+<th class="tableblock halign-left valign-top" > <strong>Description</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #009900">int</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_all</span></span><span style="color: #990000">(</span> <span style="color: #009900">int</span> predicate <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Evaluates <em>predicate</em> for all work items in the subgroup and returns a
+ non-zero value if <em>predicate</em> evaluates to non-zero for all work items in
+ the subgroup.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #009900">int</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_any</span></span><span style="color: #990000">(</span> <span style="color: #009900">int</span> predicate <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Evaluates <em>predicate</em> for all work items in the subgroup and returns a
+ non-zero value if <em>predicate</em> evaluates to non-zero for any work items in
+ the subgroup.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_broadcast</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">gentype</span> x<span style="color: #990000">,</span>
+ <span style="color: #008080">uint</span> sub_group_local_id <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Broadcasts the value of <em>x</em> for work item identified by <em>sub_group_local_id</em> (value returned by <strong>get_sub_group_local_id</strong>) to all work items in the subgroup.
+<em>sub_group_local_id</em> must be the same value for all work items in the subgroup.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_reduce_add</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_reduce_min</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_reduce_max</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Returns the result of the specified reduction operation for all values of <em>x</em> specified by work items in a subgroup.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_scan_exclusive_add</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_scan_exclusive_min</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_scan_exclusive_max</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Performs the specified exclusive scan operation of all values <em>x</em> specified by work items in a subgroup.
+The scan results are returned for each work item.</p>
+<p class="tableblock">The scan order is defined by increasing subgroup local ID within the subgroup.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_scan_inclusive_add</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x<span style="color: #990000">)</span>
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_scan_inclusive_min</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x<span style="color: #990000">)</span>
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_scan_inclusive_max</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x<span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Performs the specified inclusive scan operation of all values <em>x</em> specified by work items in a subgroup.
+The scan results are returned for each work item.</p>
+<p class="tableblock">The scan order is defined by increasing subgroup local ID within the subgroup.</p></td>
+</tr>
+</tbody>
+</table>
+</div></div>
+</dd>
+</dl></div>
+</div>
+<div class="sect2">
+<h3 id="_add_a_new_section_6_13_x_sub_group_shuffle_functions">Add a new Section 6.13.X - "Sub Group Shuffle Functions"</h3>
+<div class="dlist"><dl>
+<dt class="hdlist1">
+These are new functions:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="paragraph"><p>The OpenCL C programming language implements the following built-in functions to allow data to be exchanged among work items in a subgroup.
+These built-in functions need not be encountered by all work items in a subgroup executing the kernel, however, data may only be shuffled among work items encountering the subgroup shuffle function.
+Shuffling data from a work item that does not encounter the subgroup shuffle function will produce undefined results.
+For these functions, <span class="monospaced">gentype</span> is <span class="monospaced">float</span>, <span class="monospaced">float2</span>, <span class="monospaced">float4</span>, <span class="monospaced">float8</span>, <span class="monospaced">float16</span>, <span class="monospaced">int</span>, <span class="monospaced">int2</span>, <span class="monospaced">int4</span>, <span class="monospaced">int8</span>, <span class="monospaced">int16</span>, <span class="monospaced">uint</span>, <span class="monospaced">uint2</span>, <span class="monospaced">uint4</span>, <span class="monospaced">uint8</span>, <span class="monospaced">uint16</span>, <span class="monospaced">long</span>, or <span class="monospaced">ulong</span>.</p></div>
+<div class="paragraph"><p>If <span class="monospaced">cl_khr_fp16</span> is supported, <span class="monospaced">gentype</span> also includes <span class="monospaced">half</span>.</p></div>
+<div class="paragraph"><p>If <span class="monospaced">cl_khr_fp64</span> or doubles are supported, <span class="monospaced">gentype</span> also includes <span class="monospaced">double</span>.</p></div>
+<table class="tableblock frame-all grid-all"
+style="
+width:100%;
+">
+<col style="width:50%;">
+<col style="width:50%;">
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top" > <strong>Function</strong></th>
+<th class="tableblock halign-left valign-top" > <strong>Description</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_shuffle</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">gentype</span> data<span style="color: #990000">,</span>
+ <span style="color: #008080">uint</span> sub_group_local_id <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Allows data to be arbitrarily transferred between work items in a subgroup.
+The data that is returned for this work item is the value of <em>data</em> for the work item identified by <em>sub_group_local_id</em>.</p>
+<p class="tableblock"><em>sub_group_local_id</em> need not be the same value for all work items in the subgroup.
+There is no defined behavior for out-of-range <em>sub_group_local_ids</em>.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_shuffle_down</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">gentype</span> current<span style="color: #990000">,</span>
+ <span style="color: #008080">gentype</span> next<span style="color: #990000">,</span>
+ <span style="color: #008080">uint</span> delta <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Allows data to be transferred from a work item in the subgroup with a higher sub_group_local_id down to a work item in the subgroup with a lower sub_group_local_id.</p>
+<p class="tableblock">There are two data sources to this built-in function: <em>current</em> and <em>next</em>.
+To determine the result of this built-in function, first let the unsigned shuffle index be equivalent to the sum of this work item’s sub_group_local_id plus the specified <em>delta</em>:</p>
+<p class="tableblock">If the shuffle index is less than the max_sub_group_size, the result of this built-in function is the value of the <em>current</em> data source for the work item with sub_group_local_id equal to the shuffle index.</p>
+<p class="tableblock">If the shuffle index is greater than or equal to the max_sub_group_size but less than twice the max_sub_group_size, the result of this built-in function is the value of the <em>next</em> data source for the work item with sub_group_local_id equal to the shuffle index minus the max_sub_group_size.</p>
+<p class="tableblock">All other values of the shuffle index are considered to be out-of-range.
+There is no defined behavior for out-of-range indices.</p>
+<p class="tableblock"><em>delta</em> need not be the same value for all work items in the subgroup.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_shuffle_up</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">gentype</span> previous<span style="color: #990000">,</span>
+ <span style="color: #008080">gentype</span> current<span style="color: #990000">,</span>
+ <span style="color: #008080">uint</span> delta <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Allows data to be transferred from a work item in the subgroup with a lower sub_group_local_id up to a work item in the subgroup with a higher sub_group_local_id.</p>
+<p class="tableblock">There are two data sources to this built-in function: <em>previous</em> and <em>current</em>.
+To determine the result of this built-in function, first let the signed shuffle index be equivalent to this work item’s sub_group_local_id minus the specified <em>delta</em>:</p>
+<p class="tableblock">If the shuffle index is greater than or equal to zero and less than the max_sub_group_size, the result of this built-in function is the value of the <em>current</em> data source for the work item with sub_group_local_id equal to the shuffle index.</p>
+<p class="tableblock">If the shuffle index is less than zero but greater than or equal to the negative max_sub_group_size, the result of this built-in function is the value of the previous data source for the work item with sub_group_local_id equal to the shuffle index plus the max_sub_group_size.</p>
+<p class="tableblock">All other values of the shuffle index are considered to be out-of-range.
+There is no defined behavior for out-of-range indices.</p>
+<p class="tableblock"><em>delta</em> need not be the same value for all work items in the subgroup.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_shuffle_xor</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">gentype</span> data<span style="color: #990000">,</span>
+ <span style="color: #008080">uint</span> value <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Allows data to be transferred between work items in a subgroup as a function of the work item’s sub_group_local_id.
+The data that is returned for this work item is the value of <em>data</em> for the work item with sub_group_local_id equal to this work item’s sub_group_local_id XOR’d with the specified <em>value</em>.
+If the result of the XOR is greater than max_sub_group_size then it is considered out-of-range.</p>
+<p class="tableblock"><em>value</em> need not be the same for all work items in the subgroup.
+There is no defined behavior for out-of-range indices.</p></td>
+</tr>
+</tbody>
+</table>
+</div></div>
+</dd>
+</dl></div>
+</div>
+<div class="sect2">
+<h3 id="_add_a_new_section_6_13_x_sub_group_read_and_write_functions">Add a new Section 6.13.X - "Sub Group Read and Write Functions"</h3>
+<div class="dlist"><dl>
+<dt class="hdlist1">
+These are new functions:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<table class="tableblock frame-all grid-all"
+style="
+width:100%;
+">
+<col style="width:55%;">
+<col style="width:44%;">
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top" ><strong>Function</strong></th>
+<th class="tableblock halign-left valign-top" ><strong>Description</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">uint</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read</span></span><span style="color: #990000">(</span>
+ <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p <span style="color: #990000">)</span>
+<span style="color: #008080">uint2</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read2</span></span><span style="color: #990000">(</span>
+ <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p <span style="color: #990000">)</span>
+<span style="color: #008080">uint4</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read4</span></span><span style="color: #990000">(</span>
+ <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p <span style="color: #990000">)</span>
+<span style="color: #008080">uint8</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read8</span></span><span style="color: #990000">(</span>
+ <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Reads 1, 2, 4, or 8 uints of data for each work item in the subgroup from the specified pointer as a block operation.
+The data is read strided, so the first value read is:</p>
+<p class="tableblock"><span class="monospaced">p[ sub_group_local_id ]</span></p>
+<p class="tableblock">and the second value read is:</p>
+<p class="tableblock"><span class="monospaced">p[ sub_group_local_id + max_sub_group_size ]</span></p>
+<p class="tableblock">etc.</p>
+<p class="tableblock"><em>p</em> must be aligned to a 32-bit (4-byte) boundary.</p>
+<p class="tableblock">There is no defined out-of-range behavior for these functions.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">uint</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">)</span>
+<span style="color: #008080">uint2</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read2</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">)</span>
+<span style="color: #008080">uint4</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read4</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">)</span>
+<span style="color: #008080">uint8</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read8</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Reads 1, 2, 4, or 8 uints of data for each work item in the subgroup from the specified <em>image</em> at the specified coordinate as a block operation.
+Note that the coordinate is a byte coordinate, not an image element coordinate.
+Also note that the image data is read without format conversion, so each work item may read multiple image elements
+(for images with element size smaller than 16-bits).</p>
+<p class="tableblock">The data is read row-by-row, so the first value read is from the row specified in the y-component of the provided <em>byte_coord</em>, the second value is read from the y-component of the provided <em>byte_coord</em> plus one, etc.</p>
+<p class="tableblock">Please see the note below describing out-of-bounds behavior for these functions.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">uint</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write2</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">uint2</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write4</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">uint4</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write8</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">uint8</span> data <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Writes 1, 2, 4, or 8 uints of data for each work item in the subgroup to the specified pointer as a block operation.
+The data is written strided, so the first value is written to:</p>
+<p class="tableblock"><span class="monospaced">p[ sub_group_local_id ]</span></p>
+<p class="tableblock">and the second value is written to:</p>
+<p class="tableblock"><span class="monospaced">p[ sub_group_local_id + max_sub_group_size ]</span></p>
+<p class="tableblock">etc.</p>
+<p class="tableblock"><em>p</em> must be aligned to a 128-bit (16-byte) boundary.</p>
+<p class="tableblock">There is no defined out-of-range behavior for these functions.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">uint</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write2</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">uint2</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write4</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">uint4</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write8</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">uint8</span> data <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Writes 1, 2, 4, or 8 uints of data for each work item in the subgroup to the specified <em>image</em> at the specified coordinate as a block operation.
+Note that the coordinate is a byte coordinate, not an image element coordinate.
+Unlike the image block read function, which may read from any arbitrary byte offset, the x-component of the byte coordinate for the image block write functions must be a multiple of four;
+in other words, the write must begin at 32-bit boundary.
+There is no restriction on the y-component of the coordinate.
+Also, note that the image <em>data</em> is written without format conversion, so each work item may write multiple image elements (for images with element size smaller than 8-bits).</p>
+<p class="tableblock">The data is written row-by-row, so the first value written is from the row specified by the y-component of the provided <em>byte_coord</em>, the second value is written from the y-component of the provided <em>byte_coord</em> plus one, etc.</p>
+<p class="tableblock">Please see the note below describing out-of-bounds behavior for these functions.</p></td>
+</tr>
+</tbody>
+</table>
+<div class="paragraph"><p>Note: The subgroup image block read and write built-ins do support bounds checking, however these built-ins bounds-check to the image width in units of uints, not in units of image elements.
+This means:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+If the image has an element size equal to the size of a uint (four bytes, for example <span class="monospaced">CL_RGBA</span> + <span class="monospaced">CL_UNORM_INT8</span>), the image will be correctly bounds-checked.
+In this case, out-of-bounds reads will return the edge image element (the equivalent of <span class="monospaced">CLK_ADDRESS_CLAMP_TO_EDGE</span>), and out-of-bounds writes will be ignored.
+</p>
+</li>
+<li>
+<p>
+If the image has element size less than the size of a uint (such as <span class="monospaced">CL_R</span> + <span class="monospaced">CL_UNSIGNED_INT8</span>), the entire image is addressable, however bounds checking will occur too late.
+For this reason, extra care should be taken to avoid out-of-bounds reads and writes, since out-of-bounds reads may return invalid data and out-of-bounds writes may corrupt other images or buffers unpredictably.
+</p>
+</li>
+</ul></div>
+</div></div>
+</dd>
+<dt class="hdlist1">
+Add a new sub-section 6.13.X.1 - Restrictions:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="paragraph"><p>The following restrictions apply to the subgroup buffer block read and write functions:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+The pointer <em>p</em> must be 32-bit (4-byte) aligned for reads, and must be 128-bit (16-byte) aligned for writes.
+</p>
+</li>
+<li>
+<p>
+If the pointer <em>p</em> is computed from a kernel argument that is a cl_mem that was created with <span class="monospaced">CL_MEM_USE_HOST_PTR</span>, then the <em>host_ptr</em> must be 32-bit (4-byte) aligned for reads, and must be 128-bit (16-byte) aligned for writes.
+</p>
+</li>
+<li>
+<p>
+If the pointer <em>p</em> is computed from a kernel argument that is a cl_mem that is a sub-buffer, then the <em>origin</em> defining the sub-buffer offset into the <em>buffer</em> must be a multiple of 4 bytes for reads, and must be a multiple of 16 bytes for write, in addition to the <span class="monospaced">CL_DEVICE_MEM_BASE_ADDR_ALIGN</span> requirements.
+Additionally, if the <em>buffer</em> that the sub-buffer is created from was created with <span class="monospaced">CL_MEM_USE_HOST_PTR</span>, then the <em>host_ptr</em> for the <em>buffer</em> must be 32-bit (4-byte) aligned for reads, and must be 128-bit(16-byte) aligned for writes.
+</p>
+</li>
+<li>
+<p>
+If the pointer <em>p</em> is computed from an SVM pointer kernel argument, then the SVM pointer kernel argument must be 32-bit (4-byte) aligned for reads, and must be 128-bit (16-byte) aligned for writes.
+</p>
+</li>
+</ul></div>
+<div class="paragraph"><p>The following restrictions apply to the subgroup image block read and write functions:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+The behavior of the subgroup image block read and write built-ins is undefined for images with an element size greater than four bytes (such as <span class="monospaced">CL_RGBA</span> + <span class="monospaced">CL_FLOAT</span>).
+</p>
+</li>
+<li>
+<p>
+When reading or writing a 2D image created from a buffer with the subgroup block read and write built-ins, the image row pitch is required to be a multiple of 64-bytes, in addition to the <span class="monospaced">CL_DEVICE_IMAGE_PITCH_ALIGNMENT</span> requirements.
+</p>
+</li>
+<li>
+<p>
+When reading or writing a 2D image created from a buffer with the subgroup block read and write built-ins, if the buffer is a cl_mem that was created with <span class="monospaced">CL_MEM_USE_HOST_PTR</span>, then the <em>host_ptr</em> must be 256-bit (32-byte) aligned.
+</p>
+</li>
+<li>
+<p>
+When reading or writing a 2D image created from a buffer with the subgroup block read and write built-ins, if the buffer is a cl_mem that is a sub-buffer, then the <em>origin</em> must be a multiple of 32-bytes.
+Additionally, if the <em>buffer</em> that the sub-buffer is created from was created with CL_MEM_USE_HOST_PTR, then the <em>host_ptr</em> for the <em>buffer</em> must be 256-bit (32-byte) aligned.
+</p>
+</li>
+</ul></div>
+</div></div>
+</dd>
+</dl></div>
+</div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_issues">Issues</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>None.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_revision_history">Revision History</h2>
+<div class="sectionbody">
+<table class="tableblock frame-all grid-rows"
+style="
+width:100%;
+">
+<col style="width:4%;">
+<col style="width:14%;">
+<col style="width:14%;">
+<col style="width:66%;">
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top" >Rev</th>
+<th class="tableblock halign-left valign-top" >Date</th>
+<th class="tableblock halign-left valign-top" >Author</th>
+<th class="tableblock halign-left valign-top" >Changes</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">1</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">2014-12-01</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Ben Ashbaugh</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock"><strong>First public revision.</strong></p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">2</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">2015-03-12</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Ben Ashbaugh</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Fixed minor formatting errors, added restriction for subgroup image block read and write built-ins with large image formats.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">3</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">2016-02-12</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Ben Ashbaugh</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Fixed a small bug in the shuffle up and shuffle down descriptions.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">4</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">2016-08-28</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Ben Ashbaugh</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Added additional restrictions and programming notes for the subgroup shuffle and block read built-ins.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">5</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">2018-11-15</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Ben Ashbaugh</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Converted to asciidoc.</p></td>
+</tr>
+</tbody>
+</table>
+</div>
+</div>
+</div>
+<div id="footnotes"><hr></div>
+<div id="footer">
+<div id="footer-text">
+Last updated
+ 2018-11-16 09:14:13 PST
+</div>
+</div>
+</body>
+</html>
diff --git a/extensions/intel/cl_intel_subgroups.txt b/extensions/intel/cl_intel_subgroups.txt
index 98c6ed3..710e0e9 100644
--- a/extensions/intel/cl_intel_subgroups.txt
+++ b/extensions/intel/cl_intel_subgroups.txt
@@ -1,877 +1,11 @@
-Name String
+The cl_intel_subgroups extension is no longer authored in plain text.
- cl_intel_subgroups
+Please refer to the HTML extension specification instead,
+which may be found here:
-Contributors
+https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_subgroups.html
- Ben Ashbaugh, Intel
- Allen Hux, Intel
- Pranayini Gudali, Intel
- Dawid Dominiak, Intel
- Biju George, Intel
+The asciidoc source for this extension specification may
+be found here:
-Contact
-
- Ben Ashbaugh, Intel (ben.ashbaugh 'at' intel.com)
-
-Version
-
- Version 4, August 28, 2016
-
-Number
-
- OpenCL Extension #35
-
-Status
-
- Final Draft
-
-Dependencies
-
- OpenCL 1.2 is required. Some features (get_num_enqueued_sub_groups() and
- the sub_group_barrier() function that accept a memory scope) require OpenCL
- 2.0.
-
- This extension is written against revision 24 of the OpenCL 2.0 API
- specification, against revision 24 of the OpenCL 2.0 OpenCL C specification,
- and against revision 24 of the OpenCL 2.0 extension specification.
-
-Overview
-
- The goal of this extension is to allow programmers to improve the performance
- of their applications by taking advantage of the fact that some work items in a
- work group execute together as a group (a "subgroup"), and that work items in a
- subgroup can take advantage of hardware features that are not available to work
- items in a work group. Specifically, this extension is designed to allow work
- items in a subgroup to share data without the use of local memory and work group
- barriers, and to utilize specialized hardware to load and store blocks of data.
-
- There is a large amount of overlap between the functionality in this extension
- and the functionality in the Khronos OpenCL 2.0 "cl_khr_subgroups" extension, so
- this extension reuses many of the names, concepts, and functions already described
- in the cl_khr_subgroups extension. The key differences between the Intel
- subgroups extension and the Khronos subgroups extension are:
-
- * The Khronos subgroups extension requires OpenCL 2.0, but the Intel subgroups
- extension may be available on OpenCL 1.2 devices.
-
- * The Khronos subgroups extension guarantees that subgroups in a work group
- will make independent forward progress, but the Intel extension does not
- guarantee that subgroups in a work group will make independent forward
- progress.
-
- * The Intel extension adds a rich set of subgroup "shuffle" functions to
- allow work items within a work group to interchange data without the use
- of local memory and work group barriers.
-
- * The Intel extension adds a set of subgroup "block read and write" functions
- to take advantage of specialized hardware to read or write blocks of data
- from or to buffers or images.
-
- * The Intel subgroups extension does not include the subgroup pipes functions
- that are included as part of the Khronos subgroups extension.
-
- * The Intel subgroups extension does not include the device-side kernel query
- functions for subgroups that are included as part of the Khronos subgroups
- extension.
-
-New API Functions
-
- This function is copied unchanged from the Khronos subgroups extension:
-
- cl_int clGetKernelSubGroupInfoKHR(
- cl_kernel kernel,
- cl_device_id device,
- cl_kernel_sub_group_info param_name,
- size_t input_value_size,
- const void* input_value,
- size_t param_value_size,
- void* param_value,
- size_t* param_value_size_ret)
-
-New API Enums
-
- These enums are copied unchanged from the Khronos subgroups extension:
-
- Accepted as the <param_name> parameter of clGetKernelSubGroupInfoKHR.
-
- CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR 0x2033
- CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR 0x2034
-
-New OpenCL C Functions
-
- These built-in functions are copied unchanged from the Khronos subgroups
- extension:
-
- uint get_sub_group_size( void );
- uint get_max_sub_group_size( void );
- uint get_num_sub_groups( void );
-
- uint get_sub_group_id( void );
- uint get_sub_group_local_id( void );
-
- void sub_group_barrier( cl_mem_fence_flags flags );
-
- int sub_group_all( int predicate );
- int sub_group_any( int predicate );
-
- If OpenCL 2.0 is supported:
-
- uint get_enqueued_num_sub_groups( void );
- void sub_group_barrier( cl_mem_fence_flags flags, memory_scope scope );
-
- For the sub_group_broadcast functions, <gentype> is <int>, <uint>,
- <long>, <ulong>, or <float>.
-
- If cl_khr_fp16 is supported, <gentype> also includes <half>.
- If cl_khr_fp64 or doubles are supported, <gentype> also includes <double>.
-
- <gentype> sub_group_broadcast( <gentype> x, uint sub_group_local_id );
-
- For the sub_group_reduce, sub_group_scan_exclusive, and
- sub_group_scan_inclusive functions, <gentype> is <int>, <uint>, <long>,
- <ulong>, or <float>. <op> is <add>, <min>, or <max>.
-
- If cl_khr_fp16 is supported, <gentype> also includes <half>.
- If cl_khr_fp64 or doubles are supported, <gentype> also includes <double>.
-
- <gentype> sub_group_reduce_<op>( <gentype> x );
- <gentype> sub_group_scan_exclusive_<op>( <gentype> x );
- <gentype> sub_group_scan_inclusive_<op>( <gentype> x );
-
- These built-in functions are unique to the Intel subgroups extension and are not
- part of the Khronos subgroups extension:
-
- For the sub_group_shuffle, sub_group_shuffle_down, sub_group_shuffle_up, and
- sub_group_shuffle_xor functions, <gentype> is <float>, <float2>, <float4>,
- <float8>, <float16>, <int>, <int2>, <int4>, <int8>, <int16>, <uint>, <uint2>,
- <uint4>, <uint8>, <uint16>, <long>, or <ulong>.
-
- If cl_khr_fp16 is supported, <gentype> also includes <half>.
- If cl_khr_fp64 or doubles are supported, <gentype> also includes <double>.
-
- <gentype> intel_sub_group_shuffle( <gentype> data, uint c );
- <gentype> intel_sub_group_shuffle_down(
- <gentype> current, <gentype> next, uint delta );
- <gentype> intel_sub_group_shuffle_up(
- <gentype> previous, <gentype> current, uint delta );
- <gentype> intel_sub_group_shuffle_xor( <gentype> data, uint value );
-
-
- uint intel_sub_group_block_read( const __global uint* p );
- uint2 intel_sub_group_block_read2( const __global uint* p );
- uint4 intel_sub_group_block_read4( const __global uint* p );
- uint8 intel_sub_group_block_read8( const __global uint* p );
-
- uint intel_sub_group_block_read( image2d_t image, int2 byte_coord );
- uint2 intel_sub_group_block_read2( image2d_t image, int2 byte_coord );
- uint4 intel_sub_group_block_read4( image2d_t image, int2 byte_coord );
- uint8 intel_sub_group_block_read8( image2d_t image, int2 byte_coord );
-
- void intel_sub_group_block_write( __global uint* p, uint data );
- void intel_sub_group_block_write2( __global uint* p, uint2 data );
- void intel_sub_group_block_write4( __global uint* p, uint4 data );
- void intel_sub_group_block_write8( __global uint* p, uint8 data );
-
- void intel_sub_group_block_write( image2d_t image, int2 byte_coord, uint data );
- void intel_sub_group_block_write2( image2d_t image, int2 byte_coord, uint2 data );
- void intel_sub_group_block_write4( image2d_t image, int2 byte_coord, uint4 data );
- void intel_sub_group_block_write8( image2d_t image, int2 byte_coord, uint8 data );
-
-New OpenCL C Enums
-
- This enum is copied unchanged from the Khronos subgroups extension:
-
- Add the following new value to the enumerated type <memory_scope>:
-
- memory_scope_sub_group
-
-Modifications to Section 2 - "Glossary" of the OpenCL 2.0 API Specification
-
- Add memory_scope_sub_group to the description of Memory Scopes:
-
- "Memory Scopes: Memory scopes define a hierarchy of visibilities when analyzing the
- ordering constraints of memory operations. They are defined by the values of the
- memory_scope enumeration constant. Current values are memory_scope_work_item (memory
- constraints only apply to a single work item and in practice only apply to image
- operations), memory_scope_sub_group (memory-ordering constraints only apply to work
- items executing in a subgroup), memory_scope_work_group ..."
-
- Add memory_scope_sub_group to the description of Scope inclusion:
-
- "Scope inclusion: Two actions A and B are defined to have an inclusive scope if they
- have the same scope P such that: (1) if P is memory_scope_sub_group, and A and B are
- executed by work items within the same subgroup, or (2) if P is memory_scope_work_group,
- and A and B are executed by work items within the same workgroup ..."
-
- Change the description for Subgroups to:
-
- "Subgroup: Subgroups are an implementation-dependent grouping of work items within a
- work group. The size and number of subgroups is implementation-defined and not
- exposed in the core OpenCL 2.0 feature set. Subgroups execute concurrently within
- a work group, but are not guaranteed to make independent forward progress.
- Subgroups may synchronize internally using subgroup barrier operations without
- synchronizing with other subgroups."
-
-Modifications to Section 3.2.1 - "Execution Model: Mapping Work Items Onto an NDRange" of
-the OpenCL 2.0 API Specification
-
- Change the paragraph describing subgroups to:
-
- "An implementation of OpenCL may divide each work group into one or more subgroups.
- The size and number of subgroups is implementation-defined and not exposed in the
- core OpenCL 2.0 feature set."
-
-Modifications to Section 3.2.2 - "Execution Model: Execution of Kernel Instances" of the
-OpenCL 2.0 API Specification
-
- Remove the last paragraph describing subgroups and independent forward progress.
-
-Additions to Section 3.2 - "Execution Model" of the OpenCL 2.0 API Specification
-
- This text is largely the same as the text in the Khronos subgroups extension.
- Only the sentence about independent forward progress has been modified.
-
- "Within a work group, work items may be divided into subgroups in an implementation-
- defined fashion. The mapping of work items to subgroups is implementation-defined
- and may be queried at runtime. While subgroups may be used in multi-dimensional
- work groups, each subgroup is 1-dimensional and any given work item may query which
- subgroup it is a member of.
-
- Work items are mapped into subgroups through a combination of compile-time decisions
- and the parameters of the dispatch. The mapping to subgroups is invariant for the
- duration of a kernel's execution, across dispatches of a given kernel with the same
- launch parameters, and from one work group to another within the dispatch (excluding
- the trailing edge work groups in the presence of non-uniform work group sizes). In
- addition, all subgroups within a work group will be the same size, apart from the
- subgroup with the maximum index, which may be smaller if the size of the work group
- is not evenly divisible by the size of the subgroups.
-
- Subgroups execute concurrently within a given work group. Similar to work items
- within a work group, subgroups executing within a work group are not guaranteed to make
- independent forward progress. Work items in a subgroup can internally synchronize
- using subgroup barrier operations without synchronizing with other subgroups."
-
-Additions to Section 3.3.4 - "Memory Model: Memory Consistency Model"
-
- Add memory_scope_sub_group to the bulleted descriptions of memory scopes:
-
- " * memory_scope_sub_group: memory-ordering constraints only apply to work items
- executing within a single subgroup.
- * memory_scope_work_group: ..."
-
- In the paragraph after the bulleted descriptions of memory scopes, include
- memory_scope_sub_group as a valid memory scope for local memory:
-
- "... For local memory, memory_scope_sub_group and memory_scope_work_group are valid,
- and may constrain visibility to the subgroup or workgroup."
-
-Additions to Section 3.3.5 - "Memory Model: Overview of atomic and fence operations"
-
- Add memory_scope_sub_group to the definition of inclusive scope:
-
- " * P is memory_scope_sub_group and A and B are executed by work items within the same
- subgroup.
- * P is memory_scope_work_group ..."
-
-Additions to Section 5.9.3 - "Kernel Object Queries" of the OpenCL 2.0 API Specification
-
- This addition is copied unchanged from the Khronos subgroups extension:
-
- "The function
-
- cl_int clGetKernelSubGroupInfoKHR(
- cl_kernel kernel,
- cl_device_id device,
- cl_kernel_sub_group_info param_name,
- size_t input_value_size,
- const void* input_value,
- size_t param_value_size,
- void* param_value,
- size_t* param_value_size_ret)
-
- returns information about the kernel object.
-
- <kernel> specifies the kernel object being queries.
-
- <device> identifies a specific device in the list of devices associated with <kernel>.
- The list of devices is the list of devices in the OpenCL context that is associated
- with <kernel>. If the list of devices associated with <kernel> is a single device,
- <device> can be a NULL value.
-
- <param_name> specifies the information to query. The list of supported <param_name>
- types and the information returned in <param_value> by clGetKernelSubGroupInfoKHR is
- described in the table below.
-
- <input_value_size> is used to specify the size in bytes of memory pointed to by
- <input_value>. This size must be equal to the size of the input type as described
- in the table below.
-
- <input_value> is a pointer to memory where the appropriate parameterization of the
- query is passed from. If <input_value> is NULL it is ignored.
-
- <param_value_size> is used to specify the size in bytes of memory pointed to by
- <param_value>. This size must be greater than or equal to the size of the return type
- as described in the table below.
-
- <param_value_size_ret> returns the actual size in bytes of data copied to <param_value>.
- If <param_value_size_ret> is NULL it is ignored.
-
- --------------------------------------------------------------------------------------
- cl_kernel_sub_group_info Input Type Return Type Description
- ------------------------ ---------- ----------- -----------------------------------
- CL_KERNEL_MAX_SUB_GROUP_ size_t* size_t Returns the maximum subgroup size
- SIZE_FOR_NDRANGE for this kernel. All subgroups must
- be the same size, while the last
- subgroup in any work group (i.e. the
- subgroup with the maximum index)
- could be the same or smaller size.
-
- The <input_value> must be an array
- of size_t values corresponding to
- the local work size parameter of the
- intended dispatch. The number of
- dimensions in the NDRange will be
- inferred form the value specified
- for <input_value_size>.
-
- CL_KERNEL_SUB_GROUP_ size_t* size_t Returns the number of subgroups that
- COUNT_FOR_NDRANGE will be present in each work group
- for a given local work size. All
- work groups, apart from the last
- work group in each dimension in the
- presence of non-uniform work group
- sizes, will have the same number of
- subgroups.
-
- The <input_value> must be an array
- of size_t values corresponding to
- the local work size parameter of the
- intended dispatch. The number of
- dimensions in the NDRange will be
- inferred from the value specified
- for <input_value_size>.
- --------------------------------------------------------------------------------------
-
- clGetKernelSubGroupInfoKHR returns CL_SUCCESS if the function executed successfully.
- Otherwise, it returns one of the following errors:
-
- * CL_INVALID_DEVICE if <device> is not in the list of devices associated with <kernel>,
- or if <device> is NULL but there is more than one device associated with <kernel>.
-
- * CL_INVALID_VALUE if <param_name> is not valid, or if the size in bytes specified by
- <param_value_size> is less than the size of the return type as described in the
- table above and <param_value> is not NULL.
-
- * CL_INVALID_VALUE if <param_name> is CL_KERNEL_SUB_GROUP_SIZE_FOR_NDRANGE and the
- size in bytes specified by <input_value_size> is not valid or if <input_value> is
- NULL.
-
- * CL_INVALID_KERNEL if <kernel> is not a valid kernel object.
-
- * CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the
- OpenCL implementation on the device.
-
- * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by
- the OpenCL implementation on the host."
-
-Additions to Section 6.13.1 - "Work Item Functions" of the OpenCL 2.0 C Specification
-
- These additions are copied unchanged from the Khronos subgroups extension:
-
- "--------------------------------------------------------------------------------------
- Function Description
- ------------------------------------- -----------------------------------------------
- uint get_sub_group_size( void ) Returns the number of work items in the
- subgroup. This value is no more than the
- maximum subgroup size and is implementation-
- defined based on a combination of the compiled
- compiled kernel and the dispatch dimensions.
- This will be a constant value for the lifetime
- of the subgroup.
-
- uint get_max_sub_group_size( void ) Returns the maximum size of a subgroup with the
- dispatch. This value will be invariant for a
- given set of dispatch dimensions and a kernel
- object compiled for a given device.
-
- uint get_num_sub_groups( void ) Returns the number of subgroups that the current
- work group is divided into.
-
- This number will be constant for the duration of
- a work group's execution. If the kernel is
- executed with a non-uniform work group size in
- any dimension, calls to this built-in may return
- a different values for some work groups than for
- other work groups.
-
- uint get_sub_group_id( void ) Returns the subgroup ID, which is a number from
- zero to get_num_sub_groups - 1.
-
- For clEnqueueTask, this returns zero.
-
- uint get_sub_group_local_id( void ) Returns the unique work item ID within the
- current subgroup. The mapping from get_local_id
- to get_sub_group_local_id will be invariant for
- the lifetime of the work group.
-
- --------------------------------------------------------------------------------------"
-
- If OpenCL 2.0 is supported:
-
- "--------------------------------------------------------------------------------------
- Function Description
- ---------------------------------------- --------------------------------------------
- uint get_enqueued_num_sub_groups( void ) Returns the same value as that returned by
- get_num_sub_groups if the kernel is executed
- with a uniform work group size. This value
- will be constant for the entire NDRange.
-
- If the kernel is executed with a non-uniform
- work group size, returns the number of
- subgroups in a work group that makes up the
- uniform region of the global NDRange.
- --------------------------------------------------------------------------------------"
-
-Additions to Section 6.13.8 - "Synchronization Functions" of the OpenCL 2.0 C Specification
-
- These additions are mostly unchanged from the Khronos subgroups extension. There is
- no new functionality, only minor edits for clarity:
-
- "--------------------------------------------------------------------------------------
- Function Description
- ---------------------------------------- --------------------------------------------
- void sub_group_barrier( All work items in a subgroup executing the
- cl_mem_fence_flags flags ) kernel on a processor must execute this
- function before any are allowed to continue
- execution beyond the subgroup barrier. This
- function must be encountered by all work
- items in a subgroup executing the kernel.
- These rules apply to NDRanges implemented
- with uniform and non-uniform work groups.
-
- If sub_group_barrier is inside a conditional
- statement then all work items within the
- subgroup must enter the conditional if
- any work item in the subgroup enters the
- conditional statement and executes the
- sub_group_barrier.
-
- If sub_group_barrier is inside a loop, all
- work items within the subgroup must execute
- the sub_group_barrier for each iteration of
- the loop before any are allowed to continue
- execution beyond the sub_group_barrier.
-
- The sub_group_barrier function also queues a
- memory fence (reads and writes) to ensure
- correct ordering of memory operations to
- local or global memory.
-
- The flags argument specifies the memory
- address space and can be set to a
- combination of the following values:
-
- CLK_LOCAL_MEM_FENCE - The sub_group_barrier
- function will either flush any variables
- stored in local memory or queue a memory
- fence to ensure correct ordering of memory
- operations to local memory.
-
- CLK_GLOBAL_MEM_FENCE - The sub_group_barrier
- function will queue a memory fence to ensure
- correct ordering of memory operations to
- global memory. This can be useful when work
- items, for example, write to buffer objects
- and then want to read the updated data from
- these buffer objects.
- --------------------------------------------------------------------------------------"
-
- If OpenCL 2.0 is supported, add the following to the table above:
-
- "--------------------------------------------------------------------------------------
- Function Description
- ---------------------------------------- --------------------------------------------
- void sub_group_barrier( ...
- cl_mem_fence_flags flags, The sub_group_barrier function also supports
- memory_scope scope ) a variant that specifies the memory scope.
- For the sub_group_barrier variant that does
- not take a memory scope, the scope is
- memory_scope_sub_group.
-
- The scope argument specifies whether the
- memory accesses of work items in the
- subgroup to memory address space(s)
- identified by flags become visible to all
- work items in the subgroup, the work group,
- the device, or all SVM devices.
- ...
- CLK_IMAGE_MEM_FENCE - The sub_group_barrier
- function will queue a memory fence to ensure
- correct ordering of memory operations to
- image objects. This can be useful when work
- items, for example, write to image objects
- and then want to read the updated data from
- these image objects.
- --------------------------------------------------------------------------------------"
-
-Additions to Section 6.13.11 - "Atomic Functions" of the OpenCL 2.0 C Specification
-
- Modify the bullet describing behavior for functions that do not have a memory_scope
- argument to say:
-
- " * The subgroup functions that do not have a memory_scope argument have the same
- semantics as the corresponding functions with the memory_scope argument set to
- memory_scope_sub_group. Other functions that do not have a memory_scope
- argument have the same semantics as the corresponding functions with the
- memory_scope argument set to memory_scope_device."
-
- This addition is copied unchanged from the Khronos subgroups extension:
-
- Add the following new value to the enumerated type <memory_scope> defined in Section
- 6.13.11.4:
-
- "<memory_scope_sub_group>
-
- The <memory_scope_sub_group> specifies that the memory ordering constraints given by
- <memory_order> apply to work items in a subgroup. This memory scope can be used when
- performing atomic operations to global or local memory."
-
-Additions to Section 6.13.15 - "Work Group Functions" of the OpenCL 2.0 C Specification
-
- These additions are copied from the Khronos subgroups extension:
-
- "The OpenCL C programming language implements the following built-in functions that
- operate on a subgroup level. These built-in functions must be encountered by all work
- items in a subgroup executing the kernel. We use the generic term <gentype> to indicate
- the built-in data types <int>, <uint>, <long>, <ulong>, or <float> as the type for the
- arguments.
-
- If cl_khr_fp16 is supported, <gentype> also includes <half>.
- If cl_khr_fp64 or doubles are supported, <gentype> also includes <double>.
-
- --------------------------------------------------------------------------------------
- Function Description
- ---------------------------------------- --------------------------------------------
- int sub_group_all( int predicate ) Evaluates predicate for all work items in
- the subgroup and returns a non-zero value
- if predicate evaluates to non-zero for all
- work items in the subgroup.
-
- int sub_group_any( int predicate ) Evaluates predicate for all work items in
- the subgroup and returns a non-zero value if
- predicate evaluates to non-zero for any work
- item in the subgroup.
-
- <gentype> sub_group_broadcast( Broadcasts the value of x for the work item
- <gentype> x, identified by sub_group_local_id (value
- uint sub_group_local_id ) returned by get_sub_group_local_id) to all
- work items in the subgroup.
- sub_group_local_id must be the same value
- for all work items in the subgroup.
-
- <gentype> sub_group_reduce_<op>( Returns the result of the reduction operation
- <gentype> x ) specified by <op> for all values x specified
- by work items in a subgroup.
-
- <gentype> sub_group_scan_exclusive_<op>)( Does an exclusive scan operation specified by
- <gentype> x ) <op> of all values specified by work items
- in a subgroup. The scan results are
- returned for each work item.
-
- The scan order is defined by increasing
- sub_group_local_id within the subgroup.
-
- <gentype> sub_group_scan_inclusive_<op>( Does an inclusive scan operation specified by
- <gentype> x ) <op> of all values specified by work items
- in a subgroup. The scan results are
- returned for each work item
-
- The scan order is defined by increasing
- sub_group_local_id within the subgroup.
- --------------------------------------------------------------------------------------"
-
-Add a new Section 6.13.X - "Sub Group Shuffle Functions" to the OpenCL 2.0 C Specification
-
- These additions are unique to the Intel subgroups extension and are not part of the
- Khronos subgroups extension:
-
- "The OpenCL C programming language implements the following subgroup shuffle built-in
- functions to allow data to be exchanged among work items in a subgroup. These
- built-in functions need not be encountered by all work items in a subgroup executing
- the kernel, however, data may only be shuffled among work items encountering the
- subgroup shuffle function. Shuffling data from a work item that does not encounter
- the subgroup shuffle function will produce undefined results.
-
- For these functions, <gentype> is <float>, <float2>, <float4>, <float8>, <float16>,
- <int>, <int2>, <int4>, <int8>, <int16>, <uint>, <uint2>, <uint4>, <uint8>, <uint16>,
- <long>, or <ulong>.
-
- If cl_khr_fp16 is supported, <gentype> also includes <half>.
- If cl_khr_fp64 or doubles are supported, <gentype> also includes <double>.
-
- --------------------------------------------------------------------------------------
- Function Description
- ---------------------------------------- --------------------------------------------
- <gentype> intel_sub_group_shuffle( Allows data to be arbitrarily transferred
- <gentype> data, between work items in a subgroup. The data
- uint sub_group_local_id ) that is returned for this work item is the
- value of data for the work item identified
- by sub_group_local_id.
-
- sub_group_local_id need not be the same
- value for all work items in the subgroup.
- There is no defined behavior for out-of-
- range sub_group_local_ids.
-
- <gentype> intel_sub_group_shuffle_down( Allows data to be transferred from a work
- <gentype> current, item in the subgroup with a higher
- <gentype> next, sub_group_local_id down to a work item in
- uint delta ) the subgroup with a lower sub_group_local_id.
-
- There are two data sources to this built-in
- function: current and next. To determine the
- result of this built-in function, first let
- the unsigned shuffle index be equivalent to
- the sum of this work item's sub_group_local_id
- plus the specified delta:
-
- If the shuffle index is less than the
- max_sub_group_size, the result of this built-in
- function is the value of the current data
- source for the work item with
- sub_group_local_id equal to the shuffle index.
-
- If the shuffle index is greater or equal to the
- max_sub_group_size but less than twice the
- max_sub_group_size, the result of this
- built-in function is the value of the next
- data source for the work item with
- sub_group_local_id equal to the shuffle index
- minus the max_sub_group_size.
-
- All other values of the shuffle index are
- considered to be out-of-range. There is no
- defined behavior for out-of-range indices.
-
- delta need not be the same value for all work
- items in the subgroup.
-
- <gentype> intel_sub_group_shuffle_up( Allows data to be transferred from a work
- <gentype> previous, item in the subgroup with a lower
- <gentype> current, sub_group_local_id up to a work item in the
- uint delta ) subgroup with a higher sub_group_local_id.
-
- There are two data sources to this built-in
- function: previous and current. To determine
- the result of this built-in function, first
- let the signed shuffle index be equivalent to
- this work item's sub_group_local_id minus the
- specified delta:
-
- If the shuffle index is greater than or equal
- to zero and less than the max_sub_group_size,
- the result of this built-in function is the
- value of the current data source for the work
- item with sub_group_local_id equal to the
- shuffle index.
-
- If the shuffle index is less than zero but
- greater than or equal to the negative
- max_sub_group_size, the result of this
- built-in function is the value of the previous
- data source for the work item with
- sub_group_local_id equal to the shuffle index
- plus the max_sub_group_size.
-
- All other values of the shuffle index are
- considered to be out-of-range. There is no
- defined behavior for out-of-range indices.
-
- delta need not be the same value for all work
- items in the subgroup.
-
- <gentype> intel_sub_group_shuffle_xor( Allows data to be transferred between work
- <gentype> data, items in a subgroup as a function of the work
- uint value ) item's sub_group_local_id. The data that is
- returned for this work item is the value of
- data for the work item with sub_group_local_id
- equal to this work item's sub_group_local_id
- XOR'd with the specified value. If the result
- of the XOR is greater than max_sub_group_size
- then it is considered out-of-range.
-
- value need not be the same for all work items
- in the subgroup. There is no defined behavior
- for out-of-range indices.
- --------------------------------------------------------------------------------------"
-
-Add a new Section 6.13.X - "Sub Group Read and Write Functions" to the OpenCL 2.0 C
-Specification
-
- These additions are unique to the Intel subgroups extension and are not part of the
- Khronos subgroups extension:
-
- "The OpenCL C programming language implements the following built-in functions to allow
- data to be read or written as a block by all work items in a subgroup. These built-in
- functions must be encountered by all work items in a subgroup executing the kernel.
- Furthermore, since these are block operations, the pointer, image, and coordinate
- arguments to these built-in functions must be the same for all work items in the
- subgroup (when applicable, only the data argument may be different).
-
- --------------------------------------------------------------------------------------
- Function Description
- ---------------------------------------- --------------------------------------------
- uint intel_sub_group_block_read( Reads 1, 2, 4, or 8 uints of data for each
- const __global uint* p ) work item in the subgroup from the specified
- uint2 intel_sub_group_block_read2( pointer as a block operation.
- const __global uint* p ) The data is read strided, so the first
- uint4 intel_sub_group_block_read4( value read is:
- const __global uint* p ) p[ sub_group_local_id ]
- uint8 intel_sub_group_block_read8( and the second value read is:
- const __global uint* p ) p[ sub_group_local_id + max_sub_group_size ]
- etc.
-
- There is no defined out-of-range behavior
- for these functions.
-
- uint intel_sub_group_block_read( Reads 1, 2, 4, or 8 uints of data for each
- image2d_t image, work item in the subgroup from the specified
- int2 byte_coord ) image at the specified coordinate as a block
- uint2 intel_sub_group_block_read2( operation. Note that the coordinate is a
- image2d_t image, byte coordinate, not an image element
- int2 byte_coord ) coordinate. Also note that the image data
- uint4 intel_sub_group_block_read4( is read without format conversion, so each
- image2d_t image, work item may read multiple image elements
- int2 byte_coord ) (for images with element size smaller than
- uint8 intel_sub_group_block_read8( 32-bits).
- image2d_t image,
- int2 byte_coord ) The data is read row-by-row, so the first
- value read is from the row specified in the
- y-component of the provided byte_coord, the
- second value is read from the y-component
- of the provided byte_coord plus one, etc.
-
- Please see the note below describing out-of-
- bounds behavior for the subgroup image block
- read functions.
-
- void intel_sub_group_block_write( Writes 1, 2, 4, or 8 uints of data for each
- __global uint* p, uint data ) work item in the subgroup to the specified
- void intel_sub_group_block_write2( pointer as a block operation.
- __global uint* p, uint2 data ) The data is written strided, so the first
- void intel_sub_group_block_write4( value is written to:
- __global uint* p, uint4 data ) p[ sub_group_local_id ]
- void intel_sub_group_block_write8( and the second value is written to:
- __global uint* p, uint8 data ) p[ sub_group_local_id + max_sub_group_size ]
- etc.
-
- There is no defined out-of-range behavior
- for these functions.
-
- void intel_sub_group_block_write( Writes 1, 2, 4, or 8 uints of data for each
- image2d_t image, work item in the subgroup to the specified
- int2 byte_coord, uint data ) image at the specified coordinate as a block
- void intel_sub_group_block_write2( operation. Note that the coordinate is a
- image2d_t image, byte coordinate, not an image element
- int2 byte_coord, uint2 data ) coordinate. Unlike the image block read
- void intel_sub_group_block_write4( function, which may read from any arbitrary
- image2d_t image, byte offset, the x-component of the byte
- int2 byte_coord, uint4 data ) coordinate for the image block write
- void intel_sub_group_block_write8( functions must be a multiple of four; in
- image2d_t image, other words, the write must begin at a
- int2 byte_coord, uint8 data ) 32-bit boundary. There is no restriction on
- the y-component of the coordinate. Also, note
- that the image data is written without format
- conversion, so each work item may write
- multiple image elements (for images with
- element size smaller than 32-bits).
-
- The data is written row-by-row, so the first
- value written is from the row specified by
- the y-component of the provided byte_coord,
- the second value is written from the y-
- component of the provided byte_coord plus
- one, etc.
-
- Please see the note below describing out-of-
- bounds behavior for the subgroup image block
- write functions.
- -------------------------------------------------------------------------------------
-
- Note: The subgroup image block read and write built-ins do support bounds checking,
- however these built-ins bounds-check to the image width in units of uints, not in
- units of image elements. This means:
-
- * If the image has an element size equal to the size of a uint (four bytes, for
- example CL_RGBA + CL_UNORM_INT8), the image will be correctly bounds-checked.
- In this case, out-of-bounds reads will return the edge image element (the
- equivalent of CLK_ADDRESS_CLAMP_TO_EDGE), and out-of-bounds writes will be
- ignored.
-
- * If the image has element size less than the size of a uint (such as CL_R +
- CL_UNSIGNED_INT8), the entire image is addressable, however bounds checking
- will occur too late. For this reason, extra care should be taken to avoid out-
- of-bounds reads and writes, since out-of-bounds reads may return invalid data
- and out-of-bounds writes may corrupt other images or buffers unpredictably.
-
- 6.13.X.1 - Restrictions
-
- The following restrictions apply to the subgroup buffer block read and write
- functions:
-
- * The pointer 'p' must be 32-bit (4-byte) aligned for reads, and must be
- 128-bit (16-byte) aligned for writes.
-
- * If the pointer 'p' is computed from a kernel argument that is a cl_mem
- that was created with CL_MEM_USE_HOST_PTR, then the <host_ptr> must be
- 32-bit (4-byte) aligned for reads, and must be 128-bit (16-byte) aligned
- for writes.
-
- * If the pointer 'p' is computed from a kernel argument that is a cl_mem
- that is a sub-buffer, then the <origin> defining the sub-buffer offset into
- the <buffer> must be a multiple of 4 bytes for reads, and must be a multiple
- of 16 bytes for write, in addition to the CL_DEVICE_MEM_BASE_ADDR_ALIGN
- requirements. Additionally, if the <buffer> that the sub-buffer is created
- from was created with CL_MEM_USE_HOST_PTR, then the <host_ptr> for the
- <buffer> must be 32-bit (4-byte) aligned for reads, and must be 128-bit
- (16-byte) aligned for writes.
-
- * If the pointer 'p' is computed from an SVM pointer kernel argument, then the
- SVM pointer kernel argument must be 32-bit (4-byte) aligned for reads, and
- must be 128-bit (16-byte) aligned for writes.
-
- The following restrictions apply to the subgroup image block read and write
- functions:
-
- * The behavior of the subgroup image block read and write built-ins is
- undefined for images with an element size greater than four bytes
- (such as CL_RGBA + CL_FLOAT).
-
- * When reading or writing a 2D image created from a buffer with the subgroup
- block read and write built-ins, the image row pitch is required to be a
- multiple of 64-bytes, in addition to the CL_DEVICE_IMAGE_PITCH_ALIGNMENT
- requirements.
-
- * When reading or writing a 2D image created from a buffer with the subgroup
- block read and write built-ins, if the buffer is a cl_mem that was created
- with CL_MEM_USE_HOST_PTR, then the <host_ptr> must be 256-bit (32-byte)
- aligned.
-
- * When reading or writing a 2D image created from a buffer with the subgroup
- block read and write built-ins, if the buffer is a cl_mem that is a
- sub-buffer, then the <origin> must be a multiple of 32-bytes. Additionally,
- if the <buffer> that the sub-buffer is created from was created with
- CL_MEM_USE_HOST_PTR, then the <host_ptr> for the <buffer> must be 256-bit
- (32-byte) aligned."
-
-Revision History
-
- Version 1 (2014/12/02): First public revision.
- Version 2 (2015/03/12): Fixed minor formatting errors, added restriction for
- subgroup image block read and write built-ins with large
- image formats.
- Version 3 (2016/02/12): Fixed a small bug in the shuffle up and shuffle down
- descriptions.
- Version 4 (2016/08/28): Added additional restrictions and programming notes for the
- subgroup shuffle and block read built-ins.
+https://github.com/KhronosGroup/OpenCL-Docs/blob/master/extensions/cl_intel_subgroups.asciidoc
diff --git a/extensions/intel/cl_intel_subgroups_short.html b/extensions/intel/cl_intel_subgroups_short.html
new file mode 100644
index 0000000..5f4a38e
--- /dev/null
+++ b/extensions/intel/cl_intel_subgroups_short.html
@@ -0,0 +1,1435 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta name="generator" content="AsciiDoc 8.6.9">
+<title>cl_intel_subgroups_short</title>
+<style type="text/css">
+/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
+
+/* Default font. */
+body {
+ font-family: Georgia,serif;
+}
+
+/* Title font. */
+h1, h2, h3, h4, h5, h6,
+div.title, caption.title,
+thead, p.table.header,
+#toctitle,
+#author, #revnumber, #revdate, #revremark,
+#footer {
+ font-family: Arial,Helvetica,sans-serif;
+}
+
+body {
+ margin: 1em 5% 1em 5%;
+}
+
+a {
+ color: blue;
+ text-decoration: underline;
+}
+a:visited {
+ color: fuchsia;
+}
+
+em {
+ font-style: italic;
+ color: navy;
+}
+
+strong {
+ font-weight: bold;
+ color: #083194;
+}
+
+h1, h2, h3, h4, h5, h6 {
+ color: #527bbd;
+ margin-top: 1.2em;
+ margin-bottom: 0.5em;
+ line-height: 1.3;
+}
+
+h1, h2, h3 {
+ border-bottom: 2px solid silver;
+}
+h2 {
+ padding-top: 0.5em;
+}
+h3 {
+ float: left;
+}
+h3 + * {
+ clear: left;
+}
+h5 {
+ font-size: 1.0em;
+}
+
+div.sectionbody {
+ margin-left: 0;
+}
+
+hr {
+ border: 1px solid silver;
+}
+
+p {
+ margin-top: 0.5em;
+ margin-bottom: 0.5em;
+}
+
+ul, ol, li > p {
+ margin-top: 0;
+}
+ul > li { color: #aaa; }
+ul > li > * { color: black; }
+
+.monospaced, code, pre {
+ font-family: "Courier New", Courier, monospace;
+ font-size: inherit;
+ color: navy;
+ padding: 0;
+ margin: 0;
+}
+pre {
+ white-space: pre-wrap;
+}
+
+#author {
+ color: #527bbd;
+ font-weight: bold;
+ font-size: 1.1em;
+}
+#email {
+}
+#revnumber, #revdate, #revremark {
+}
+
+#footer {
+ font-size: small;
+ border-top: 2px solid silver;
+ padding-top: 0.5em;
+ margin-top: 4.0em;
+}
+#footer-text {
+ float: left;
+ padding-bottom: 0.5em;
+}
+#footer-badges {
+ float: right;
+ padding-bottom: 0.5em;
+}
+
+#preamble {
+ margin-top: 1.5em;
+ margin-bottom: 1.5em;
+}
+div.imageblock, div.exampleblock, div.verseblock,
+div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
+div.admonitionblock {
+ margin-top: 1.0em;
+ margin-bottom: 1.5em;
+}
+div.admonitionblock {
+ margin-top: 2.0em;
+ margin-bottom: 2.0em;
+ margin-right: 10%;
+ color: #606060;
+}
+
+div.content { /* Block element content. */
+ padding: 0;
+}
+
+/* Block element titles. */
+div.title, caption.title {
+ color: #527bbd;
+ font-weight: bold;
+ text-align: left;
+ margin-top: 1.0em;
+ margin-bottom: 0.5em;
+}
+div.title + * {
+ margin-top: 0;
+}
+
+td div.title:first-child {
+ margin-top: 0.0em;
+}
+div.content div.title:first-child {
+ margin-top: 0.0em;
+}
+div.content + div.title {
+ margin-top: 0.0em;
+}
+
+div.sidebarblock > div.content {
+ background: #ffffee;
+ border: 1px solid #dddddd;
+ border-left: 4px solid #f0f0f0;
+ padding: 0.5em;
+}
+
+div.listingblock > div.content {
+ border: 1px solid #dddddd;
+ border-left: 5px solid #f0f0f0;
+ background: #f8f8f8;
+ padding: 0.5em;
+}
+
+div.quoteblock, div.verseblock {
+ padding-left: 1.0em;
+ margin-left: 1.0em;
+ margin-right: 10%;
+ border-left: 5px solid #f0f0f0;
+ color: #888;
+}
+
+div.quoteblock > div.attribution {
+ padding-top: 0.5em;
+ text-align: right;
+}
+
+div.verseblock > pre.content {
+ font-family: inherit;
+ font-size: inherit;
+}
+div.verseblock > div.attribution {
+ padding-top: 0.75em;
+ text-align: left;
+}
+/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
+div.verseblock + div.attribution {
+ text-align: left;
+}
+
+div.admonitionblock .icon {
+ vertical-align: top;
+ font-size: 1.1em;
+ font-weight: bold;
+ text-decoration: underline;
+ color: #527bbd;
+ padding-right: 0.5em;
+}
+div.admonitionblock td.content {
+ padding-left: 0.5em;
+ border-left: 3px solid #dddddd;
+}
+
+div.exampleblock > div.content {
+ border-left: 3px solid #dddddd;
+ padding-left: 0.5em;
+}
+
+div.imageblock div.content { padding-left: 0; }
+span.image img { border-style: none; vertical-align: text-bottom; }
+a.image:visited { color: white; }
+
+dl {
+ margin-top: 0.8em;
+ margin-bottom: 0.8em;
+}
+dt {
+ margin-top: 0.5em;
+ margin-bottom: 0;
+ font-style: normal;
+ color: navy;
+}
+dd > *:first-child {
+ margin-top: 0.1em;
+}
+
+ul, ol {
+ list-style-position: outside;
+}
+ol.arabic {
+ list-style-type: decimal;
+}
+ol.loweralpha {
+ list-style-type: lower-alpha;
+}
+ol.upperalpha {
+ list-style-type: upper-alpha;
+}
+ol.lowerroman {
+ list-style-type: lower-roman;
+}
+ol.upperroman {
+ list-style-type: upper-roman;
+}
+
+div.compact ul, div.compact ol,
+div.compact p, div.compact p,
+div.compact div, div.compact div {
+ margin-top: 0.1em;
+ margin-bottom: 0.1em;
+}
+
+tfoot {
+ font-weight: bold;
+}
+td > div.verse {
+ white-space: pre;
+}
+
+div.hdlist {
+ margin-top: 0.8em;
+ margin-bottom: 0.8em;
+}
+div.hdlist tr {
+ padding-bottom: 15px;
+}
+dt.hdlist1.strong, td.hdlist1.strong {
+ font-weight: bold;
+}
+td.hdlist1 {
+ vertical-align: top;
+ font-style: normal;
+ padding-right: 0.8em;
+ color: navy;
+}
+td.hdlist2 {
+ vertical-align: top;
+}
+div.hdlist.compact tr {
+ margin: 0;
+ padding-bottom: 0;
+}
+
+.comment {
+ background: yellow;
+}
+
+.footnote, .footnoteref {
+ font-size: 0.8em;
+}
+
+span.footnote, span.footnoteref {
+ vertical-align: super;
+}
+
+#footnotes {
+ margin: 20px 0 20px 0;
+ padding: 7px 0 0 0;
+}
+
+#footnotes div.footnote {
+ margin: 0 0 5px 0;
+}
+
+#footnotes hr {
+ border: none;
+ border-top: 1px solid silver;
+ height: 1px;
+ text-align: left;
+ margin-left: 0;
+ width: 20%;
+ min-width: 100px;
+}
+
+div.colist td {
+ padding-right: 0.5em;
+ padding-bottom: 0.3em;
+ vertical-align: top;
+}
+div.colist td img {
+ margin-top: 0.3em;
+}
+
+@media print {
+ #footer-badges { display: none; }
+}
+
+#toc {
+ margin-bottom: 2.5em;
+}
+
+#toctitle {
+ color: #527bbd;
+ font-size: 1.1em;
+ font-weight: bold;
+ margin-top: 1.0em;
+ margin-bottom: 0.1em;
+}
+
+div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
+ margin-top: 0;
+ margin-bottom: 0;
+}
+div.toclevel2 {
+ margin-left: 2em;
+ font-size: 0.9em;
+}
+div.toclevel3 {
+ margin-left: 4em;
+ font-size: 0.9em;
+}
+div.toclevel4 {
+ margin-left: 6em;
+ font-size: 0.9em;
+}
+
+span.aqua { color: aqua; }
+span.black { color: black; }
+span.blue { color: blue; }
+span.fuchsia { color: fuchsia; }
+span.gray { color: gray; }
+span.green { color: green; }
+span.lime { color: lime; }
+span.maroon { color: maroon; }
+span.navy { color: navy; }
+span.olive { color: olive; }
+span.purple { color: purple; }
+span.red { color: red; }
+span.silver { color: silver; }
+span.teal { color: teal; }
+span.white { color: white; }
+span.yellow { color: yellow; }
+
+span.aqua-background { background: aqua; }
+span.black-background { background: black; }
+span.blue-background { background: blue; }
+span.fuchsia-background { background: fuchsia; }
+span.gray-background { background: gray; }
+span.green-background { background: green; }
+span.lime-background { background: lime; }
+span.maroon-background { background: maroon; }
+span.navy-background { background: navy; }
+span.olive-background { background: olive; }
+span.purple-background { background: purple; }
+span.red-background { background: red; }
+span.silver-background { background: silver; }
+span.teal-background { background: teal; }
+span.white-background { background: white; }
+span.yellow-background { background: yellow; }
+
+span.big { font-size: 2em; }
+span.small { font-size: 0.6em; }
+
+span.underline { text-decoration: underline; }
+span.overline { text-decoration: overline; }
+span.line-through { text-decoration: line-through; }
+
+div.unbreakable { page-break-inside: avoid; }
+
+
+/*
+ * xhtml11 specific
+ *
+ * */
+
+div.tableblock {
+ margin-top: 1.0em;
+ margin-bottom: 1.5em;
+}
+div.tableblock > table {
+ border: 3px solid #527bbd;
+}
+thead, p.table.header {
+ font-weight: bold;
+ color: #527bbd;
+}
+p.table {
+ margin-top: 0;
+}
+/* Because the table frame attribute is overriden by CSS in most browsers. */
+div.tableblock > table[frame="void"] {
+ border-style: none;
+}
+div.tableblock > table[frame="hsides"] {
+ border-left-style: none;
+ border-right-style: none;
+}
+div.tableblock > table[frame="vsides"] {
+ border-top-style: none;
+ border-bottom-style: none;
+}
+
+
+/*
+ * html5 specific
+ *
+ * */
+
+table.tableblock {
+ margin-top: 1.0em;
+ margin-bottom: 1.5em;
+}
+thead, p.tableblock.header {
+ font-weight: bold;
+ color: #527bbd;
+}
+p.tableblock {
+ margin-top: 0;
+}
+table.tableblock {
+ border-width: 3px;
+ border-spacing: 0px;
+ border-style: solid;
+ border-color: #527bbd;
+ border-collapse: collapse;
+}
+th.tableblock, td.tableblock {
+ border-width: 1px;
+ padding: 4px;
+ border-style: solid;
+ border-color: #527bbd;
+}
+
+table.tableblock.frame-topbot {
+ border-left-style: hidden;
+ border-right-style: hidden;
+}
+table.tableblock.frame-sides {
+ border-top-style: hidden;
+ border-bottom-style: hidden;
+}
+table.tableblock.frame-none {
+ border-style: hidden;
+}
+
+th.tableblock.halign-left, td.tableblock.halign-left {
+ text-align: left;
+}
+th.tableblock.halign-center, td.tableblock.halign-center {
+ text-align: center;
+}
+th.tableblock.halign-right, td.tableblock.halign-right {
+ text-align: right;
+}
+
+th.tableblock.valign-top, td.tableblock.valign-top {
+ vertical-align: top;
+}
+th.tableblock.valign-middle, td.tableblock.valign-middle {
+ vertical-align: middle;
+}
+th.tableblock.valign-bottom, td.tableblock.valign-bottom {
+ vertical-align: bottom;
+}
+
+
+/*
+ * manpage specific
+ *
+ * */
+
+body.manpage h1 {
+ padding-top: 0.5em;
+ padding-bottom: 0.5em;
+ border-top: 2px solid silver;
+ border-bottom: 2px solid silver;
+}
+body.manpage h2 {
+ border-style: none;
+}
+body.manpage div.sectionbody {
+ margin-left: 3em;
+}
+
+@media print {
+ body.manpage div#toc { display: none; }
+}
+
+
+@media screen {
+ body {
+ max-width: 50em; /* approximately 80 characters wide */
+ margin-left: 16em;
+ }
+
+ #toc {
+ position: fixed;
+ top: 0;
+ left: 0;
+ bottom: 0;
+ width: 13em;
+ padding: 0.5em;
+ padding-bottom: 1.5em;
+ margin: 0;
+ overflow: auto;
+ border-right: 3px solid #f8f8f8;
+ background-color: white;
+ }
+
+ #toc .toclevel1 {
+ margin-top: 0.5em;
+ }
+
+ #toc .toclevel2 {
+ margin-top: 0.25em;
+ display: list-item;
+ color: #aaaaaa;
+ }
+
+ #toctitle {
+ margin-top: 0.5em;
+ }
+}
+</style>
+<script type="text/javascript">
+/*<![CDATA[*/
+var asciidoc = { // Namespace.
+
+/////////////////////////////////////////////////////////////////////
+// Table Of Contents generator
+/////////////////////////////////////////////////////////////////////
+
+/* Author: Mihai Bazon, September 2002
+ * http://students.infoiasi.ro/~mishoo
+ *
+ * Table Of Content generator
+ * Version: 0.4
+ *
+ * Feel free to use this script under the terms of the GNU General Public
+ * License, as long as you do not remove or alter this notice.
+ */
+
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
+
+// toclevels = 1..4.
+toc: function (toclevels) {
+
+ function getText(el) {
+ var text = "";
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
+ if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
+ text += i.data;
+ else if (i.firstChild != null)
+ text += getText(i);
+ }
+ return text;
+ }
+
+ function TocEntry(el, text, toclevel) {
+ this.element = el;
+ this.text = text;
+ this.toclevel = toclevel;
+ }
+
+ function tocEntries(el, toclevels) {
+ var result = new Array;
+ var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
+ // Function that scans the DOM tree for header elements (the DOM2
+ // nodeIterator API would be a better technique but not supported by all
+ // browsers).
+ var iterate = function (el) {
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
+ if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
+ var mo = re.exec(i.tagName);
+ if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
+ result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
+ }
+ iterate(i);
+ }
+ }
+ }
+ iterate(el);
+ return result;
+ }
+
+ var toc = document.getElementById("toc");
+ if (!toc) {
+ return;
+ }
+
+ // Delete existing TOC entries in case we're reloading the TOC.
+ var tocEntriesToRemove = [];
+ var i;
+ for (i = 0; i < toc.childNodes.length; i++) {
+ var entry = toc.childNodes[i];
+ if (entry.nodeName.toLowerCase() == 'div'
+ && entry.getAttribute("class")
+ && entry.getAttribute("class").match(/^toclevel/))
+ tocEntriesToRemove.push(entry);
+ }
+ for (i = 0; i < tocEntriesToRemove.length; i++) {
+ toc.removeChild(tocEntriesToRemove[i]);
+ }
+
+ // Rebuild TOC entries.
+ var entries = tocEntries(document.getElementById("content"), toclevels);
+ for (var i = 0; i < entries.length; ++i) {
+ var entry = entries[i];
+ if (entry.element.id == "")
+ entry.element.id = "_toc_" + i;
+ var a = document.createElement("a");
+ a.href = "#" + entry.element.id;
+ a.appendChild(document.createTextNode(entry.text));
+ var div = document.createElement("div");
+ div.appendChild(a);
+ div.className = "toclevel" + entry.toclevel;
+ toc.appendChild(div);
+ }
+ if (entries.length == 0)
+ toc.parentNode.removeChild(toc);
+},
+
+
+/////////////////////////////////////////////////////////////////////
+// Footnotes generator
+/////////////////////////////////////////////////////////////////////
+
+/* Based on footnote generation code from:
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
+ */
+
+footnotes: function () {
+ // Delete existing footnote entries in case we're reloading the footnodes.
+ var i;
+ var noteholder = document.getElementById("footnotes");
+ if (!noteholder) {
+ return;
+ }
+ var entriesToRemove = [];
+ for (i = 0; i < noteholder.childNodes.length; i++) {
+ var entry = noteholder.childNodes[i];
+ if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
+ entriesToRemove.push(entry);
+ }
+ for (i = 0; i < entriesToRemove.length; i++) {
+ noteholder.removeChild(entriesToRemove[i]);
+ }
+
+ // Rebuild footnote entries.
+ var cont = document.getElementById("content");
+ var spans = cont.getElementsByTagName("span");
+ var refs = {};
+ var n = 0;
+ for (i=0; i<spans.length; i++) {
+ if (spans[i].className == "footnote") {
+ n++;
+ var note = spans[i].getAttribute("data-note");
+ if (!note) {
+ // Use [\s\S] in place of . so multi-line matches work.
+ // Because JavaScript has no s (dotall) regex flag.
+ note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
+ spans[i].innerHTML =
+ "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
+ spans[i].setAttribute("data-note", note);
+ }
+ noteholder.innerHTML +=
+ "<div class='footnote' id='_footnote_" + n + "'>" +
+ "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
+ n + "</a>. " + note + "</div>";
+ var id =spans[i].getAttribute("id");
+ if (id != null) refs["#"+id] = n;
+ }
+ }
+ if (n == 0)
+ noteholder.parentNode.removeChild(noteholder);
+ else {
+ // Process footnoterefs.
+ for (i=0; i<spans.length; i++) {
+ if (spans[i].className == "footnoteref") {
+ var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
+ href = href.match(/#.*/)[0]; // Because IE return full URL.
+ n = refs[href];
+ spans[i].innerHTML =
+ "[<a href='#_footnote_" + n +
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
+ }
+ }
+ }
+},
+
+install: function(toclevels) {
+ var timerId;
+
+ function reinstall() {
+ asciidoc.footnotes();
+ if (toclevels) {
+ asciidoc.toc(toclevels);
+ }
+ }
+
+ function reinstallAndRemoveTimer() {
+ clearInterval(timerId);
+ reinstall();
+ }
+
+ timerId = setInterval(reinstall, 500);
+ if (document.addEventListener)
+ document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
+ else
+ window.onload = reinstallAndRemoveTimer;
+}
+
+}
+asciidoc.install(1);
+/*]]>*/
+</script>
+</head>
+<body class="article">
+<div id="header">
+<h1>cl_intel_subgroups_short</h1>
+<div id="toc">
+ <div id="toctitle">Table of Contents</div>
+ <noscript><p><b>JavaScript must be enabled in your browser to display the table of contents.</b></p></noscript>
+</div>
+</div>
+<div id="content">
+<div class="sect1">
+<h2 id="_name_strings">Name Strings</h2>
+<div class="sectionbody">
+<div class="paragraph"><p><span class="monospaced">cl_intel_subgroups_short</span></p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_contact">Contact</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Ben Ashbaugh, Intel (ben <em>dot</em> ashbaugh <em>at</em> intel <em>dot</em> com)</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_contributors">Contributors</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Ben Ashbaugh, Intel<br>
+Felix J Degrood, Intel<br>
+Biju George, Intel<br>
+Raun M Krisch, Intel<br>
+Konstantin A Pyjov, Intel<br>
+Insoo Woo, Intel</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_notice">Notice</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Copyright (c) 2018 Intel Corporation. All rights reserved.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_status">Status</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Final Draft</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_version">Version</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>Built On: 2018-11-16<br>
+Revision: 2</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_dependencies">Dependencies</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>OpenCL 1.2 and support for <span class="monospaced">cl_intel_subgroups</span> is required.
+This extension is written against the OpenCL API Specification Version 2.2 (revision v2.2-7), against the OpenCL C Language Specification Version 2.0 (revision v2.2-7), and against version 4 of the <span class="monospaced">cl_intel_subgroups</span> specification.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_overview">Overview</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>The goal of this extension is to allow programmers to improve the performance of applications operating on 16-bit data types by extending the subgroup functions described in the <span class="monospaced">cl_intel_subgroups</span> extension to support 16-bit integer data types (<span class="monospaced">shorts</span> and <span class="monospaced">ushorts</span>).
+Specifically, the extension:</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+Extends the subgroup broadcast function to allow 16-bit integer values to be broadcast from one work item to all other work items in the subgroup.
+</p>
+</li>
+<li>
+<p>
+Extends the subgroup scan and reduction functions to operate on 16-bit integer data types.
+</p>
+</li>
+<li>
+<p>
+Extends the Intel subgroup shuffle functions to allow arbitrarily exchanging 16-bit integer values among work items in the subgroup.
+</p>
+</li>
+<li>
+<p>
+Extends the Intel subgroup block read and write functions to allow reading and writing 16-bit integer data from images and buffers.
+</p>
+</li>
+</ul></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_new_api_functions">New API Functions</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>None.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_new_api_enums">New API Enums</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>None.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_new_opencl_c_functions">New OpenCL C Functions</h2>
+<div class="sectionbody">
+<div class="dlist"><dl>
+<dt class="hdlist1">
+Add <span class="monospaced">short</span> and <span class="monospaced">ushort</span> to the list of supported data types for the subgroup broadcast, scan, and reduction functions:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #009900">short</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_broadcast</span></span><span style="color: #990000">(</span> <span style="color: #009900">short</span> x<span style="color: #990000">,</span> <span style="color: #008080">uint</span> sub_group_local_id <span style="color: #990000">)</span>
+<span style="color: #008080">ushort</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_broadcast</span></span><span style="color: #990000">(</span> <span style="color: #008080">ushort</span> x<span style="color: #990000">,</span> <span style="color: #008080">uint</span> sub_group_local_id <span style="color: #990000">)</span>
+
+<span style="color: #009900">short</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_reduce_add</span></span><span style="color: #990000">(</span> <span style="color: #009900">short</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">ushort</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_reduce_add</span></span><span style="color: #990000">(</span> <span style="color: #008080">ushort</span> x <span style="color: #990000">)</span>
+<span style="color: #009900">short</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_reduce_min</span></span><span style="color: #990000">(</span> <span style="color: #009900">short</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">ushort</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_reduce_min</span></span><span style="color: #990000">(</span> <span style="color: #008080">ushort</span> x <span style="color: #990000">)</span>
+<span style="color: #009900">short</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_reduce_max</span></span><span style="color: #990000">(</span> <span style="color: #009900">short</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">ushort</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_reduce_max</span></span><span style="color: #990000">(</span> <span style="color: #008080">ushort</span> x <span style="color: #990000">)</span>
+
+<span style="color: #009900">short</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_scan_exclusive_add</span></span><span style="color: #990000">(</span> <span style="color: #009900">short</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">ushort</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_scan_exclusive_add</span></span><span style="color: #990000">(</span> <span style="color: #008080">ushort</span> x <span style="color: #990000">)</span>
+<span style="color: #009900">short</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_scan_exclusive_min</span></span><span style="color: #990000">(</span> <span style="color: #009900">short</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">ushort</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_scan_exclusive_min</span></span><span style="color: #990000">(</span> <span style="color: #008080">ushort</span> x <span style="color: #990000">)</span>
+<span style="color: #009900">short</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_scan_exclusive_max</span></span><span style="color: #990000">(</span> <span style="color: #009900">short</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">ushort</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_scan_exclusive_max</span></span><span style="color: #990000">(</span> <span style="color: #008080">ushort</span> x <span style="color: #990000">)</span>
+
+<span style="color: #009900">short</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_scan_inclusive_add</span></span><span style="color: #990000">(</span> <span style="color: #009900">short</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">ushort</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_scan_inclusive_add</span></span><span style="color: #990000">(</span> <span style="color: #008080">ushort</span> x <span style="color: #990000">)</span>
+<span style="color: #009900">short</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_scan_inclusive_min</span></span><span style="color: #990000">(</span> <span style="color: #009900">short</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">ushort</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_scan_inclusive_min</span></span><span style="color: #990000">(</span> <span style="color: #008080">ushort</span> x <span style="color: #990000">)</span>
+<span style="color: #009900">short</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_scan_inclusive_max</span></span><span style="color: #990000">(</span> <span style="color: #009900">short</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">ushort</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_scan_inclusive_max</span></span><span style="color: #990000">(</span> <span style="color: #008080">ushort</span> x <span style="color: #990000">)</span></tt></pre></div></div>
+</div></div>
+</dd>
+<dt class="hdlist1">
+Add <span class="monospaced">short</span>, <span class="monospaced">short2</span>, <span class="monospaced">short4</span>, <span class="monospaced">short8</span>, <span class="monospaced">short16</span>, <span class="monospaced">ushort</span>, <span class="monospaced">ushort2</span>, <span class="monospaced">ushort4</span>, <span class="monospaced">ushort8</span>, and <span class="monospaced">ushort16</span> to the list of <span class="monospaced">gentype</span> data types supported by the <span class="monospaced">sub_group_shuffle</span>, <span class="monospaced">sub_group_shuffle_down</span>, <span class="monospaced">sub_group_shuffle_up</span>, and <span class="monospaced">sub_group_shuffle_xor</span> functions:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_shuffle</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> data<span style="color: #990000">,</span> <span style="color: #008080">uint</span> c <span style="color: #990000">)</span>
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_shuffle_down</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">gentype</span> current<span style="color: #990000">,</span> <span style="color: #008080">gentype</span> next<span style="color: #990000">,</span> <span style="color: #008080">uint</span> delta <span style="color: #990000">)</span>
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_shuffle_up</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">gentype</span> previous<span style="color: #990000">,</span> <span style="color: #008080">gentype</span> current<span style="color: #990000">,</span> <span style="color: #008080">uint</span> delta <span style="color: #990000">)</span>
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_shuffle_xor</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> data<span style="color: #990000">,</span> <span style="color: #008080">uint</span> value <span style="color: #990000">)</span></tt></pre></div></div>
+</div></div>
+</dd>
+<dt class="hdlist1">
+Add <span class="monospaced">ushort</span> variants of the subgroup block read and write functions:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">ushort</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_us</span></span><span style="color: #990000">(</span> <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> ushort<span style="color: #990000">*</span> p <span style="color: #990000">)</span>
+<span style="color: #008080">ushort2</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_us2</span></span><span style="color: #990000">(</span> <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> ushort<span style="color: #990000">*</span> p <span style="color: #990000">)</span>
+<span style="color: #008080">ushort4</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_us4</span></span><span style="color: #990000">(</span> <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> ushort<span style="color: #990000">*</span> p <span style="color: #990000">)</span>
+<span style="color: #008080">ushort8</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_us8</span></span><span style="color: #990000">(</span> <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> ushort<span style="color: #990000">*</span> p <span style="color: #990000">)</span>
+<span style="color: #008080">ushort</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_us</span></span><span style="color: #990000">(</span> <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span> <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">)</span>
+<span style="color: #008080">ushort2</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_us2</span></span><span style="color: #990000">(</span> <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span> <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">)</span>
+<span style="color: #008080">ushort4</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_us4</span></span><span style="color: #990000">(</span> <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span> <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">)</span>
+<span style="color: #008080">ushort8</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_us8</span></span><span style="color: #990000">(</span> <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span> <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">)</span>
+
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_us</span></span><span style="color: #990000">(</span> <span style="color: #008080">__global</span> ushort<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">ushort</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_us2</span></span><span style="color: #990000">(</span> <span style="color: #008080">__global</span> ushort<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">ushort2</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_us4</span></span><span style="color: #990000">(</span> <span style="color: #008080">__global</span> ushort<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">ushort4</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_us8</span></span><span style="color: #990000">(</span> <span style="color: #008080">__global</span> ushort<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">ushort8</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_us</span></span><span style="color: #990000">(</span> <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span> <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">ushort</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_us2</span></span><span style="color: #990000">(</span> <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span> <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">ushort2</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_us4</span></span><span style="color: #990000">(</span> <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span> <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">ushort4</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_us8</span></span><span style="color: #990000">(</span> <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span> <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">ushort8</span> data <span style="color: #990000">)</span></tt></pre></div></div>
+</div></div>
+</dd>
+<dt class="hdlist1">
+For naming consistency, also add suffixed aliases of the <span class="monospaced">uint</span> subgroup block read and write functions described in the <span class="monospaced">cl_intel_subgroups</span> extension:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">uint</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_ui</span></span><span style="color: #990000">(</span> <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p <span style="color: #990000">)</span>
+<span style="color: #008080">uint2</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_ui2</span></span><span style="color: #990000">(</span> <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p <span style="color: #990000">)</span>
+<span style="color: #008080">uint4</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_ui4</span></span><span style="color: #990000">(</span> <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p <span style="color: #990000">)</span>
+<span style="color: #008080">uint8</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_ui8</span></span><span style="color: #990000">(</span> <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p <span style="color: #990000">)</span>
+<span style="color: #008080">uint</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_ui</span></span><span style="color: #990000">(</span> <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span> <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">)</span>
+<span style="color: #008080">uint2</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_ui2</span></span><span style="color: #990000">(</span> <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span> <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">)</span>
+<span style="color: #008080">uint4</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_ui4</span></span><span style="color: #990000">(</span> <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span> <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">)</span>
+<span style="color: #008080">uint8</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_ui8</span></span><span style="color: #990000">(</span> <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span> <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">)</span>
+
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_ui</span></span><span style="color: #990000">(</span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">uint</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_ui2</span></span><span style="color: #990000">(</span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">uint2</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_ui4</span></span><span style="color: #990000">(</span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">uint4</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_ui8</span></span><span style="color: #990000">(</span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">uint8</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_ui</span></span><span style="color: #990000">(</span> <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span> <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">uint</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_ui2</span></span><span style="color: #990000">(</span> <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span> <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">uint2</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_ui4</span></span><span style="color: #990000">(</span> <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span> <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">uint4</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_ui8</span></span><span style="color: #990000">(</span> <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span> <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">uint8</span> data <span style="color: #990000">)</span></tt></pre></div></div>
+</div></div>
+</dd>
+</dl></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_modifications_to_the_opencl_c_specification">Modifications to the OpenCL C Specification</h2>
+<div class="sectionbody">
+<div class="sect2">
+<h3 id="_additions_to_section_6_13_15_work_group_functions">Additions to Section 6.13.15 - "Work Group Functions"</h3>
+<div class="dlist"><dl>
+<dt class="hdlist1">
+Add <span class="monospaced">short</span> and <span class="monospaced">ushort</span> to the list of supported data types for the subgroup broadcast, scan, and reduction functions:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<table class="tableblock frame-all grid-all"
+style="
+width:100%;
+">
+<col style="width:66%;">
+<col style="width:33%;">
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top" > <strong>Function</strong></th>
+<th class="tableblock halign-left valign-top" > <strong>Description</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_broadcast</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">gentype</span> x<span style="color: #990000">,</span>
+ <span style="color: #008080">uint</span> sub_group_local_id <span style="color: #990000">)</span>
+
+<span style="color: #009900">short</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_broadcast</span></span><span style="color: #990000">(</span>
+ <span style="color: #009900">short</span> x<span style="color: #990000">,</span>
+ <span style="color: #008080">uint</span> sub_group_local_id <span style="color: #990000">)</span>
+<span style="color: #008080">ushort</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_broadcast</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">ushort</span> x<span style="color: #990000">,</span>
+ <span style="color: #008080">uint</span> sub_group_local_id <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Broadcasts the value of <em>x</em> for work item identified by <em>sub_group_local_id</em> (value returned by <strong>get_sub_group_local_id</strong>) to all work items in the subgroup.
+<em>sub_group_local_id</em> must be the same value for all work items in the subgroup.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_reduce_add</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_reduce_min</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_reduce_max</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x <span style="color: #990000">)</span>
+
+<span style="color: #009900">short</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_reduce_add</span></span><span style="color: #990000">(</span> <span style="color: #009900">short</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">ushort</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_reduce_add</span></span><span style="color: #990000">(</span> <span style="color: #008080">ushort</span> x <span style="color: #990000">)</span>
+<span style="color: #009900">short</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_reduce_min</span></span><span style="color: #990000">(</span> <span style="color: #009900">short</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">ushort</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_reduce_min</span></span><span style="color: #990000">(</span> <span style="color: #008080">ushort</span> x <span style="color: #990000">)</span>
+<span style="color: #009900">short</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_reduce_max</span></span><span style="color: #990000">(</span> <span style="color: #009900">short</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">ushort</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_reduce_max</span></span><span style="color: #990000">(</span> <span style="color: #008080">ushort</span> x <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Returns the result of the specified reduction operation for all values of <em>x</em> specified by work items in a subgroup.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_scan_exclusive_add</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_scan_exclusive_min</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_scan_exclusive_max</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x <span style="color: #990000">)</span>
+
+<span style="color: #009900">short</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_scan_exclusive_add</span></span><span style="color: #990000">(</span> <span style="color: #009900">short</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">ushort</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_scan_exclusive_add</span></span><span style="color: #990000">(</span> <span style="color: #008080">ushort</span> x <span style="color: #990000">)</span>
+<span style="color: #009900">short</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_scan_exclusive_min</span></span><span style="color: #990000">(</span> <span style="color: #009900">short</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">ushort</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_scan_exclusive_min</span></span><span style="color: #990000">(</span> <span style="color: #008080">ushort</span> x <span style="color: #990000">)</span>
+<span style="color: #009900">short</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_scan_exclusive_max</span></span><span style="color: #990000">(</span> <span style="color: #009900">short</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">ushort</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_scan_exclusive_max</span></span><span style="color: #990000">(</span> <span style="color: #008080">ushort</span> x <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Performs the specified exclusive scan operation of all values <em>x</em> specified by work items in a subgroup.
+The scan results are returned for each work item.</p>
+<p class="tableblock">The scan order is defined by increasing subgroup local ID within the subgroup.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_scan_inclusive_add</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x<span style="color: #990000">)</span>
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_scan_inclusive_min</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x<span style="color: #990000">)</span>
+<span style="color: #008080">gentype</span> <span style="font-weight: bold"><span style="color: #000000">sub_group_scan_inclusive_max</span></span><span style="color: #990000">(</span> <span style="color: #008080">gentype</span> x<span style="color: #990000">)</span>
+
+<span style="color: #009900">short</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_scan_inclusive_add</span></span><span style="color: #990000">(</span> <span style="color: #009900">short</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">ushort</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_scan_inclusive_add</span></span><span style="color: #990000">(</span> <span style="color: #008080">ushort</span> x <span style="color: #990000">)</span>
+<span style="color: #009900">short</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_scan_inclusive_min</span></span><span style="color: #990000">(</span> <span style="color: #009900">short</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">ushort</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_scan_inclusive_min</span></span><span style="color: #990000">(</span> <span style="color: #008080">ushort</span> x <span style="color: #990000">)</span>
+<span style="color: #009900">short</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_scan_inclusive_max</span></span><span style="color: #990000">(</span> <span style="color: #009900">short</span> x <span style="color: #990000">)</span>
+<span style="color: #008080">ushort</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_scan_inclusive_max</span></span><span style="color: #990000">(</span> <span style="color: #008080">ushort</span> x <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Performs the specified inclusive scan operation of all values <em>x</em> specified by work items in a subgroup.
+The scan results are returned for each work item.</p>
+<p class="tableblock">The scan order is defined by increasing subgroup local ID within the subgroup.</p></td>
+</tr>
+</tbody>
+</table>
+</div></div>
+</dd>
+</dl></div>
+</div>
+<div class="sect2">
+<h3 id="_additions_to_section_6_13_x_sub_group_shuffle_functions">Additions to Section 6.13.X - "Sub Group Shuffle Functions"</h3>
+<div class="paragraph"><p>This section was added by the <span class="monospaced">cl_intel_subgroups</span> extension.</p></div>
+<div class="dlist"><dl>
+<dt class="hdlist1">
+Add <span class="monospaced">short</span>, <span class="monospaced">short2</span>, <span class="monospaced">short4</span>, <span class="monospaced">short8</span>, <span class="monospaced">short16</span>, <span class="monospaced">ushort</span>, <span class="monospaced">ushort2</span>, <span class="monospaced">ushort4</span>, <span class="monospaced">ushort8</span>, and <span class="monospaced">ushort16</span> to the list of data types supported by the <span class="monospaced">sub_group_shuffle</span>, <span class="monospaced">sub_group_shuffle_down</span>, <span class="monospaced">sub_group_shuffle_up</span>, and <span class="monospaced">sub_group_shuffle_xor</span> functions:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="paragraph"><p>The OpenCL C programming language implements the following built-in functions to allow data to be exchanged among work items in a subgroup.
+These built-in functions need not be encountered by all work items in a subgroup executing the kernel, however, data may only be shuffled among work items encountering the subgroup shuffle function.
+Shuffling data from a work item that does not encounter the subgroup shuffle function will produce undefined results.
+For these functions, <span class="monospaced">gentype</span> is <span class="monospaced">float</span>, <span class="monospaced">float2</span>, <span class="monospaced">float4</span>, <span class="monospaced">float8</span>, <span class="monospaced">float16</span>, <span class="monospaced">short</span>, <span class="monospaced">short2</span>, <span class="monospaced">short4</span>, <span class="monospaced">short8</span>, <span class="monospaced">short16</span>, <span class="monospaced">ushort</span>, <span class="monospaced">ushort2</span>, <span class="monospaced">ushort4</span>, <span class="monospaced">ushort8</span>, <span class="monospaced">ushort16</span>, <span class="monospaced">int</span>, <span class="monospaced">int2</span>, <span class="monospaced">int4</span>, <span class="monospaced">int8</span>, <span class="monospaced">int16</span>, <span class="monospaced">uint</span>, <span class="monospaced">uint2</span>, <span class="monospaced">uint4</span>, <span class="monospaced">uint8</span>, <span class="monospaced">uint16</span>, <span class="monospaced">long</span>, or <span class="monospaced">ulong</span>.</p></div>
+<div class="paragraph"><p>If <span class="monospaced">cl_khr_fp16</span> is supported, <span class="monospaced">gentype</span> also includes <span class="monospaced">half</span>.</p></div>
+<div class="paragraph"><p>If <span class="monospaced">cl_khr_fp64</span> or doubles are supported, <span class="monospaced">gentype</span> also includes <span class="monospaced">double</span>.</p></div>
+</div></div>
+</dd>
+</dl></div>
+</div>
+<div class="sect2">
+<h3 id="_modifications_to_section_6_13_x_sub_group_read_and_write_functions">Modifications to Section 6.13.X "Sub Group Read and Write Functions"</h3>
+<div class="paragraph"><p>This section was added by the <span class="monospaced">cl_intel_subgroups</span> extension.</p></div>
+<div class="dlist"><dl>
+<dt class="hdlist1">
+Add suffixed aliases of the previously un-suffixed 32-bit block read and write functions. There is no change to the description or behavior of these functions:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<table class="tableblock frame-all grid-all"
+style="
+width:100%;
+">
+<col style="width:55%;">
+<col style="width:44%;">
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top" ><strong>Function</strong></th>
+<th class="tableblock halign-left valign-top" ><strong>Description</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">uint</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read</span></span><span style="color: #990000">(</span>
+ <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p <span style="color: #990000">)</span>
+<span style="color: #008080">uint2</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read2</span></span><span style="color: #990000">(</span>
+ <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p <span style="color: #990000">)</span>
+<span style="color: #008080">uint4</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read4</span></span><span style="color: #990000">(</span>
+ <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p <span style="color: #990000">)</span>
+<span style="color: #008080">uint8</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read8</span></span><span style="color: #990000">(</span>
+ <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p <span style="color: #990000">)</span>
+
+<span style="color: #008080">uint</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_ui</span></span><span style="color: #990000">(</span>
+ <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p <span style="color: #990000">)</span>
+<span style="color: #008080">uint2</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_ui2</span></span><span style="color: #990000">(</span>
+ <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p <span style="color: #990000">)</span>
+<span style="color: #008080">uint4</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_ui4</span></span><span style="color: #990000">(</span>
+ <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p <span style="color: #990000">)</span>
+<span style="color: #008080">uint8</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_ui8</span></span><span style="color: #990000">(</span>
+ <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Reads 1, 2, 4, or 8 uints of data for each work item in the subgroup from the specified pointer as a block operation…</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">uint</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">)</span>
+<span style="color: #008080">uint2</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read2</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">)</span>
+<span style="color: #008080">uint4</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read4</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">)</span>
+<span style="color: #008080">uint8</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read8</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">)</span>
+
+<span style="color: #008080">uint</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_ui</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">)</span>
+<span style="color: #008080">uint2</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_ui2</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">)</span>
+<span style="color: #008080">uint4</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_ui4</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">)</span>
+<span style="color: #008080">uint8</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_ui8</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Reads 1, 2, 4, or 8 uints of data for each work item in the subgroup from the specified image at the specified coordinate as a block operation…</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">uint</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write2</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">uint2</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write4</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">uint4</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write8</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">uint8</span> data <span style="color: #990000">)</span>
+
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_ui</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">uint</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_ui2</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">uint2</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_ui4</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">uint4</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_ui8</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">__global</span> uint<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">uint8</span> data <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Writes 1, 2, 4, or 8 uints of data for each work item in the subgroup to the specified pointer as a block operation…</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">uint</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write2</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">uint2</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write4</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">uint4</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write8</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">uint8</span> data <span style="color: #990000">)</span>
+
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_ui</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">uint</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_ui2</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">uint2</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_ui4</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">uint4</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_ui8</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">uint8</span> data <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Writes 1, 2, 4, or 8 uints of data for each work item in the subgroup to the specified image at the specified coordinate as a block operation…</p></td>
+</tr>
+</tbody>
+</table>
+</div></div>
+</dd>
+<dt class="hdlist1">
+Also, add <span class="monospaced">ushort</span> variants of the block read and write functions. In the descriptions of these functions, the "note below describing out-of-bounds behavior" is in the <span class="monospaced">cl_intel_subgroups</span> extension specification:
+</dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<table class="tableblock frame-all grid-all"
+style="
+width:100%;
+">
+<col style="width:55%;">
+<col style="width:44%;">
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top" ><strong>Function</strong></th>
+<th class="tableblock halign-left valign-top" ><strong>Description</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">ushort</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_us</span></span><span style="color: #990000">(</span>
+ <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> ushort<span style="color: #990000">*</span> p <span style="color: #990000">)</span>
+<span style="color: #008080">ushort2</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_us2</span></span><span style="color: #990000">(</span>
+ <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> ushort<span style="color: #990000">*</span> p <span style="color: #990000">)</span>
+<span style="color: #008080">ushort4</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_us4</span></span><span style="color: #990000">(</span>
+ <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> ushort<span style="color: #990000">*</span> p <span style="color: #990000">)</span>
+<span style="color: #008080">ushort8</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_us8</span></span><span style="color: #990000">(</span>
+ <span style="font-weight: bold"><span style="color: #0000FF">const</span></span> <span style="color: #008080">__global</span> ushort<span style="color: #990000">*</span> p <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Reads 1, 2, 4, or 8 ushorts of data for each work item in the subgroup from the specified pointer as a block operation.
+The data is read strided, so the first value read is:</p>
+<p class="tableblock"><span class="monospaced">p[ sub_group_local_id ]</span></p>
+<p class="tableblock">and the second value read is:</p>
+<p class="tableblock"><span class="monospaced">p[ sub_group_local_id + max_sub_group_size ]</span></p>
+<p class="tableblock">etc.</p>
+<p class="tableblock"><em>p</em> must be aligned to a 32-bit (4-byte) boundary.</p>
+<p class="tableblock">There is no defined out-of-range behavior for these functions.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #008080">ushort</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_us</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">)</span>
+<span style="color: #008080">ushort2</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_us2</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">)</span>
+<span style="color: #008080">ushort4</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_us4</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">)</span>
+<span style="color: #008080">ushort8</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_read_us8</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Reads 1, 2, 4, or 8 ushorts of data for each work item in the subgroup from the specified <em>image</em> at the specified coordinate as a block operation.
+Note that the coordinate is a byte coordinate, not an image element coordinate.
+Also note that the image data is read without format conversion, so each work item may read multiple image elements
+(for images with element size smaller than 16-bits).</p>
+<p class="tableblock">The data is read row-by-row, so the first value read is from the row specified in the y-component of the provided <em>byte_coord</em>, the second value is read from the y-component of the provided <em>byte_coord</em> plus one, etc.</p>
+<p class="tableblock">Please see the note below describing out-of-bounds behavior for these functions.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_us</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">__global</span> ushort<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">ushort</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_us2</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">__global</span> ushort<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">ushort2</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_us4</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">__global</span> ushort<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">ushort4</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_us8</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">__global</span> ushort<span style="color: #990000">*</span> p<span style="color: #990000">,</span> <span style="color: #008080">ushort8</span> data <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Writes 1, 2, 4, or 8 ushorts of data for each work item in the subgroup to the specified pointer as a block operation.
+The data is written strided, so the first value is written to:</p>
+<p class="tableblock"><span class="monospaced">p[ sub_group_local_id ]</span></p>
+<p class="tableblock">and the second value is written to:</p>
+<p class="tableblock"><span class="monospaced">p[ sub_group_local_id + max_sub_group_size ]</span></p>
+<p class="tableblock">etc.</p>
+<p class="tableblock"><em>p</em> must be aligned to a 128-bit (16-byte) boundary.</p>
+<p class="tableblock">There is no defined out-of-range behavior for these functions.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><div><div class="listingblock">
+<div class="content"><!-- Generator: GNU source-highlight 3.1.8
+by Lorenzo Bettini
+http://www.lorenzobettini.it
+http://www.gnu.org/software/src-highlite -->
+<pre><tt><span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_us</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">ushort</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_us2</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">ushort2</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_us4</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">ushort4</span> data <span style="color: #990000">)</span>
+<span style="color: #009900">void</span> <span style="font-weight: bold"><span style="color: #000000">intel_sub_group_block_write_us8</span></span><span style="color: #990000">(</span>
+ <span style="color: #008080">image2d_t</span> image<span style="color: #990000">,</span>
+ <span style="color: #008080">int2</span> byte_coord<span style="color: #990000">,</span> <span style="color: #008080">ushort8</span> data <span style="color: #990000">)</span></tt></pre></div></div></div></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Writes 1, 2, 4, or 8 ushorts of data for each work item in the subgroup to the specified <em>image</em> at the specified coordinate as a block operation.
+Note that the coordinate is a byte coordinate, not an image element coordinate.
+Unlike the image block read function, which may read from any arbitrary byte offset, the x-component of the byte coordinate for the image block write functions must be a multiple of four;
+in other words, the write must begin at 32-bit boundary.
+There is no restriction on the y-component of the coordinate.
+Also, note that the image <em>data</em> is written without format conversion, so each work item may write multiple image elements (for images with element size smaller than 8-bits).</p>
+<p class="tableblock">The data is written row-by-row, so the first value written is from the row specified by the y-component of the provided <em>byte_coord</em>, the second value is written from the y-component of the provided <em>byte_coord</em> plus one, etc.</p>
+<p class="tableblock">Please see the note below describing out-of-bounds behavior for these functions.</p></td>
+</tr>
+</tbody>
+</table>
+</div></div>
+</dd>
+</dl></div>
+</div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_issues">Issues</h2>
+<div class="sectionbody">
+<div class="paragraph"><p>None.</p></div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_revision_history">Revision History</h2>
+<div class="sectionbody">
+<table class="tableblock frame-all grid-rows"
+style="
+width:100%;
+">
+<col style="width:4%;">
+<col style="width:14%;">
+<col style="width:14%;">
+<col style="width:66%;">
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top" >Rev</th>
+<th class="tableblock halign-left valign-top" >Date</th>
+<th class="tableblock halign-left valign-top" >Author</th>
+<th class="tableblock halign-left valign-top" >Changes</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">1</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">2016-10-20</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Ben Ashbaugh</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock"><strong>First public revision.</strong></p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">2</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">2018-11-15</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Ben Ashbaugh</p></td>
+<td class="tableblock halign-left valign-top" ><p class="tableblock">Conversion to asciidoc.</p></td>
+</tr>
+</tbody>
+</table>
+</div>
+</div>
+</div>
+<div id="footnotes"><hr></div>
+<div id="footer">
+<div id="footer-text">
+Last updated
+ 2018-11-16 09:32:34 PST
+</div>
+</div>
+</body>
+</html>
diff --git a/extensions/intel/cl_intel_subgroups_short.txt b/extensions/intel/cl_intel_subgroups_short.txt
index 77a6272..74f225c 100644
--- a/extensions/intel/cl_intel_subgroups_short.txt
+++ b/extensions/intel/cl_intel_subgroups_short.txt
@@ -1,370 +1,11 @@
-Name String
+The cl_intel_subgroups_short extension is no longer authored in plain text.
- cl_intel_subgroups_short
+Please refer to the HTML extension specification instead,
+which may be found here:
-Contributors
+https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_subgroups_short.html
- Ben Ashbaugh, Intel
- Felix J Degrood, Intel
- Biju George, Intel
- Raun M Krisch, Intel
- Konstantin A Pyjov, Intel
- Insoo Woo, Intel
+The asciidoc source for this extension specification may
+be found here:
-Contact
-
- Ben Ashbaugh, Intel (ben.ashbaugh 'at' intel.com)
-
-Version
-
- Version 1, October 20, 2016
-
-Number
-
- OpenCL Extension #48
-
-Status
-
- Final Draft
-
-Dependencies
-
- OpenCL 1.2 and support for cl_intel_subgroups is required. This extension is written
- against revision 29 of the OpenCL 2.0 API specification, against revision 33 of the
- OpenCL 2.0 OpenCL C specification, against revision 32 of the OpenCL 2.0 Extension
- specification, and against version 4 of the cl_intel_subgroups specification.
-
-Overview
-
- The goal of this extension is to allow programmers to improve the performance of
- applications operating on 16-bit data types by extending the subgroup functions
- described in the cl_intel_subgroups extension to support 16-bit integer data types
- (shorts and ushorts). Specifically, the extension:
-
- * Extends the subgroup broadcast function to allow 16-bit integer values to be
- broadcast from one work item to all other work items in the subgroup.
-
- * Extends the subgroup scan and reduction functions to operate on 16-bit integer
- data types.
-
- * Extends the Intel subgroup shuffle functions to allow arbitrarily exchanging
- 16-bit integer values among work items in the subgroup.
-
- * Extends the Intel subgroup block read and write functions to allow reading
- and writing 16-bit integer data from images and buffers.
-
-New OpenCL C Functions
-
- Add <short> and <ushort> to the list of supported data types for the subgroup
- broadcast, scan, and reduction functions:
-
- short intel_sub_group_broadcast( short x, uint sub_group_local_id );
- ushort intel_sub_group_broadcast( ushort x, uint sub_group_local_id )
-
- For the sub_group_reduce, sub_group_scan_exclusive, and
- sub_group_scan_inclusive functions, <op> is <add>, <min>, or <max>.
-
- short intel_sub_group_reduce_<op>( short x );
- short intel_sub_group_scan_exclusive_<op>( short x );
- short intel_sub_group_scan_inclusive_<op>( short x );
- ushort intel_sub_group_reduce_<op>( ushort x );
- ushort intel_sub_group_scan_exclusive_<op>( ushort x );
- ushort intel_sub_group_scan_inclusive_<op>( ushort x );
-
- Add <short>, <short2>, <short4>, <short8>, <short16>, <ushort>, <ushort2>,
- <ushort4>, <ushort8>, and <ushort16> to the list of data types supported by
- the sub_group_shuffle, sub_group_shuffle_down, sub_group_shuffle_up, and
- sub_group_suffle_xor functions:
-
- <gentype> intel_sub_group_shuffle( <gentype> data, uint c );
- <gentype> intel_sub_group_shuffle_down(
- <gentype> current, <gentype> next, uint delta );
- <gentype> intel_sub_group_shuffle_up(
- <gentype> previous, <gentype> current, uint delta );
- <gentype> intel_sub_group_shuffle_xor( <gentype> data, uint value );
-
- Add <ushort> variants of the subgroup block read and write functions:
-
- ushort intel_sub_group_block_read_us( const __global ushort* p );
- ushort2 intel_sub_group_block_read_us2( const __global ushort* p );
- ushort4 intel_sub_group_block_read_us4( const __global ushort* p );
- ushort8 intel_sub_group_block_read_us8( const __global ushort* p );
- ushort intel_sub_group_block_read_us( image2d_t image, int2 byte_coord );
- ushort2 intel_sub_group_block_read_us2( image2d_t image, int2 byte_coord );
- ushort4 intel_sub_group_block_read_us4( image2d_t image, int2 byte_coord );
- ushort8 intel_sub_group_block_read_us8( image2d_t image, int2 byte_coord );
-
- void intel_sub_group_block_write_us( __global ushort* p, ushort data );
- void intel_sub_group_block_write_us2( __global ushort* p, ushort2 data );
- void intel_sub_group_block_write_us4( __global ushort* p, ushort4 data );
- void intel_sub_group_block_write_us8( __global ushort* p, ushort8 data );
- void intel_sub_group_block_write_us( image2d_t image, int2 byte_coord, ushort data );
- void intel_sub_group_block_write_us2( image2d_t image, int2 byte_coord, ushort2 data );
- void intel_sub_group_block_write_us4( image2d_t image, int2 byte_coord, ushort4 data );
- void intel_sub_group_block_write_us8( image2d_t image, int2 byte_coord, ushort8 data );
-
- For naming consistency, also adds suffixed aliases of the <uint> subgroup block read
- and write functions described in the cl_intel_subgroups extension:
-
- uint intel_sub_group_block_read_ui( const __global uint* p );
- uint2 intel_sub_group_block_read_ui2( const __global uint* p );
- uint4 intel_sub_group_block_read_ui4( const __global uint* p );
- uint8 intel_sub_group_block_read_ui8( const __global uint* p );
- uint intel_sub_group_block_read_ui( image2d_t image, int2 byte_coord );
- uint2 intel_sub_group_block_read_ui2( image2d_t image, int2 byte_coord );
- uint4 intel_sub_group_block_read_ui4( image2d_t image, int2 byte_coord );
- uint8 intel_sub_group_block_read_ui8( image2d_t image, int2 byte_coord );
-
- void intel_sub_group_block_write_ui( __global uint* p, uint data );
- void intel_sub_group_block_write_ui2( __global uint* p, uint2 data );
- void intel_sub_group_block_write_ui4( __global uint* p, uint4 data );
- void intel_sub_group_block_write_ui8( __global uint* p, uint8 data );
- void intel_sub_group_block_write_ui( image2d_t image, int2 byte_coord, uint data );
- void intel_sub_group_block_write_ui2( image2d_t image, int2 byte_coord, uint2 data );
- void intel_sub_group_block_write_ui4( image2d_t image, int2 byte_coord, uint4 data );
- void intel_sub_group_block_write_ui8( image2d_t image, int2 byte_coord, uint8 data );
-
-Additions to Section 6.13.15 - "Work Group Functions" of the OpenCL 2.0 C Specification:
-
- Add <short> and <ushort> to the list of supported data types for the subgroup
- broadcast, scan, and reduction functions:
-
- "----------------------------------------------------------------------------------------
- Function Description
- ------------------------------------------ --------------------------------------------
- <gentype> sub_group_broadcast( Broadcasts the value of x for the work item
- <gentype> x, identified by sub_group_local_id (value
- uint sub_group_local_id ) returned by get_sub_group_local_id) to all
- short intel_sub_group_broadcast( work items in the subgroup.
- short x, sub_group_local_id must be the same value
- uint sub_group_local_id ) for all work items in the subgroup.
- ushort intel_sub_group_broadcast(
- ushort x,
- uint sub_group_local_id )
-
- <gentype> sub_group_reduce_<op>( Returns the result of the reduction operation
- <gentype> x ) specified by <op> for all values x specified
- short intel_sub_group_reduce_<op>( by work items in a subgroup.
- short x )
- ushort intel_sub_group_reduce_<op>(
- ushort x )
-
- <gentype> sub_group_scan_exclusive_<op>( Does an exclusive scan operation specified by
- <gentype> x ) <op> of all values specified by work items
- short intel_sub_group_scan_exclusive_<op>( in a subgroup. The scan results are
- short x ) returned for each work item.
- ushort intel_sub_group_scan_exclusive_<op>(
- ushort x ) The scan order is defined by increasing
- sub_group_local_id within the subgroup.
-
- <gentype> sub_group_scan_inclusive_<op>( Does an inclusive scan operation specified by
- <gentype> x ) <op> of all values specified by work items
- short intel_sub_group_scan_inclusive_<op>( in a subgroup. The scan results are
- short x ) returned for each work item
- ushort intel_sub_group_scan_inclusive_<op>(
- ushort x ) The scan order is defined by increasing
- sub_group_local_id within the subgroup.
- ----------------------------------------------------------------------------------------"
-
-Additions to Section 6.13.X - "Sub Group Shuffle Functions" of the OpenCL 2.0 C Specification,
-which was added by the cl_intel_subgroups Extension:
-
- Add <short>, <short2>, <short4>, <short8>, <short16>, <ushort>, <ushort2>,
- <ushort4>, <ushort8>, and <ushort16> to the list of data types supported by
- the sub_group_shuffle, sub_group_shuffle_down, sub_group_shuffle_up, and
- sub_group_suffle_xor functions:
-
- "The OpenCL C programming language implements the following built-in functions to allow
- data to be exchanged among work items in a subgroup. These built-in functions need not
- be encountered by all work items in a subgroup executing the kernel. For these
- functions, <gentype> is <float>, <float2>, <float4>, <float8>, <float16>, <short>,
- <short2>, <short4>, <short8>, <short16>, <ushort>, <ushort2>, <ushort4>, <ushort8>,
- <ushort16>, <int>, <int2>, <int4>, <int8>, <int16>, <uint>, <uint2>, <uint4>, <uint8>,
- <uint16>, <long>, or <ulong>.
-
- If cl_khr_fp16 is supported, <gentype> also includes <half>.
- If cl_khr_fp64 or doubles are supported, <gentype> also includes <double>."
-
-Modifications to Section 6.13.X "Sub Group Read and Write Functions" of the OpenCL 2.0 C
-Specification, which was added by the cl_intel_subgroups Extension:
-
- Add suffixed aliases of the previously un-suffixed 32-bit block read and write functions.
- There is no change to the description or behavior of these functions:
-
- "---------------------------------------------------------------------------------------
- Function Description
- ---------------------------------------- ------------------------------------------
- uint intel_sub_group_block_read( Reads 1, 2, 4, or 8 uints of data for each
- const __global uint* p ) work item in the subgroup from the specified
- uint2 intel_sub_group_block_read2( pointer as a block operation...
- const __global uint* p )
- uint4 intel_sub_group_block_read4(
- const __global uint* p )
- uint8 intel_sub_group_block_read8(
- const __global uint* p )
- uint intel_sub_group_block_read_ui(
- const __global uint* p )
- uint2 intel_sub_group_block_read_ui2(
- const __global uint* p )
- uint4 intel_sub_group_block_read_ui4(
- const __global uint* p )
- uint8 intel_sub_group_block_read_ui8(
- const __global uint* p )
-
- uint intel_sub_group_block_read( Reads 1, 2, 4, or 8 uints of data for each
- image2d_t image, work item in the subgroup from the specified
- int2 byte_coord ) image at the specified coordinate as a block
- uint2 intel_sub_group_block_read2( operation...
- image2d_t image,
- int2 byte_coord )
- uint4 intel_sub_group_block_read4(
- image2d_t image,
- int2 byte_coord )
- uint8 intel_sub_group_block_read8(
- image2d_t image,
- int2 byte_coord )
- uint intel_sub_group_block_read_ui(
- image2d_t image,
- int2 byte_coord )
- uint2 intel_sub_group_block_read_ui2(
- image2d_t image,
- int2 byte_coord )
- uint4 intel_sub_group_block_read_ui4(
- image2d_t image,
- int2 byte_coord )
- uint8 intel_sub_group_block_read_ui8(
- image2d_t image,
- int2 byte_coord )
-
- void intel_sub_group_block_write( Writes 1, 2, 4, or 8 uints of data for each
- __global uint* p, uint data ) work item in the subgroup to the specified
- void intel_sub_group_block_write2( pointer as a block operation...
- __global uint* p, uint2 data )
- void intel_sub_group_block_write4(
- __global uint* p, uint4 data )
- void intel_sub_group_block_write8(
- __global uint* p, uint8 data )
- void intel_sub_group_block_write_ui(
- __global uint* p, uint data )
- void intel_sub_group_block_write_ui2(
- __global uint* p, uint2 data )
- void intel_sub_group_block_write_ui4(
- __global uint* p, uint4 data )
- void intel_sub_group_block_write_ui8(
- __global uint* p, uint8 data )
-
- void intel_sub_group_block_write( Writes 1, 2, 4, or 8 uints of data for each
- image2d_t image, work item in the subgroup to the specified
- int2 byte_coord, uint data ) image at the specified coordinate as a block
- void intel_sub_group_block_write2( operation...
- image2d_t image,
- int2 byte_coord, uint2 data )
- void intel_sub_group_block_write4(
- image2d_t image,
- int2 byte_coord, uint4 data )
- void intel_sub_group_block_write8(
- image2d_t image,
- int2 byte_coord, uint8 data )
- void intel_sub_group_block_write_ui(
- image2d_t image,
- int2 byte_coord, uint data )
- void intel_sub_group_block_write_ui2(
- image2d_t image,
- int2 byte_coord, uint2 data )
- void intel_sub_group_block_write_ui4(
- image2d_t image,
- int2 byte_coord, uint4 data )
- void intel_sub_group_block_write_ui8(
- image2d_t image,
- int2 byte_coord, uint8 data )
- --------------------------------------------------------------------------------------"
-
- Also, add <ushort> variants of the block read and write functions. In the descriptions
- of these functions, the "note below describing out-of-bounds behavior" is in the
- cl_intel_subgroups extension specification:
-
- "--------------------------------------------------------------------------------------
- Function Description
- ---------------------------------------- -------------------------------------------
- ushort intel_sub_group_block_read_us( Reads 1, 2, 4, or 8 ushorts of data for each
- const __global ushort* p ) work item in the subgroup from the specified
- ushort2 intel_sub_group_block_read_us2( pointer as a block operation.
- const __global ushort* p ) The data is read strided, so the first
- ushort4 intel_sub_group_block_read4( value read is:
- const __global ushort* p ) p[ sub_group_local_id ]
- ushort8 intel_sub_group_block_read_us8( and the second value read is:
- const __global ushort* p ) p[ sub_group_local_id + max_sub_group_size ]
- etc.
- p must be aligned to a 32-bit (4-byte)
- boundary.
-
- There is no defined out-of-range behavior
- for these functions.
-
- ushort intel_sub_group_block_read_us( Reads 1, 2, 4, or 8 ushorts of data for each
- image2d_t image, work item in the subgroup from the specified
- int2 byte_coord ) image at the specified coordinate as a block
- ushort2 intel_sub_group_block_read_us2( operation. Note that the coordinate is a
- image2d_t image, byte coordinate, not an image element
- int2 byte_coord ) coordinate. Also note that the image data
- ushort4 intel_sub_group_block_read_us4( is read without format conversion, so each
- image2d_t image, work item may read multiple image elements
- int2 byte_coord ) (for images with element size smaller than
- ushort8 intel_sub_group_block_read_us8( 16-bits).
- image2d_t image,
- int2 byte_coord ) The data is read row-by-row, so the first
- value read is from the row specified in the
- y-component of the provided byte_coord, the
- second value is read from the y-component
- of the provided byte_coord plus one, etc.
-
- Please see the note below describing out-of-
- bounds behavior for these functions.
-
- void intel_sub_group_block_write_us( Writes 1, 2, 4, or 8 ushorts of data for each
- __global ushort* p, work item in the subgroup to the specified
- ushort data ) pointer as a block operation.
- void intel_sub_group_block_write_us2( The data is written strided, so the first
- __global ushort* p, value is written to:
- ushort2 data ) p[ sub_group_local_id ]
- void intel_sub_group_block_write_us4( and the second value is written to:
- __global ushort* p, p[ sub_group_local_id + max_sub_group_size ]
- ushort4 data ) etc.
- void intel_sub_group_block_write_us8( p must be aligned to a 128-bit (16-byte)
- __global ushort* p, boundary.
- ushort8 data )
- There is no defined out-of-range behavior
- for these functions.
-
- void intel_sub_group_block_write_us( Writes 1, 2, 4, or 8 ushorts of data for
- image2d_t image, each work item in the subgroup to the specified
- int2 byte_coord, ushort data ) image at the specified coordinate as a block
- void intel_sub_group_block_write_us2( operation. Note that the coordinate is a
- image2d_t image, byte coordinate, not an image element
- int2 byte_coord, ushort2 data ) coordinate. Unlike the image block read
- void intel_sub_group_block_write_us4( function, which may read from any arbitrary
- image2d_t image, byte offset, the x-component of the byte
- int2 byte_coord, ushort4 data ) coordinate for the image block write
- void intel_sub_group_block_write_us8( functions must be a multiple of four; in
- image2d_t image, other words, the write must begin at 32-bit
- int2 byte_coord, ushort8 data ) boundary. There is no restriction on the
- y-component of the coordinate. Also, note that
- the image data is written without format
- conversion, so each work item may write
- multiple image elements (for images with
- element size smaller than 16-bits).
-
- The data is written row-by-row, so the first
- value written is from the row specified by
- the y-component of the provided byte_coord,
- the second value is written from the y-
- component of the provided byte_coord plus
- one, etc.
-
- Please see the note below describing out-of-
- bounds behavior for these functions.
- --------------------------------------------------------------------------------------"
-
-Revision History
-
- Version 1 (2016/10/20): First public revision.
+https://github.com/KhronosGroup/OpenCL-Docs/blob/master/extensions/cl_intel_subgroups_short.asciidoc
diff --git a/extensions/registry.py b/extensions/registry.py
index 7a4ac11..7489dab 100644
--- a/extensions/registry.py
+++ b/extensions/registry.py
@@ -174,22 +174,37 @@
'cl_intel_required_subgroup_size' : {
'number' : 43,
'flags' : { 'public' },
- 'url' : 'extensions/intel/cl_intel_required_subgroup_size.txt',
+ 'url' : 'extensions/intel/cl_intel_required_subgroup_size.html',
},
'cl_intel_simultaneous_sharing' : {
'number' : 34,
'flags' : { 'public' },
'url' : 'extensions/intel/cl_intel_simultaneous_sharing.txt',
},
+ 'cl_intel_spirv_device_side_avc_motion_estimation' : {
+ 'number' : 57,
+ 'flags' : { 'public' },
+ 'url' : 'extensions/intel/cl_intel_spirv_device_side_avc_motion_estimation.html',
+ },
+ 'cl_intel_spirv_media_block_io' : {
+ 'number' : 56,
+ 'flags' : { 'public' },
+ 'url' : 'extensions/intel/cl_intel_spirv_media_block_io.html',
+ },
+ 'cl_intel_spirv_subgroups' : {
+ 'number' : 55,
+ 'flags' : { 'public' },
+ 'url' : 'extensions/intel/cl_intel_spirv_subgroups.html',
+ },
'cl_intel_subgroups' : {
'number' : 35,
'flags' : { 'public' },
- 'url' : 'extensions/intel/cl_intel_subgroups.txt',
+ 'url' : 'extensions/intel/cl_intel_subgroups.html',
},
'cl_intel_subgroups_short' : {
'number' : 48,
'flags' : { 'public' },
- 'url' : 'extensions/intel/cl_intel_subgroups_short.txt',
+ 'url' : 'extensions/intel/cl_intel_subgroups_short.html',
},
'cl_intel_thread_local_exec' : {
'number' : 16,