| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta http-equiv="X-UA-Compatible" content="IE=edge"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <meta name="generator" content="Asciidoctor 2.0.16"> |
| <title>cl_intel_subgroups</title> |
| <style> |
| /*! normalize.css v2.1.2 | MIT License | git.io/normalize */ |
| /* ========================================================================== HTML5 display definitions ========================================================================== */ |
| /** Correct `block` display not defined in IE 8/9. */ |
| article, aside, details, figcaption, figure, footer, header, hgroup, main, nav, section, summary { display: block; } |
| |
| /** Correct `inline-block` display not defined in IE 8/9. */ |
| audio, canvas, video { display: inline-block; } |
| |
| /** Prevent modern browsers from displaying `audio` without controls. Remove excess height in iOS 5 devices. */ |
| audio:not([controls]) { display: none; height: 0; } |
| |
| /** Address `[hidden]` styling not present in IE 8/9. Hide the `template` element in IE, Safari, and Firefox < 22. */ |
| [hidden], template { display: none; } |
| |
| script { display: none !important; } |
| |
| /* ========================================================================== Base ========================================================================== */ |
| /** 1. Set default font family to sans-serif. 2. Prevent iOS text size adjust after orientation change, without disabling user zoom. */ |
| html { font-family: sans-serif; /* 1 */ -ms-text-size-adjust: 100%; /* 2 */ -webkit-text-size-adjust: 100%; /* 2 */ } |
| |
| /** Remove default margin. */ |
| body { margin: 0; } |
| |
| /* ========================================================================== Links ========================================================================== */ |
| /** Remove the gray background color from active links in IE 10. */ |
| a { background: transparent; } |
| |
| /** Address `outline` inconsistency between Chrome and other browsers. */ |
| a:focus { outline: thin dotted; } |
| |
| /** Improve readability when focused and also mouse hovered in all browsers. */ |
| a:active, a:hover { outline: 0; } |
| |
| /* ========================================================================== Typography ========================================================================== */ |
| /** Address variable `h1` font-size and margin within `section` and `article` contexts in Firefox 4+, Safari 5, and Chrome. */ |
| h1 { font-size: 2em; margin: 0.67em 0; } |
| |
| /** Address styling not present in IE 8/9, Safari 5, and Chrome. */ |
| abbr[title] { border-bottom: 1px dotted; } |
| |
| /** Address style set to `bolder` in Firefox 4+, Safari 5, and Chrome. */ |
| b, strong { font-weight: bold; } |
| |
| /** Address styling not present in Safari 5 and Chrome. */ |
| dfn { font-style: italic; } |
| |
| /** Address differences between Firefox and other browsers. */ |
| hr { -moz-box-sizing: content-box; box-sizing: content-box; height: 0; } |
| |
| /** Address styling not present in IE 8/9. */ |
| mark { background: #ff0; color: #000; } |
| |
| /** Correct font family set oddly in Safari 5 and Chrome. */ |
| code, kbd, pre, samp { font-family: monospace, serif; font-size: 1em; } |
| |
| /** Improve readability of pre-formatted text in all browsers. */ |
| pre { white-space: pre-wrap; } |
| |
| /** Set consistent quote types. */ |
| q { quotes: "\201C" "\201D" "\2018" "\2019"; } |
| |
| /** Address inconsistent and variable font size in all browsers. */ |
| small { font-size: 80%; } |
| |
| /** Prevent `sub` and `sup` affecting `line-height` in all browsers. */ |
| sub, sup { font-size: 75%; line-height: 0; position: relative; vertical-align: baseline; } |
| |
| sup { top: -0.5em; } |
| |
| sub { bottom: -0.25em; } |
| |
| /* ========================================================================== Embedded content ========================================================================== */ |
| /** Remove border when inside `a` element in IE 8/9. */ |
| img { border: 0; } |
| |
| /** Correct overflow displayed oddly in IE 9. */ |
| svg:not(:root) { overflow: hidden; } |
| |
| /* ========================================================================== Figures ========================================================================== */ |
| /** Address margin not present in IE 8/9 and Safari 5. */ |
| figure { margin: 0; } |
| |
| /* ========================================================================== Forms ========================================================================== */ |
| /** Define consistent border, margin, and padding. */ |
| fieldset { border: 1px solid #c0c0c0; margin: 0 2px; padding: 0.35em 0.625em 0.75em; } |
| |
| /** 1. Correct `color` not being inherited in IE 8/9. 2. Remove padding so people aren't caught out if they zero out fieldsets. */ |
| legend { border: 0; /* 1 */ padding: 0; /* 2 */ } |
| |
| /** 1. Correct font family not being inherited in all browsers. 2. Correct font size not being inherited in all browsers. 3. Address margins set differently in Firefox 4+, Safari 5, and Chrome. */ |
| button, input, select, textarea { font-family: inherit; /* 1 */ font-size: 100%; /* 2 */ margin: 0; /* 3 */ } |
| |
| /** Address Firefox 4+ setting `line-height` on `input` using `!important` in the UA stylesheet. */ |
| button, input { line-height: normal; } |
| |
| /** Address inconsistent `text-transform` inheritance for `button` and `select`. All other form control elements do not inherit `text-transform` values. Correct `button` style inheritance in Chrome, Safari 5+, and IE 8+. Correct `select` style inheritance in Firefox 4+ and Opera. */ |
| button, select { text-transform: none; } |
| |
| /** 1. Avoid the WebKit bug in Android 4.0.* where (2) destroys native `audio` and `video` controls. 2. Correct inability to style clickable `input` types in iOS. 3. Improve usability and consistency of cursor style between image-type `input` and others. */ |
| button, html input[type="button"], input[type="reset"], input[type="submit"] { -webkit-appearance: button; /* 2 */ cursor: pointer; /* 3 */ } |
| |
| /** Re-set default cursor for disabled elements. */ |
| button[disabled], html input[disabled] { cursor: default; } |
| |
| /** 1. Address box sizing set to `content-box` in IE 8/9. 2. Remove excess padding in IE 8/9. */ |
| input[type="checkbox"], input[type="radio"] { box-sizing: border-box; /* 1 */ padding: 0; /* 2 */ } |
| |
| /** 1. Address `appearance` set to `searchfield` in Safari 5 and Chrome. 2. Address `box-sizing` set to `border-box` in Safari 5 and Chrome (include `-moz` to future-proof). */ |
| input[type="search"] { -webkit-appearance: textfield; /* 1 */ -moz-box-sizing: content-box; -webkit-box-sizing: content-box; /* 2 */ box-sizing: content-box; } |
| |
| /** Remove inner padding and search cancel button in Safari 5 and Chrome on OS X. */ |
| input[type="search"]::-webkit-search-cancel-button, input[type="search"]::-webkit-search-decoration { -webkit-appearance: none; } |
| |
| /** Remove inner padding and border in Firefox 4+. */ |
| button::-moz-focus-inner, input::-moz-focus-inner { border: 0; padding: 0; } |
| |
| /** 1. Remove default vertical scrollbar in IE 8/9. 2. Improve readability and alignment in all browsers. */ |
| textarea { overflow: auto; /* 1 */ vertical-align: top; /* 2 */ } |
| |
| /* ========================================================================== Tables ========================================================================== */ |
| /** Remove most spacing between table cells. */ |
| table { border-collapse: collapse; border-spacing: 0; } |
| |
| meta.foundation-mq-small { font-family: "only screen and (min-width: 768px)"; width: 768px; } |
| |
| meta.foundation-mq-medium { font-family: "only screen and (min-width:1280px)"; width: 1280px; } |
| |
| meta.foundation-mq-large { font-family: "only screen and (min-width:1440px)"; width: 1440px; } |
| |
| *, *:before, *:after { -moz-box-sizing: border-box; -webkit-box-sizing: border-box; box-sizing: border-box; } |
| |
| html, body { font-size: 100%; } |
| |
| body { background: #fff; color: #222; padding: 0; margin: 0; font-family: "Helvetica Neue", "Helvetica", Helvetica, Arial, sans-serif; font-weight: normal; font-style: normal; line-height: 1; position: relative; cursor: auto; } |
| |
| a:hover { cursor: pointer; } |
| |
| img, object, embed { max-width: 100%; height: auto; } |
| |
| object, embed { height: 100%; } |
| |
| img { -ms-interpolation-mode: bicubic; } |
| |
| #map_canvas img, #map_canvas embed, #map_canvas object, .map_canvas img, .map_canvas embed, .map_canvas object { max-width: none !important; } |
| |
| .left { float: left !important; } |
| |
| .right { float: right !important; } |
| |
| .text-left { text-align: left !important; } |
| |
| .text-right { text-align: right !important; } |
| |
| .text-center { text-align: center !important; } |
| |
| .text-justify { text-align: justify !important; } |
| |
| .hide { display: none; } |
| |
| .antialiased { -webkit-font-smoothing: antialiased; } |
| |
| img { display: inline-block; vertical-align: middle; } |
| |
| textarea { height: auto; min-height: 50px; } |
| |
| select { width: 100%; } |
| |
| object, svg { display: inline-block; vertical-align: middle; } |
| |
| .center { margin-left: auto; margin-right: auto; } |
| |
| .spread { width: 100%; } |
| |
| p.lead, .paragraph.lead > p, #preamble > .sectionbody > .paragraph:first-of-type p { font-size: 1.21875em; line-height: 1.6; } |
| |
| .subheader, .admonitionblock td.content > .title, .audioblock > .title, .exampleblock > .title, .imageblock > .title, .listingblock > .title, .literalblock > .title, .stemblock > .title, .openblock > .title, .paragraph > .title, .quoteblock > .title, table.tableblock > .title, .verseblock > .title, .videoblock > .title, .dlist > .title, .olist > .title, .ulist > .title, .qlist > .title, .hdlist > .title { line-height: 1.4; color: black; font-weight: 300; margin-top: 0.2em; margin-bottom: 0.5em; } |
| |
| /* Typography resets */ |
| div, dl, dt, dd, ul, ol, li, h1, h2, h3, #toctitle, .sidebarblock > .content > .title, h4, h5, h6, pre, form, p, blockquote, th, td { margin: 0; padding: 0; direction: ltr; } |
| |
| /* Default Link Styles */ |
| a { color: #0068b0; text-decoration: none; line-height: inherit; } |
| a:hover, a:focus { color: #333; } |
| a img { border: none; } |
| |
| /* Default paragraph styles */ |
| p { font-family: Noto, sans-serif; font-weight: normal; font-size: 1em; line-height: 1.6; margin-bottom: 0.75em; text-rendering: optimizeLegibility; } |
| p aside { font-size: 0.875em; line-height: 1.35; font-style: italic; } |
| |
| /* Default header styles */ |
| h1, h2, h3, #toctitle, .sidebarblock > .content > .title, h4, h5, h6 { font-family: Noto, sans-serif; font-weight: normal; font-style: normal; color: black; text-rendering: optimizeLegibility; margin-top: 0.5em; margin-bottom: 0.5em; line-height: 1.2125em; } |
| h1 small, h2 small, h3 small, #toctitle small, .sidebarblock > .content > .title small, h4 small, h5 small, h6 small { font-size: 60%; color: #4d4d4d; line-height: 0; } |
| |
| h1 { font-size: 2.125em; } |
| |
| h2 { font-size: 1.6875em; } |
| |
| h3, #toctitle, .sidebarblock > .content > .title { font-size: 1.375em; } |
| |
| h4 { font-size: 1.125em; } |
| |
| h5 { font-size: 1.125em; } |
| |
| h6 { font-size: 1em; } |
| |
| hr { border: solid #ddd; border-width: 1px 0 0; clear: both; margin: 1.25em 0 1.1875em; height: 0; } |
| |
| /* Helpful Typography Defaults */ |
| em, i { font-style: italic; line-height: inherit; } |
| |
| strong, b { font-weight: bold; line-height: inherit; } |
| |
| small { font-size: 60%; line-height: inherit; } |
| |
| code { font-family: Consolas, "Liberation Mono", Courier, monospace; font-weight: normal; color: #264357; } |
| |
| /* Lists */ |
| ul, ol, dl { font-size: 1em; line-height: 1.6; margin-bottom: 0.75em; list-style-position: outside; font-family: Noto, sans-serif; } |
| |
| ul, ol { margin-left: 1.5em; } |
| ul.no-bullet, ol.no-bullet { margin-left: 1.5em; } |
| |
| /* Unordered Lists */ |
| ul li ul, ul li ol { margin-left: 1.25em; margin-bottom: 0; font-size: 1em; /* Override nested font-size change */ } |
| ul.square li ul, ul.circle li ul, ul.disc li ul { list-style: inherit; } |
| ul.square { list-style-type: square; } |
| ul.circle { list-style-type: circle; } |
| ul.disc { list-style-type: disc; } |
| ul.no-bullet { list-style: none; } |
| |
| /* Ordered Lists */ |
| ol li ul, ol li ol { margin-left: 1.25em; margin-bottom: 0; } |
| |
| /* Definition Lists */ |
| dl dt { margin-bottom: 0.3em; font-weight: bold; } |
| dl dd { margin-bottom: 0.75em; } |
| |
| /* Abbreviations */ |
| abbr, acronym { text-transform: uppercase; font-size: 90%; color: black; border-bottom: 1px dotted #ddd; cursor: help; } |
| |
| abbr { text-transform: none; } |
| |
| /* Blockquotes */ |
| blockquote { margin: 0 0 0.75em; padding: 0.5625em 1.25em 0 1.1875em; border-left: 1px solid #ddd; } |
| blockquote cite { display: block; font-size: 0.8125em; color: #365E7A; } |
| blockquote cite:before { content: "\2014 \0020"; } |
| blockquote cite a, blockquote cite a:visited { color: #365E7A; } |
| |
| blockquote, blockquote p { line-height: 1.6; color: #333; } |
| |
| /* Microformats */ |
| .vcard { display: inline-block; margin: 0 0 1.25em 0; border: 1px solid #ddd; padding: 0.625em 0.75em; } |
| .vcard li { margin: 0; display: block; } |
| .vcard .fn { font-weight: bold; font-size: 0.9375em; } |
| |
| .vevent .summary { font-weight: bold; } |
| .vevent abbr { cursor: auto; text-decoration: none; font-weight: bold; border: none; padding: 0 0.0625em; } |
| |
| @media only screen and (min-width: 768px) { h1, h2, h3, #toctitle, .sidebarblock > .content > .title, h4, h5, h6 { line-height: 1.4; } |
| h1 { font-size: 2.75em; } |
| h2 { font-size: 2.3125em; } |
| h3, #toctitle, .sidebarblock > .content > .title { font-size: 1.6875em; } |
| h4 { font-size: 1.4375em; } } |
| /* Tables */ |
| table { background: #fff; margin-bottom: 1.25em; border: solid 1px #d8d8ce; } |
| table thead, table tfoot { background: #eee; font-weight: bold; } |
| table thead tr th, table thead tr td, table tfoot tr th, table tfoot tr td { padding: 0.5em 0.625em 0.625em; font-size: inherit; color: #222; text-align: left; } |
| table tr th, table tr td { padding: 0.5625em 0.625em; font-size: inherit; color: #6d6e71; } |
| table tr.even, table tr.alt, table tr:nth-of-type(even) { background: #f8f8f8; } |
| table thead tr th, table tfoot tr th, table tbody tr td, table tr td, table tfoot tr td { display: table-cell; line-height: 1.4; } |
| |
| body { -moz-osx-font-smoothing: grayscale; -webkit-font-smoothing: antialiased; tab-size: 4; } |
| |
| h1, h2, h3, #toctitle, .sidebarblock > .content > .title, h4, h5, h6 { line-height: 1.4; } |
| |
| a:hover, a:focus { text-decoration: underline; } |
| |
| .clearfix:before, .clearfix:after, .float-group:before, .float-group:after { content: " "; display: table; } |
| .clearfix:after, .float-group:after { clear: both; } |
| |
| *:not(pre) > code { font-size: inherit; font-style: normal !important; letter-spacing: 0; padding: 0; background-color: transparent; -webkit-border-radius: 0; border-radius: 0; line-height: inherit; word-wrap: break-word; } |
| *:not(pre) > code.nobreak { word-wrap: normal; } |
| *:not(pre) > code.nowrap { white-space: nowrap; } |
| |
| pre, pre > code { line-height: 1.6; color: #264357; font-family: Consolas, "Liberation Mono", Courier, monospace; font-weight: normal; } |
| |
| em em { font-style: normal; } |
| |
| strong strong { font-weight: normal; } |
| |
| .keyseq { color: #333333; } |
| |
| kbd { font-family: Consolas, "Liberation Mono", Courier, monospace; display: inline-block; color: black; font-size: 0.65em; line-height: 1.45; background-color: #f7f7f7; border: 1px solid #ccc; -webkit-border-radius: 3px; border-radius: 3px; -moz-box-shadow: 0 1px 0 rgba(0, 0, 0, 0.2), 0 0 0 0.1em white inset; -webkit-box-shadow: 0 1px 0 rgba(0, 0, 0, 0.2), 0 0 0 0.1em white inset; box-shadow: 0 1px 0 rgba(0, 0, 0, 0.2), 0 0 0 0.1em white inset; margin: 0 0.15em; padding: 0.2em 0.5em; vertical-align: middle; position: relative; top: -0.1em; white-space: nowrap; } |
| |
| .keyseq kbd:first-child { margin-left: 0; } |
| |
| .keyseq kbd:last-child { margin-right: 0; } |
| |
| .menuseq, .menuref { color: #000; } |
| |
| .menuseq b:not(.caret), .menuref { font-weight: inherit; } |
| |
| .menuseq { word-spacing: -0.02em; } |
| .menuseq b.caret { font-size: 1.25em; line-height: 0.8; } |
| .menuseq i.caret { font-weight: bold; text-align: center; width: 0.45em; } |
| |
| b.button:before, b.button:after { position: relative; top: -1px; font-weight: normal; } |
| |
| b.button:before { content: "["; padding: 0 3px 0 2px; } |
| |
| b.button:after { content: "]"; padding: 0 2px 0 3px; } |
| |
| #header, #content, #footnotes, #footer { width: 100%; margin-left: auto; margin-right: auto; margin-top: 0; margin-bottom: 0; max-width: 62.5em; *zoom: 1; position: relative; padding-left: 1.5em; padding-right: 1.5em; } |
| #header:before, #header:after, #content:before, #content:after, #footnotes:before, #footnotes:after, #footer:before, #footer:after { content: " "; display: table; } |
| #header:after, #content:after, #footnotes:after, #footer:after { clear: both; } |
| |
| #content { margin-top: 1.25em; } |
| |
| #content:before { content: none; } |
| |
| #header > h1:first-child { color: black; margin-top: 2.25rem; margin-bottom: 0; } |
| #header > h1:first-child + #toc { margin-top: 8px; border-top: 1px solid #ddd; } |
| #header > h1:only-child, body.toc2 #header > h1:nth-last-child(2) { border-bottom: 1px solid #ddd; padding-bottom: 8px; } |
| #header .details { border-bottom: 1px solid #ddd; line-height: 1.45; padding-top: 0.25em; padding-bottom: 0.25em; padding-left: 0.25em; color: #365E7A; display: -ms-flexbox; display: -webkit-flex; display: flex; -ms-flex-flow: row wrap; -webkit-flex-flow: row wrap; flex-flow: row wrap; } |
| #header .details span:first-child { margin-left: -0.125em; } |
| #header .details span.email a { color: #333; } |
| #header .details br { display: none; } |
| #header .details br + span:before { content: "\00a0\2013\00a0"; } |
| #header .details br + span.author:before { content: "\00a0\22c5\00a0"; color: #333; } |
| #header .details br + span#revremark:before { content: "\00a0|\00a0"; } |
| #header #revnumber { text-transform: capitalize; } |
| #header #revnumber:after { content: "\00a0"; } |
| |
| #content > h1:first-child:not([class]) { color: black; border-bottom: 1px solid #ddd; padding-bottom: 8px; margin-top: 0; padding-top: 1rem; margin-bottom: 1.25rem; } |
| |
| #toc { border-bottom: 0 solid #ddd; padding-bottom: 0.5em; } |
| #toc > ul { margin-left: 0.125em; } |
| #toc ul.sectlevel0 > li > a { font-style: italic; } |
| #toc ul.sectlevel0 ul.sectlevel1 { margin: 0.5em 0; } |
| #toc ul { font-family: Noto, sans-serif; list-style-type: none; } |
| #toc li { line-height: 1.3334; margin-top: 0.3334em; } |
| #toc a { text-decoration: none; } |
| #toc a:active { text-decoration: underline; } |
| |
| #toctitle { color: black; font-size: 1.2em; } |
| |
| @media only screen and (min-width: 768px) { #toctitle { font-size: 1.375em; } |
| body.toc2 { padding-left: 15em; padding-right: 0; } |
| #toc.toc2 { margin-top: 0 !important; background-color: #fff; position: fixed; width: 15em; left: 0; top: 0; border-right: 1px solid #ddd; border-top-width: 0 !important; border-bottom-width: 0 !important; z-index: 1000; padding: 1.25em 1em; height: 100%; overflow: auto; } |
| #toc.toc2 #toctitle { margin-top: 0; margin-bottom: 0.8rem; font-size: 1.2em; } |
| #toc.toc2 > ul { font-size: 0.9em; margin-bottom: 0; } |
| #toc.toc2 ul ul { margin-left: 0; padding-left: 1em; } |
| #toc.toc2 ul.sectlevel0 ul.sectlevel1 { padding-left: 0; margin-top: 0.5em; margin-bottom: 0.5em; } |
| body.toc2.toc-right { padding-left: 0; padding-right: 15em; } |
| body.toc2.toc-right #toc.toc2 { border-right-width: 0; border-left: 1px solid #ddd; left: auto; right: 0; } } |
| @media only screen and (min-width: 1280px) { body.toc2 { padding-left: 20em; padding-right: 0; } |
| #toc.toc2 { width: 20em; } |
| #toc.toc2 #toctitle { font-size: 1.375em; } |
| #toc.toc2 > ul { font-size: 0.95em; } |
| #toc.toc2 ul ul { padding-left: 1.25em; } |
| body.toc2.toc-right { padding-left: 0; padding-right: 20em; } } |
| #content #toc { border-style: solid; border-width: 1px; border-color: #e6e6e6; margin-bottom: 1.25em; padding: 1.25em; background: #fff; -webkit-border-radius: 0; border-radius: 0; } |
| #content #toc > :first-child { margin-top: 0; } |
| #content #toc > :last-child { margin-bottom: 0; } |
| |
| #footer { max-width: 100%; background-color: none; padding: 1.25em; } |
| |
| #footer-text { color: black; line-height: 1.44; } |
| |
| #content { margin-bottom: 0.625em; } |
| |
| .sect1 { padding-bottom: 0.625em; } |
| |
| @media only screen and (min-width: 768px) { #content { margin-bottom: 1.25em; } |
| .sect1 { padding-bottom: 1.25em; } } |
| .sect1:last-child { padding-bottom: 0; } |
| |
| .sect1 + .sect1 { border-top: 0 solid #ddd; } |
| |
| #content h1 > a.anchor, h2 > a.anchor, h3 > a.anchor, #toctitle > a.anchor, .sidebarblock > .content > .title > a.anchor, h4 > a.anchor, h5 > a.anchor, h6 > a.anchor { position: absolute; z-index: 1001; width: 1.5ex; margin-left: -1.5ex; display: block; text-decoration: none !important; visibility: hidden; text-align: center; font-weight: normal; } |
| #content h1 > a.anchor:before, h2 > a.anchor:before, h3 > a.anchor:before, #toctitle > a.anchor:before, .sidebarblock > .content > .title > a.anchor:before, h4 > a.anchor:before, h5 > a.anchor:before, h6 > a.anchor:before { content: "\00A7"; font-size: 0.85em; display: block; padding-top: 0.1em; } |
| #content h1:hover > a.anchor, #content h1 > a.anchor:hover, h2:hover > a.anchor, h2 > a.anchor:hover, h3:hover > a.anchor, #toctitle:hover > a.anchor, .sidebarblock > .content > .title:hover > a.anchor, h3 > a.anchor:hover, #toctitle > a.anchor:hover, .sidebarblock > .content > .title > a.anchor:hover, h4:hover > a.anchor, h4 > a.anchor:hover, h5:hover > a.anchor, h5 > a.anchor:hover, h6:hover > a.anchor, h6 > a.anchor:hover { visibility: visible; } |
| #content h1 > a.link, h2 > a.link, h3 > a.link, #toctitle > a.link, .sidebarblock > .content > .title > a.link, h4 > a.link, h5 > a.link, h6 > a.link { color: black; text-decoration: none; } |
| #content h1 > a.link:hover, h2 > a.link:hover, h3 > a.link:hover, #toctitle > a.link:hover, .sidebarblock > .content > .title > a.link:hover, h4 > a.link:hover, h5 > a.link:hover, h6 > a.link:hover { color: black; } |
| |
| .audioblock, .imageblock, .literalblock, .listingblock, .stemblock, .videoblock { margin-bottom: 1.25em; } |
| |
| .admonitionblock td.content > .title, .audioblock > .title, .exampleblock > .title, .imageblock > .title, .listingblock > .title, .literalblock > .title, .stemblock > .title, .openblock > .title, .paragraph > .title, .quoteblock > .title, table.tableblock > .title, .verseblock > .title, .videoblock > .title, .dlist > .title, .olist > .title, .ulist > .title, .qlist > .title, .hdlist > .title { text-rendering: optimizeLegibility; text-align: left; } |
| |
| table.tableblock > caption.title { white-space: nowrap; overflow: visible; max-width: 0; } |
| |
| .paragraph.lead > p, #preamble > .sectionbody > .paragraph:first-of-type p { color: black; } |
| |
| table.tableblock #preamble > .sectionbody > .paragraph:first-of-type p { font-size: inherit; } |
| |
| .admonitionblock > table { border-collapse: separate; border: 0; background: none; width: 100%; } |
| .admonitionblock > table td.icon { text-align: center; width: 80px; } |
| .admonitionblock > table td.icon img { max-width: initial; } |
| .admonitionblock > table td.icon .title { font-weight: bold; font-family: Noto, sans-serif; text-transform: uppercase; } |
| .admonitionblock > table td.content { padding-left: 1.125em; padding-right: 1.25em; border-left: 1px solid #ddd; color: #365E7A; } |
| .admonitionblock > table td.content > :last-child > :last-child { margin-bottom: 0; } |
| |
| .exampleblock > .content { border-style: solid; border-width: 1px; border-color: #e6e6e6; margin-bottom: 1.25em; padding: 1.25em; background: #fff; -webkit-border-radius: 0; border-radius: 0; } |
| .exampleblock > .content > :first-child { margin-top: 0; } |
| .exampleblock > .content > :last-child { margin-bottom: 0; } |
| |
| .sidebarblock { border-style: solid; border-width: 1px; border-color: #e6e6e6; margin-bottom: 1.25em; padding: 1.25em; background: #fff; -webkit-border-radius: 0; border-radius: 0; } |
| .sidebarblock > :first-child { margin-top: 0; } |
| .sidebarblock > :last-child { margin-bottom: 0; } |
| .sidebarblock > .content > .title { color: black; margin-top: 0; } |
| |
| .exampleblock > .content > :last-child > :last-child, .exampleblock > .content .olist > ol > li:last-child > :last-child, .exampleblock > .content .ulist > ul > li:last-child > :last-child, .exampleblock > .content .qlist > ol > li:last-child > :last-child, .sidebarblock > .content > :last-child > :last-child, .sidebarblock > .content .olist > ol > li:last-child > :last-child, .sidebarblock > .content .ulist > ul > li:last-child > :last-child, .sidebarblock > .content .qlist > ol > li:last-child > :last-child { margin-bottom: 0; } |
| |
| .literalblock pre, .listingblock pre:not(.highlight), .listingblock pre[class="highlight"], .listingblock pre[class^="highlight "], .listingblock pre.CodeRay, .listingblock pre.prettyprint { background: #eee; } |
| .sidebarblock .literalblock pre, .sidebarblock .listingblock pre:not(.highlight), .sidebarblock .listingblock pre[class="highlight"], .sidebarblock .listingblock pre[class^="highlight "], .sidebarblock .listingblock pre.CodeRay, .sidebarblock .listingblock pre.prettyprint { background: #f2f1f1; } |
| |
| .literalblock pre, .literalblock pre[class], .listingblock pre, .listingblock pre[class] { border: 1px hidden #666; -webkit-border-radius: 0; border-radius: 0; word-wrap: break-word; padding: 1.25em 1.5625em 1.125em 1.5625em; font-size: 0.8125em; } |
| .literalblock pre.nowrap, .literalblock pre[class].nowrap, .listingblock pre.nowrap, .listingblock pre[class].nowrap { overflow-x: auto; white-space: pre; word-wrap: normal; } |
| @media only screen and (min-width: 768px) { .literalblock pre, .literalblock pre[class], .listingblock pre, .listingblock pre[class] { font-size: 0.90625em; } } |
| @media only screen and (min-width: 1280px) { .literalblock pre, .literalblock pre[class], .listingblock pre, .listingblock pre[class] { font-size: 1em; } } |
| |
| .literalblock.output pre { color: #eee; background-color: #264357; } |
| |
| .listingblock pre.highlightjs { padding: 0; } |
| .listingblock pre.highlightjs > code { padding: 1.25em 1.5625em 1.125em 1.5625em; -webkit-border-radius: 0; border-radius: 0; } |
| |
| .listingblock > .content { position: relative; } |
| |
| .listingblock code[data-lang]:before { display: none; content: attr(data-lang); position: absolute; font-size: 0.75em; top: 0.425rem; right: 0.5rem; line-height: 1; text-transform: uppercase; color: #999; } |
| |
| .listingblock:hover code[data-lang]:before { display: block; } |
| |
| .listingblock.terminal pre .command:before { content: attr(data-prompt); padding-right: 0.5em; color: #999; } |
| |
| .listingblock.terminal pre .command:not([data-prompt]):before { content: "$"; } |
| |
| table.pyhltable { border-collapse: separate; border: 0; margin-bottom: 0; background: none; } |
| |
| table.pyhltable td { vertical-align: top; padding-top: 0; padding-bottom: 0; line-height: 1.6; } |
| |
| table.pyhltable td.code { padding-left: .75em; padding-right: 0; } |
| |
| pre.pygments .lineno, table.pyhltable td:not(.code) { color: #999; padding-left: 0; padding-right: .5em; border-right: 1px solid #ddd; } |
| |
| pre.pygments .lineno { display: inline-block; margin-right: .25em; } |
| |
| table.pyhltable .linenodiv { background: none !important; padding-right: 0 !important; } |
| |
| .quoteblock { margin: 0 1em 0.75em 1.5em; display: table; } |
| .quoteblock > .title { margin-left: -1.5em; margin-bottom: 0.75em; } |
| .quoteblock blockquote, .quoteblock blockquote p { color: #333; font-size: 1.15rem; line-height: 1.75; word-spacing: 0.1em; letter-spacing: 0; font-style: italic; text-align: justify; } |
| .quoteblock blockquote { margin: 0; padding: 0; border: 0; } |
| .quoteblock blockquote:before { content: "\201c"; float: left; font-size: 2.75em; font-weight: bold; line-height: 0.6em; margin-left: -0.6em; color: black; text-shadow: 0 1px 2px rgba(0, 0, 0, 0.1); } |
| .quoteblock blockquote > .paragraph:last-child p { margin-bottom: 0; } |
| .quoteblock .attribution { margin-top: 0.5em; margin-right: 0.5ex; text-align: right; } |
| .quoteblock .quoteblock { margin-left: 0; margin-right: 0; padding: 0.5em 0; border-left: 3px solid #365E7A; } |
| .quoteblock .quoteblock blockquote { padding: 0 0 0 0.75em; } |
| .quoteblock .quoteblock blockquote:before { display: none; } |
| |
| .verseblock { margin: 0 1em 0.75em 1em; } |
| .verseblock pre { font-family: "Open Sans", "DejaVu Sans", sans; font-size: 1.15rem; color: #333; font-weight: 300; text-rendering: optimizeLegibility; } |
| .verseblock pre strong { font-weight: 400; } |
| .verseblock .attribution { margin-top: 1.25rem; margin-left: 0.5ex; } |
| |
| .quoteblock .attribution, .verseblock .attribution { font-size: 0.8125em; line-height: 1.45; font-style: italic; } |
| .quoteblock .attribution br, .verseblock .attribution br { display: none; } |
| .quoteblock .attribution cite, .verseblock .attribution cite { display: block; letter-spacing: -0.025em; color: #365E7A; } |
| |
| .quoteblock.abstract { margin: 0 0 0.75em 0; display: block; } |
| .quoteblock.abstract blockquote, .quoteblock.abstract blockquote p { text-align: left; word-spacing: 0; } |
| .quoteblock.abstract blockquote:before, .quoteblock.abstract blockquote p:first-of-type:before { display: none; } |
| |
| table.tableblock { max-width: 100%; border-collapse: separate; } |
| table.tableblock td > .paragraph:last-child p > p:last-child, table.tableblock th > p:last-child, table.tableblock td > p:last-child { margin-bottom: 0; } |
| |
| table.tableblock, th.tableblock, td.tableblock { border: 0 solid #d8d8ce; } |
| |
| table.grid-all > thead > tr > .tableblock, table.grid-all > tbody > tr > .tableblock { border-width: 0 1px 1px 0; } |
| |
| table.grid-all > tfoot > tr > .tableblock { border-width: 1px 1px 0 0; } |
| |
| table.grid-cols > * > tr > .tableblock { border-width: 0 1px 0 0; } |
| |
| table.grid-rows > thead > tr > .tableblock, table.grid-rows > tbody > tr > .tableblock { border-width: 0 0 1px 0; } |
| |
| table.grid-rows > tfoot > tr > .tableblock { border-width: 1px 0 0 0; } |
| |
| table.grid-all > * > tr > .tableblock:last-child, table.grid-cols > * > tr > .tableblock:last-child { border-right-width: 0; } |
| |
| table.grid-all > tbody > tr:last-child > .tableblock, table.grid-all > thead:last-child > tr > .tableblock, table.grid-rows > tbody > tr:last-child > .tableblock, table.grid-rows > thead:last-child > tr > .tableblock { border-bottom-width: 0; } |
| |
| table.frame-all { border-width: 1px; } |
| |
| table.frame-sides { border-width: 0 1px; } |
| |
| table.frame-topbot { border-width: 1px 0; } |
| |
| th.halign-left, td.halign-left { text-align: left; } |
| |
| th.halign-right, td.halign-right { text-align: right; } |
| |
| th.halign-center, td.halign-center { text-align: center; } |
| |
| th.valign-top, td.valign-top { vertical-align: top; } |
| |
| th.valign-bottom, td.valign-bottom { vertical-align: bottom; } |
| |
| th.valign-middle, td.valign-middle { vertical-align: middle; } |
| |
| table thead th, table tfoot th { font-weight: bold; } |
| |
| tbody tr th { display: table-cell; line-height: 1.4; background: #eee; } |
| |
| tbody tr th, tbody tr th p, tfoot tr th, tfoot tr th p { color: #222; font-weight: bold; } |
| |
| p.tableblock > code:only-child { background: none; padding: 0; } |
| |
| p.tableblock { font-size: 1em; } |
| |
| td > div.verse { white-space: pre; } |
| |
| ol { margin-left: 1.75em; } |
| |
| ul li ol { margin-left: 1.5em; } |
| |
| dl dd { margin-left: 1.125em; } |
| |
| dl dd:last-child, dl dd:last-child > :last-child { margin-bottom: 0; } |
| |
| ol > li p, ul > li p, ul dd, ol dd, .olist .olist, .ulist .ulist, .ulist .olist, .olist .ulist { margin-bottom: 0.375em; } |
| |
| ul.checklist, ul.none, ol.none, ul.no-bullet, ol.no-bullet, ol.unnumbered, ul.unstyled, ol.unstyled { list-style-type: none; } |
| |
| ul.no-bullet, ol.no-bullet, ol.unnumbered { margin-left: 0.625em; } |
| |
| ul.unstyled, ol.unstyled { margin-left: 0; } |
| |
| ul.checklist { margin-left: 0.625em; } |
| |
| ul.checklist li > p:first-child > .fa-square-o:first-child, ul.checklist li > p:first-child > .fa-check-square-o:first-child { width: 1.25em; font-size: 0.8em; position: relative; bottom: 0.125em; } |
| |
| ul.checklist li > p:first-child > input[type="checkbox"]:first-child { margin-right: 0.25em; } |
| |
| ul.inline { display: -ms-flexbox; display: -webkit-box; display: flex; -ms-flex-flow: row wrap; -webkit-flex-flow: row wrap; flex-flow: row wrap; list-style: none; margin: 0 0 0.375em -0.75em; } |
| |
| ul.inline > li { margin-left: 0.75em; } |
| |
| .unstyled dl dt { font-weight: normal; font-style: normal; } |
| |
| ol.arabic { list-style-type: decimal; } |
| |
| ol.decimal { list-style-type: decimal-leading-zero; } |
| |
| ol.loweralpha { list-style-type: lower-alpha; } |
| |
| ol.upperalpha { list-style-type: upper-alpha; } |
| |
| ol.lowerroman { list-style-type: lower-roman; } |
| |
| ol.upperroman { list-style-type: upper-roman; } |
| |
| ol.lowergreek { list-style-type: lower-greek; } |
| |
| .hdlist > table, .colist > table { border: 0; background: none; } |
| .hdlist > table > tbody > tr, .colist > table > tbody > tr { background: none; } |
| |
| td.hdlist1, td.hdlist2 { vertical-align: top; padding: 0 0.625em; } |
| |
| td.hdlist1 { font-weight: bold; padding-bottom: 0.75em; } |
| |
| .literalblock + .colist, .listingblock + .colist { margin-top: -0.5em; } |
| |
| .colist > table tr > td:first-of-type { padding: 0.4em 0.75em 0 0.75em; line-height: 1; vertical-align: top; } |
| .colist > table tr > td:first-of-type img { max-width: initial; } |
| .colist > table tr > td:last-of-type { padding: 0.25em 0; } |
| |
| .thumb, .th { line-height: 0; display: inline-block; border: solid 4px #fff; -webkit-box-shadow: 0 0 0 1px #ddd; box-shadow: 0 0 0 1px #ddd; } |
| |
| .imageblock.left, .imageblock[style*="float: left"] { margin: 0.25em 0.625em 1.25em 0; } |
| .imageblock.right, .imageblock[style*="float: right"] { margin: 0.25em 0 1.25em 0.625em; } |
| .imageblock > .title { margin-bottom: 0; } |
| .imageblock.thumb, .imageblock.th { border-width: 6px; } |
| .imageblock.thumb > .title, .imageblock.th > .title { padding: 0 0.125em; } |
| |
| .image.left, .image.right { margin-top: 0.25em; margin-bottom: 0.25em; display: inline-block; line-height: 0; } |
| .image.left { margin-right: 0.625em; } |
| .image.right { margin-left: 0.625em; } |
| |
| a.image { text-decoration: none; display: inline-block; } |
| a.image object { pointer-events: none; } |
| |
| sup.footnote, sup.footnoteref { font-size: 0.875em; position: static; vertical-align: super; } |
| sup.footnote a, sup.footnoteref a { text-decoration: none; } |
| sup.footnote a:active, sup.footnoteref a:active { text-decoration: underline; } |
| |
| #footnotes { padding-top: 0.75em; padding-bottom: 0.75em; margin-bottom: 0.625em; } |
| #footnotes hr { width: 20%; min-width: 6.25em; margin: -0.25em 0 0.75em 0; border-width: 1px 0 0 0; } |
| #footnotes .footnote { padding: 0 0.375em 0 0.225em; line-height: 1.3334; font-size: 0.875em; margin-left: 1.2em; margin-bottom: 0.2em; } |
| #footnotes .footnote a:first-of-type { font-weight: bold; text-decoration: none; margin-left: -1.05em; } |
| #footnotes .footnote:last-of-type { margin-bottom: 0; } |
| #content #footnotes { margin-top: -0.625em; margin-bottom: 0; padding: 0.75em 0; } |
| |
| .gist .file-data > table { border: 0; background: #fff; width: 100%; margin-bottom: 0; } |
| .gist .file-data > table td.line-data { width: 99%; } |
| |
| div.unbreakable { page-break-inside: avoid; } |
| |
| .big { font-size: larger; } |
| |
| .small { font-size: smaller; } |
| |
| .underline { text-decoration: underline; } |
| |
| .overline { text-decoration: overline; } |
| |
| .line-through { text-decoration: line-through; } |
| |
| .aqua { color: #00bfbf; } |
| |
| .aqua-background { background-color: #00fafa; } |
| |
| .black { color: black; } |
| |
| .black-background { background-color: black; } |
| |
| .blue { color: #0000bf; } |
| |
| .blue-background { background-color: #0000fa; } |
| |
| .fuchsia { color: #bf00bf; } |
| |
| .fuchsia-background { background-color: #fa00fa; } |
| |
| .gray { color: #606060; } |
| |
| .gray-background { background-color: #7d7d7d; } |
| |
| .green { color: #006000; } |
| |
| .green-background { background-color: #007d00; } |
| |
| .lime { color: #00bf00; } |
| |
| .lime-background { background-color: #00fa00; } |
| |
| .maroon { color: #600000; } |
| |
| .maroon-background { background-color: #7d0000; } |
| |
| .navy { color: #000060; } |
| |
| .navy-background { background-color: #00007d; } |
| |
| .olive { color: #606000; } |
| |
| .olive-background { background-color: #7d7d00; } |
| |
| .purple { color: #600060; } |
| |
| .purple-background { background-color: #7d007d; } |
| |
| .red { color: #bf0000; } |
| |
| .red-background { background-color: #fa0000; } |
| |
| .silver { color: #909090; } |
| |
| .silver-background { background-color: #bcbcbc; } |
| |
| .teal { color: #006060; } |
| |
| .teal-background { background-color: #007d7d; } |
| |
| .white { color: #bfbfbf; } |
| |
| .white-background { background-color: #fafafa; } |
| |
| .yellow { color: #bfbf00; } |
| |
| .yellow-background { background-color: #fafa00; } |
| |
| span.icon > .fa { cursor: default; } |
| a span.icon > .fa { cursor: inherit; } |
| |
| .admonitionblock td.icon [class^="fa icon-"] { font-size: 2.5em; text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.5); cursor: default; } |
| .admonitionblock td.icon .icon-note:before { content: "\f05a"; color: #29475c; } |
| .admonitionblock td.icon .icon-tip:before { content: "\f0eb"; text-shadow: 1px 1px 2px rgba(155, 155, 0, 0.8); color: #111; } |
| .admonitionblock td.icon .icon-warning:before { content: "\f071"; color: #bf6900; } |
| .admonitionblock td.icon .icon-caution:before { content: "\f06d"; color: #bf3400; } |
| .admonitionblock td.icon .icon-important:before { content: "\f06a"; color: #bf0000; } |
| |
| .conum[data-value] { display: inline-block; color: #fff !important; background-color: black; -webkit-border-radius: 100px; border-radius: 100px; text-align: center; font-size: 0.75em; width: 1.67em; height: 1.67em; line-height: 1.67em; font-family: "Open Sans", "DejaVu Sans", sans-serif; font-style: normal; font-weight: bold; } |
| .conum[data-value] * { color: #fff !important; } |
| .conum[data-value] + b { display: none; } |
| .conum[data-value]:after { content: attr(data-value); } |
| pre .conum[data-value] { position: relative; top: -0.125em; } |
| |
| b.conum * { color: inherit !important; } |
| |
| .conum:not([data-value]):empty { display: none; } |
| |
| h1, h2, h3, #toctitle, .sidebarblock > .content > .title, h4, h5, h6 { border-bottom: 1px solid #ddd; } |
| |
| .sect1 { padding-bottom: 0; } |
| |
| #toctitle { color: #00406F; font-weight: normal; margin-top: 1.5em; } |
| |
| .sidebarblock { border-color: #aaa; } |
| |
| code { -webkit-border-radius: 4px; border-radius: 4px; } |
| |
| p.tableblock.header { color: #6d6e71; } |
| |
| .literalblock pre, .listingblock pre { background: #eee; } |
| |
| /* From https://github.com/KhronosGroup/Vulkan-Docs/pull/901 */ |
| a code { color: inherit; } |
| |
| /* From https://github.com/KhronosGroup/Vulkan-Docs/pull/1157 */ |
| /* Make VUID anchor handles*/ |
| li > p > a[id^="VUID-"] { visibility: hidden; position: absolute; z-index: 1001; width: 2.2ex; margin-left: -2.2ex; display: block; text-decoration: none !important; text-align: center; font-weight: normal; } |
| |
| li > p > a[id^="VUID-"]:before { content: "\00A7"; font-size: 1em; display: block; padding-top: 0em; background: #fff; } |
| |
| li > p:hover > a[id^="VUID-"], li > p > a[id^="VUID-"]:hover { visibility: visible; } |
| |
| li > p > a[id^="VUID-"].link { color: black; text-decoration: none; } |
| |
| /* TODO: not quite sure what these two do */ |
| li > p > a[id^="VUID-"].link:hover { color: black; } |
| |
| .vuid { color: #4d4d4d; font-family: monospace; } |
| |
| </style> |
| <link rel="stylesheet" href="../katex/katex.min.css"> |
| <script src="../katex/katex.min.js"></script> |
| <script src="../katex/contrib/auto-render.min.js"></script> |
| <!-- Use KaTeX to render math once document is loaded, see |
| https://github.com/Khan/KaTeX/tree/master/contrib/auto-render --> |
| <script> |
| document.addEventListener("DOMContentLoaded", function () { |
| renderMathInElement( |
| document.body, |
| { |
| delimiters: [ |
| { left: "$$", right: "$$", display: true}, |
| { left: "\\[", right: "\\]", display: true}, |
| { left: "$", right: "$", display: false}, |
| { left: "\\(", right: "\\)", display: false} |
| ] |
| } |
| ); |
| }); |
| </script></head> |
| <body class="book"> |
| <div id="header"> |
| <h1>cl_intel_subgroups</h1> |
| <div class="details"> |
| <span id="revnumber">version v3.0.14-10-gff88d06,</span> |
| <span id="revdate">Mon, 12 Jun 2023 23:00:00 +0000</span> |
| <br><span id="revremark">from git branch: main commit: ff88d0674a775a7b458bf1500d052f2f67a2c2fe</span> |
| </div> |
| </div> |
| <div id="content"> |
| <div class="sect1"> |
| <h2 id="_name_strings"><a class="anchor" href="#_name_strings"></a>Name Strings</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p><code>cl_intel_subgroups</code></p> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="_contact"><a class="anchor" href="#_contact"></a>Contact</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>Ben Ashbaugh, Intel (ben 'dot' ashbaugh 'at' intel 'dot' com)</p> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="_contributors"><a class="anchor" href="#_contributors"></a>Contributors</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>Ben Ashbaugh, Intel<br> |
| Allen Hux, Intel<br> |
| Pranayini Gudali, Intel<br> |
| Dawid Dominiak, Intel<br> |
| Biju George, Intel</p> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="_notice"><a class="anchor" href="#_notice"></a>Notice</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>Copyright (c) 2018-2023 Intel Corporation. All rights reserved.</p> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="_status"><a class="anchor" href="#_status"></a>Status</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>Final Draft</p> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="_version"><a class="anchor" href="#_version"></a>Version</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>Built On: 2023-06-12<br> |
| Revision: 8</p> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="_dependencies"><a class="anchor" href="#_dependencies"></a>Dependencies</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>OpenCL 1.2 is required. |
| Some features (<code>get_num_enqueued_sub_groups()</code> and the <code>sub_group_barrier()</code> function that accept a memory scope) require OpenCL 2.0.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This extension is written against revision 24 of the OpenCL 2.0 API specification, against revision 24 of the OpenCL 2.0 OpenCL C specification, and against revision 24 of the OpenCL 2.0 extension specification.</p> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="_overview"><a class="anchor" href="#_overview"></a>Overview</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>The goal of this extension is to allow programmers to improve the performance of their applications by taking advantage of the fact that some work items in a work-group execute together as a group (a "sub-group"), and that work items in a sub-group can take advantage of hardware features that are not available to work items in a work-group. |
| Specifically, this extension is designed to allow work items in a sub-group to share data without the use of local memory and work-group barriers, and to utilize specialized hardware to load and store blocks of data.</p> |
| </div> |
| <div class="paragraph"> |
| <p>There is a large amount of overlap between the functionality in this extension and the functionality in the Khronos sub-groups extension <code>cl_khr_subgroups</code>, so this extension reuses many of the names, concepts, and functions already described by the <code>cl_khr_subgroups</code> extension. |
| The key differences between the Intel sub-groups extension and the Khronos sub-groups extension are:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>The Khronos sub-groups extension requires OpenCL 2.0, but the Intel sub-groups extension may be available on OpenCL 1.2 devices.</p> |
| </li> |
| <li> |
| <p>The Khronos sub-groups extension guarantees that sub-groups in a work-group will make independent forward progress, but the Intel extension does not guarantee that sub-groups in a work-group will make independent forward progress.</p> |
| </li> |
| <li> |
| <p>The Intel extension adds a rich set of sub-group "shuffle" functions to allow work items within a work-group to interchange data without the use of local memory and work-group barriers.</p> |
| </li> |
| <li> |
| <p>The Intel extension adds a set of sub-group "block read and write" functions to take advantage of specialized hardware to read or write blocks of data from or to buffers or images.</p> |
| </li> |
| <li> |
| <p>The Intel sub-groups extension does not include the sub-group pipes functions that are included as part of the Khronos sub-groups extension.</p> |
| </li> |
| <li> |
| <p>The Intel sub-groups extension does not include the device-side kernel query functions for sub-groups that are included as part of the Khronos sub-groups extension.</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="_new_api_functions"><a class="anchor" href="#_new_api_functions"></a>New API Functions</h2> |
| <div class="sectionbody"> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">This function is copied unchanged from the Khronos sub-groups extension: </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code>cl_int clGetKernelSubGroupInfoKHR( |
| cl_kernel kernel, |
| cl_device_id device, |
| cl_kernel_sub_group_info param_name, |
| size_t input_value_size, |
| const void* input_value, |
| size_t param_value_size, |
| void* param_value, |
| size_t* param_value_size_ret)</code></pre> |
| </div> |
| </div> |
| </div> |
| </div> |
| </dd> |
| </dl> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="_new_api_enums"><a class="anchor" href="#_new_api_enums"></a>New API Enums</h2> |
| <div class="sectionbody"> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">These enums are copied unchanged from the Khronos sub-groups extension: </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>Accepted as the <em>param_name</em> parameter of <strong>clGetKernelSubGroupInfoKHR</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code>CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR 0x2033 |
| CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR 0x2034</code></pre> |
| </div> |
| </div> |
| </div> |
| </div> |
| </dd> |
| </dl> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="_new_opencl_c_functions"><a class="anchor" href="#_new_opencl_c_functions"></a>New OpenCL C Functions</h2> |
| <div class="sectionbody"> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">These built-in functions are copied unchanged from the Khronos sub-groups extension: </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code>uint get_sub_group_size( void ); |
| uint get_max_sub_group_size( void ); |
| uint get_num_sub_groups( void ); |
| |
| uint get_sub_group_id( void ); |
| uint get_sub_group_local_id( void ); |
| |
| void sub_group_barrier( cl_mem_fence_flags flags ); |
| |
| int sub_group_all( int predicate ); |
| int sub_group_any( int predicate );</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>If OpenCL 2.0 is supported:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code>uint get_enqueued_num_sub_groups( void ); |
| void sub_group_barrier( cl_mem_fence_flags flags, memory_scope scope );</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>For the sub_group_broadcast functions, <code>gentype</code> is <code>int</code>, <code>uint</code>, <code>long</code>, <code>ulong</code>, or <code>float</code>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If cl_khr_fp16 is supported, <code>gentype</code> also includes <code>half</code>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If cl_khr_fp64 or doubles are supported, <code>gentype</code> also includes <code>double</code>.</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code>gentype sub_group_broadcast( gentype x, uint sub_group_local_id );</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>For the sub_group_reduce, sub_group_scan_exclusive, and sub_group_scan_inclusive functions, <code>gentype</code> is <code>int</code>, <code>uint</code>, <code>long</code>, <code>ulong</code>, or <code>float</code>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If cl_khr_fp16 is supported, <code>gentype</code> also includes <code>half</code>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If cl_khr_fp64 or doubles are supported, <code>gentype</code> also includes <code>double</code>.</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code>gentype sub_group_reduce_add( gentype x ) |
| gentype sub_group_reduce_min( gentype x ) |
| gentype sub_group_reduce_max( gentype x ) |
| |
| gentype sub_group_scan_exclusive_add( gentype x ) |
| gentype sub_group_scan_exclusive_min( gentype x ) |
| gentype sub_group_scan_exclusive_max( gentype x ) |
| |
| gentype sub_group_scan_inclusive_add( gentype x) |
| gentype sub_group_scan_inclusive_min( gentype x) |
| gentype sub_group_scan_inclusive_max( gentype x)</code></pre> |
| </div> |
| </div> |
| </div> |
| </div> |
| </dd> |
| <dt class="hdlist1">These built-in functions are unique to the Intel sub-groups extension and are not part of the Khronos sub-groups extension: </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>For the sub_group_shuffle, sub_group_shuffle_down, sub_group_shuffle_up, and sub_group_shuffle_xor functions, <code>gentype</code> is <code>float</code>, <code>float2</code>, <code>float3</code>, <code>float4</code>, <code>float8</code>, <code>float16</code>, <code>int</code>, <code>int2</code>, <code>int3</code>, <code>int4</code>, <code>int8</code>, <code>int16</code>, <code>uint</code>, <code>uint2</code>, <code>uint3</code>, <code>uint4</code>, <code>uint8</code>, <code>uint16</code>, <code>long</code>, or <code>ulong</code>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If cl_khr_fp16 is supported, <code>gentype</code> also includes <code>half</code>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If cl_khr_fp64 or doubles are supported, <code>gentype</code> also includes <code>double</code>.</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code>gentype intel_sub_group_shuffle( gentype data, uint c ); |
| gentype intel_sub_group_shuffle_down( |
| gentype current, gentype next, uint delta ); |
| gentype intel_sub_group_shuffle_up( |
| gentype previous, gentype current, uint delta ); |
| gentype intel_sub_group_shuffle_xor( gentype data, uint value );</code></pre> |
| </div> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code>uint intel_sub_group_block_read( const __global uint* p ); |
| uint2 intel_sub_group_block_read2( const __global uint* p ); |
| uint4 intel_sub_group_block_read4( const __global uint* p ); |
| uint8 intel_sub_group_block_read8( const __global uint* p ); |
| |
| uint intel_sub_group_block_read( image2d_t image, int2 byte_coord ); |
| uint2 intel_sub_group_block_read2( image2d_t image, int2 byte_coord ); |
| uint4 intel_sub_group_block_read4( image2d_t image, int2 byte_coord ); |
| uint8 intel_sub_group_block_read8( image2d_t image, int2 byte_coord ); |
| |
| void intel_sub_group_block_write( __global uint* p, uint data ); |
| void intel_sub_group_block_write2( __global uint* p, uint2 data ); |
| void intel_sub_group_block_write4( __global uint* p, uint4 data ); |
| void intel_sub_group_block_write8( __global uint* p, uint8 data ); |
| |
| void intel_sub_group_block_write( image2d_t image, int2 byte_coord, uint data ); |
| void intel_sub_group_block_write2( image2d_t image, int2 byte_coord, uint2 data ); |
| void intel_sub_group_block_write4( image2d_t image, int2 byte_coord, uint4 data ); |
| void intel_sub_group_block_write8( image2d_t image, int2 byte_coord, uint8 data );</code></pre> |
| </div> |
| </div> |
| </div> |
| </div> |
| </dd> |
| </dl> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="_modifications_to_the_opencl_api_specification"><a class="anchor" href="#_modifications_to_the_opencl_api_specification"></a>Modifications to the OpenCL API Specification</h2> |
| <div class="sectionbody"> |
| <div class="sect2"> |
| <h3 id="_modifications_to_section_2_glossary"><a class="anchor" href="#_modifications_to_section_2_glossary"></a>Modifications to Section 2 - "Glossary"</h3> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">Add memory_scope_sub_group to the description of Memory Scopes: </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">Memory Scopes </dt> |
| <dd> |
| <p>Memory scopes define a hierarchy of visibilities when analyzing the ordering constraints of memory operations. |
| They are defined by the values of the <code>memory_scope</code> enumeration constant. |
| Current values are <code>memory_scope_work_item</code> (memory constraints only apply to a single work item and in practice only apply to image operations), <code>memory_scope_sub_group</code> (memory-ordering constraints only apply to work items executing in a sub-group), <code>memory_scope_work_group</code> …​</p> |
| </dd> |
| </dl> |
| </div> |
| </div> |
| </div> |
| </dd> |
| <dt class="hdlist1">Add memory_scope_sub_group to the description of Scope inclusion: </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">Scope inclusion </dt> |
| <dd> |
| <p>Two actions <strong>A</strong> and <strong>B</strong> are defined to have an inclusive scope if they have the same scope <strong>P</strong> such that: (1) if <strong>P</strong> is <code>memory_scope_sub_group</code>, and <strong>A</strong> and <strong>B</strong> are executed by work items within the same sub-group, or (2) if <strong>P</strong> is <code>memory_scope_work_group</code>, and <strong>A</strong> and <strong>B</strong> are executed by work items within the same work-group …​</p> |
| </dd> |
| </dl> |
| </div> |
| </div> |
| </div> |
| </dd> |
| <dt class="hdlist1">Change the description for Sub-groups to: </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">Sub-group </dt> |
| <dd> |
| <p>Sub-groups are an implementation-dependent grouping of work items within a |
| work-group. |
| The size and number of sub-groups is implementation-defined and not exposed in the core OpenCL 2.0 feature set. |
| Sub-groups execute concurrently within a work-group, but are not guaranteed to make independent forward progress. |
| Sub-groups may synchronize internally using sub-group barrier operations without synchronizing with other sub-groups.</p> |
| </dd> |
| </dl> |
| </div> |
| </div> |
| </div> |
| </dd> |
| </dl> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_modifications_to_section_3_2_1_execution_model_mapping_work_items_onto_an_nd_range"><a class="anchor" href="#_modifications_to_section_3_2_1_execution_model_mapping_work_items_onto_an_nd_range"></a>Modifications to Section 3.2.1 - "Execution Model: Mapping Work Items Onto an ND-range"</h3> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">Change the paragraph describing sub-groups to: </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>An implementation of OpenCL may divide each work-group into one or more sub-groups. |
| The size and number of sub-groups is implementation-defined and not exposed in the |
| core OpenCL 2.0 feature set.</p> |
| </div> |
| </div> |
| </div> |
| </dd> |
| </dl> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_modifications_to_section_3_2_2_execution_model_execution_of_kernel_instances"><a class="anchor" href="#_modifications_to_section_3_2_2_execution_model_execution_of_kernel_instances"></a>Modifications to Section 3.2.2 - "Execution Model: Execution of Kernel Instances"</h3> |
| <div class="paragraph"> |
| <p>Remove the last paragraph describing sub-groups and independent forward progress.</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_additions_to_section_3_2_execution_model"><a class="anchor" href="#_additions_to_section_3_2_execution_model"></a>Additions to Section 3.2 - "Execution Model"</h3> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">This text is largely the same as the text in the Khronos sub-groups extension. Only the sentence about independent forward progress has been modified: </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>Within a work-group, work items may be divided into sub-groups in an implementation- |
| defined fashion. The mapping of work items to sub-groups is implementation-defined |
| and may be queried at runtime. While sub-groups may be used in multi-dimensional |
| work-groups, each sub-group is 1-dimensional and any given work item may query which |
| sub-group it is a member of.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Work items are mapped into sub-groups through a combination of compile-time decisions |
| and the parameters of the dispatch. The mapping to sub-groups is invariant for the |
| duration of a kernel’s execution, across dispatches of a given kernel with the same |
| launch parameters, and from one work-group to another within the dispatch (excluding |
| the trailing edge work-groups in the presence of non-uniform work-group sizes). In |
| addition, all sub-groups within a work-group will be the same size, apart from the |
| sub-group with the maximum index, which may be smaller if the size of the work-group |
| is not evenly divisible by the size of the sub-groups.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Sub-groups execute concurrently within a given work-group. Similar to work items |
| within a work-group, sub-groups executing within a work-group are not guaranteed to make |
| independent forward progress. Work items in a sub-group can internally synchronize |
| using sub-group barrier operations without synchronizing with other sub-groups.</p> |
| </div> |
| </div> |
| </div> |
| </dd> |
| </dl> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_additions_to_section_3_3_4_memory_model_memory_consistency_model"><a class="anchor" href="#_additions_to_section_3_3_4_memory_model_memory_consistency_model"></a>Additions to Section 3.3.4 - "Memory Model: Memory Consistency Model"</h3> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">Add memory_scope_sub_group to the bulleted descriptions of memory scopes: </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p><code>memory_scope_sub_group</code>: memory-ordering constraints only apply to work items executing within a single sub-group.</p> |
| </li> |
| <li> |
| <p><code>memory_scope_work_group</code>: …​</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| </div> |
| </dd> |
| <dt class="hdlist1">In the paragraph after the bulleted descriptions of memory scopes, include memory_scope_sub_group as a valid memory scope for local memory: </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>... For local memory, <code>memory_scope_sub_group</code> and <code>memory_scope_work_group</code> are valid, and may constrain visibility to the sub-group or work-group.</p> |
| </div> |
| </div> |
| </div> |
| </dd> |
| </dl> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_additions_to_section_3_3_5_memory_model_overview_of_atomic_and_fence_operations"><a class="anchor" href="#_additions_to_section_3_3_5_memory_model_overview_of_atomic_and_fence_operations"></a>Additions to Section 3.3.5 - "Memory Model: Overview of atomic and fence operations"</h3> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">Add memory_scope_sub_group to the definition of inclusive scope: </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p><strong>P</strong> is <code>memory_scope_sub_group</code> and <strong>A</strong> and <strong>B</strong> are executed by work items within the same sub-group.</p> |
| </li> |
| <li> |
| <p><strong>P</strong> is <code>memory_scope_work_group</code> …​</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| </div> |
| </dd> |
| </dl> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_additions_to_section_5_9_3_kernel_object_queries"><a class="anchor" href="#_additions_to_section_5_9_3_kernel_object_queries"></a>Additions to Section 5.9.3 - "Kernel Object Queries"</h3> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">This addition is copied unchanged from the Khronos sub-groups extension: </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>The function</p> |
| </div> |
| <div class="paragraph"> |
| <p></p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code class="language-c" data-lang="c">cl_int clGetKernelSubGroupInfoKHR(cl_kernel kernel, |
| cl_device_id device, |
| cl_kernel_sub_group_info param_name, |
| size_t input_value_size, |
| const void *input_value, |
| size_t param_value_size, |
| void *param_value, |
| size_t *param_value_size_ret)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>returns information about the kernel object.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>kernel</em> specifies the kernel object being queried.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>device</em> identifies a specific device in the list of devices associated with |
| <em>kernel</em>. |
| The list of devices is the list of devices in the OpenCL context that is |
| associated with <em>kernel</em>. |
| If the list of devices associated with <em>kernel</em> is a single device, <em>device</em> |
| can be a <code>NULL</code> value.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>param_name</em> specifies the information to query. |
| The list of supported <em>param_name</em> types and the information returned in |
| <em>param_value</em> by <strong>clGetKernelSubGroupInfoKHR</strong> is described in the table below.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>input_value_size</em> is used to specify the size in bytes of memory pointed to |
| by <em>input_value</em>. |
| This size must be equal to the size of input type as described in the table below.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>input_value</em> is a pointer to memory where the appropriate parameterization |
| of the query is passed from. |
| If <em>input_value</em> is <code>NULL</code>, it is ignored.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>param_value</em> is a pointer to memory where the appropriate result being |
| queried is returned. |
| If <em>param_value</em> is <code>NULL</code>, it is ignored.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>param_value_size</em> is used to specify the size in bytes of memory pointed to |
| by <em>param_value</em>. |
| This size must be greater than or equal to the size of the return type as described in the |
| table below.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>param_value_size_ret</em> returns the actual size in bytes of data being |
| queried by <em>param_name</em>. |
| If <em>param_value_size_ret</em> is <code>NULL</code>, it is ignored.</p> |
| </div> |
| <table id="cl_khr_subgroups-kernel-sub-group-info-table" class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 1. <strong>clGetKernelSubGroupInfoKHR</strong> parameter queries</caption> |
| <colgroup> |
| <col style="width: 25%;"> |
| <col style="width: 25%;"> |
| <col style="width: 25%;"> |
| <col style="width: 25%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_kernel_sub_group_info</strong></th> |
| <th class="tableblock halign-left valign-top">Input Type</th> |
| <th class="tableblock halign-left valign-top">Return Type</th> |
| <th class="tableblock halign-left valign-top">Info. returned in <em>param_value</em></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_​KERNEL_​MAX_​SUB_​GROUP_​SIZE_​FOR_​NDRANGE_​KHR</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">size_t *</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">size_t</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the maximum sub-group size for this kernel. |
| All sub-groups must be the same size, while the last sub-group in |
| any work-group (i.e. the sub-group with the maximum index) could |
| be the same or smaller size.</p> |
| <p class="tableblock"> The <em>input_value</em> must be an array of size_t values |
| corresponding to the local work size parameter of the intended |
| dispatch. |
| The number of dimensions in the ND-range will be inferred from |
| the value specified for <em>input_value_size</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_​KERNEL_​SUB_​GROUP_​COUNT_​FOR_​NDRANGE_​KHR</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">size_t *</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">size_t</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the number of sub-groups that will be present in each |
| work-group for a given local work size. |
| All work-groups, apart from the last work-group in each dimension |
| in the presence of non-uniform work-group sizes, will have the |
| same number of sub-groups.</p> |
| <p class="tableblock"> The <em>input_value</em> must be an array of size_t values |
| corresponding to the local work size parameter of the intended |
| dispatch. |
| The number of dimensions in the ND-range will be inferred from |
| the value specified for <em>input_value_size</em>.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p><strong>clGetKernelSubGroupInfoKHR</strong> returns CL_SUCCESS if the function is executed |
| successfully. |
| Otherwise, it returns one of the following errors:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p><code>CL_INVALID_DEVICE</code> if <em>device</em> is not in the list of devices associated |
| with <em>kernel</em> or if <em>device</em> is <code>NULL</code> but there is more than one device |
| associated with <em>kernel</em>.</p> |
| </li> |
| <li> |
| <p><code>CL_INVALID_VALUE</code> if <em>param_name</em> is not valid, or if size in bytes |
| specified by <em>param_value_size</em> is less than the size of return type as described in |
| the table above and <em>param_value</em> is not <code>NULL</code>.</p> |
| </li> |
| <li> |
| <p><code>CL_INVALID_VALUE</code> if <em>param_name</em> is |
| <code>CL_​KERNEL_​MAX_​SUB_​GROUP_​SIZE_​FOR_​NDRANGE_​KHR</code> and the size in bytes specified by |
| <em>input_value_size</em> is not valid or if <em>input_value</em> is <code>NULL</code>.</p> |
| </li> |
| <li> |
| <p><code>CL_INVALID_KERNEL</code> if <em>kernel</em> is a not a valid kernel object.</p> |
| </li> |
| <li> |
| <p><code>CL_OUT_OF_RESOURCES</code> if there is a failure to allocate resources required |
| by the OpenCL implementation on the device.</p> |
| </li> |
| <li> |
| <p><code>CL_OUT_OF_HOST_MEMORY</code> if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| </div> |
| </dd> |
| </dl> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="_modifications_to_the_opencl_c_specification"><a class="anchor" href="#_modifications_to_the_opencl_c_specification"></a>Modifications to the OpenCL C Specification</h2> |
| <div class="sectionbody"> |
| <div class="sect2"> |
| <h3 id="_additions_to_section_6_13_1_work_item_functions"><a class="anchor" href="#_additions_to_section_6_13_1_work_item_functions"></a>Additions to section 6.13.1 - "Work Item Functions"</h3> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">These additions are copied unchanged from the Khronos sub-groups extension: </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code class="language-c" data-lang="c">uint get_sub_group_size( void )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the number of work items in the sub-group. |
| This value is no more than the maximum sub-group size and is implementation-defined based on a combination of the compiled kernel and the dispatch dimensions. |
| This will be a constant value for the lifetime of the sub-group.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code class="language-c" data-lang="c">uint get_max_sub_group_size( void )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the maximum size of a sub-group with the dispatch. |
| This value will be invariant for a given set of dispatch dimensions and a kernel object compiled for a given device.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code class="language-c" data-lang="c">uint get_num_sub_groups( void )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the number of sub-groups that the current work-group is divided into.</p> |
| <p class="tableblock">This number will be constant for the duration of a work-group’s execution. |
| If the kernel is executed with a non-uniform work-group size in any dimension, calls to this built-in may return a different values for some work-groups than for other work-groups.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code class="language-c" data-lang="c">uint get_sub_group_id( void )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the sub-group ID, which is a number from zero to <strong>get_num_sub_groups</strong> - 1.</p> |
| <p class="tableblock">For <strong>clEnqueueTask</strong>, this returns 0.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code class="language-c" data-lang="c">uint get_sub_group_local_id( void )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the unique work item ID within the current sub-group. |
| The mapping from <strong>get_local_id</strong> to <strong>get_sub_group_local_id</strong> will be invariant for the lifetime of the work-group.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>If OpenCL 2.0 is supported:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code class="language-c" data-lang="c">uint get_enqueued_num_sub_groups( void )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the same value as that returned by <strong>get_num_sub_groups</strong> if the kernel is executed with a uniform work-group size. This value will be constant for the entire ND-range.</p> |
| <p class="tableblock">If the kernel is executed with a non-uniform work-group size, returns the number of sub-groups in a work-group that makes up the uniform region of the global ND-range.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| </dd> |
| </dl> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_additions_to_section_6_13_8_synchronization_functions"><a class="anchor" href="#_additions_to_section_6_13_8_synchronization_functions"></a>Additions to Section 6.13.8 - "Synchronization Functions"</h3> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">These additions are mostly unchanged from the Khronos sub-groups extension, with only minor edits for clarity: </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code class="language-c" data-lang="c">void sub_group_barrier( |
| cl_mem_fence_flags flags )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">All work items in a sub-group executing the kernel on a processor must execute this function before any are allowed to continue execution beyond the sub-group barrier. |
| This function must be encountered by all work items in a sub-group executing the kernel. |
| These rules apply to ND-ranges implemented with uniform and non-uniform work-groups.</p> |
| <p class="tableblock">If <strong>sub_group_barrier</strong> is inside a conditional statement then all work items within the sub-group must enter the conditional if any work item in the sub-group enters the conditional statement and executes the <strong>sub_group_barrier</strong>.</p> |
| <p class="tableblock">If <strong>sub_group_barrier</strong> is inside a loop, all work items within the sub-group must execute the <strong>sub_group_barrier</strong> for each iteration of the loop before any are allowed to continue execution beyond the <strong>sub_group_barrier</strong>.</p> |
| <p class="tableblock">The <strong>sub_group_barrier</strong> function also queues a memory fence (reads and writes) to ensure correct ordering of memory operations to local or global memory.</p> |
| <p class="tableblock">The flags argument specifies the memory address space and can be set to a combination of the following values:</p> |
| <p class="tableblock"><code>CLK_LOCAL_MEM_FENCE</code> - The <strong>sub_group_barrier</strong> function will either flush any variables stored in local memory or queue a memory fence to ensure correct ordering of memory operations to local memory.</p> |
| <p class="tableblock"><code>CLK_GLOBAL_MEM_FENCE</code> - The <strong>sub_group_barrier</strong> function will queue a memory fence to ensure correct ordering of memory operations to global memory. |
| This can be useful when work items, for example, write to buffer objects and then want to read the updated data from these buffer objects.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>If OpenCL 2.0 is supported, add the following to the table above:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code class="language-c" data-lang="c">void sub_group_barrier( |
| cl_mem_fence_flags flags, |
| memory_scope scope )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">…​</p> |
| <p class="tableblock">The <strong>sub_group_barrier</strong> function also supports a variant that specifies the memory scope. |
| For the sub_group_barrier variant that does not take a memory scope, the scope is <code>memory_scope_sub_group</code>.</p> |
| <p class="tableblock">The scope argument specifies whether the memory accesses of work items in the sub-group to memory address space(s) identified by flags become visible to all work items in the sub-group, the work-group, the device, or all SVM devices.</p> |
| <p class="tableblock">…​</p> |
| <p class="tableblock"><code>CLK_IMAGE_MEM_FENCE</code> - The <strong>sub_group_barrier</strong> function will queue a memory fence to ensure correct ordering of memory operations to image objects. This can be useful when work items, for example, write to image objects and then want to read the updated data from these image objects.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| </dd> |
| </dl> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_additions_to_section_6_13_11_atomic_functions"><a class="anchor" href="#_additions_to_section_6_13_11_atomic_functions"></a>Additions to Section 6.13.11 - "Atomic Functions"</h3> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">Modify the bullet describing behavior for functions that do not have a memory_scope argument to say: </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>The sub-group functions that do not have a <em>memory_scope</em> argument have the same semantics as the corresponding functions with the <em>memory_scope</em> argument set to <code>memory_scope_sub_group</code>. |
| Other functions that do not have a <em>memory_scope</em> argument have the same semantics as the corresponding functions with the <em>memory_scope</em> argument set to <code>memory_scope_device</code>.</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| </div> |
| </dd> |
| <dt class="hdlist1">The following addition is copied unchanged from the Khronos sub-groups extension: </dt> |
| <dt class="hdlist1">Add the following new value to the enumerated type memory_scope defined in Section 6.13.11.4: </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>memory_scope_sub_group</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>The <code>memory_scope_sub_group</code> specifies that the memory ordering constraints |
| given by <code>memory_order</code> apply to work items in a sub-group. |
| This memory scope can be used when performing atomic operations to global or |
| local memory.</p> |
| </div> |
| </div> |
| </div> |
| </dd> |
| </dl> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_additions_to_section_6_13_15_work_group_functions"><a class="anchor" href="#_additions_to_section_6_13_15_work_group_functions"></a>Additions to Section 6.13.15 - "Work-group Functions"</h3> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">These additions are copied from the Khronos sub-groups extension: </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>The OpenCL C programming language implements the following built-in |
| functions that operate on a sub-group level. |
| These built-in functions must be encountered by all work items in a sub-group |
| executing the kernel. |
| We use the generic type name <code>gentype</code> to indicate the built-in data types |
| <code>int</code>, <code>uint</code>, <code>long</code>, <code>ulong</code>, or <code>float</code> as the type for the arguments.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If <code>cl_khr_fp16</code> is supported, <code>gentype</code> also includes <code>half</code>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If <code>cl_khr_fp64</code> or doubles are supported, <code>gentype</code> also includes <code>double</code>.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 66.6666%;"> |
| <col style="width: 33.3334%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code class="language-c" data-lang="c">int sub_group_all( int predicate )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Evaluates <em>predicate</em> for all work items in the sub-group and returns a |
| non-zero value if <em>predicate</em> evaluates to non-zero for all work items in |
| the sub-group.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code class="language-c" data-lang="c">int sub_group_any( int predicate )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Evaluates <em>predicate</em> for all work items in the sub-group and returns a |
| non-zero value if <em>predicate</em> evaluates to non-zero for any work items in |
| the sub-group.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code class="language-c" data-lang="c">gentype sub_group_broadcast( |
| gentype x, |
| uint sub_group_local_id )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Broadcasts the value of <em>x</em> for work item identified by <em>sub_group_local_id</em> (value returned by <strong>get_sub_group_local_id</strong>) to all work items in the sub-group. |
| <em>sub_group_local_id</em> must be the same value for all work items in the sub-group.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code class="language-c" data-lang="c">gentype sub_group_reduce_add( gentype x ) |
| gentype sub_group_reduce_min( gentype x ) |
| gentype sub_group_reduce_max( gentype x )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the result of the specified reduction operation for all values of <em>x</em> specified by work items in a sub-group.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code class="language-c" data-lang="c">gentype sub_group_scan_exclusive_add( gentype x ) |
| gentype sub_group_scan_exclusive_min( gentype x ) |
| gentype sub_group_scan_exclusive_max( gentype x )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Performs the specified exclusive scan operation of all values <em>x</em> specified by work items in a sub-group. |
| The scan results are returned for each work item.</p> |
| <p class="tableblock">The scan order is defined by increasing sub-group local ID within the sub-group.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code class="language-c" data-lang="c">gentype sub_group_scan_inclusive_add( gentype x) |
| gentype sub_group_scan_inclusive_min( gentype x) |
| gentype sub_group_scan_inclusive_max( gentype x)</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Performs the specified inclusive scan operation of all values <em>x</em> specified by work items in a sub-group. |
| The scan results are returned for each work item.</p> |
| <p class="tableblock">The scan order is defined by increasing sub-group local ID within the sub-group.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| </dd> |
| </dl> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_add_a_new_section_6_13_x_sub_group_shuffle_functions"><a class="anchor" href="#_add_a_new_section_6_13_x_sub_group_shuffle_functions"></a>Add a new Section 6.13.X - "Sub-group Shuffle Functions"</h3> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">These are new functions: </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>The OpenCL C programming language implements the following built-in functions to allow data to be exchanged among work items in a sub-group. |
| These built-in functions need not be encountered by all work items in a sub-group executing the kernel, however, data may only be shuffled among work items encountering the sub-group shuffle function. |
| Shuffling data from a work item that does not encounter the sub-group shuffle function will produce undefined results. |
| For these functions, <code>gentype</code> is <code>float</code>, <code>float2</code>, <code>float3</code>, <code>float4</code>, <code>float8</code>, <code>float16</code>, <code>int</code>, <code>int2</code>, <code>int3</code>, <code>int4</code>, <code>int8</code>, <code>int16</code>, <code>uint</code>, <code>uint2</code>, <code>uint3</code>, <code>uint4</code>, <code>uint8</code>, <code>uint16</code>, <code>long</code>, or <code>ulong</code>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If <code>cl_khr_fp16</code> is supported, <code>gentype</code> also includes <code>half</code>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If <code>cl_khr_fp64</code> or doubles are supported, <code>gentype</code> also includes <code>double</code>.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code class="language-c" data-lang="c">gentype intel_sub_group_shuffle( |
| gentype data, |
| uint sub_group_local_id )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Allows data to be arbitrarily transferred between work items in a sub-group. |
| The data that is returned for this work item is the value of <em>data</em> for the work item identified by <em>sub_group_local_id</em>.</p> |
| <p class="tableblock"><em>sub_group_local_id</em> need not be the same value for all work items in the sub-group. |
| There is no defined behavior for out-of-range <em>sub_group_local_ids</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code class="language-c" data-lang="c">gentype intel_sub_group_shuffle_down( |
| gentype current, |
| gentype next, |
| uint delta )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Allows data to be transferred from a work item in the sub-group with a higher sub_group_local_id down to a work item in the sub-group with a lower sub_group_local_id.</p> |
| <p class="tableblock">There are two data sources to this built-in function: <em>current</em> and <em>next</em>. |
| To determine the result of this built-in function, first let the unsigned shuffle index be equivalent to the sum of this work item’s sub_group_local_id plus the specified <em>delta</em>:</p> |
| <p class="tableblock">If the shuffle index is less than the max_sub_group_size, the result of this built-in function is the value of the <em>current</em> data source for the work item with sub_group_local_id equal to the shuffle index.</p> |
| <p class="tableblock">If the shuffle index is greater than or equal to the max_sub_group_size but less than twice the max_sub_group_size, the result of this built-in function is the value of the <em>next</em> data source for the work item with sub_group_local_id equal to the shuffle index minus the max_sub_group_size.</p> |
| <p class="tableblock">All other values of the shuffle index are considered to be out-of-range. |
| There is no defined behavior for out-of-range indices.</p> |
| <p class="tableblock"><em>delta</em> need not be the same value for all work items in the sub-group.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code class="language-c" data-lang="c">gentype intel_sub_group_shuffle_up( |
| gentype previous, |
| gentype current, |
| uint delta )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Allows data to be transferred from a work item in the sub-group with a lower sub_group_local_id up to a work item in the sub-group with a higher sub_group_local_id.</p> |
| <p class="tableblock">There are two data sources to this built-in function: <em>previous</em> and <em>current</em>. |
| To determine the result of this built-in function, first let the signed shuffle index be equivalent to this work item’s sub_group_local_id minus the specified <em>delta</em>:</p> |
| <p class="tableblock">If the shuffle index is greater than or equal to zero and less than the max_sub_group_size, the result of this built-in function is the value of the <em>current</em> data source for the work item with sub_group_local_id equal to the shuffle index.</p> |
| <p class="tableblock">If the shuffle index is less than zero but greater than or equal to the negative max_sub_group_size, the result of this built-in function is the value of the previous data source for the work item with sub_group_local_id equal to the shuffle index plus the max_sub_group_size.</p> |
| <p class="tableblock">All other values of the shuffle index are considered to be out-of-range. |
| There is no defined behavior for out-of-range indices.</p> |
| <p class="tableblock"><em>delta</em> need not be the same value for all work items in the sub-group.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code class="language-c" data-lang="c">gentype intel_sub_group_shuffle_xor( |
| gentype data, |
| uint value )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Allows data to be transferred between work items in a sub-group as a function of the work item’s sub_group_local_id. |
| The data that is returned for this work item is the value of <em>data</em> for the work item with sub_group_local_id equal to this work item’s sub_group_local_id XOR’d with the specified <em>value</em>. |
| If the result of the XOR is greater than max_sub_group_size then it is considered out-of-range.</p> |
| <p class="tableblock"><em>value</em> need not be the same for all work items in the sub-group. |
| There is no defined behavior for out-of-range indices.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| </dd> |
| </dl> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_add_a_new_section_6_13_x_sub_group_read_and_write_functions"><a class="anchor" href="#_add_a_new_section_6_13_x_sub_group_read_and_write_functions"></a>Add a new Section 6.13.X - "Sub-group Read and Write Functions"</h3> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">These are new functions: </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>The OpenCL C programming language implements the following built-in functions to allow data to be read or written as a block by all work items in a sub-group. |
| These built-in functions must be encountered by all work items in a sub-group executing the kernel. |
| Furthermore, since these are block operations, the <em>pointer</em>, <em>image</em>, and <em>coordinate</em> arguments to these built-in functions must be the same for all work items in the sub-group (when applicable, only the <em>data</em> argument may be different).</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 55.5555%;"> |
| <col style="width: 44.4445%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code class="language-c" data-lang="c">uint intel_sub_group_block_read( |
| const __global uint* p ) |
| uint2 intel_sub_group_block_read2( |
| const __global uint* p ) |
| uint4 intel_sub_group_block_read4( |
| const __global uint* p ) |
| uint8 intel_sub_group_block_read8( |
| const __global uint* p )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Reads 1, 2, 4, or 8 uints of data for each work item in the sub-group from the specified pointer as a block operation. |
| The data is read strided, so the first value read is:</p> |
| <p class="tableblock"><code>p[ sub_group_local_id ]</code></p> |
| <p class="tableblock">and the second value read is:</p> |
| <p class="tableblock"><code>p[ sub_group_local_id + max_sub_group_size ]</code></p> |
| <p class="tableblock">etc.</p> |
| <p class="tableblock"><em>p</em> must be aligned to a 32-bit (4-byte) boundary.</p> |
| <p class="tableblock">There is no defined out-of-range behavior for these functions.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code class="language-c" data-lang="c">uint intel_sub_group_block_read( |
| image2d_t image, |
| int2 byte_coord ) |
| uint2 intel_sub_group_block_read2( |
| image2d_t image, |
| int2 byte_coord ) |
| uint4 intel_sub_group_block_read4( |
| image2d_t image, |
| int2 byte_coord ) |
| uint8 intel_sub_group_block_read8( |
| image2d_t image, |
| int2 byte_coord )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Reads 1, 2, 4, or 8 uints of data for each work item in the sub-group from the specified <em>image</em> at the specified coordinate as a block operation. |
| Note that the coordinate is a byte coordinate, not an image element coordinate. |
| Also note that the image data is read without format conversion, so each work item may read multiple image elements |
| (for images with element size smaller than 16-bits).</p> |
| <p class="tableblock">The data is read row-by-row, so the first value read is from the row specified in the y-component of the provided <em>byte_coord</em>, the second value is read from the y-component of the provided <em>byte_coord</em> plus one, etc.</p> |
| <p class="tableblock">Please see the note below describing out-of-bounds behavior for these functions.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code class="language-c" data-lang="c">void intel_sub_group_block_write( |
| __global uint* p, uint data ) |
| void intel_sub_group_block_write2( |
| __global uint* p, uint2 data ) |
| void intel_sub_group_block_write4( |
| __global uint* p, uint4 data ) |
| void intel_sub_group_block_write8( |
| __global uint* p, uint8 data )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Writes 1, 2, 4, or 8 uints of data for each work item in the sub-group to the specified pointer as a block operation. |
| The data is written strided, so the first value is written to:</p> |
| <p class="tableblock"><code>p[ sub_group_local_id ]</code></p> |
| <p class="tableblock">and the second value is written to:</p> |
| <p class="tableblock"><code>p[ sub_group_local_id + max_sub_group_size ]</code></p> |
| <p class="tableblock">etc.</p> |
| <p class="tableblock"><em>p</em> must be aligned to a 128-bit (16-byte) boundary.</p> |
| <p class="tableblock">There is no defined out-of-range behavior for these functions.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="highlight"><code class="language-c" data-lang="c">void intel_sub_group_block_write( |
| image2d_t image, |
| int2 byte_coord, uint data ) |
| void intel_sub_group_block_write2( |
| image2d_t image, |
| int2 byte_coord, uint2 data ) |
| void intel_sub_group_block_write4( |
| image2d_t image, |
| int2 byte_coord, uint4 data ) |
| void intel_sub_group_block_write8( |
| image2d_t image, |
| int2 byte_coord, uint8 data )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Writes 1, 2, 4, or 8 uints of data for each work item in the sub-group to the specified <em>image</em> at the specified coordinate as a block operation. |
| Note that the coordinate is a byte coordinate, not an image element coordinate. |
| Unlike the image block read function, which may read from any arbitrary byte offset, the x-component of the byte coordinate for the image block write functions must be a multiple of four; |
| in other words, the write must begin at 32-bit boundary. |
| There is no restriction on the y-component of the coordinate. |
| Also, note that the image <em>data</em> is written without format conversion, so each work item may write multiple image elements (for images with element size smaller than 8-bits).</p> |
| <p class="tableblock">The data is written row-by-row, so the first value written is from the row specified by the y-component of the provided <em>byte_coord</em>, the second value is written from the y-component of the provided <em>byte_coord</em> plus one, etc.</p> |
| <p class="tableblock">Please see the note below describing out-of-bounds behavior for these functions.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>Note: The sub-group image block read and write built-ins do support bounds checking, however these built-ins bounds-check to the image width in units of uints, not in units of image elements. |
| This means:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>If the image has an element size equal to the size of a uint (four bytes, for example <code>CL_RGBA</code> + <code>CL_UNORM_INT8</code>), the image will be correctly bounds-checked. |
| In this case, out-of-bounds reads will return the edge image element (the equivalent of <code>CLK_ADDRESS_CLAMP_TO_EDGE</code>), and out-of-bounds writes will be ignored.</p> |
| </li> |
| <li> |
| <p>If the image has element size less than the size of a uint (such as <code>CL_R</code> + <code>CL_UNSIGNED_INT8</code>), the entire image is addressable, however bounds checking will occur too late. |
| For this reason, extra care should be taken to avoid out-of-bounds reads and writes, since out-of-bounds reads may return invalid data and out-of-bounds writes may corrupt other images or buffers unpredictably.</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| </div> |
| </dd> |
| <dt class="hdlist1">Add a new sub-section 6.13.X.1 - Restrictions: </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>The following restrictions apply to the sub-group buffer block read and write functions:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>The pointer <em>p</em> must be 32-bit (4-byte) aligned for reads, and must be 128-bit (16-byte) aligned for writes.</p> |
| </li> |
| <li> |
| <p>If the pointer <em>p</em> is computed from a kernel argument that is a cl_mem that was created with <code>CL_MEM_USE_HOST_PTR</code>, then the <em>host_ptr</em> must be 32-bit (4-byte) aligned for reads, and must be 128-bit (16-byte) aligned for writes.</p> |
| </li> |
| <li> |
| <p>If the pointer <em>p</em> is computed from a kernel argument that is a cl_mem that is a sub-buffer, then the <em>origin</em> defining the sub-buffer offset into the <em>buffer</em> must be a multiple of 4 bytes for reads, and must be a multiple of 16 bytes for write, in addition to the <code>CL_DEVICE_MEM_BASE_ADDR_ALIGN</code> requirements. |
| Additionally, if the <em>buffer</em> that the sub-buffer is created from was created with <code>CL_MEM_USE_HOST_PTR</code>, then the <em>host_ptr</em> for the <em>buffer</em> must be 32-bit (4-byte) aligned for reads, and must be 128-bit(16-byte) aligned for writes.</p> |
| </li> |
| <li> |
| <p>If the pointer <em>p</em> is computed from an SVM pointer kernel argument, then the SVM pointer kernel argument must be 32-bit (4-byte) aligned for reads, and must be 128-bit (16-byte) aligned for writes.</p> |
| </li> |
| <li> |
| <p>Behavior is undefined if the sub-group size is smaller than the maximum sub-group size; in other words, if this is a partial sub-group.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>The following restrictions apply to the sub-group image block read and write functions:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>The behavior of the sub-group image block read and write built-ins is undefined for images with an element size greater than four bytes (such as <code>CL_RGBA</code> + <code>CL_FLOAT</code>).</p> |
| </li> |
| <li> |
| <p>When reading or writing a 2D image created from a buffer with the sub-group block read and write built-ins, the image row pitch is required to be a multiple of 64-bytes, in addition to the <code>CL_DEVICE_IMAGE_PITCH_ALIGNMENT</code> requirements.</p> |
| </li> |
| <li> |
| <p>When reading or writing a 2D image created from a buffer with the sub-group block read and write built-ins, if the buffer is a cl_mem that was created with <code>CL_MEM_USE_HOST_PTR</code>, then the <em>host_ptr</em> must be 256-bit (32-byte) aligned.</p> |
| </li> |
| <li> |
| <p>When reading or writing a 2D image created from a buffer with the sub-group block read and write built-ins, if the buffer is a cl_mem that is a sub-buffer, then the <em>origin</em> must be a multiple of 32-bytes. |
| Additionally, if the <em>buffer</em> that the sub-buffer is created from was created with CL_MEM_USE_HOST_PTR, then the <em>host_ptr</em> for the <em>buffer</em> must be 256-bit (32-byte) aligned.</p> |
| </li> |
| <li> |
| <p>Behavior is undefined if the sub-group size is smaller than the maximum sub-group size; in other words, if this is a partial sub-group.</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| </div> |
| </dd> |
| </dl> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="_issues"><a class="anchor" href="#_issues"></a>Issues</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>None.</p> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="_revision_history"><a class="anchor" href="#_revision_history"></a>Revision History</h2> |
| <div class="sectionbody"> |
| <table class="tableblock frame-all grid-rows stretch"> |
| <colgroup> |
| <col style="width: 4.7619%;"> |
| <col style="width: 14.2857%;"> |
| <col style="width: 14.2857%;"> |
| <col style="width: 66.6667%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top">Rev</th> |
| <th class="tableblock halign-left valign-top">Date</th> |
| <th class="tableblock halign-left valign-top">Author</th> |
| <th class="tableblock halign-left valign-top">Changes</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2014-12-01</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Ben Ashbaugh</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>First public revision.</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2015-03-12</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Ben Ashbaugh</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Fixed minor formatting errors, added restriction for sub-group image block read and write built-ins with large image formats.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">3</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2016-02-12</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Ben Ashbaugh</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Fixed a small bug in the shuffle up and shuffle down descriptions.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">4</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2016-08-28</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Ben Ashbaugh</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Added additional restrictions and programming notes for the sub-group shuffle and block read built-ins.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">5</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2018-11-15</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Ben Ashbaugh</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Converted to asciidoc.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">6</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2018-12-02</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Ben Ashbaugh</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Added back a section that was inadvertently removed during conversion to asciidoc.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">7</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2019-01-15</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Ben Ashbaugh</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Fixed a typo in the summary section of new built-in functions.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">8</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2019-09-17</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Ben Ashbaugh</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Added vec3 types for shuffles, restriction for block reads and writes and partial sub-groups, and asciidoctor formatting fixes.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| </div> |
| <div id="footer"> |
| <div id="footer-text"> |
| Version v3.0.14-10-gff88d06<br> |
| Last updated 2023-06-12 16:00:00 -0700 |
| </div> |
| </div> |
| |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.9/MathJax.js?config=TeX-MML-AM_HTMLorMML"></script> |
| </body> |
| </html> |