| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <!--[if IE]><meta http-equiv="X-UA-Compatible" content="IE=edge"><![endif]--> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <meta name="generator" content="Asciidoctor 1.5.8"> |
| <meta name="author" content="Khronos® OpenCL Working Group"> |
| <title>The OpenCL™ Extension Specification</title> |
| <style> |
| /*! normalize.css v2.1.2 | MIT License | git.io/normalize */ |
| /* ========================================================================== HTML5 display definitions ========================================================================== */ |
| /** Correct `block` display not defined in IE 8/9. */ |
| article, aside, details, figcaption, figure, footer, header, hgroup, main, nav, section, summary { display: block; } |
| |
| /** Correct `inline-block` display not defined in IE 8/9. */ |
| audio, canvas, video { display: inline-block; } |
| |
| /** Prevent modern browsers from displaying `audio` without controls. Remove excess height in iOS 5 devices. */ |
| audio:not([controls]) { display: none; height: 0; } |
| |
| /** Address `[hidden]` styling not present in IE 8/9. Hide the `template` element in IE, Safari, and Firefox < 22. */ |
| [hidden], template { display: none; } |
| |
| script { display: none !important; } |
| |
| /* ========================================================================== Base ========================================================================== */ |
| /** 1. Set default font family to sans-serif. 2. Prevent iOS text size adjust after orientation change, without disabling user zoom. */ |
| html { font-family: sans-serif; /* 1 */ -ms-text-size-adjust: 100%; /* 2 */ -webkit-text-size-adjust: 100%; /* 2 */ } |
| |
| /** Remove default margin. */ |
| body { margin: 0; } |
| |
| /* ========================================================================== Links ========================================================================== */ |
| /** Remove the gray background color from active links in IE 10. */ |
| a { background: transparent; } |
| |
| /** Address `outline` inconsistency between Chrome and other browsers. */ |
| a:focus { outline: thin dotted; } |
| |
| /** Improve readability when focused and also mouse hovered in all browsers. */ |
| a:active, a:hover { outline: 0; } |
| |
| /* ========================================================================== Typography ========================================================================== */ |
| /** Address variable `h1` font-size and margin within `section` and `article` contexts in Firefox 4+, Safari 5, and Chrome. */ |
| h1 { font-size: 2em; margin: 0.67em 0; } |
| |
| /** Address styling not present in IE 8/9, Safari 5, and Chrome. */ |
| abbr[title] { border-bottom: 1px dotted; } |
| |
| /** Address style set to `bolder` in Firefox 4+, Safari 5, and Chrome. */ |
| b, strong { font-weight: bold; } |
| |
| /** Address styling not present in Safari 5 and Chrome. */ |
| dfn { font-style: italic; } |
| |
| /** Address differences between Firefox and other browsers. */ |
| hr { -moz-box-sizing: content-box; box-sizing: content-box; height: 0; } |
| |
| /** Address styling not present in IE 8/9. */ |
| mark { background: #ff0; color: #000; } |
| |
| /** Correct font family set oddly in Safari 5 and Chrome. */ |
| code, kbd, pre, samp { font-family: monospace, serif; font-size: 1em; } |
| |
| /** Improve readability of pre-formatted text in all browsers. */ |
| pre { white-space: pre-wrap; } |
| |
| /** Set consistent quote types. */ |
| q { quotes: "\201C" "\201D" "\2018" "\2019"; } |
| |
| /** Address inconsistent and variable font size in all browsers. */ |
| small { font-size: 80%; } |
| |
| /** Prevent `sub` and `sup` affecting `line-height` in all browsers. */ |
| sub, sup { font-size: 75%; line-height: 0; position: relative; vertical-align: baseline; } |
| |
| sup { top: -0.5em; } |
| |
| sub { bottom: -0.25em; } |
| |
| /* ========================================================================== Embedded content ========================================================================== */ |
| /** Remove border when inside `a` element in IE 8/9. */ |
| img { border: 0; } |
| |
| /** Correct overflow displayed oddly in IE 9. */ |
| svg:not(:root) { overflow: hidden; } |
| |
| /* ========================================================================== Figures ========================================================================== */ |
| /** Address margin not present in IE 8/9 and Safari 5. */ |
| figure { margin: 0; } |
| |
| /* ========================================================================== Forms ========================================================================== */ |
| /** Define consistent border, margin, and padding. */ |
| fieldset { border: 1px solid #c0c0c0; margin: 0 2px; padding: 0.35em 0.625em 0.75em; } |
| |
| /** 1. Correct `color` not being inherited in IE 8/9. 2. Remove padding so people aren't caught out if they zero out fieldsets. */ |
| legend { border: 0; /* 1 */ padding: 0; /* 2 */ } |
| |
| /** 1. Correct font family not being inherited in all browsers. 2. Correct font size not being inherited in all browsers. 3. Address margins set differently in Firefox 4+, Safari 5, and Chrome. */ |
| button, input, select, textarea { font-family: inherit; /* 1 */ font-size: 100%; /* 2 */ margin: 0; /* 3 */ } |
| |
| /** Address Firefox 4+ setting `line-height` on `input` using `!important` in the UA stylesheet. */ |
| button, input { line-height: normal; } |
| |
| /** Address inconsistent `text-transform` inheritance for `button` and `select`. All other form control elements do not inherit `text-transform` values. Correct `button` style inheritance in Chrome, Safari 5+, and IE 8+. Correct `select` style inheritance in Firefox 4+ and Opera. */ |
| button, select { text-transform: none; } |
| |
| /** 1. Avoid the WebKit bug in Android 4.0.* where (2) destroys native `audio` and `video` controls. 2. Correct inability to style clickable `input` types in iOS. 3. Improve usability and consistency of cursor style between image-type `input` and others. */ |
| button, html input[type="button"], input[type="reset"], input[type="submit"] { -webkit-appearance: button; /* 2 */ cursor: pointer; /* 3 */ } |
| |
| /** Re-set default cursor for disabled elements. */ |
| button[disabled], html input[disabled] { cursor: default; } |
| |
| /** 1. Address box sizing set to `content-box` in IE 8/9. 2. Remove excess padding in IE 8/9. */ |
| input[type="checkbox"], input[type="radio"] { box-sizing: border-box; /* 1 */ padding: 0; /* 2 */ } |
| |
| /** 1. Address `appearance` set to `searchfield` in Safari 5 and Chrome. 2. Address `box-sizing` set to `border-box` in Safari 5 and Chrome (include `-moz` to future-proof). */ |
| input[type="search"] { -webkit-appearance: textfield; /* 1 */ -moz-box-sizing: content-box; -webkit-box-sizing: content-box; /* 2 */ box-sizing: content-box; } |
| |
| /** Remove inner padding and search cancel button in Safari 5 and Chrome on OS X. */ |
| input[type="search"]::-webkit-search-cancel-button, input[type="search"]::-webkit-search-decoration { -webkit-appearance: none; } |
| |
| /** Remove inner padding and border in Firefox 4+. */ |
| button::-moz-focus-inner, input::-moz-focus-inner { border: 0; padding: 0; } |
| |
| /** 1. Remove default vertical scrollbar in IE 8/9. 2. Improve readability and alignment in all browsers. */ |
| textarea { overflow: auto; /* 1 */ vertical-align: top; /* 2 */ } |
| |
| /* ========================================================================== Tables ========================================================================== */ |
| /** Remove most spacing between table cells. */ |
| table { border-collapse: collapse; border-spacing: 0; } |
| |
| meta.foundation-mq-small { font-family: "only screen and (min-width: 768px)"; width: 768px; } |
| |
| meta.foundation-mq-medium { font-family: "only screen and (min-width:1280px)"; width: 1280px; } |
| |
| meta.foundation-mq-large { font-family: "only screen and (min-width:1440px)"; width: 1440px; } |
| |
| *, *:before, *:after { -moz-box-sizing: border-box; -webkit-box-sizing: border-box; box-sizing: border-box; } |
| |
| html, body { font-size: 100%; } |
| |
| body { background: white; color: #222222; padding: 0; margin: 0; font-family: "Helvetica Neue", "Helvetica", Helvetica, Arial, sans-serif; font-weight: normal; font-style: normal; line-height: 1; position: relative; cursor: auto; } |
| |
| a:hover { cursor: pointer; } |
| |
| img, object, embed { max-width: 100%; height: auto; } |
| |
| object, embed { height: 100%; } |
| |
| img { -ms-interpolation-mode: bicubic; } |
| |
| #map_canvas img, #map_canvas embed, #map_canvas object, .map_canvas img, .map_canvas embed, .map_canvas object { max-width: none !important; } |
| |
| .left { float: left !important; } |
| |
| .right { float: right !important; } |
| |
| .text-left { text-align: left !important; } |
| |
| .text-right { text-align: right !important; } |
| |
| .text-center { text-align: center !important; } |
| |
| .text-justify { text-align: justify !important; } |
| |
| .hide { display: none; } |
| |
| .antialiased { -webkit-font-smoothing: antialiased; } |
| |
| img { display: inline-block; vertical-align: middle; } |
| |
| textarea { height: auto; min-height: 50px; } |
| |
| select { width: 100%; } |
| |
| object, svg { display: inline-block; vertical-align: middle; } |
| |
| .center { margin-left: auto; margin-right: auto; } |
| |
| .spread { width: 100%; } |
| |
| p.lead, .paragraph.lead > p, #preamble > .sectionbody > .paragraph:first-of-type p { font-size: 1.21875em; line-height: 1.6; } |
| |
| .subheader, .admonitionblock td.content > .title, .audioblock > .title, .exampleblock > .title, .imageblock > .title, .listingblock > .title, .literalblock > .title, .stemblock > .title, .openblock > .title, .paragraph > .title, .quoteblock > .title, table.tableblock > .title, .verseblock > .title, .videoblock > .title, .dlist > .title, .olist > .title, .ulist > .title, .qlist > .title, .hdlist > .title { line-height: 1.4; color: black; font-weight: 300; margin-top: 0.2em; margin-bottom: 0.5em; } |
| |
| /* Typography resets */ |
| div, dl, dt, dd, ul, ol, li, h1, h2, h3, #toctitle, .sidebarblock > .content > .title, h4, h5, h6, pre, form, p, blockquote, th, td { margin: 0; padding: 0; direction: ltr; } |
| |
| /* Default Link Styles */ |
| a { color: #0068b0; text-decoration: none; line-height: inherit; } |
| a:hover, a:focus { color: #333333; } |
| a img { border: none; } |
| |
| /* Default paragraph styles */ |
| p { font-family: Noto, sans-serif; font-weight: normal; font-size: 1em; line-height: 1.6; margin-bottom: 0.75em; text-rendering: optimizeLegibility; } |
| p aside { font-size: 0.875em; line-height: 1.35; font-style: italic; } |
| |
| /* Default header styles */ |
| h1, h2, h3, #toctitle, .sidebarblock > .content > .title, h4, h5, h6 { font-family: Noto, sans-serif; font-weight: normal; font-style: normal; color: black; text-rendering: optimizeLegibility; margin-top: 0.5em; margin-bottom: 0.5em; line-height: 1.2125em; } |
| h1 small, h2 small, h3 small, #toctitle small, .sidebarblock > .content > .title small, h4 small, h5 small, h6 small { font-size: 60%; color: #4d4d4d; line-height: 0; } |
| |
| h1 { font-size: 2.125em; } |
| |
| h2 { font-size: 1.6875em; } |
| |
| h3, #toctitle, .sidebarblock > .content > .title { font-size: 1.375em; } |
| |
| h4 { font-size: 1.125em; } |
| |
| h5 { font-size: 1.125em; } |
| |
| h6 { font-size: 1em; } |
| |
| hr { border: solid #dddddd; border-width: 1px 0 0; clear: both; margin: 1.25em 0 1.1875em; height: 0; } |
| |
| /* Helpful Typography Defaults */ |
| em, i { font-style: italic; line-height: inherit; } |
| |
| strong, b { font-weight: bold; line-height: inherit; } |
| |
| small { font-size: 60%; line-height: inherit; } |
| |
| code { font-family: Consolas, "Liberation Mono", Courier, monospace; font-weight: normal; color: #264357; } |
| |
| /* Lists */ |
| ul, ol, dl { font-size: 1em; line-height: 1.6; margin-bottom: 0.75em; list-style-position: outside; font-family: Noto, sans-serif; } |
| |
| ul, ol { margin-left: 1.5em; } |
| ul.no-bullet, ol.no-bullet { margin-left: 1.5em; } |
| |
| /* Unordered Lists */ |
| ul li ul, ul li ol { margin-left: 1.25em; margin-bottom: 0; font-size: 1em; /* Override nested font-size change */ } |
| ul.square li ul, ul.circle li ul, ul.disc li ul { list-style: inherit; } |
| ul.square { list-style-type: square; } |
| ul.circle { list-style-type: circle; } |
| ul.disc { list-style-type: disc; } |
| ul.no-bullet { list-style: none; } |
| |
| /* Ordered Lists */ |
| ol li ul, ol li ol { margin-left: 1.25em; margin-bottom: 0; } |
| |
| /* Definition Lists */ |
| dl dt { margin-bottom: 0.3em; font-weight: bold; } |
| dl dd { margin-bottom: 0.75em; } |
| |
| /* Abbreviations */ |
| abbr, acronym { text-transform: uppercase; font-size: 90%; color: black; border-bottom: 1px dotted #dddddd; cursor: help; } |
| |
| abbr { text-transform: none; } |
| |
| /* Blockquotes */ |
| blockquote { margin: 0 0 0.75em; padding: 0.5625em 1.25em 0 1.1875em; border-left: 1px solid #dddddd; } |
| blockquote cite { display: block; font-size: 0.8125em; color: #5e93b8; } |
| blockquote cite:before { content: "\2014 \0020"; } |
| blockquote cite a, blockquote cite a:visited { color: #5e93b8; } |
| |
| blockquote, blockquote p { line-height: 1.6; color: #333333; } |
| |
| /* Microformats */ |
| .vcard { display: inline-block; margin: 0 0 1.25em 0; border: 1px solid #dddddd; padding: 0.625em 0.75em; } |
| .vcard li { margin: 0; display: block; } |
| .vcard .fn { font-weight: bold; font-size: 0.9375em; } |
| |
| .vevent .summary { font-weight: bold; } |
| .vevent abbr { cursor: auto; text-decoration: none; font-weight: bold; border: none; padding: 0 0.0625em; } |
| |
| @media only screen and (min-width: 768px) { h1, h2, h3, #toctitle, .sidebarblock > .content > .title, h4, h5, h6 { line-height: 1.4; } |
| h1 { font-size: 2.75em; } |
| h2 { font-size: 2.3125em; } |
| h3, #toctitle, .sidebarblock > .content > .title { font-size: 1.6875em; } |
| h4 { font-size: 1.4375em; } } |
| /* Tables */ |
| table { background: white; margin-bottom: 1.25em; border: solid 1px #d8d8ce; } |
| table thead, table tfoot { background: -webkit-linear-gradient(top, #add386, #90b66a); font-weight: bold; } |
| table thead tr th, table thead tr td, table tfoot tr th, table tfoot tr td { padding: 0.5em 0.625em 0.625em; font-size: inherit; color: white; text-align: left; } |
| table tr th, table tr td { padding: 0.5625em 0.625em; font-size: inherit; color: #6d6e71; } |
| table tr.even, table tr.alt, table tr:nth-of-type(even) { background: #edf2f2; } |
| table thead tr th, table tfoot tr th, table tbody tr td, table tr td, table tfoot tr td { display: table-cell; line-height: 1.4; } |
| |
| body { -moz-osx-font-smoothing: grayscale; -webkit-font-smoothing: antialiased; tab-size: 4; } |
| |
| h1, h2, h3, #toctitle, .sidebarblock > .content > .title, h4, h5, h6 { line-height: 1.4; } |
| |
| a:hover, a:focus { text-decoration: underline; } |
| |
| .clearfix:before, .clearfix:after, .float-group:before, .float-group:after { content: " "; display: table; } |
| .clearfix:after, .float-group:after { clear: both; } |
| |
| *:not(pre) > code { font-size: inherit; font-style: normal !important; letter-spacing: 0; padding: 0; background-color: white; -webkit-border-radius: 0; border-radius: 0; line-height: inherit; word-wrap: break-word; } |
| *:not(pre) > code.nobreak { word-wrap: normal; } |
| *:not(pre) > code.nowrap { white-space: nowrap; } |
| |
| pre, pre > code { line-height: 1.6; color: #264357; font-family: Consolas, "Liberation Mono", Courier, monospace; font-weight: normal; } |
| |
| em em { font-style: normal; } |
| |
| strong strong { font-weight: normal; } |
| |
| .keyseq { color: #333333; } |
| |
| kbd { font-family: Consolas, "Liberation Mono", Courier, monospace; display: inline-block; color: black; font-size: 0.65em; line-height: 1.45; background-color: #f7f7f7; border: 1px solid #ccc; -webkit-border-radius: 3px; border-radius: 3px; -webkit-box-shadow: 0 1px 0 rgba(0, 0, 0, 0.2), 0 0 0 0.1em white inset; box-shadow: 0 1px 0 rgba(0, 0, 0, 0.2), 0 0 0 0.1em white inset; margin: 0 0.15em; padding: 0.2em 0.5em; vertical-align: middle; position: relative; top: -0.1em; white-space: nowrap; } |
| |
| .keyseq kbd:first-child { margin-left: 0; } |
| |
| .keyseq kbd:last-child { margin-right: 0; } |
| |
| .menuseq, .menuref { color: #000; } |
| |
| .menuseq b:not(.caret), .menuref { font-weight: inherit; } |
| |
| .menuseq { word-spacing: -0.02em; } |
| .menuseq b.caret { font-size: 1.25em; line-height: 0.8; } |
| .menuseq i.caret { font-weight: bold; text-align: center; width: 0.45em; } |
| |
| b.button:before, b.button:after { position: relative; top: -1px; font-weight: normal; } |
| |
| b.button:before { content: "["; padding: 0 3px 0 2px; } |
| |
| b.button:after { content: "]"; padding: 0 2px 0 3px; } |
| |
| #header, #content, #footnotes, #footer { width: 100%; margin-left: auto; margin-right: auto; margin-top: 0; margin-bottom: 0; max-width: 62.5em; *zoom: 1; position: relative; padding-left: 1.5em; padding-right: 1.5em; } |
| #header:before, #header:after, #content:before, #content:after, #footnotes:before, #footnotes:after, #footer:before, #footer:after { content: " "; display: table; } |
| #header:after, #content:after, #footnotes:after, #footer:after { clear: both; } |
| |
| #content { margin-top: 1.25em; } |
| |
| #content:before { content: none; } |
| |
| #header > h1:first-child { color: black; margin-top: 2.25rem; margin-bottom: 0; } |
| #header > h1:first-child + #toc { margin-top: 8px; border-top: 1px solid #dddddd; } |
| #header > h1:only-child, body.toc2 #header > h1:nth-last-child(2) { border-bottom: 1px solid #dddddd; padding-bottom: 8px; } |
| #header .details { border-bottom: 1px solid #dddddd; line-height: 1.45; padding-top: 0.25em; padding-bottom: 0.25em; padding-left: 0.25em; color: #5e93b8; display: -ms-flexbox; display: -webkit-flex; display: flex; -ms-flex-flow: row wrap; -webkit-flex-flow: row wrap; flex-flow: row wrap; } |
| #header .details span:first-child { margin-left: -0.125em; } |
| #header .details span.email a { color: #333333; } |
| #header .details br { display: none; } |
| #header .details br + span:before { content: "\00a0\2013\00a0"; } |
| #header .details br + span.author:before { content: "\00a0\22c5\00a0"; color: #333333; } |
| #header .details br + span#revremark:before { content: "\00a0|\00a0"; } |
| #header #revnumber { text-transform: capitalize; } |
| #header #revnumber:after { content: "\00a0"; } |
| |
| #content > h1:first-child:not([class]) { color: black; border-bottom: 1px solid #dddddd; padding-bottom: 8px; margin-top: 0; padding-top: 1rem; margin-bottom: 1.25rem; } |
| |
| #toc { border-bottom: 0 solid #dddddd; padding-bottom: 0.5em; } |
| #toc > ul { margin-left: 0.125em; } |
| #toc ul.sectlevel0 > li > a { font-style: italic; } |
| #toc ul.sectlevel0 ul.sectlevel1 { margin: 0.5em 0; } |
| #toc ul { font-family: Noto, sans-serif; list-style-type: none; } |
| #toc li { line-height: 1.3334; margin-top: 0.3334em; } |
| #toc a { text-decoration: none; } |
| #toc a:active { text-decoration: underline; } |
| |
| #toctitle { color: black; font-size: 1.2em; } |
| |
| @media only screen and (min-width: 768px) { #toctitle { font-size: 1.375em; } |
| body.toc2 { padding-left: 15em; padding-right: 0; } |
| #toc.toc2 { margin-top: 0 !important; background-color: white; position: fixed; width: 15em; left: 0; top: 0; border-right: 1px solid #dddddd; border-top-width: 0 !important; border-bottom-width: 0 !important; z-index: 1000; padding: 1.25em 1em; height: 100%; overflow: auto; } |
| #toc.toc2 #toctitle { margin-top: 0; margin-bottom: 0.8rem; font-size: 1.2em; } |
| #toc.toc2 > ul { font-size: 0.9em; margin-bottom: 0; } |
| #toc.toc2 ul ul { margin-left: 0; padding-left: 1em; } |
| #toc.toc2 ul.sectlevel0 ul.sectlevel1 { padding-left: 0; margin-top: 0.5em; margin-bottom: 0.5em; } |
| body.toc2.toc-right { padding-left: 0; padding-right: 15em; } |
| body.toc2.toc-right #toc.toc2 { border-right-width: 0; border-left: 1px solid #dddddd; left: auto; right: 0; } } |
| @media only screen and (min-width: 1280px) { body.toc2 { padding-left: 20em; padding-right: 0; } |
| #toc.toc2 { width: 20em; } |
| #toc.toc2 #toctitle { font-size: 1.375em; } |
| #toc.toc2 > ul { font-size: 0.95em; } |
| #toc.toc2 ul ul { padding-left: 1.25em; } |
| body.toc2.toc-right { padding-left: 0; padding-right: 20em; } } |
| #content #toc { border-style: solid; border-width: 1px; border-color: #e6e6e6; margin-bottom: 1.25em; padding: 1.25em; background: white; -webkit-border-radius: 0; border-radius: 0; } |
| #content #toc > :first-child { margin-top: 0; } |
| #content #toc > :last-child { margin-bottom: 0; } |
| |
| #footer { max-width: 100%; background-color: none; padding: 1.25em; } |
| |
| #footer-text { color: black; line-height: 1.44; } |
| |
| #content { margin-bottom: 0.625em; } |
| |
| .sect1 { padding-bottom: 0.625em; } |
| |
| @media only screen and (min-width: 768px) { #content { margin-bottom: 1.25em; } |
| .sect1 { padding-bottom: 1.25em; } } |
| .sect1:last-child { padding-bottom: 0; } |
| |
| .sect1 + .sect1 { border-top: 0 solid #dddddd; } |
| |
| #content h1 > a.anchor, h2 > a.anchor, h3 > a.anchor, #toctitle > a.anchor, .sidebarblock > .content > .title > a.anchor, h4 > a.anchor, h5 > a.anchor, h6 > a.anchor { position: absolute; z-index: 1001; width: 1.5ex; margin-left: -1.5ex; display: block; text-decoration: none !important; visibility: hidden; text-align: center; font-weight: normal; } |
| #content h1 > a.anchor:before, h2 > a.anchor:before, h3 > a.anchor:before, #toctitle > a.anchor:before, .sidebarblock > .content > .title > a.anchor:before, h4 > a.anchor:before, h5 > a.anchor:before, h6 > a.anchor:before { content: "\00A7"; font-size: 0.85em; display: block; padding-top: 0.1em; } |
| #content h1:hover > a.anchor, #content h1 > a.anchor:hover, h2:hover > a.anchor, h2 > a.anchor:hover, h3:hover > a.anchor, #toctitle:hover > a.anchor, .sidebarblock > .content > .title:hover > a.anchor, h3 > a.anchor:hover, #toctitle > a.anchor:hover, .sidebarblock > .content > .title > a.anchor:hover, h4:hover > a.anchor, h4 > a.anchor:hover, h5:hover > a.anchor, h5 > a.anchor:hover, h6:hover > a.anchor, h6 > a.anchor:hover { visibility: visible; } |
| #content h1 > a.link, h2 > a.link, h3 > a.link, #toctitle > a.link, .sidebarblock > .content > .title > a.link, h4 > a.link, h5 > a.link, h6 > a.link { color: black; text-decoration: none; } |
| #content h1 > a.link:hover, h2 > a.link:hover, h3 > a.link:hover, #toctitle > a.link:hover, .sidebarblock > .content > .title > a.link:hover, h4 > a.link:hover, h5 > a.link:hover, h6 > a.link:hover { color: black; } |
| |
| .audioblock, .imageblock, .literalblock, .listingblock, .stemblock, .videoblock { margin-bottom: 1.25em; } |
| |
| .admonitionblock td.content > .title, .audioblock > .title, .exampleblock > .title, .imageblock > .title, .listingblock > .title, .literalblock > .title, .stemblock > .title, .openblock > .title, .paragraph > .title, .quoteblock > .title, table.tableblock > .title, .verseblock > .title, .videoblock > .title, .dlist > .title, .olist > .title, .ulist > .title, .qlist > .title, .hdlist > .title { text-rendering: optimizeLegibility; text-align: left; } |
| |
| table.tableblock > caption.title { white-space: nowrap; overflow: visible; max-width: 0; } |
| |
| .paragraph.lead > p, #preamble > .sectionbody > .paragraph:first-of-type p { color: black; } |
| |
| table.tableblock #preamble > .sectionbody > .paragraph:first-of-type p { font-size: inherit; } |
| |
| .admonitionblock > table { border-collapse: separate; border: 0; background: none; width: 100%; } |
| .admonitionblock > table td.icon { text-align: center; width: 80px; } |
| .admonitionblock > table td.icon img { max-width: initial; } |
| .admonitionblock > table td.icon .title { font-weight: bold; font-family: Noto, sans-serif; text-transform: uppercase; } |
| .admonitionblock > table td.content { padding-left: 1.125em; padding-right: 1.25em; border-left: 1px solid #dddddd; color: #5e93b8; } |
| .admonitionblock > table td.content > :last-child > :last-child { margin-bottom: 0; } |
| |
| .exampleblock > .content { border-style: solid; border-width: 1px; border-color: #e6e6e6; margin-bottom: 1.25em; padding: 1.25em; background: white; -webkit-border-radius: 0; border-radius: 0; } |
| .exampleblock > .content > :first-child { margin-top: 0; } |
| .exampleblock > .content > :last-child { margin-bottom: 0; } |
| |
| .sidebarblock { border-style: solid; border-width: 1px; border-color: #e6e6e6; margin-bottom: 1.25em; padding: 1.25em; background: white; -webkit-border-radius: 0; border-radius: 0; } |
| .sidebarblock > :first-child { margin-top: 0; } |
| .sidebarblock > :last-child { margin-bottom: 0; } |
| .sidebarblock > .content > .title { color: black; margin-top: 0; } |
| |
| .exampleblock > .content > :last-child > :last-child, .exampleblock > .content .olist > ol > li:last-child > :last-child, .exampleblock > .content .ulist > ul > li:last-child > :last-child, .exampleblock > .content .qlist > ol > li:last-child > :last-child, .sidebarblock > .content > :last-child > :last-child, .sidebarblock > .content .olist > ol > li:last-child > :last-child, .sidebarblock > .content .ulist > ul > li:last-child > :last-child, .sidebarblock > .content .qlist > ol > li:last-child > :last-child { margin-bottom: 0; } |
| |
| .literalblock pre, .listingblock pre:not(.highlight), .listingblock pre[class="highlight"], .listingblock pre[class^="highlight "], .listingblock pre.CodeRay, .listingblock pre.prettyprint { background: #eeeeee; } |
| .sidebarblock .literalblock pre, .sidebarblock .listingblock pre:not(.highlight), .sidebarblock .listingblock pre[class="highlight"], .sidebarblock .listingblock pre[class^="highlight "], .sidebarblock .listingblock pre.CodeRay, .sidebarblock .listingblock pre.prettyprint { background: #f2f1f1; } |
| |
| .literalblock pre, .literalblock pre[class], .listingblock pre, .listingblock pre[class] { border: 1px hidden #666666; -webkit-border-radius: 0; border-radius: 0; word-wrap: break-word; padding: 1.25em 1.5625em 1.125em 1.5625em; font-size: 0.8125em; } |
| .literalblock pre.nowrap, .literalblock pre[class].nowrap, .listingblock pre.nowrap, .listingblock pre[class].nowrap { overflow-x: auto; white-space: pre; word-wrap: normal; } |
| @media only screen and (min-width: 768px) { .literalblock pre, .literalblock pre[class], .listingblock pre, .listingblock pre[class] { font-size: 0.90625em; } } |
| @media only screen and (min-width: 1280px) { .literalblock pre, .literalblock pre[class], .listingblock pre, .listingblock pre[class] { font-size: 1em; } } |
| |
| .literalblock.output pre { color: #eeeeee; background-color: #264357; } |
| |
| .listingblock pre.highlightjs { padding: 0; } |
| .listingblock pre.highlightjs > code { padding: 1.25em 1.5625em 1.125em 1.5625em; -webkit-border-radius: 0; border-radius: 0; } |
| |
| .listingblock > .content { position: relative; } |
| |
| .listingblock code[data-lang]:before { display: none; content: attr(data-lang); position: absolute; font-size: 0.75em; top: 0.425rem; right: 0.5rem; line-height: 1; text-transform: uppercase; color: #999; } |
| |
| .listingblock:hover code[data-lang]:before { display: block; } |
| |
| .listingblock.terminal pre .command:before { content: attr(data-prompt); padding-right: 0.5em; color: #999; } |
| |
| .listingblock.terminal pre .command:not([data-prompt]):before { content: "$"; } |
| |
| table.pyhltable { border-collapse: separate; border: 0; margin-bottom: 0; background: none; } |
| |
| table.pyhltable td { vertical-align: top; padding-top: 0; padding-bottom: 0; line-height: 1.6; } |
| |
| table.pyhltable td.code { padding-left: .75em; padding-right: 0; } |
| |
| pre.pygments .lineno, table.pyhltable td:not(.code) { color: #999; padding-left: 0; padding-right: .5em; border-right: 1px solid #dddddd; } |
| |
| pre.pygments .lineno { display: inline-block; margin-right: .25em; } |
| |
| table.pyhltable .linenodiv { background: none !important; padding-right: 0 !important; } |
| |
| .quoteblock { margin: 0 1em 0.75em 1.5em; display: table; } |
| .quoteblock > .title { margin-left: -1.5em; margin-bottom: 0.75em; } |
| .quoteblock blockquote, .quoteblock blockquote p { color: #333333; font-size: 1.15rem; line-height: 1.75; word-spacing: 0.1em; letter-spacing: 0; font-style: italic; text-align: justify; } |
| .quoteblock blockquote { margin: 0; padding: 0; border: 0; } |
| .quoteblock blockquote:before { content: "\201c"; float: left; font-size: 2.75em; font-weight: bold; line-height: 0.6em; margin-left: -0.6em; color: black; text-shadow: 0 1px 2px rgba(0, 0, 0, 0.1); } |
| .quoteblock blockquote > .paragraph:last-child p { margin-bottom: 0; } |
| .quoteblock .attribution { margin-top: 0.5em; margin-right: 0.5ex; text-align: right; } |
| .quoteblock .quoteblock { margin-left: 0; margin-right: 0; padding: 0.5em 0; border-left: 3px solid #5e93b8; } |
| .quoteblock .quoteblock blockquote { padding: 0 0 0 0.75em; } |
| .quoteblock .quoteblock blockquote:before { display: none; } |
| |
| .verseblock { margin: 0 1em 0.75em 1em; } |
| .verseblock pre { font-family: "Open Sans", "DejaVu Sans", sans; font-size: 1.15rem; color: #333333; font-weight: 300; text-rendering: optimizeLegibility; } |
| .verseblock pre strong { font-weight: 400; } |
| .verseblock .attribution { margin-top: 1.25rem; margin-left: 0.5ex; } |
| |
| .quoteblock .attribution, .verseblock .attribution { font-size: 0.8125em; line-height: 1.45; font-style: italic; } |
| .quoteblock .attribution br, .verseblock .attribution br { display: none; } |
| .quoteblock .attribution cite, .verseblock .attribution cite { display: block; letter-spacing: -0.025em; color: #5e93b8; } |
| |
| .quoteblock.abstract { margin: 0 0 0.75em 0; display: block; } |
| .quoteblock.abstract blockquote, .quoteblock.abstract blockquote p { text-align: left; word-spacing: 0; } |
| .quoteblock.abstract blockquote:before, .quoteblock.abstract blockquote p:first-of-type:before { display: none; } |
| |
| table.tableblock { max-width: 100%; border-collapse: separate; } |
| table.tableblock td > .paragraph:last-child p > p:last-child, table.tableblock th > p:last-child, table.tableblock td > p:last-child { margin-bottom: 0; } |
| |
| table.tableblock, th.tableblock, td.tableblock { border: 0 solid #d8d8ce; } |
| |
| table.grid-all > thead > tr > .tableblock, table.grid-all > tbody > tr > .tableblock { border-width: 0 1px 1px 0; } |
| |
| table.grid-all > tfoot > tr > .tableblock { border-width: 1px 1px 0 0; } |
| |
| table.grid-cols > * > tr > .tableblock { border-width: 0 1px 0 0; } |
| |
| table.grid-rows > thead > tr > .tableblock, table.grid-rows > tbody > tr > .tableblock { border-width: 0 0 1px 0; } |
| |
| table.grid-rows > tfoot > tr > .tableblock { border-width: 1px 0 0 0; } |
| |
| table.grid-all > * > tr > .tableblock:last-child, table.grid-cols > * > tr > .tableblock:last-child { border-right-width: 0; } |
| |
| table.grid-all > tbody > tr:last-child > .tableblock, table.grid-all > thead:last-child > tr > .tableblock, table.grid-rows > tbody > tr:last-child > .tableblock, table.grid-rows > thead:last-child > tr > .tableblock { border-bottom-width: 0; } |
| |
| table.frame-all { border-width: 1px; } |
| |
| table.frame-sides { border-width: 0 1px; } |
| |
| table.frame-topbot { border-width: 1px 0; } |
| |
| th.halign-left, td.halign-left { text-align: left; } |
| |
| th.halign-right, td.halign-right { text-align: right; } |
| |
| th.halign-center, td.halign-center { text-align: center; } |
| |
| th.valign-top, td.valign-top { vertical-align: top; } |
| |
| th.valign-bottom, td.valign-bottom { vertical-align: bottom; } |
| |
| th.valign-middle, td.valign-middle { vertical-align: middle; } |
| |
| table thead th, table tfoot th { font-weight: bold; } |
| |
| tbody tr th { display: table-cell; line-height: 1.4; background: -webkit-linear-gradient(top, #add386, #90b66a); } |
| |
| tbody tr th, tbody tr th p, tfoot tr th, tfoot tr th p { color: white; font-weight: bold; } |
| |
| p.tableblock > code:only-child { background: none; padding: 0; } |
| |
| p.tableblock { font-size: 1em; } |
| |
| td > div.verse { white-space: pre; } |
| |
| ol { margin-left: 1.75em; } |
| |
| ul li ol { margin-left: 1.5em; } |
| |
| dl dd { margin-left: 1.125em; } |
| |
| dl dd:last-child, dl dd:last-child > :last-child { margin-bottom: 0; } |
| |
| ol > li p, ul > li p, ul dd, ol dd, .olist .olist, .ulist .ulist, .ulist .olist, .olist .ulist { margin-bottom: 0.375em; } |
| |
| ul.checklist, ul.none, ol.none, ul.no-bullet, ol.no-bullet, ol.unnumbered, ul.unstyled, ol.unstyled { list-style-type: none; } |
| |
| ul.no-bullet, ol.no-bullet, ol.unnumbered { margin-left: 0.625em; } |
| |
| ul.unstyled, ol.unstyled { margin-left: 0; } |
| |
| ul.checklist { margin-left: 0.625em; } |
| |
| ul.checklist li > p:first-child > .fa-square-o:first-child, ul.checklist li > p:first-child > .fa-check-square-o:first-child { width: 1.25em; font-size: 0.8em; position: relative; bottom: 0.125em; } |
| |
| ul.checklist li > p:first-child > input[type="checkbox"]:first-child { margin-right: 0.25em; } |
| |
| ul.inline { display: -ms-flexbox; display: -webkit-box; display: flex; -ms-flex-flow: row wrap; -webkit-flex-flow: row wrap; flex-flow: row wrap; list-style: none; margin: 0 0 0.375em -0.75em; } |
| |
| ul.inline > li { margin-left: 0.75em; } |
| |
| .unstyled dl dt { font-weight: normal; font-style: normal; } |
| |
| ol.arabic { list-style-type: decimal; } |
| |
| ol.decimal { list-style-type: decimal-leading-zero; } |
| |
| ol.loweralpha { list-style-type: lower-alpha; } |
| |
| ol.upperalpha { list-style-type: upper-alpha; } |
| |
| ol.lowerroman { list-style-type: lower-roman; } |
| |
| ol.upperroman { list-style-type: upper-roman; } |
| |
| ol.lowergreek { list-style-type: lower-greek; } |
| |
| .hdlist > table, .colist > table { border: 0; background: none; } |
| .hdlist > table > tbody > tr, .colist > table > tbody > tr { background: none; } |
| |
| td.hdlist1, td.hdlist2 { vertical-align: top; padding: 0 0.625em; } |
| |
| td.hdlist1 { font-weight: bold; padding-bottom: 0.75em; } |
| |
| .literalblock + .colist, .listingblock + .colist { margin-top: -0.5em; } |
| |
| .colist > table tr > td:first-of-type { padding: 0.4em 0.75em 0 0.75em; line-height: 1; vertical-align: top; } |
| .colist > table tr > td:first-of-type img { max-width: initial; } |
| .colist > table tr > td:last-of-type { padding: 0.25em 0; } |
| |
| .thumb, .th { line-height: 0; display: inline-block; border: solid 4px white; -webkit-box-shadow: 0 0 0 1px #dddddd; box-shadow: 0 0 0 1px #dddddd; } |
| |
| .imageblock.left, .imageblock[style*="float: left"] { margin: 0.25em 0.625em 1.25em 0; } |
| .imageblock.right, .imageblock[style*="float: right"] { margin: 0.25em 0 1.25em 0.625em; } |
| .imageblock > .title { margin-bottom: 0; } |
| .imageblock.thumb, .imageblock.th { border-width: 6px; } |
| .imageblock.thumb > .title, .imageblock.th > .title { padding: 0 0.125em; } |
| |
| .image.left, .image.right { margin-top: 0.25em; margin-bottom: 0.25em; display: inline-block; line-height: 0; } |
| .image.left { margin-right: 0.625em; } |
| .image.right { margin-left: 0.625em; } |
| |
| a.image { text-decoration: none; display: inline-block; } |
| a.image object { pointer-events: none; } |
| |
| sup.footnote, sup.footnoteref { font-size: 0.875em; position: static; vertical-align: super; } |
| sup.footnote a, sup.footnoteref a { text-decoration: none; } |
| sup.footnote a:active, sup.footnoteref a:active { text-decoration: underline; } |
| |
| #footnotes { padding-top: 0.75em; padding-bottom: 0.75em; margin-bottom: 0.625em; } |
| #footnotes hr { width: 20%; min-width: 6.25em; margin: -0.25em 0 0.75em 0; border-width: 1px 0 0 0; } |
| #footnotes .footnote { padding: 0 0.375em 0 0.225em; line-height: 1.3334; font-size: 0.875em; margin-left: 1.2em; margin-bottom: 0.2em; } |
| #footnotes .footnote a:first-of-type { font-weight: bold; text-decoration: none; margin-left: -1.05em; } |
| #footnotes .footnote:last-of-type { margin-bottom: 0; } |
| #content #footnotes { margin-top: -0.625em; margin-bottom: 0; padding: 0.75em 0; } |
| |
| .gist .file-data > table { border: 0; background: #fff; width: 100%; margin-bottom: 0; } |
| .gist .file-data > table td.line-data { width: 99%; } |
| |
| div.unbreakable { page-break-inside: avoid; } |
| |
| .big { font-size: larger; } |
| |
| .small { font-size: smaller; } |
| |
| .underline { text-decoration: underline; } |
| |
| .overline { text-decoration: overline; } |
| |
| .line-through { text-decoration: line-through; } |
| |
| .aqua { color: #00bfbf; } |
| |
| .aqua-background { background-color: #00fafa; } |
| |
| .black { color: black; } |
| |
| .black-background { background-color: black; } |
| |
| .blue { color: #0000bf; } |
| |
| .blue-background { background-color: #0000fa; } |
| |
| .fuchsia { color: #bf00bf; } |
| |
| .fuchsia-background { background-color: #fa00fa; } |
| |
| .gray { color: #606060; } |
| |
| .gray-background { background-color: #7d7d7d; } |
| |
| .green { color: #006000; } |
| |
| .green-background { background-color: #007d00; } |
| |
| .lime { color: #00bf00; } |
| |
| .lime-background { background-color: #00fa00; } |
| |
| .maroon { color: #600000; } |
| |
| .maroon-background { background-color: #7d0000; } |
| |
| .navy { color: #000060; } |
| |
| .navy-background { background-color: #00007d; } |
| |
| .olive { color: #606000; } |
| |
| .olive-background { background-color: #7d7d00; } |
| |
| .purple { color: #600060; } |
| |
| .purple-background { background-color: #7d007d; } |
| |
| .red { color: #bf0000; } |
| |
| .red-background { background-color: #fa0000; } |
| |
| .silver { color: #909090; } |
| |
| .silver-background { background-color: #bcbcbc; } |
| |
| .teal { color: #006060; } |
| |
| .teal-background { background-color: #007d7d; } |
| |
| .white { color: #bfbfbf; } |
| |
| .white-background { background-color: #fafafa; } |
| |
| .yellow { color: #bfbf00; } |
| |
| .yellow-background { background-color: #fafa00; } |
| |
| span.icon > .fa { cursor: default; } |
| a span.icon > .fa { cursor: inherit; } |
| |
| .admonitionblock td.icon [class^="fa icon-"] { font-size: 2.5em; text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.5); cursor: default; } |
| .admonitionblock td.icon .icon-note:before { content: "\f05a"; color: #29475c; } |
| .admonitionblock td.icon .icon-tip:before { content: "\f0eb"; text-shadow: 1px 1px 2px rgba(155, 155, 0, 0.8); color: #111; } |
| .admonitionblock td.icon .icon-warning:before { content: "\f071"; color: #bf6900; } |
| .admonitionblock td.icon .icon-caution:before { content: "\f06d"; color: #bf3400; } |
| .admonitionblock td.icon .icon-important:before { content: "\f06a"; color: #bf0000; } |
| |
| .conum[data-value] { display: inline-block; color: #fff !important; background-color: black; -webkit-border-radius: 100px; border-radius: 100px; text-align: center; font-size: 0.75em; width: 1.67em; height: 1.67em; line-height: 1.67em; font-family: "Open Sans", "DejaVu Sans", sans-serif; font-style: normal; font-weight: bold; } |
| .conum[data-value] * { color: #fff !important; } |
| .conum[data-value] + b { display: none; } |
| .conum[data-value]:after { content: attr(data-value); } |
| pre .conum[data-value] { position: relative; top: -0.125em; } |
| |
| b.conum * { color: inherit !important; } |
| |
| .conum:not([data-value]):empty { display: none; } |
| |
| h1, h2, h3, #toctitle, .sidebarblock > .content > .title, h4, h5, h6 { border-bottom: 1px solid #dddddd; } |
| |
| .sect1 { padding-bottom: 0; } |
| |
| #toctitle { color: #00406F; font-weight: normal; margin-top: 1.5em; } |
| |
| .sidebarblock { border-color: #aaa; } |
| |
| code { -webkit-border-radius: 4px; border-radius: 4px; } |
| |
| p.tableblock.header { color: #6d6e71; } |
| |
| .literalblock pre, .listingblock pre { background: #eeeeee; } |
| |
| </style> |
| <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css"> |
| <style> |
| /* Stylesheet for CodeRay to match GitHub theme | MIT License | http://foundation.zurb.com */ |
| /*pre.CodeRay {background-color:#f7f7f8;}*/ |
| .CodeRay .line-numbers{border-right:1px solid #d8d8d8;padding:0 0.5em 0 .25em} |
| .CodeRay span.line-numbers{display:inline-block;margin-right:.5em;color:rgba(0,0,0,.3)} |
| .CodeRay .line-numbers strong{color:rgba(0,0,0,.4)} |
| table.CodeRay{border-collapse:separate;border-spacing:0;margin-bottom:0;border:0;background:none} |
| table.CodeRay td{vertical-align: top;line-height:1.45} |
| table.CodeRay td.line-numbers{text-align:right} |
| table.CodeRay td.line-numbers>pre{padding:0;color:rgba(0,0,0,.3)} |
| table.CodeRay td.code{padding:0 0 0 .5em} |
| table.CodeRay td.code>pre{padding:0} |
| .CodeRay .debug{color:#fff !important;background:#000080 !important} |
| .CodeRay .annotation{color:#007} |
| .CodeRay .attribute-name{color:#000080} |
| .CodeRay .attribute-value{color:#700} |
| .CodeRay .binary{color:#509} |
| .CodeRay .comment{color:#998;font-style:italic} |
| .CodeRay .char{color:#04d} |
| .CodeRay .char .content{color:#04d} |
| .CodeRay .char .delimiter{color:#039} |
| .CodeRay .class{color:#458;font-weight:bold} |
| .CodeRay .complex{color:#a08} |
| .CodeRay .constant,.CodeRay .predefined-constant{color:#008080} |
| .CodeRay .color{color:#099} |
| .CodeRay .class-variable{color:#369} |
| .CodeRay .decorator{color:#b0b} |
| .CodeRay .definition{color:#099} |
| .CodeRay .delimiter{color:#000} |
| .CodeRay .doc{color:#970} |
| .CodeRay .doctype{color:#34b} |
| .CodeRay .doc-string{color:#d42} |
| .CodeRay .escape{color:#666} |
| .CodeRay .entity{color:#800} |
| .CodeRay .error{color:#808} |
| .CodeRay .exception{color:inherit} |
| .CodeRay .filename{color:#099} |
| .CodeRay .function{color:#900;font-weight:bold} |
| .CodeRay .global-variable{color:#008080} |
| .CodeRay .hex{color:#058} |
| .CodeRay .integer,.CodeRay .float{color:#099} |
| .CodeRay .include{color:#555} |
| .CodeRay .inline{color:#000} |
| .CodeRay .inline .inline{background:#ccc} |
| .CodeRay .inline .inline .inline{background:#bbb} |
| .CodeRay .inline .inline-delimiter{color:#d14} |
| .CodeRay .inline-delimiter{color:#d14} |
| .CodeRay .important{color:#555;font-weight:bold} |
| .CodeRay .interpreted{color:#b2b} |
| .CodeRay .instance-variable{color:#008080} |
| .CodeRay .label{color:#970} |
| .CodeRay .local-variable{color:#963} |
| .CodeRay .octal{color:#40e} |
| .CodeRay .predefined{color:#369} |
| .CodeRay .preprocessor{color:#579} |
| .CodeRay .pseudo-class{color:#555} |
| .CodeRay .directive{font-weight:bold} |
| .CodeRay .type{font-weight:bold} |
| .CodeRay .predefined-type{color:inherit} |
| .CodeRay .reserved,.CodeRay .keyword {color:#000;font-weight:bold} |
| .CodeRay .key{color:#808} |
| .CodeRay .key .delimiter{color:#606} |
| .CodeRay .key .char{color:#80f} |
| .CodeRay .value{color:#088} |
| .CodeRay .regexp .delimiter{color:#808} |
| .CodeRay .regexp .content{color:#808} |
| .CodeRay .regexp .modifier{color:#808} |
| .CodeRay .regexp .char{color:#d14} |
| .CodeRay .regexp .function{color:#404;font-weight:bold} |
| .CodeRay .string{color:#d20} |
| .CodeRay .string .string .string{background:#ffd0d0} |
| .CodeRay .string .content{color:#d14} |
| .CodeRay .string .char{color:#d14} |
| .CodeRay .string .delimiter{color:#d14} |
| .CodeRay .shell{color:#d14} |
| .CodeRay .shell .delimiter{color:#d14} |
| .CodeRay .symbol{color:#990073} |
| .CodeRay .symbol .content{color:#a60} |
| .CodeRay .symbol .delimiter{color:#630} |
| .CodeRay .tag{color:#008080} |
| .CodeRay .tag-special{color:#d70} |
| .CodeRay .variable{color:#036} |
| .CodeRay .insert{background:#afa} |
| .CodeRay .delete{background:#faa} |
| .CodeRay .change{color:#aaf;background:#007} |
| .CodeRay .head{color:#f8f;background:#505} |
| .CodeRay .insert .insert{color:#080} |
| .CodeRay .delete .delete{color:#800} |
| .CodeRay .change .change{color:#66f} |
| .CodeRay .head .head{color:#f4f} |
| </style> |
| <link rel="stylesheet" href="../katex/katex.min.css"> |
| <script src="../katex/katex.min.js"></script> |
| <script src="../katex/contrib/auto-render.min.js"></script> |
| <!-- Use KaTeX to render math once document is loaded, see |
| https://github.com/Khan/KaTeX/tree/master/contrib/auto-render --> |
| <script> |
| document.addEventListener("DOMContentLoaded", function () { |
| renderMathInElement( |
| document.body, |
| { |
| delimiters: [ |
| { left: "$$", right: "$$", display: true}, |
| { left: "\\[", right: "\\]", display: true}, |
| { left: "$", right: "$", display: false}, |
| { left: "\\(", right: "\\)", display: false} |
| ] |
| } |
| ); |
| }); |
| </script></head> |
| <body class="book toc2 toc-left" style="max-width: 100;"> |
| <div id="header"> |
| <h1>The OpenCL<sup>™</sup> Extension Specification</h1> |
| <div class="details"> |
| <span id="author" class="author">Khronos<sup>®</sup> OpenCL Working Group</span><br> |
| <span id="revnumber">version v3.0.7,</span> |
| <span id="revdate">Fri, 23 Apr 2021 20:00:00 +0000</span> |
| <br><span id="revremark">from git branch: master commit: 133503b85911ca8fd6642721d440e89460d491a3</span> |
| </div> |
| <div id="toc" class="toc2"> |
| <div id="toctitle">Table of Contents</div> |
| <ul class="sectlevel1"> |
| <li><a href="#extensions-overview">1. Extensions Overview</a></li> |
| <li><a href="#cl_khr_icd-opencl">2. Installable Client Drivers</a></li> |
| <li><a href="#cl_khr_byte_addressable_store">3. Byte Addressable Stores</a></li> |
| <li><a href="#cl_khr_3d_image_writes">4. Writing to 3D Image Objects</a></li> |
| <li><a href="#cl_khr_fp16">5. Half Precision Floating-Point</a></li> |
| <li><a href="#cl_khr_fp64">6. Double Precision Floating-Point</a></li> |
| <li><a href="#cl_khr_int32_atomics">7. 32-bit Atomics</a></li> |
| <li><a href="#cl_khr_int64_atomics">8. 64-bit Atomics</a></li> |
| <li><a href="#cl_khr_select_fprounding_mode">9. Selecting the Rounding Mode <strong>(DEPRECATED)</strong></a></li> |
| <li><a href="#cl_khr_gl_sharing">10. Creating an OpenCL Context from an OpenGL Context or Share Group</a></li> |
| <li><a href="#cl_khr_gl_sharing__memobjs">11. Creating OpenCL Memory Objects from OpenGL Objects</a></li> |
| <li><a href="#cl_khr_gl_event">12. Creating OpenCL Event Objects from OpenGL Sync Objects</a></li> |
| <li><a href="#cl_khr_d3d10_sharing">13. Creating OpenCL Memory Objects from Direct3D 10 Buffers and Textures</a></li> |
| <li><a href="#cl_khr_d3d11_sharing">14. Creating OpenCL Memory Objects from Direct3D 11 Buffers and Textures</a></li> |
| <li><a href="#cl_khr_dx9_media_sharing">15. Creating OpenCL Memory Objects from DirectX 9 Media Surfaces</a></li> |
| <li><a href="#cl_khr_depth_images">16. Depth Images</a></li> |
| <li><a href="#cl_khr_gl_depth_images">17. Sharing OpenGL and OpenGL ES Depth and Depth-Stencil Images</a></li> |
| <li><a href="#cl_khr_gl_msaa_sharing">18. Creating OpenCL Memory Objects from OpenGL MSAA Textures</a></li> |
| <li><a href="#cl_khr_egl_event">19. Creating OpenCL Event Objects from EGL Sync Objects</a></li> |
| <li><a href="#cl_khr_egl_image">20. Creating OpenCL Memory Objects from EGL Images</a></li> |
| <li><a href="#cl_khr_image2d_from_buffer">21. Creating a 2D Image From A Buffer</a></li> |
| <li><a href="#cl_khr_initialize_memory">22. Local and Private Memory Initialization</a></li> |
| <li><a href="#cl_khr_terminate_context">23. Terminating OpenCL contexts</a></li> |
| <li><a href="#cl_khr_spir">24. Standard Portable Intermediate Representation Binaries</a></li> |
| <li><a href="#cl_khr_il_program">25. Intermediate Language Programs</a></li> |
| <li><a href="#cl_khr_create_command_queue">26. Creating Command Queues with Properties</a></li> |
| <li><a href="#cl_khr_device_enqueue_local_arg_types">27. Device Enqueue Local Argument Types</a></li> |
| <li><a href="#cl_khr_subgroups">28. Subgroups</a></li> |
| <li><a href="#cl_khr_mipmap_image">29. Mipmaps</a></li> |
| <li><a href="#cl_khr_srgb_image_writes">30. sRGB Image Writes</a></li> |
| <li><a href="#cl_khr_priority_hints">31. Priority Hints</a></li> |
| <li><a href="#cl_khr_throttle_hints">32. Throttle Hints</a></li> |
| <li><a href="#cl_khr_subgroup_named_barrier">33. Named Barriers for Subgroups</a></li> |
| <li><a href="#cl_khr_extended_async_copies">34. Extended Async Copies (Provisional)</a></li> |
| <li><a href="#cl_khr_async_work_group_copy_fence">35. Async Work Group Copy Fence (Provisional)</a></li> |
| <li><a href="#cl_khr_device_uuid">36. Unique Device Identifiers</a></li> |
| <li><a href="#cl_khr_extended_versioning">37. Extended versioning</a></li> |
| <li><a href="#_extended_subgroup_functions">38. Extended Subgroup Functions</a></li> |
| <li><a href="#cl_khr_pci_bus_info">39. PCI Bus Information Query</a></li> |
| <li><a href="#cl_khr_extended_bit_ops">40. Extended Bit Operations</a></li> |
| <li><a href="#cl_khr_suggested_local_work_size">41. Suggested Local Work Size Query</a></li> |
| <li><a href="#spirv_extensions">42. Extensions to the OpenCL SPIR-V Environment</a></li> |
| <li><a href="#_extensions_promoted_to_core_features">Appendix A: Extensions Promoted to Core Features</a></li> |
| <li><a href="#_deprecated_extensions">Appendix B: Deprecated Extensions</a></li> |
| <li><a href="#_quick_reference">Appendix C: Quick Reference</a></li> |
| </ul> |
| </div> |
| </div> |
| <div id="content"> |
| <div id="preamble"> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>Copyright 2008-2020 The Khronos Group.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This specification is protected by copyright laws and contains material proprietary |
| to the Khronos Group, Inc. Except as described by these terms, it or any components |
| may not be reproduced, republished, distributed, transmitted, displayed, broadcast |
| or otherwise exploited in any manner without the express prior written permission |
| of Khronos Group.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Khronos Group grants a conditional copyright license to use and reproduce the |
| unmodified specification for any purpose, without fee or royalty, EXCEPT no licenses |
| to any patent, trademark or other intellectual property rights are granted under |
| these terms. Parties desiring to implement the specification and make use of |
| Khronos trademarks in relation to that implementation, and receive reciprocal patent |
| license protection under the Khronos IP Policy must become Adopters and confirm the |
| implementation as conformant under the process defined by Khronos for this |
| specification; see <a href="https://www.khronos.org/adopters" class="bare">https://www.khronos.org/adopters</a>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Khronos Group makes no, and expressly disclaims any, representations or warranties, |
| express or implied, regarding this specification, including, without limitation: |
| merchantability, fitness for a particular purpose, non-infringement of any |
| intellectual property, correctness, accuracy, completeness, timeliness, and |
| reliability. Under no circumstances will the Khronos Group, or any of its Promoters, |
| Contributors or Members, or their respective partners, officers, directors, |
| employees, agents or representatives be liable for any damages, whether direct, |
| indirect, special or consequential damages for lost revenues, lost profits, or |
| otherwise, arising from or in connection with these materials.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Vulkan and Khronos are registered trademarks, and OpenXR, SPIR, SPIR-V, SYCL, WebGL, |
| WebCL, OpenVX, OpenVG, EGL, COLLADA, glTF, NNEF, OpenKODE, OpenKCAM, StreamInput, |
| OpenWF, OpenSL ES, OpenMAX, OpenMAX AL, OpenMAX IL, OpenMAX DL, OpenML and DevU are |
| trademarks of the Khronos Group Inc. ASTC is a trademark of ARM Holdings PLC, |
| OpenCL is a trademark of Apple Inc. and OpenGL and OpenML are registered trademarks |
| and the OpenGL ES and OpenGL SC logos are trademarks of Silicon Graphics |
| International used under license by Khronos. All other product names, trademarks, |
| and/or company names are used solely for identification and belong to their |
| respective owners.</p> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="extensions-overview"><a class="anchor" href="#extensions-overview"></a>1. Extensions Overview</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This document describes the list of optional features supported by OpenCL. |
| Optional extensions are not required to be supported by a conformant OpenCL |
| implementation, but are expected to be widely available, and in some cases may define |
| functionality that is likely to be required in a future revision of the |
| OpenCL specification.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This document describes all extensions that have been approved by the OpenCL |
| working group. |
| It is a <em>unified</em> specification, meaning that the extensions described in this |
| document are not specific to a specific core OpenCL specification version.</p> |
| </div> |
| <div class="paragraph"> |
| <p>OpenCL extensions approved by the OpenCL working group may be <em>promoted</em> to |
| core features in later revisions of OpenCL. |
| When this occurs, the feature described by the extension specification |
| is merged into the core OpenCL specification. |
| The extension will continue to be documented in this specification, both for |
| backwards compatibility and for devices that wish to support the feature |
| but that are unable to support the newer core OpenCL version.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="naming-convention-for-optional-extensions"><a class="anchor" href="#naming-convention-for-optional-extensions"></a>1.1. Naming Convention for Optional Extensions</h3> |
| <div class="paragraph"> |
| <p>OpenCL extensions approved by the OpenCL working group use the following |
| naming convention:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>A unique <em>name string</em> of the form <code>"<strong>cl_khr_<<em>name</em>></strong>"</code> is associated |
| with each extension. |
| If the extension is supported by an implementation, this string will be |
| present in the implementation’s <code>CL_PLATFORM_EXTENSIONS</code> string or |
| <code>CL_DEVICE_EXTENSIONS</code> string.</p> |
| </li> |
| <li> |
| <p>All API functions defined by the extension will have names of the form |
| <strong>cl<<em>function_name</em>>KHR</strong>.</p> |
| </li> |
| <li> |
| <p>All enumerants defined by the extension will have names of the form |
| <strong>CL_<<em>enum_name</em>>_KHR.</strong></p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>Functions and enumerants defined by extensions that are promoted to |
| core features will have their <strong>KHR</strong> affix removed. |
| OpenCL implementations of such later revisions must also export the name |
| strings of promoted extensions in the <code>CL_PLATFORM_EXTENSIONS</code> or |
| <code>CL_DEVICE_EXTENSIONS</code> string, and support the <strong>KHR</strong>-affixed versions of |
| functions and enumerants as a transition aid.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Vendor extensions are strongly encouraged to follow a similar naming |
| convention:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>A unique <em>name string</em> of the form <code>"<strong>cl_<<em>vendor_name</em>>_<<em>name></em></strong>"</code> |
| is associated with each extension. |
| If the extension is supported by an implementation, this string will be |
| present in the implementation’s <code>CL_PLATFORM_EXTENSIONS</code> string or |
| <code>CL_DEVICE_EXTENSIONS</code> string.</p> |
| </li> |
| <li> |
| <p>All API functions defined by the vendor extension will have names of the |
| form <strong>cl<<em>function_name</em>><<em>vendor_name</em>></strong>.</p> |
| </li> |
| <li> |
| <p>All enumerants defined by the vendor extension will have names of the |
| form <strong>CL_<<em>enum_name</em>>_<<em>vendor_name</em>>.</strong></p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="compiler-directives-for-optional-extensions"><a class="anchor" href="#compiler-directives-for-optional-extensions"></a>1.2. Compiler Directives for Optional Extensions</h3> |
| <div class="paragraph"> |
| <p>The <strong>#pragma OPENCL EXTENSION</strong> directive controls the behavior of the OpenCL |
| compiler with respect to extensions. |
| The <strong>#pragma OPENCL EXTENSION</strong> directive is defined as:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="preprocessor">#pragma</span> OPENCL EXTENSION <extension_name> : <behavior> |
| <span class="preprocessor">#pragma</span> OPENCL EXTENSION all : <behavior></code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>where <em>extension_name</em> is the name of the extension. |
| The <em>extension_name</em> will have names of the form <strong>cl_khr_<<em>name</em>></strong> for an |
| extension approved by the OpenCL working group and will have names of the |
| form <strong>cl_<<em>vendor_name</em>>_<<em>name</em>></strong> for vendor extensions. |
| The token <strong>all</strong> means that the behavior applies to all extensions supported |
| by the compiler. |
| The <em>behavior</em> can be set to one of the following values given by the table |
| below.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 25%;"> |
| <col style="width: 75%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>behavior</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>enable</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Behave as specified by the extension <em>extension_name</em>.</p> |
| <p class="tableblock"> Report an error on the <strong><code>#pragma OPENCL EXTENSION</code></strong> if the |
| <em>extension_name</em> is not supported, or if <strong>all</strong> is specified.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>disable</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Behave (including issuing errors and warnings) as if the extension |
| <em>extension_name</em> is not part of the language definition.</p> |
| <p class="tableblock"> If <strong>all</strong> is specified, then behavior must revert back to that of the |
| non-extended core version of the language being compiled to.</p> |
| <p class="tableblock"> Warn on the <strong><code>#pragma OPENCL EXTENSION</code></strong> if the extension <em>extension_name</em> |
| is not supported.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>The <strong><code>#pragma OPENCL EXTENSION</code></strong> directive is a simple, low-level mechanism |
| to set the behavior for each extension. |
| It does not define policies such as which combinations are appropriate; |
| those must be defined elsewhere. |
| The order of directives matter in setting the behavior for each extension. |
| Directives that occur later override those seen earlier. |
| The <strong>all</strong> variant sets the behavior for all extensions, overriding all |
| previously issued extension directives, but only if the <em>behavior</em> is set to |
| <strong>disable</strong>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The initial state of the compiler is as if the directive</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="preprocessor">#pragma</span> OPENCL EXTENSION all : disable</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>was issued, telling the compiler that all error and warning reporting must |
| be done according to this specification, ignoring any extensions.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Every extension which affects the OpenCL language semantics, syntax or adds |
| built-in functions to the language must create a preprocessor <code>#define</code> that |
| matches the extension name string. |
| This <code>#define</code> would be available in the language if and only if the |
| extension is supported on a given implementation.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>Example</strong>:</p> |
| </div> |
| <div class="paragraph"> |
| <p>An extension which adds the extension string <code>"cl_khr_3d_image_writes"</code> |
| should also add a preprocessor <code>#define</code> called <strong><code>cl_khr_3d_image_writes</code></strong>. |
| A kernel can now use this preprocessor <code>#define</code> to do something like:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="preprocessor">#ifdef</span> cl_khr_3d_image_writes |
| <span class="comment">// do something using the extension</span> |
| <span class="preprocessor">#else</span> |
| <span class="comment">// do something else or #error!</span> |
| <span class="preprocessor">#endif</span></code></pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="getting-opencl-api-extension-function-pointers"><a class="anchor" href="#getting-opencl-api-extension-function-pointers"></a>1.3. Getting OpenCL API Extension Function Pointers</h3> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="directive">void</span>* clGetExtensionFunctionAddressForPlatform(cl_platform_id platform, |
| <span class="directive">const</span> <span class="predefined-type">char</span> *funcname)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>returns the address of the extension function named by <em>funcname</em> for a |
| given <em>platform</em> The pointer returned should be cast to a function pointer |
| type matching the extension function’s definition defined in the appropriate |
| extension specification and header file. |
| A return value of <code>NULL</code> indicates that the specified function does not |
| exist for the implementation or <em>platform</em> is not a valid platform. |
| A non-<code>NULL</code> return value for <strong>clGetExtensionFunctionAddressForPlatform</strong> |
| does not guarantee that an extension function is actually supported by the |
| platform. |
| The application must also make a corresponding query using |
| <strong>clGetPlatformInfo</strong>(platform, CL_PLATFORM_EXTENSIONS, …​) or |
| <strong>clGetDeviceInfo</strong>(device, CL_DEVICE_EXTENSIONS, …​) to determine if an |
| extension is supported by the OpenCL implementation.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Since there is no way to qualify the query with a |
| device, the function pointer returned must work for all implementations of |
| that extension on different devices for a platform. |
| The behavior of calling a device extension function on a device not |
| supporting that extension is undefined.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clGetExtensionFunctionAddressForPlatform</strong> may not be be used to query for core |
| (non-extension) functions in OpenCL. |
| For extension functions that may be queried using |
| <strong>clGetExtensionFunctionAddressForPlatform</strong>, implementations may also choose to |
| export those functions statically from the object libraries |
| implementing those functions, however, portable applications cannot rely on |
| this behavior.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Function pointer typedefs must be declared for all extensions that add API |
| entrypoints. |
| These typedefs are a required part of the extension interface, to be |
| provided in an appropriate header (such as cl_ext.h if the extension is an |
| OpenCL extension, or cl_gl_ext.h if the extension is an OpenCL / OpenGL |
| sharing extension).</p> |
| </div> |
| <div class="paragraph"> |
| <p>The following convention must be followed for all extensions affecting the |
| host API:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="preprocessor">#ifndef</span> extension_name |
| <span class="preprocessor">#define</span> extension_name <span class="integer">1</span> |
| |
| <span class="comment">// all data typedefs, token #defines, prototypes, and</span> |
| <span class="comment">// function pointer typedefs for this extension</span> |
| |
| <span class="comment">// function pointer typedefs must use the</span> |
| <span class="comment">// following naming convention</span> |
| |
| <span class="keyword">typedef</span> return_type |
| (CL_API_CALL *clExtensionFunctionNameTAG_fn)(...); |
| |
| <span class="preprocessor">#endif</span> <span class="comment">// _extension_name_</span></code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>where <code>TAG</code> can be <code>KHR</code>, <code>EXT</code> or <code>vendor-specific</code>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Consider, for example, the <strong>cl_khr_gl_sharing</strong> extension. |
| This extension would add the following to cl_gl_ext.h:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="preprocessor">#ifndef</span> cl_khr_gl_sharing |
| <span class="preprocessor">#define</span> cl_khr_gl_sharing <span class="integer">1</span> |
| |
| <span class="comment">// all data typedefs, token #defines, prototypes, and</span> |
| <span class="comment">// function pointer typedefs for this extension</span> |
| <span class="preprocessor">#define</span> CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -<span class="integer">1000</span> |
| <span class="preprocessor">#define</span> CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR <span class="hex">0x2006</span> |
| <span class="preprocessor">#define</span> CL_DEVICES_FOR_GL_CONTEXT_KHR <span class="hex">0x2007</span> |
| <span class="preprocessor">#define</span> CL_GL_CONTEXT_KHR <span class="hex">0x2008</span> |
| <span class="preprocessor">#define</span> CL_EGL_DISPLAY_KHR <span class="hex">0x2009</span> |
| <span class="preprocessor">#define</span> CL_GLX_DISPLAY_KHR <span class="hex">0x200A</span> |
| <span class="preprocessor">#define</span> CL_WGL_HDC_KHR <span class="hex">0x200B</span> |
| <span class="preprocessor">#define</span> CL_CGL_SHAREGROUP_KHR <span class="hex">0x200C</span> |
| |
| <span class="comment">// function pointer typedefs must use the</span> |
| <span class="comment">// following naming convention</span> |
| <span class="keyword">typedef</span> cl_int |
| (CL_API_CALL *clGetGLContextInfoKHR_fn)( |
| <span class="directive">const</span> cl_context_properties * <span class="comment">/* properties */</span>, |
| cl_gl_context_info <span class="comment">/* param_name */</span>, |
| size_t <span class="comment">/* param_value_size */</span>, |
| <span class="directive">void</span> * <span class="comment">/* param_value */</span>, |
| size_t * <span class="comment">/*param_value_size_ret*/</span>); |
| |
| <span class="preprocessor">#endif</span> <span class="comment">// cl_khr_gl_sharing</span></code></pre> |
| </div> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_icd-opencl"><a class="anchor" href="#cl_khr_icd-opencl"></a>2. Installable Client Drivers</h2> |
| <div class="sectionbody"> |
| <div class="sect2"> |
| <h3 id="cl_khr_icd-overview"><a class="anchor" href="#cl_khr_icd-overview"></a>2.1. Overview</h3> |
| <div class="paragraph"> |
| <p>This section describes a platform extension which defines a simple mechanism |
| through which the Khronos OpenCL installable client driver loader (ICD |
| Loader) may expose multiple separate vendor installable client drivers |
| (Vendor ICDs) for OpenCL. |
| An application written against the ICD Loader will be able to access all |
| cl_platform_ids exposed by all vendor implementations with the ICD Loader |
| acting as a demultiplexor.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This is a platform extension, so if this extension is supported by an |
| implementation, the string <strong>cl_khr_icd</strong> will be present in the |
| <code>CL_PLATFORM_EXTENSIONS</code> string.</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information"><a class="anchor" href="#_general_information"></a>2.2. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history"><a class="anchor" href="#_version_history"></a>2.2.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_icd-inferring-vendors-from-function-call-arguments"><a class="anchor" href="#cl_khr_icd-inferring-vendors-from-function-call-arguments"></a>2.3. Inferring Vendors from Function Call Arguments</h3> |
| <div class="paragraph"> |
| <p>At every OpenCL function call, the ICD Loader infers the vendor ICD function |
| to call from the arguments to the function. |
| An object is said to be ICD compatible if it is of the following structure:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="keyword">struct</span> _cl_<object> |
| { |
| <span class="keyword">struct</span> _cl_icd_dispatch *dispatch; |
| <span class="comment">// ... remainder of internal data</span> |
| };</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p><object> is one of platform_id, device_id, context, command_queue, mem, |
| program, kernel, event, or sampler.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The structure <code>_cl_icd_dispatch</code> is a function pointer dispatch table which |
| is used to direct calls to a particular vendor implementation. |
| All objects created from ICD compatible objects must be ICD compatible.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The definition for <code>_cl_icd_dispatch</code> is provided along with the OpenCL |
| headers. Existing members can never be removed from that structure but new |
| members can be appended.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Functions which do not have an argument from which the vendor implementation |
| may be inferred have been deprecated and may be ignored.</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_icd-icd-data"><a class="anchor" href="#cl_khr_icd-icd-data"></a>2.4. ICD Data</h3> |
| <div class="paragraph"> |
| <p>A Vendor ICD is defined by two pieces of data:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>The Vendor ICD library specifies a library which contains the OpenCL |
| entry points for the vendor’s OpenCL implementation. |
| The vendor ICD’s library file name should include the vendor name, or a |
| vendor-specific implementation identifier.</p> |
| </li> |
| <li> |
| <p>The Vendor ICD extension suffix is a short string which specifies the |
| default suffix for extensions implemented only by that vendor. |
| The vendor suffix string is optional.</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_icd-icd-loader-vendor-enumeration-on-windows"><a class="anchor" href="#cl_khr_icd-icd-loader-vendor-enumeration-on-windows"></a>2.5. ICD Loader Vendor Enumeration on Windows</h3> |
| <div class="paragraph"> |
| <p>To enumerate Vendor ICDs on Windows, the ICD Loader will first |
| scan for REG_SZ string values in the "Display Adapter" and |
| "Software Components" HKR registry keys. The exact registry |
| keys to scan should be obtained via PnP Configuration Manager |
| APIs, but will look like:</p> |
| </div> |
| <div class="paragraph"> |
| <p>For 64-bit ICDs:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>HKLM\SYSTEM\CurrentControlSet\Control\Class\ |
| {Display Adapter GUID}\{Instance ID}\OpenCLDriverName, or |
| |
| HKLM\SYSTEM\CurrentControlSet\Control\Class\ |
| {Software Component GUID}\{Instance ID}\OpenCLDriverName</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>For 32-bit ICDs:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>HKLM\SYSTEM\CurrentControlSet\Control\Class\ |
| {Display Adapter GUID}\{Instance ID}\OpenCLDriverNameWoW, or |
| |
| HKLM\SYSTEM\CurrentControlSet\Control\Class\ |
| {Software Component GUID}\{Instance ID}\OpenCLDriverNameWoW</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>These registry values contain the path to the Vendor ICD library. |
| For example, if the registry contains the value:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>[HKLM\SYSTEM\CurrentControlSet\Control\Class\{GUID}\{Instance}] |
| "OpenCLDriverName"="c:\\vendor a\\vndra_ocl.dll"</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Then the ICD Loader will open the Vendor ICD library:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>c:\vendor a\vndra_ocl.dll</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>The ICD Loader will also scan for REG_DWORD values in the registry |
| key:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>HKLM\SOFTWARE\Khronos\OpenCL\Vendors</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>For each registry value in this key which has data set to 0, the |
| ICD Loader will open the Vendor ICD library specified by the name |
| of the registry value.</p> |
| </div> |
| <div class="paragraph"> |
| <p>For example, if the registry contains the value:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>[HKLM\SOFTWARE\Khronos\OpenCL\Vendors] |
| "c:\\vendor a\\vndra_ocl.dll"=dword:00000000</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Then the ICD will open the Vendor ICD library:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>c:\vendor a\vndra_ocl.dll</pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_icd-icd-loader-vendor-enumeration-on-linux"><a class="anchor" href="#cl_khr_icd-icd-loader-vendor-enumeration-on-linux"></a>2.6. ICD Loader Vendor Enumeration on Linux</h3> |
| <div class="paragraph"> |
| <p>To enumerate vendor ICDs on Linux, the ICD Loader scans the files in the |
| path <code>/etc/OpenCL/vendors</code>. |
| For each file in this path, the ICD Loader opens the file as a text file. |
| The expected format for the file is a single line of text which specifies |
| the Vendor ICD’s library. |
| The ICD Loader will attempt to open that file as a shared object using |
| dlopen(). |
| Note that the library specified may be an absolute path or just a file name.</p> |
| </div> |
| <div class="paragraph"> |
| <p>For example, if the following file exists</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>/etc/OpenCL/vendors/VendorA.icd</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>and contains the text</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>libVendorAOpenCL.so</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>then the ICD Loader will load the library <code>libVendorAOpenCL.so</code>.</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_icd-icd-loader-vendor-enumeration-on-android"><a class="anchor" href="#cl_khr_icd-icd-loader-vendor-enumeration-on-android"></a>2.7. ICD Loader Vendor Enumeration on Android</h3> |
| <div class="paragraph"> |
| <p>To enumerate vendor ICDs on Android, the ICD Loader scans the files in the |
| path <code>/system/vendor/Khronos/OpenCL/vendors</code>. |
| For each file in this path, the ICD Loader opens the file as a text file. |
| The expected format for the file is a single line of text which specifies |
| the Vendor ICD’s library. |
| The ICD Loader will attempt to open that file as a shared object using |
| dlopen(). |
| Note that the library specified may be an absolute path or just a file name.</p> |
| </div> |
| <div class="paragraph"> |
| <p>For example, if the following file exists</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>/system/vendor/Khronos/OpenCL/vendors/VendorA.icd</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>and contains the text</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>libVendorAOpenCL.so</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>then the ICD Loader will load the library <code>libVendorAOpenCL.so</code>.</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_icd-adding-a-vendor-library"><a class="anchor" href="#cl_khr_icd-adding-a-vendor-library"></a>2.8. Adding a Vendor Library</h3> |
| <div class="paragraph"> |
| <p>Upon successfully loading a Vendor ICD’s library, the ICD Loader queries the |
| following functions from the library: <strong>clIcdGetPlatformIDsKHR</strong>, |
| <strong>clGetPlatformInfo</strong>, and <strong>clGetExtensionFunctionAddress</strong> (note: |
| <strong>clGetExtensionFunctionAddress</strong> has been deprecated, but is still required |
| for the ICD loader). |
| If any of these functions are not present then the ICD Loader will close and |
| ignore the library.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Next the ICD Loader queries available ICD-enabled platforms in the library |
| using <strong>clIcdGetPlatformIDsKHR</strong>. |
| For each of these platforms, the ICD Loader queries the platform’s extension |
| string to verify that <strong>cl_khr_icd</strong> is supported, then queries the platform’s |
| Vendor ICD extension suffix using <strong>clGetPlatformInfo</strong> with the value |
| CL_PLATFORM_ICD_SUFFIX_KHR.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If any of these steps fail, the ICD Loader will ignore the Vendor ICD and |
| continue on to the next.</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_icd-new-procedures-and-functions"><a class="anchor" href="#cl_khr_icd-new-procedures-and-functions"></a>2.9. New Procedures and Functions</h3> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_int clIcdGetPlatformIDsKHR(cl_uint num_entries, |
| cl_platform_id *platforms, |
| cl_uint *num_platforms);</code></pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_icd-new-tokens"><a class="anchor" href="#cl_khr_icd-new-tokens"></a>2.10. New Tokens</h3> |
| <div class="paragraph"> |
| <p>Accepted as <em>param_name</em> to the function <strong>clGetPlatformInfo</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_PLATFORM_ICD_SUFFIX_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Returned by <strong>clGetPlatformIDs</strong> when no platforms are found:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_PLATFORM_NOT_FOUND_KHR</pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_icd-additions-to-chapter-4"><a class="anchor" href="#cl_khr_icd-additions-to-chapter-4"></a>2.11. Additions to Chapter 4 of the OpenCL 2.2 Specification</h3> |
| <div class="paragraph"> |
| <p>In <em>section 4.1</em>, replace the description of the return values of |
| <strong>clGetPlatformIDs</strong> with:</p> |
| </div> |
| <div class="paragraph"> |
| <p>"clGetPlatformIDs* returns CL_SUCCESS if the function is executed |
| successfully and there are a non zero number of platforms available. |
| It returns CL_PLATFORM_NOT_FOUND_KHR if zero platforms are available. |
| It returns CL_INVALID_VALUE if <em>num_entries</em> is equal to zero and |
| <em>platforms</em> is not <code>NULL</code> or if both <em>num_platforms</em> and <em>platforms</em> are |
| <code>NULL</code>."</p> |
| </div> |
| <div class="paragraph"> |
| <p>In <em>section 4.1</em>, add the following after the description of |
| <strong>clGetPlatformIDs</strong>:</p> |
| </div> |
| <div class="paragraph"> |
| <p>"The list of platforms accessible through the Khronos ICD Loader can be |
| obtained using the following function: |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_int clIcdGetPlatformIDsKHR(cl_uint num_entries, |
| cl_platform_id *platforms, |
| cl_uint *num_platforms);</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p><em>num_entries</em> is the number of cl_platform_id entries that can be added to |
| <em>platforms</em>. |
| If <em>platforms</em> is not <code>NULL</code>, then <em>num_entries</em> must be greater than zero.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>platforms</em> returns a list of OpenCL platforms available for access through |
| the Khronos ICD Loader. |
| The cl_platform_id values returned in <em>platforms</em> are ICD compatible and can |
| be used to identify a specific OpenCL platform. |
| If the <em>platforms</em> argument is <code>NULL</code>, then this argument is ignored. |
| The number of OpenCL platforms returned is the minimum of the value |
| specified by <em>num_entries</em> or the number of OpenCL platforms available.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>num_platforms</em> returns the number of OpenCL platforms available. |
| If <em>num_platforms</em> is <code>NULL</code>, then this argument is ignored.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clIcdGetPlatformIDsKHR</strong> returns CL_SUCCESS if the function is executed |
| successfully and there are a non zero number of platforms available. |
| It returns CL_PLATFORM_NOT_FOUND_KHR if zero platforms are available. |
| It returns CL_INVALID_VALUE if <em>num_entries</em> is equal to zero and |
| <em>platforms</em> is not <code>NULL</code> or if both <em>num_platforms</em> and <em>platforms</em> are |
| <code>NULL</code>."</p> |
| </div> |
| <div class="paragraph"> |
| <p>Add the following to <em>table 4.1</em>:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 40%;"> |
| <col style="width: 20%;"> |
| <col style="width: 40%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_platform_info enum</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Return Type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_PLATFORM_ICD_SUFFIX_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">char[]</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">The function name suffix used to identify extension functions to be |
| directed to this platform by the ICD Loader.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_icd-source-code"><a class="anchor" href="#cl_khr_icd-source-code"></a>2.12. Source Code</h3> |
| <div class="paragraph"> |
| <p>The official source for the ICD loader is available on github, at:</p> |
| </div> |
| <div class="paragraph"> |
| <p><a href="https://github.com/KhronosGroup/OpenCL-ICD-Loader" class="bare">https://github.com/KhronosGroup/OpenCL-ICD-Loader</a></p> |
| </div> |
| <div class="paragraph"> |
| <p>The complete <code>_cl_icd_dispatch</code> structure is defined in the header |
| <strong>cl_icd.h</strong>, which is available as a part of the OpenCL headers.</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_icd-issues"><a class="anchor" href="#cl_khr_icd-issues"></a>2.13. Issues</h3> |
| <div class="olist arabic"> |
| <ol class="arabic"> |
| <li> |
| <p>Some OpenCL functions do not take an object argument from which their |
| vendor library may be identified (e.g, clUnloadCompiler), how will they |
| be handled?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>RESOLVED: Such functions will be a noop for all calls through the ICD.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>How are OpenCL extension to be handled?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>RESOLVED: OpenCL extension functions may be added to the ICD as soon as they |
| are implemented by any vendor. |
| The suffix mechanism provides access for vendor extensions which are not yet |
| added to the ICD.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>How will the ICD handle a <code>NULL</code> cl_platform_id?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>RESOLVED: The ICD will by default choose the first enumerated platform as |
| the <code>NULL</code> platform. |
| The user can override this default by setting an environment variable |
| OPENCL_ICD_DEFAULT_PLATFORM to the desired platform index. |
| The API calls that deal with platforms will return CL_INVALID_PLATFORM if |
| the index is not between zero and (number of platforms - 1), both inclusive.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>There exists no mechanism to unload the ICD, should there be one?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>RESOLVED: As there is no standard mechanism for unloading a vendor |
| implementation, do not add one for the ICD.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>How will the ICD loader handle <code>NULL</code> objects passed to the OpenCL |
| functions?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>RESOLVED: The ICD loader will check for <code>NULL</code> objects passed to the OpenCL |
| functions without trying to dereference the <code>NULL</code> objects for obtaining the |
| ICD dispatch table. |
| On detecting a <code>NULL</code> object it will return one of the CL_INVALID_* error |
| values corresponding to the object in question.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| </ol> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_byte_addressable_store"><a class="anchor" href="#cl_khr_byte_addressable_store"></a>3. Byte Addressable Stores</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This section describes the <strong>cl_khr_byte_addressable_store</strong> extension. |
| This extension relaxes restrictions on pointers to <code>char</code>, <code>uchar</code>, <code>char2</code>, <code>uchar2</code>, <code>short</code>, <code>ushort</code> and <code>half</code> that were present in <em>Section 6.8m: Restrictions</em> of the OpenCL 1.0 specification. |
| With this extension, applications are able to read from and write to pointers to these types.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This extension became a core feature in OpenCL 1.1.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_2"><a class="anchor" href="#_general_information_2"></a>3.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_2"><a class="anchor" href="#_version_history_2"></a>3.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_3d_image_writes"><a class="anchor" href="#cl_khr_3d_image_writes"></a>4. Writing to 3D Image Objects</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This section describes the <strong>cl_khr_3d_image_writes</strong> extension.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This extension adds built-in functions that allow a kernel to write to 3D image objects in addition to 2D image objects.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This extension became a core feature in OpenCL 2.0.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_3"><a class="anchor" href="#_general_information_3"></a>4.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_3"><a class="anchor" href="#_version_history_3"></a>4.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>The new built-in functions are described in the table below:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 1. <em>3D Image Built-in Image Write Functions</em></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">void <strong>write_imagef</strong> (<br> |
| image3d_t <em>image</em>,<br> |
| int4 <em>coord</em>,<br> |
| float4 <em>color</em>)<br> |
| <br> |
| void <strong>write_imagei</strong> (<br> |
| image3d_t <em>image</em>,<br> |
| int4 <em>coord</em>,<br> |
| int4 <em>color</em>)<br> |
| <br> |
| void <strong>write_imageui</strong> (<br> |
| image3d_t <em>image</em>,<br> |
| int4 <em>coord</em>,<br> |
| uint4 <em>color</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Write <em>color</em> value to the location specified by coordinate (<em>x</em>, <em>y</em>, <em>z</em>) in the 3D image specified by <em>image</em>. |
| The appropriate data format conversion to the specified image format is done before writing the color value. |
| <em>coord.x</em>, <em>coord.y</em>, and <em>coord.z</em> are considered to be unnormalized coordinates and must be in the range 0 …​ image width - 1, 0 …​ image height - 1, and 0 …​ image depth - 1.<br> |
| <br> |
| <strong>write_imagef</strong> can only be used with image objects created with <em>image_channel_data_type</em> set to one of the pre-defined packed formats or set to CL_SNORM_INT8, CL_UNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT16, CL_HALF_FLOAT, or CL_FLOAT. Appropriate data format conversion will be done to convert the channel data from a floating-point value to the actual data format in which the channels are stored.<br> |
| <br> |
| <strong>write_imagei</strong> can only be used with image objects created with <em>image_channel_data_type</em> set to one of the following values:<br> |
| CL_SIGNED_INT8,<br> |
| CL_SIGNED_INT16, or<br> |
| CL_SIGNED_INT32.<br> |
| <br> |
| <strong>write_imageui</strong> can only be used with image objects created with <em>image_channel_data_type</em> set to one of the following values:<br> |
| CL_UNSIGNED_INT8,<br> |
| CL_UNSIGNED_INT16, or<br> |
| CL_UNSIGNED_INT32.<br> |
| <br> |
| The behavior of <strong>write_imagef</strong>, <strong>write_imagei</strong>, and <strong>write_imageui</strong> for image objects created with <em>image_channel_data_type</em> values not specified in the description above, or with (<em>x</em>, <em>y</em>, <em>z</em>) coordinate values that are not in the range (0 …​ image width - 1, 0 …​ image height - 1, 0 …​ image depth - 1) respectively, is undefined.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_fp16"><a class="anchor" href="#cl_khr_fp16"></a>5. Half Precision Floating-Point</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This section describes the <strong>cl_khr_fp16</strong> extension. |
| This extension adds support for half scalar and vector types as built-in |
| types that can be used for arithmetic operations, conversions etc.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_4"><a class="anchor" href="#_general_information_4"></a>5.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_4"><a class="anchor" href="#_version_history_4"></a>5.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_fp16-additions-to-chapter-6-of-the-opencl-2.0-specification"><a class="anchor" href="#cl_khr_fp16-additions-to-chapter-6-of-the-opencl-2.0-specification"></a>5.2. Additions to Chapter 6 of the OpenCL 2.0 C Specification</h3> |
| <div class="paragraph"> |
| <p>The list of built-in scalar, and vector data types defined in <em>tables 6.1</em>, |
| and <em>6.2</em> are extended to include the following:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 25%;"> |
| <col style="width: 75%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>half2</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">A 2-component half-precision floating-point vector.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>half3</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">A 3-component half-precision floating-point vector.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>half4</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">A 4-component half-precision floating-point vector.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>half8</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">A 8-component half-precision floating-point vector.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>half16</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">A 16-component half-precision floating-point vector.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>The built-in vector data types for <code>halfn</code> are also declared as appropriate |
| types in the OpenCL API (and header files) that can be used by an |
| application. |
| The following table describes the built-in vector data types for <code>halfn</code> as |
| defined in the OpenCL C programming language and the corresponding data type |
| available to the application:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Type in OpenCL Language</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>API type for application</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>half2</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cl_half2</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>half3</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cl_half3</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>half4</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cl_half4</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>half8</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cl_half8</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>half16</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cl_half16</strong></p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>The relational, equality, logical and logical unary operators described in |
| <em>section 6.3</em> can be used with <code>half</code> scalar and <code>halfn</code> vector types and |
| shall produce a scalar <code>int</code> and vector <code>shortn</code> result respectively.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The OpenCL compiler accepts an h and H suffix on floating point literals, |
| indicating the literal is typed as a half.</p> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_fp16-conversions"><a class="anchor" href="#cl_khr_fp16-conversions"></a>5.2.1. Conversions</h4> |
| <div class="paragraph"> |
| <p>The implicit conversion rules specified in <em>section 6.2.1</em> now include the |
| <code>half</code> scalar and <code>halfn</code> vector data types.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The explicit casts described in <em>section 6.2.2</em> are extended to take a |
| <code>half</code> scalar data type and a <code>halfn</code> vector data type.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The explicit conversion functions described in <em>section 6.2.3</em> are extended |
| to take a <code>half</code> scalar data type and a <code>halfn</code> vector data type.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The <code>as_typen()</code> function for re-interpreting types as described in <em>section |
| 6.2.4.2</em> is extended to allow conversion-free casts between <code>shortn</code>, |
| <code>ushortn</code>, and <code>halfn</code> scalar and vector data types.</p> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_fp16-math-functions"><a class="anchor" href="#cl_khr_fp16-math-functions"></a>5.2.2. Math Functions</h4> |
| <div class="paragraph"> |
| <p>The built-in math functions defined in <em>table 6.8</em> (also listed below) are |
| extended to include appropriate versions of functions that take <code>half</code> and |
| <code>half{2|3|4|8|16}</code> as arguments and return values. |
| <code>gentype</code> now also includes <code>half</code>, <code>half2</code>, <code>half3</code>, <code>half4</code>, <code>half8</code>, and |
| <code>half16</code>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>For any specific use of a function, the actual type has to be the same for |
| all arguments and the return type.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 2. <em>Half Precision Built-in Math Functions</em></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>acos</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Arc cosine function.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>acosh</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Inverse hyperbolic cosine.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>acospi</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute <strong>acos</strong> (<em>x</em>) / π.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>asin</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Arc sine function.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>asinh</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Inverse hyperbolic sine.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>asinpi</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute <strong>asin</strong> (<em>x</em>) / π.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>atan</strong> (gentype <em>y_over_x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Arc tangent function.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>atan2</strong> (gentype <em>y</em>, gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Arc tangent of <em>y</em> / <em>x</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>atanh</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Hyperbolic arc tangent.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>atanpi</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute <strong>atan</strong> (<em>x</em>) / π.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>atan2pi</strong> (gentype <em>y</em>, gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute <strong>atan2</strong> (<em>y</em>, <em>x</em>) / π.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>cbrt</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute cube-root.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>ceil</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Round to integral value using the round to positive infinity rounding |
| mode.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>copysign</strong> (gentype <em>x</em>, gentype <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <em>x</em> with its sign changed to match the sign of <em>y</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>cos</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute cosine.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>cosh</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute hyperbolic cosine.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>cospi</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute <strong>cos</strong> (π <em>x</em>).</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>erfc</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Complementary error function.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>erf</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Error function encountered in integrating the normal distribution.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>exp</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute the base- e exponential of <em>x</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>exp2</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Exponential base 2 function.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>exp10</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Exponential base 10 function.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>expm1</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute <em>e<sup>x</sup></em>- 1.0.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>fabs</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute absolute value of a floating-point number.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>fdim</strong> (gentype <em>x</em>, gentype <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><em>x</em> - <em>y</em> if <em>x</em> > <em>y</em>, +0 if x is less than or equal to y.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>floor</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Round to integral value using the round to negative infinity rounding |
| mode.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>fma</strong> (gentype <em>a</em>, gentype <em>b</em>, gentype <em>c</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the correctly rounded floating-point representation of the sum of |
| <em>c</em> with the infinitely precise product of <em>a</em> and <em>b</em>. |
| Rounding of intermediate products shall not occur. |
| Edge case behavior is per the IEEE 754-2008 standard.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>fmax</strong> (gentype <em>x</em>, gentype <em>y</em>)<br> |
| gentype <strong>fmax</strong> (gentype <em>x</em>, half <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <em>y</em> if <em>x</em> < <em>y</em>, otherwise it returns <em>x</em>. |
| If one argument is a NaN, <strong>fmax()</strong> returns the other argument. |
| If both arguments are NaNs, <strong>fmax()</strong> returns a NaN.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>fmin</strong> (gentype <em>x</em>, gentype <em>y</em>)<br> |
| gentype <strong>fmin</strong> (gentype <em>x</em>, half <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <em>y</em> if <em>y</em> < <em>x</em>, otherwise it returns <em>x</em>. |
| If one argument is a NaN, <strong>fmin()</strong> returns the other argument. |
| If both arguments are NaNs, <strong>fmin()</strong> returns a NaN.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>fmod</strong> (gentype <em>x</em>, gentype <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Modulus. |
| Returns <em>x</em> - <em>y</em> * <strong>trunc</strong> (<em>x</em>/<em>y</em>) .</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>fract</strong> (gentype <em>x</em>, __global gentype *<em>iptr</em>)<br> |
| gentype <strong>fract</strong> (gentype <em>x</em>, __local gentype *<em>iptr</em>)<br> |
| gentype <strong>fract</strong> (gentype <em>x</em>, __private gentype *<em>iptr</em>)<br></p> |
| <p class="tableblock"> For OpenCL C 2.0 or with the <code>__opencl_c_generic_address_space</code> |
| feature macro:<br></p> |
| <p class="tableblock"> gentype <strong>fract</strong> (gentype <em>x</em>, gentype *<em>iptr</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <strong>fmin</strong>( <em>x</em> - <strong>floor</strong> (<em>x</em>), 0x1.ffcp-1f ).</p> |
| <p class="tableblock"> <strong>floor</strong>(x) is returned in <em>iptr</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">half<em>n</em> <strong>frexp</strong> (half<em>n x</em>, __global int<em>n</em> *exp)<br> |
| half <strong>frexp</strong> (half <em>x</em>, __global int *exp)<br></p> |
| <p class="tableblock"> half<em>n</em> <strong>frexp</strong> (half<em>n x</em>, __local int<em>n</em> *exp)<br> |
| half <strong>frexp</strong> (half <em>x</em>, __local int *exp)<br></p> |
| <p class="tableblock"> half<em>n</em> <strong>frexp</strong> (half<em>n x</em>, __private int<em>n</em> *exp)<br> |
| half <strong>frexp</strong> (half <em>x</em>, __private int *exp)<br></p> |
| <p class="tableblock"> For OpenCL C 2.0 or with the <code>__opencl_c_generic_address_space</code> |
| feature macro:<br></p> |
| <p class="tableblock"> half<em>n</em> <strong>frexp</strong> (half<em>n</em> <em>x</em>, int<em>n</em> *exp)<br> |
| half <strong>frexp</strong> (half <em>x</em>, int *exp)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extract mantissa and exponent from <em>x</em>. |
| For each component the mantissa returned is a float with magnitude in the |
| interval [1/2, 1) or 0. |
| Each component of <em>x</em> equals mantissa returned * 2<em><sup>exp</sup></em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>hypot</strong> (gentype <em>x</em>, gentype <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute the value of the square root of <em>x</em><sup>2</sup>+ <em>y</em><sup>2</sup> without undue |
| overflow or underflow.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int<em>n</em> <strong>ilogb</strong> (half<em>n</em> <em>x</em>)<br> |
| int <strong>ilogb</strong> (half <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Return the exponent as an integer value.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">half<em>n</em> <strong>ldexp</strong> (half<em>n</em> <em>x</em>, int<em>n</em> <em>k</em>)<br> |
| half<em>n</em> <strong>ldexp</strong> (half<em>n</em> <em>x</em>, int <em>k</em>)<br> |
| half <strong>ldexp</strong> (half <em>x</em>, int <em>k</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Multiply <em>x</em> by 2 to the power <em>k</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>lgamma</strong> (gentype <em>x</em>)<br></p> |
| <p class="tableblock"> half<em>n</em> <strong>lgamma_r</strong> (half<em>n</em> <em>x</em>, __global int<em>n</em> *<em>signp</em>)<br> |
| half <strong>lgamma_r</strong> (half <em>x</em>, __global int *<em>signp</em>)<br></p> |
| <p class="tableblock"> half<em>n</em> <strong>lgamma_r</strong> (half<em>n</em> <em>x</em>, __local int<em>n</em> *<em>signp</em>)<br> |
| half <strong>lgamma_r</strong> (half <em>x</em>, __local int *<em>signp</em>)<br></p> |
| <p class="tableblock"> half<em>n</em> <strong>lgamma_r</strong> (half<em>n</em> <em>x</em>, __private int<em>n</em> *<em>signp</em>)<br> |
| half <strong>lgamma_r</strong> (half <em>x</em>, __private int *<em>signp</em>)<br></p> |
| <p class="tableblock"> For OpenCL C 2.0 or with the <code>__opencl_c_generic_address_space</code> |
| feature macro:<br></p> |
| <p class="tableblock"> half<em>n</em> <strong>lgamma_r</strong> (half<em>n</em> <em>x</em>, int<em>n</em> *<em>signp</em>)<br> |
| half <strong>lgamma_r</strong> (half <em>x</em>, int *<em>signp</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Log gamma function. |
| Returns the natural logarithm of the absolute value of the gamma function. |
| The sign of the gamma function is returned in the <em>signp</em> argument of |
| <strong>lgamma_r</strong>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>log</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute natural logarithm.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>log2</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute a base 2 logarithm.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>log10</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute a base 10 logarithm.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>log1p</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute log<sub>e</sub>(1.0 + <em>x</em>) .</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>logb</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute the exponent of <em>x</em>, which is the integral part of |
| log<em><sub>r</sub></em>|<em>x</em>|.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>mad</strong> (gentype <em>a</em>, gentype <em>b</em>, gentype <em>c</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>mad</strong> computes <em>a</em> * <em>b</em> + <em>c</em>. |
| The function may compute <em>a</em> * <em>b</em> + <em>c</em> with reduced accuracy |
| in the embedded profile. See the OpenCL SPIR-V Environment Specification |
| for details. On some hardware the mad instruction may provide better |
| performance than expanded computation of <em>a</em> * <em>b</em> + <em>c</em>.</p> |
| <p class="tableblock"> Note: For some usages, e.g. <strong>mad</strong>(a, b, -a*b), the half precision |
| definition of <strong>mad</strong>() is loose enough that almost any result is allowed |
| from <strong>mad</strong>() for some values of a and b.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>maxmag</strong> (gentype <em>x</em>, gentype <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <em>x</em> if |<em>x</em>| > |<em>y</em>|, <em>y</em> if |<em>y</em>| > |<em>x</em>|, otherwise |
| <strong>fmax</strong>(<em>x</em>, <em>y</em>).</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>minmag</strong> (gentype <em>x</em>, gentype <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <em>x</em> if |<em>x</em>| < |<em>y</em>|, <em>y</em> if |<em>y</em>| < |<em>x</em>|, otherwise |
| <strong>fmin</strong>(<em>x</em>, <em>y</em>).</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>modf</strong> (gentype <em>x</em>, __global gentype *<em>iptr</em>)<br> |
| gentype <strong>modf</strong> (gentype <em>x</em>, __local gentype *<em>iptr</em>)<br> |
| gentype <strong>modf</strong> (gentype <em>x</em>, __private gentype *<em>iptr</em>)<br></p> |
| <p class="tableblock"> For OpenCL C 2.0 or with the <code>__opencl_c_generic_address_space</code> |
| feature macro:<br></p> |
| <p class="tableblock"> gentype <strong>modf</strong> (gentype <em>x</em>, gentype *<em>iptr</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Decompose a floating-point number. |
| The <strong>modf</strong> function breaks the argument <em>x</em> into integral and fractional |
| parts, each of which has the same sign as the argument. |
| It stores the integral part in the object pointed to by <em>iptr</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">half<em>n</em> <strong>nan</strong> (ushort<em>n</em> <em>nancode</em>)<br> |
| half <strong>nan</strong> (ushort <em>nancode</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns a quiet NaN. |
| The <em>nancode</em> may be placed in the significand of the resulting NaN.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>nextafter</strong> (gentype <em>x</em>, gentype <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Computes the next representable half-precision floating-point value |
| following <em>x</em> in the direction of <em>y</em>. |
| Thus, if <em>y</em> is less than <em>x</em>, <strong>nextafter</strong>() returns the largest |
| representable floating-point number less than <em>x</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>pow</strong> (gentype <em>x</em>, gentype <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute <em>x</em> to the power <em>y</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">half<em>n</em> <strong>pown</strong> (half<em>n</em> <em>x</em>, int<em>n</em> <em>y</em>)<br> |
| half <strong>pown</strong> (half <em>x</em>, int <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute <em>x</em> to the power <em>y</em>, where <em>y</em> is an integer.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>powr</strong> (gentype <em>x</em>, gentype <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute <em>x</em> to the power <em>y</em>, where <em>x</em> is >= 0.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>remainder</strong> (gentype <em>x</em>, gentype <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute the value <em>r</em> such that <em>r</em> = <em>x</em> - <em>n</em>*<em>y</em>, where <em>n</em> is the |
| integer nearest the exact value of <em>x</em>/<em>y</em>. |
| If there are two integers closest to <em>x</em>/<em>y</em>, <em>n</em> shall be the even one. |
| If <em>r</em> is zero, it is given the same sign as <em>x</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">half<em>n</em> <strong>remquo</strong> (half<em>n</em> <em>x</em>, half<em>n</em> <em>y</em>, __global int<em>n</em> *<em>quo</em>)<br> |
| half <strong>remquo</strong> (half <em>x</em>, half <em>y</em>, __global int *<em>quo</em>)<br></p> |
| <p class="tableblock"> half<em>n</em> <strong>remquo</strong> (half<em>n</em> <em>x</em>, half<em>n</em> <em>y</em>, __local int<em>n</em> *<em>quo</em>)<br> |
| half <strong>remquo</strong> (half <em>x</em>, half <em>y</em>, __local int *<em>quo</em>)<br></p> |
| <p class="tableblock"> half<em>n</em> <strong>remquo</strong> (half<em>n</em> <em>x</em>, half<em>n</em> <em>y</em>, __private int<em>n</em> *<em>quo</em>)<br> |
| half <strong>remquo</strong> (half <em>x</em>, half <em>y</em>, __private int *<em>quo</em>)<br></p> |
| <p class="tableblock"> For OpenCL C 2.0 or with the <code>__opencl_c_generic_address_space</code> |
| feature macro:<br></p> |
| <p class="tableblock"> half<em>n</em> <strong>remquo</strong> (half<em>n</em> <em>x</em>, half<em>n</em> <em>y</em>, int<em>n</em> *<em>quo</em>)<br> |
| half <strong>remquo</strong> (half <em>x</em>, half <em>y</em>, int *<em>quo</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">The <strong>remquo</strong> function computes the value r such that <em>r</em> = <em>x</em> - <em>k</em>*<em>y</em>, |
| where <em>k</em> is the integer nearest the exact value of <em>x</em>/<em>y</em>. |
| If there are two integers closest to <em>x</em>/<em>y</em>, <em>k</em> shall be the even one. |
| If <em>r</em> is zero, it is given the same sign as <em>x</em>. |
| This is the same value that is returned by the <strong>remainder</strong> function. |
| <strong>remquo</strong> also calculates the lower seven bits of the integral quotient |
| <em>x</em>/<em>y</em>, and gives that value the same sign as <em>x</em>/<em>y</em>. |
| It stores this signed value in the object pointed to by <em>quo</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>rint</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Round to integral value (using round to nearest even rounding mode) in |
| floating-point format. |
| Refer to section 7.1 for description of rounding modes.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">half<em>n</em> <strong>rootn</strong> (half<em>n</em> <em>x</em>, int<em>n</em> <em>y</em>)<br> |
| half <strong>rootn</strong> (half <em>x</em>, int <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute <em>x</em> to the power 1/<em>y</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>round</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Return the integral value nearest to <em>x</em> rounding halfway cases away from |
| zero, regardless of the current rounding direction.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>rsqrt</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute inverse square root.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>sin</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute sine.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>sincos</strong> (gentype <em>x</em>, __global gentype *<em>cosval</em>)<br> |
| gentype <strong>sincos</strong> (gentype <em>x</em>, __local gentype *<em>cosval</em>)<br> |
| gentype <strong>sincos</strong> (gentype <em>x</em>, __private gentype *<em>cosval</em>)<br></p> |
| <p class="tableblock"> For OpenCL C 2.0 or with the <code>__opencl_c_generic_address_space</code> |
| feature macro:<br></p> |
| <p class="tableblock"> gentype <strong>sincos</strong> (gentype <em>x</em>, gentype *<em>cosval</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute sine and cosine of x. |
| The computed sine is the return value and computed cosine is returned in |
| <em>cosval</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>sinh</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute hyperbolic sine.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>sinpi</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute <strong>sin</strong> (π <em>x</em>).</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>sqrt</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute square root.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>tan</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute tangent.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>tanh</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute hyperbolic tangent.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>tanpi</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute <strong>tan</strong> (π <em>x</em>).</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>tgamma</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute the gamma function.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>trunc</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Round to integral value using the round to zero rounding mode.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>The <strong>FP_FAST_FMA_HALF</strong> macro indicates whether the <strong>fma()</strong> family of |
| functions are fast compared with direct code for half precision |
| floating-point. |
| If defined, the <strong>FP_FAST_FMA_HALF</strong> macro shall indicate that the <strong>fma()</strong> |
| function generally executes about as fast as, or faster than, a multiply and |
| an add of <strong>half</strong> operands.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The macro names given in the following list must use the values specified. |
| These constant expressions are suitable for use in #if preprocessing |
| directives.</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="preprocessor">#define</span> HALF_DIG <span class="integer">3</span> |
| <span class="preprocessor">#define</span> HALF_MANT_DIG <span class="integer">11</span> |
| <span class="preprocessor">#define</span> HALF_MAX_10_EXP +<span class="integer">4</span> |
| <span class="preprocessor">#define</span> HALF_MAX_EXP +<span class="integer">16</span> |
| <span class="preprocessor">#define</span> HALF_MIN_10_EXP -<span class="integer">4</span> |
| <span class="preprocessor">#define</span> HALF_MIN_EXP -<span class="integer">13</span> |
| <span class="preprocessor">#define</span> HALF_RADIX <span class="integer">2</span> |
| <span class="preprocessor">#define</span> HALF_MAX <span class="hex">0x1</span>.ffcp15h |
| <span class="preprocessor">#define</span> HALF_MIN <span class="hex">0x1</span><span class="float">.0</span>p-<span class="integer">14</span>h |
| <span class="preprocessor">#define</span> HALF_EPSILON <span class="hex">0x1</span><span class="float">.0</span>p-<span class="integer">10</span>h</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>The following table describes the built-in macro names given above in the |
| OpenCL C programming language and the corresponding macro names available to |
| the application.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Macro in OpenCL Language</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Macro for application</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>HALF_DIG</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_HALF_DIG</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>HALF_MANT_DIG</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_HALF_MANT_DIG</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>HALF_MAX_10_EXP</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_HALF_MAX_10_EXP</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>HALF_MAX_EXP</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_HALF_MAX_EXP</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>HALF_MIN_10_EXP</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_HALF_MIN_10_EXP</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>HALF_MIN_EXP</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_HALF_MIN_EXP</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>HALF_RADIX</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_HALF_RADIX</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>HALF_MAX</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_HALF_MAX</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>HALF_MIN</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_HALF_MIN</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>HALF_EPSILSON</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_HALF_EPSILON</strong></p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>The following constants are also available. |
| They are of type <code>half</code> and are accurate within the precision of the <code>half</code> |
| type.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Constant</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_E_H</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of e</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_LOG2E_H</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of log<sub>2</sub>e</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_LOG10E_H</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of log<sub>10</sub>e</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_LN2_H</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of log<sub>e</sub>2</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_LN10_H</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of log<sub>e</sub>10</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_PI_H</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of π</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_PI_2_H</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of π / 2</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_PI_4_H</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of π / 4</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_1_PI_H</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of 1 / π</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_2_PI_H</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of 2 / π</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_2_SQRTPI_H</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of 2 / √π</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_SQRT2_H</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of √2</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_SQRT1_2_H</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of 1 / √2</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_fp16-common-functions"><a class="anchor" href="#cl_khr_fp16-common-functions"></a>5.2.3. Common Functions</h4> |
| <div class="paragraph"> |
| <p>The built-in common functions defined in <em>table 6.12</em> (also listed below) |
| are extended to include appropriate versions of functions that take <code>half</code> |
| and <code>half{2|3|4|8|16}</code> as arguments and return values. |
| gentype now also includes <code>half</code>, <code>half2</code>, <code>half3</code>, <code>half4</code>, <code>half8</code> and |
| <code>half16</code>. |
| These are described below.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 3. <em>Half Precision Built-in Common Functions</em></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>clamp</strong> (<br> |
| gentype <em>x</em>, gentype <em>minval</em>, gentype <em>maxval</em>)</p> |
| <p class="tableblock"> gentype <strong>clamp</strong> (<br> |
| gentype <em>x</em>, half <em>minval</em>, half <em>maxval</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <strong>fmin</strong>(<strong>fmax</strong>(<em>x</em>, <em>minval</em>), <em>maxval</em>).</p> |
| <p class="tableblock"> Results are undefined if <em>minval</em> > <em>maxval</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>degrees</strong> (gentype <em>radians</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Converts <em>radians</em> to degrees,<br> |
| i.e. (180 / π) * <em>radians</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>max</strong> (gentype <em>x</em>, gentype <em>y</em>)<br> |
| gentype <strong>max</strong> (gentype <em>x</em>, half <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <em>y</em> if <em>x</em> < <em>y</em>, otherwise it returns <em>x</em>. |
| If <em>x</em> and <em>y</em> are infinite or NaN, the return values are undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>min</strong> (gentype <em>x</em>, gentype <em>y</em>)<br> |
| gentype <strong>min</strong> (gentype <em>x</em>, half <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <em>y</em> if <em>y</em> < <em>x</em>, otherwise it returns <em>x</em>. |
| If <em>x</em> and <em>y</em> are infinite or NaN, the return values are undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>mix</strong> (gentype <em>x</em>, gentype <em>y</em>, gentype <em>a</em>)<br> |
| gentype <strong>mix</strong> (gentype <em>x</em>, gentype <em>y</em>, half <em>a</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the linear blend of <em>x</em> and <em>y</em> implemented as:</p> |
| <p class="tableblock"> <em>x</em> + (<em>y</em> - <em>x)</em> * <em>a</em></p> |
| <p class="tableblock"> <em>a</em> must be a value in the range 0.0 …​ 1.0. |
| If <em>a</em> is not in the range 0.0 …​ 1.0, the return values are undefined.</p> |
| <p class="tableblock"> Note: The half precision <strong>mix</strong> function can be implemented using contractions such as <strong>mad</strong> or <strong>fma</strong>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>radians</strong> (gentype <em>degrees</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Converts <em>degrees</em> to radians, i.e. (π / 180) * <em>degrees</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>step</strong> (gentype <em>edge</em>, gentype <em>x</em>)<br> |
| gentype <strong>step</strong> (half <em>edge</em>, gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns 0.0 if <em>x</em> < <em>edge</em>, otherwise it returns 1.0.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>smoothstep</strong> (<br> |
| gentype <em>edge0</em>, gentype <em>edge1</em>, gentype <em>x</em>)</p> |
| <p class="tableblock"> gentype <strong>smoothstep</strong> (<br> |
| half <em>edge0</em>, half <em>edge1</em>, gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns 0.0 if <em>x</em> <= <em>edge0</em> and 1.0 if <em>x</em> >= <em>edge1</em> and performs |
| smooth Hermite interpolation between 0 and 1 when <em>edge0</em> < <em>x</em> < <em>edge1</em>. |
| This is useful in cases where you would want a threshold function with a |
| smooth transition.</p> |
| <p class="tableblock"> This is equivalent to:</p> |
| <p class="tableblock"> gentype <em>t</em>;<br> |
| <em>t</em> = clamp ((<em>x</em> - <em>edge0</em>) / (<em>edge1</em> - <em>edge0</em>), 0, 1);<br> |
| return <em>t</em> * <em>t</em> * (3 - 2 * <em>t</em>);<br></p> |
| <p class="tableblock"> Results are undefined if <em>edge0</em> >= <em>edge1</em>.</p> |
| <p class="tableblock"> Note: The half precision <strong>smoothstep</strong> function can be implemented using contractions such as <strong>mad</strong> or <strong>fma</strong>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>sign</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns 1.0 if <em>x</em> > 0, -0.0 if <em>x</em> = -0.0, +0.0 if <em>x</em> = +0.0, or -1.0 if |
| <em>x</em> < 0. |
| Returns 0.0 if <em>x</em> is a NaN.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_fp16-geometric-functions"><a class="anchor" href="#cl_khr_fp16-geometric-functions"></a>5.2.4. Geometric Functions</h4> |
| <div class="paragraph"> |
| <p>The built-in geometric functions defined in <em>table 6.13</em> (also listed below) |
| are extended to include appropriate versions of functions that take <code>half</code> |
| and <code>half{2|3|4}</code> as arguments and return values. |
| gentype now also includes <code>half</code>, <code>half2</code>, <code>half3</code> and <code>half4</code>. |
| These are described below.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Note: The half precision geometric functions can be implemented using |
| contractions such as <strong>mad</strong> or <strong>fma</strong>.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 4. <em>Half Precision Built-in Geometric Functions</em></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">half4 <strong>cross</strong> (half4 <em>p0</em>, half4 <em>p1</em>)<br> |
| half3 <strong>cross</strong> (half3 <em>p0</em>, half3 <em>p1</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the cross product of <em>p0.xyz</em> and <em>p1.xyz</em>. |
| The <em>w</em> component of the result will be 0.0.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">half <strong>dot</strong> (gentype <em>p0</em>, gentype <em>p1</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute the dot product of <em>p0</em> and <em>p1</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">half <strong>distance</strong> (gentype <em>p0</em>, gentype <em>p1</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the distance between <em>p0</em> and <em>p1</em>. |
| This is calculated as <strong>length</strong>(<em>p0</em> - <em>p1</em>).</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">half <strong>length</strong> (gentype <em>p</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Return the length of vector x, i.e.,<br> |
| sqrt( <em>p.x</em><sup>2</sup> + <em>p.y</em><sup>2</sup> + …​ )</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>normalize</strong> (gentype <em>p</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns a vector in the same direction as <em>p</em> but with a length of 1.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_fp16-relational-functions"><a class="anchor" href="#cl_khr_fp16-relational-functions"></a>5.2.5. Relational Functions</h4> |
| <div class="paragraph"> |
| <p>The scalar and vector relational functions described in <em>table 6.14</em> are |
| extended to include versions that take <code>half</code>, <code>half2</code>, <code>half3</code>, <code>half4</code>, |
| <code>half8</code> and <code>half16</code> as arguments.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The relational and equality operators (<, <=, >, >=, !=, ==) can be used |
| with <code>halfn</code> vector types and shall produce a vector <code>shortn</code> result as |
| described in <em>section 6.3</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The functions <strong>isequal</strong>, <strong>isnotequal</strong>, <strong>isgreater</strong>, <strong>isgreaterequal</strong>, |
| <strong>isless</strong>, <strong>islessequal</strong>, <strong>islessgreater</strong>, <strong>isfinite</strong>, <strong>isinf</strong>, <strong>isnan</strong>, |
| <strong>isnormal</strong>, <strong>isordered</strong>, <strong>isunordered</strong> and <strong>signbit</strong> shall return a 0 if the |
| specified relation is <em>false</em> and a 1 if the specified relation is true for |
| scalar argument types. |
| These functions shall return a 0 if the specified relation is <em>false</em> and a |
| -1 (i.e. all bits set) if the specified relation is <em>true</em> for vector |
| argument types.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The relational functions <strong>isequal</strong>, <strong>isgreater</strong>, <strong>isgreaterequal</strong>, <strong>isless</strong>, |
| <strong>islessequal</strong>, and <strong>islessgreater</strong> always return 0 if either argument is not |
| a number (NaN). |
| <strong>isnotequal</strong> returns 1 if one or both arguments are not a number (NaN) and |
| the argument type is a scalar and returns -1 if one or both arguments are |
| not a number (NaN) and the argument type is a vector.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The functions described in <em>table 6.14</em> are extended to include the <code>halfn</code> |
| vector types.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 5. <em>Half Precision Relational Functions</em></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>isequal</strong> (half <em>x</em>, half <em>y</em>)<br> |
| short<em>n</em> <strong>isequal</strong> (half<em>n x</em>, half<em>n y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the component-wise compare of <em>x</em> == <em>y</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>isnotequal</strong> (half <em>x</em>, half <em>y</em>)<br> |
| short<em>n</em> <strong>isnotequal</strong> (half<em>n x</em>, half<em>n y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the component-wise compare of <em>x</em> != <em>y</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>isgreater</strong> (half <em>x</em>, half <em>y</em>)<br> |
| short<em>n</em> <strong>isgreater</strong> (half<em>n x</em>, half<em>n y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the component-wise compare of <em>x</em> > <em>y</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>isgreaterequal</strong> (half <em>x</em>, half <em>y</em>)<br> |
| short<em>n</em> <strong>isgreaterequal</strong> (half<em>n x</em>, half<em>n y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the component-wise compare of <em>x</em> >= <em>y</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>isless</strong> (half <em>x</em>, half <em>y</em>)<br> |
| short<em>n</em> <strong>isless</strong> (half<em>n x</em>, half<em>n y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the component-wise compare of <em>x</em> < <em>y</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>islessequal</strong> (half <em>x</em>, half <em>y</em>)<br> |
| short<em>n</em> <strong>islessequal</strong> (half<em>n x</em>, half<em>n y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the component-wise compare of <em>x</em> <= <em>y</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>islessgreater</strong> (half <em>x</em>, half <em>y</em>)<br> |
| short<em>n</em> <strong>islessgreater</strong> (half<em>n x</em>, half<em>n y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the component-wise compare of (<em>x</em> < <em>y</em>) || (<em>x</em> > <em>y</em>) .</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>isfinite</strong> (half)<br> |
| short<em>n</em> <strong>isfinite</strong> (half<em>n</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Test for finite value.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>isinf</strong> (half)<br> |
| short<em>n</em> <strong>isinf</strong> (half<em>n</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Test for infinity value (positive or negative) .</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>isnan</strong> (half)<br> |
| short<em>n</em> <strong>isnan</strong> (half<em>n</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Test for a NaN.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>isnormal</strong> (half)<br> |
| short<em>n</em> <strong>isnormal</strong> (half<em>n</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Test for a normal value.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>isordered</strong> (half <em>x</em>, half <em>y</em>)<br> |
| short<em>n</em> <strong>isordered</strong> (half<em>n x</em>, half<em>n y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Test if arguments are ordered. |
| <strong>isordered</strong>() takes arguments <em>x</em> and <em>y</em>, and returns the result |
| <strong>isequal</strong>(<em>x</em>, <em>x</em>) && <strong>isequal</strong>(<em>y</em>, <em>y</em>).</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>isunordered</strong> (half <em>x</em>, half <em>y</em>)<br> |
| short<em>n</em> <strong>isunordered</strong> (half<em>n x</em>, half<em>n y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Test if arguments are unordered. |
| <strong>isunordered</strong>() takes arguments <em>x</em> and <em>y</em>, returning non-zero if <em>x</em> or |
| <em>y</em> is a NaN, and zero otherwise.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>signbit</strong> (half)<br> |
| short<em>n</em> <strong>signbit</strong> (half<em>n</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Test for sign bit. |
| The scalar version of the function returns a 1 if the sign bit in the half |
| is set else returns 0. |
| The vector version of the function returns the following for each |
| component in half<em>n</em>: -1 (i.e all bits set) if the sign bit in the half |
| is set else returns 0.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">half<em>n</em> <strong>bitselect</strong> (half<em>n a</em>, half<em>n b</em>, half<em>n c</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Each bit of the result is the corresponding bit of <em>a</em> if the |
| corresponding bit of <em>c</em> is 0. |
| Otherwise it is the corresponding bit of <em>b</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">half<em>n</em> <strong>select</strong> (half<em>n a</em>, half<em>n b</em>, short<em>n</em> <em>c</em>)<br> |
| half<em>n</em> <strong>select</strong> (half<em>n a</em>, half<em>n b</em>, ushort<em>n</em> <em>c</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">For each component,<br> |
| <em>result[i]</em> = if MSB of <em>c[i]</em> is set ? <em>b[i]</em> : <em>a[i]</em>.<br></p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_fp16-vector-data-load-and-store-functions"><a class="anchor" href="#cl_khr_fp16-vector-data-load-and-store-functions"></a>5.2.6. Vector Data Load and Store Functions</h4> |
| <div class="paragraph"> |
| <p>The vector data load (<strong>vload<em>n</em></strong>) and store (<strong>vstore<em>n</em></strong>) functions |
| described in <em>table 6.13</em> (also listed below) are extended to include |
| versions that read or write half vector values. |
| The generic type <code>gentype</code> is extended to include <code>half</code>. |
| The generic type <code>gentypen</code> is extended to include <code>half2</code>, <code>half3</code>, |
| <code>half4</code>, <code>half8</code>, and <code>half16</code>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Note: <strong>vload3</strong> reads <em>x</em>, <em>y</em>, <em>z</em> components from address |
| (<em>p</em> + (<em>offset</em> * 3)) into a 3-component vector and <strong>vstore3</strong> writes <em>x</em>, <em>y</em>, <em>z</em> |
| components from a 3-component vector to address (<em>p</em> + (<em>offset</em> * 3)).</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 6. <em>Half Precision Vector Data Load and Store Functions</em></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype<em>n</em> <strong>vload<em>n</em></strong>(size_t <em>offset</em>, const __global gentype *<em>p</em>)<br> |
| gentype<em>n</em> <strong>vload<em>n</em></strong>(size_t <em>offset</em>, const __local gentype *<em>p</em>)<br> |
| gentype<em>n</em> <strong>vload<em>n</em></strong>(size_t <em>offset</em>, const __constant gentype *<em>p</em>)<br> |
| gentype<em>n</em> <strong>vload<em>n</em></strong>(size_t <em>offset</em>, const __private gentype *<em>p</em>)<br></p> |
| <p class="tableblock"> For OpenCL C 2.0 or with the <code>__opencl_c_generic_address_space</code> |
| feature macro:<br></p> |
| <p class="tableblock"> gentype<em>n</em> <strong>vload<em>n</em></strong>(size_t <em>offset</em>, const gentype *<em>p</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Return sizeof (gentype<em>n</em>) bytes of data read from address |
| (<em>p</em> + (<em>offset * n</em>)). |
| If gentype is half, the read address computed as (<em>p</em> + (<em>offset * n</em>)) |
| must be 16-bit aligned.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">void <strong>vstore<em>n</em></strong>(gentype<em>n</em> <em>data</em>, size_t <em>offset</em>, __global gentype *<em>p</em>)<br> |
| void <strong>vstore<em>n</em></strong>(gentype<em>n</em> <em>data</em>, size_t <em>offset</em>, __local gentype *<em>p</em>)<br> |
| void <strong>vstore<em>n</em></strong>(gentype<em>n</em> <em>data</em>, size_t <em>offset</em>, __private gentype *<em>p</em>)<br></p> |
| <p class="tableblock"> For OpenCL C 2.0 or with the <code>__opencl_c_generic_address_space</code> |
| feature macro:<br></p> |
| <p class="tableblock"> void <strong>vstore<em>n</em></strong>(gentype<em>n</em> <em>data</em>, size_t <em>offset</em>, gentype *<em>p</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Write sizeof (gentype<em>n</em>) bytes given by <em>data</em> to address |
| (<em>p</em> + (<em>offset * n</em>)). |
| If gentype is half, the write address computed as (<em>p</em> + (<em>offset * n</em>)) |
| must be 16-bit aligned.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_fp16-async-copies-from-global-to-local-memory-local-to-global-memory-and-prefetch"><a class="anchor" href="#cl_khr_fp16-async-copies-from-global-to-local-memory-local-to-global-memory-and-prefetch"></a>5.2.7. Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch</h4> |
| <div class="paragraph"> |
| <p>The OpenCL C programming language implements the following functions that |
| provide asynchronous copies between global and local memory and a prefetch |
| from global memory.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The generic type <code>gentype</code> is extended to include <code>half</code>, <code>half2</code>, <code>half3</code>, |
| <code>half4</code>, <code>half8</code>, and <code>half16</code>.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 7. <em>Half Precision Built-in Async Copy and Prefetch Functions</em></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">event_t <strong>async_work_group_copy</strong> (<br> |
| __local gentype *<em>dst</em>,<br> |
| const __global gentype *<em>src</em>,<br> |
| size_t <em>num_gentypes</em>, event_t <em>event</em>)</p> |
| <p class="tableblock"> event_t <strong>async_work_group_copy</strong> (<br> |
| __global gentype <em>*dst</em>,<br> |
| const __local gentype *<em>src</em>,<br> |
| size_t <em>num_gentypes</em>, event_t <em>event</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Perform an async copy of <em>num_gentypes</em> gentype elements from <em>src</em> to |
| <em>dst</em>. |
| The async copy is performed by all work-items in a work-group and this |
| built-in function must therefore be encountered by all work-items in a |
| work-group executing the kernel with the same argument values; otherwise |
| the results are undefined.</p> |
| <p class="tableblock"> Returns an event object that can be used by <strong>wait_group_events</strong> to wait |
| for the async copy to finish. |
| The <em>event</em> argument can also be used to associate the |
| <strong>async_work_group_copy</strong> with a previous async copy allowing an event to be |
| shared by multiple async copies; otherwise <em>event</em> should be zero.</p> |
| <p class="tableblock"> If <em>event</em> argument is not zero, the event object supplied in <em>event</em> |
| argument will be returned.</p> |
| <p class="tableblock"> This function does not perform any implicit synchronization of source data |
| such as using a <strong>barrier</strong> before performing the copy.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">event_t <strong>async_work_group_strided_copy</strong> (<br> |
| __local gentype <em>*dst</em>,<br> |
| const __global gentype *<em>src</em>,<br> |
| size_t <em>num_gentypes</em>,<br> |
| size_t <em>src_stride</em>, event_t <em>event</em>)</p> |
| <p class="tableblock"> event_t <strong>async_work_group_strided_copy</strong> (<br> |
| __global gentype <em>*dst</em>,<br> |
| const __local gentype *<em>src</em>,<br> |
| size_t <em>num_gentypes</em>,<br> |
| size_t <em>dst_stride</em>, event_t <em>event</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Perform an async gather of <em>num_gentypes</em> gentype elements from <em>src</em> to |
| <em>dst</em>. |
| The <em>src_stride</em> is the stride in elements for each gentype element read |
| from <em>src</em>. |
| The async gather is performed by all work-items in a work-group and this |
| built-in function must therefore be encountered by all work-items in a |
| work-group executing the kernel with the same argument values; otherwise |
| the results are undefined.</p> |
| <p class="tableblock"> Returns an event object that can be used by <strong>wait_group_events</strong> to wait |
| for the async copy to finish. |
| The <em>event</em> argument can also be used to associate the |
| <strong>async_work_group_strided_copy</strong> with a previous async copy allowing an |
| event to be shared by multiple async copies; otherwise <em>event</em> should be |
| zero.</p> |
| <p class="tableblock"> If <em>event</em> argument is not zero, the event object supplied in <em>event</em> |
| argument will be returned.</p> |
| <p class="tableblock"> This function does not perform any implicit synchronization of source data |
| such as using a <strong>barrier</strong> before performing the copy.</p> |
| <p class="tableblock"> The behavior of <strong>async_work_group_strided_copy</strong> is undefined if |
| <em>src_stride</em> or <em>dst_stride</em> is 0, or if the <em>src_stride</em> or <em>dst_stride</em> |
| values cause the <em>src</em> or <em>dst</em> pointers to exceed the upper bounds of the |
| address space during the copy.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">void <strong>wait_group_events</strong> (<br> |
| int <em>num_events</em>, event_t *<em>event_list</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Wait for events that identify the <strong>async_work_group_copy</strong> operations to |
| complete. |
| The event objects specified in <em>event_list</em> will be released after the |
| wait is performed.</p> |
| <p class="tableblock"> This function must be encountered by all work-items in a work-group |
| executing the kernel with the same <em>num_events</em> and event objects |
| specified in <em>event_list</em>; otherwise the results are undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">void <strong>prefetch</strong> (<br> |
| const __global gentype *<em>p</em>, size_t <em>num_gentypes</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Prefetch <em>num_gentypes</em> * sizeof(gentype) bytes into the global cache. |
| The prefetch instruction is applied to a work-item in a work-group and |
| does not affect the functional behavior of the kernel.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_fp16-image-read-and-write-functions"><a class="anchor" href="#cl_khr_fp16-image-read-and-write-functions"></a>5.2.8. Image Read and Write Functions</h4> |
| <div class="paragraph"> |
| <p>The image read and write functions defined in <em>tables 6.23</em>, <em>6.24</em> and |
| <em>6.25</em> are extended to support image color values that are a <code>half</code> type.</p> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="_built_in_image_read_functions"><a class="anchor" href="#_built_in_image_read_functions"></a>5.2.9. Built-in Image Read Functions</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 8. <em>Half Precision Built-in Image Read Functions</em></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">half4 <strong>read_imageh</strong> (<br> |
| read_only image2d_t <em>image</em>,<br> |
| sampler_t <em>sampler</em>,<br> |
| int2 <em>coord</em>)</p> |
| <p class="tableblock"> half4 <strong>read_imageh</strong> (<br> |
| read_only image2d_t <em>image</em>,<br> |
| sampler_t <em>sampler</em>,<br> |
| float2 <em>coord</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Use the coordinate <em>(coord.x, coord.y)</em> to do an element lookup in the 2D |
| image object specified by <em>image</em>.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values in the range |
| [0.0 …​ 1.0] for image objects created with <em>image_channel_data_type</em> set |
| to one of the pre-defined packed formats, CL_UNORM_INT8, or |
| CL_UNORM_INT16.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values in the range |
| [-1.0 …​ 1.0] for image objects created with <em>image_channel_data_type</em> |
| set to CL_SNORM_INT8, or CL_SNORM_INT16.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values for image |
| objects created with <em>image_channel_data_type</em> set to CL_HALF_FLOAT.</p> |
| <p class="tableblock"> The <strong>read_imageh</strong> calls that take integer coordinates must use a sampler |
| with filter mode set to CLK_FILTER_NEAREST, normalized coordinates set to |
| CLK_NORMALIZED_COORDS_FALSE and addressing mode set to |
| CLK_ADDRESS_CLAMP_TO_EDGE, CLK_ADDRESS_CLAMP or CLK_ADDRESS_NONE; |
| otherwise the values returned are undefined.</p> |
| <p class="tableblock"> Values returned by <strong>read_imageh</strong> for image objects with |
| <em>image_channel_data_type</em> values not specified in the description above |
| are undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">half4 <strong>read_imageh</strong> (<br> |
| read_only image3d_t <em>image</em>,<br> |
| sampler_t <em>sampler</em>,<br> |
| int4 <em>coord</em> )</p> |
| <p class="tableblock"> half4 <strong>read_imageh</strong> (<br> |
| read_only image3d_t <em>image</em>,<br> |
| sampler_t <em>sampler</em>,<br> |
| float4 <em>coord</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Use the coordinate <em>(coord.x</em>, <em>coord.y</em>, <em>coord.z)</em> to do an |
| elementlookup in the 3D image object specified by <em>image</em>. <em>coord.w</em> is |
| ignored.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values in the range |
| [0.0 …​ 1.0] for image objects created with <em>image_channel_data_type</em> set |
| to one of the pre-defined packed formats or CL_UNORM_INT8, or |
| CL_UNORM_INT16.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values in the range |
| [-1.0 …​ 1.0] for image objects created with <em>image_channel_data_type</em> |
| set to CL_SNORM_INT8, or CL_SNORM_INT16.</p> |
| <p class="tableblock"> <strong>read_imageh</strong>returns half precision floating-point values for image |
| objects created with <em>image_channel_data_type</em> set to CL_HALF_FLOAT.</p> |
| <p class="tableblock"> The <strong>read_imageh</strong> calls that take integer coordinates must use a sampler |
| with filter mode set to CLK_FILTER_NEAREST, normalized coordinates set to |
| CLK_NORMALIZED_COORDS_FALSE and addressing mode set to |
| CLK_ADDRESS_CLAMP_TO_EDGE, CLK_ADDRESS_CLAMP or CLK_ADDRESS_NONE; |
| otherwise the values returned are undefined.</p> |
| <p class="tableblock"> Values returned by <strong>read_imageh</strong> for image objects with |
| <em>image_channel_data_type</em> values not specified in the description are |
| undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">half4 <strong>read_imageh</strong> (<br> |
| read_only image2d_array_t <em>image</em>,<br> |
| sampler_t <em>sampler</em>,<br> |
| int4 <em>coord</em>)</p> |
| <p class="tableblock"> half4 <strong>read_imageh</strong> (<br> |
| read_only image2d_array_t <em>image</em>,<br> |
| sampler_t <em>sampler</em>,<br> |
| float4 <em>coord</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Use <em>coord.xy</em> to do an element lookup in the 2D image identified by |
| <em>coord.z</em> in the 2D image array specified by <em>image</em>.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values in the range |
| [0.0 …​ 1.0] for image objects created with image_channel_data_type set |
| to one of the pre-defined packed formats or CL_UNORM_INT8, or |
| CL_UNORM_INT16.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values in the range |
| [-1.0 …​ 1.0] for image objects created with image_channel_data_type set |
| to CL_SNORM_INT8, or CL_SNORM_INT16.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values for image |
| objects created with image_channel_data_type set to CL_HALF_FLOAT.</p> |
| <p class="tableblock"> The <strong>read_imageh</strong> calls that take integer coordinates must use a sampler |
| with filter mode set to CLK_FILTER_NEAREST, normalized coordinates set to |
| CLK_NORMALIZED_COORDS_FALSE and addressing mode set to |
| CLK_ADDRESS_CLAMP_TO_EDGE, CLK_ADDRESS_CLAMP or CLK_ADDRESS_NONE; |
| otherwise the values returned are undefined.</p> |
| <p class="tableblock"> Values returned by <strong>read_imageh</strong> for image objects with |
| image_channel_data_type values not specified in the description above are |
| undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">half4 <strong>read_imageh</strong> (<br> |
| read_only image1d_t <em>image</em>,<br> |
| sampler_t <em>sampler</em>,<br> |
| int <em>coord</em>)</p> |
| <p class="tableblock"> half4 <strong>read_imageh</strong> (<br> |
| read_only image1d_t <em>image</em>,<br> |
| sampler_t <em>sampler</em>,<br> |
| float <em>coord</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Use <em>coord</em> to do an element lookup in the 1D image object specified by |
| <em>image</em>.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values in the range |
| [0.0 …​ 1.0] for image objects created with <em>image_channel_data_type</em> set |
| to one of the pre-defined packed formats or CL_UNORM_INT8, or |
| CL_UNORM_INT16.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values in the range |
| [-1.0 …​ 1.0] for image objects created with <em>image_channel_data_type</em> |
| set to CL_SNORM_INT8, or CL_SNORM_INT16.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values for image |
| objects created with <em>image_channel_data_type</em> set to CL_HALF_FLOAT.</p> |
| <p class="tableblock"> The <strong>read_imageh</strong> calls that take integer coordinates must use a sampler |
| with filter mode set to CLK_FILTER_NEAREST, normalized coordinates set to |
| CLK_NORMALIZED_COORDS_FALSE and addressing mode set to |
| CLK_ADDRESS_CLAMP_TO_EDGE, CLK_ADDRESS_CLAMP or CLK_ADDRESS_NONE; |
| otherwise the values returned are undefined.</p> |
| <p class="tableblock"> Values returned by <strong>read_imageh</strong> for image objects with |
| <em>image_channel_data_type</em> values not specified in the description above |
| are undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">half4 <strong>read_imageh</strong> (<br> |
| read_only image1d_array_t <em>image</em>,<br> |
| sampler_t <em>sampler</em>,<br> |
| int2 <em>coord</em>)</p> |
| <p class="tableblock"> half4 <strong>read_imageh</strong> (<br> |
| read_only image1d_array_t <em>image</em>,<br> |
| sampler_t <em>sampler</em>,<br> |
| float2 <em>coord</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Use <em>coord.x</em> to do an element lookup in the 1D image identified by |
| <em>coord.y</em> in the 1D image array specified by <em>image</em>.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values in the range |
| [0.0 …​ 1.0] for image objects created with image_channel_data_type set |
| to one of the pre-defined packed formats or CL_UNORM_INT8, or |
| CL_UNORM_INT16.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values in the range |
| [-1.0 …​ 1.0] for image objects created with image_channel_data_type set |
| to CL_SNORM_INT8, or CL_SNORM_INT16.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values for image |
| objects created with image_channel_data_type set to CL_HALF_FLOAT.</p> |
| <p class="tableblock"> The <strong>read_imageh</strong> calls that take integer coordinates must use a sampler |
| with filter mode set to CLK_FILTER_NEAREST, normalized coordinates set to |
| CLK_NORMALIZED_COORDS_FALSE and addressing mode set to |
| CLK_ADDRESS_CLAMP_TO_EDGE, CLK_ADDRESS_CLAMP or CLK_ADDRESS_NONE; |
| otherwise the values returned are undefined.</p> |
| <p class="tableblock"> Values returned by <strong>read_imageh</strong> for image objects with |
| image_channel_data_type values not specified in the description above are |
| undefined.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="_built_in_image_sampler_less_read_functions"><a class="anchor" href="#_built_in_image_sampler_less_read_functions"></a>5.2.10. Built-in Image Sampler-less Read Functions</h4> |
| <div class="paragraph"> |
| <p><em>aQual</em> in Table 6.24 refers to one of the access qualifiers. |
| For sampler-less read functions this may be <em>read_only</em> or <em>read_write</em>.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 9. <em>Half Precision Built-in Image Sampler-less Read Functions</em></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">half4 <strong>read_imageh</strong> (<br> |
| <em>aQual</em> image2d_t <em>image</em>,<br> |
| int2 <em>coord</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Use the coordinate <em>(coord.x, coord.y)</em> to do an element lookup in the 2D |
| image object specified by <em>image</em>.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values in the range |
| [0.0 …​ 1.0] for image objects created with <em>image_channel_data_type</em> set |
| to one of the pre-defined packed formats or CL_UNORM_INT8, or |
| CL_UNORM_INT16.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values in the range |
| [-1.0 …​ 1.0] for image objects created with <em>image_channel_data_type</em> |
| set to CL_SNORM_INT8, or CL_SNORM_INT16.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values for image |
| objects created with <em>image_channel_data_type</em> set to CL_HALF_FLOAT.</p> |
| <p class="tableblock"> Values returned by <strong>read_imageh</strong> for image objects with |
| <em>image_channel_data_type</em> values not specified in the description above |
| are undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">half4 <strong>read_imageh</strong> (<br> |
| <em>aQual</em> image3d_t <em>image</em>,<br> |
| int4 <em>coord</em> )</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Use the coordinate <em>(coord.x</em>, <em>coord.y</em>, <em>coord.z)</em> to do an element |
| lookup in the 3D image object specified by <em>image</em>. <em>coord.w</em> is ignored.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values in the range |
| [0.0 …​ 1.0] for image objects created with <em>image_channel_data_type</em> set |
| to one of the pre-defined packed formats or CL_UNORM_INT8, or |
| CL_UNORM_INT16.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values in the range |
| [-1.0 …​ 1.0] for image objects created with <em>image_channel_data_type</em> |
| set to CL_SNORM_INT8, or CL_SNORM_INT16.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values for image |
| objects created with <em>image_channel_data_type</em> set to CL_HALF_FLOAT.</p> |
| <p class="tableblock"> Values returned by <strong>read_imageh</strong> for image objects with |
| <em>image_channel_data_type</em> values not specified in the description are |
| undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">half4 <strong>read_imageh</strong> (<br> |
| <em>aQual</em> image2d_array_t <em>image</em>,<br> |
| int4 <em>coord</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Use <em>coord.xy</em> to do an element lookup in the 2D image identified by |
| <em>coord.z</em> in the 2D image array specified by <em>image</em>.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values in the range |
| [0.0 …​ 1.0] for image objects created with <em>image_channel_data_type</em> set |
| to one of the pre-defined packed formats or CL_UNORM_INT8, or |
| CL_UNORM_INT16.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values in the range |
| [-1.0 …​ 1.0] for image objects created with <em>image_channel_data_type</em> |
| set to CL_SNORM_INT8, or CL_SNORM_INT16.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values for image |
| objects created with <em>image_channel_data_type</em> set to CL_HALF_FLOAT.</p> |
| <p class="tableblock"> Values returned by <strong>read_imageh</strong> for image objects with |
| <em>image_channel_data_type</em> values not specified in the description above |
| are undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">half4 <strong>read_imageh</strong> (<br> |
| <em>aQual</em> image1d_t <em>image</em>,<br> |
| int <em>coord</em>)</p> |
| <p class="tableblock"> half4 <strong>read_imageh</strong> (<br> |
| <em>aQual</em> image1d_buffer_t <em>image</em>,<br> |
| int <em>coord</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Use <em>coord</em> to do an element lookup in the 1D image or 1D image buffer |
| object specified by <em>image</em>.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values in the range |
| [0.0 …​ 1.0] for image objects created with <em>image_channel_data_type</em> set |
| to one of the pre-defined packed formats or CL_UNORM_INT8, or |
| CL_UNORM_INT16.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values in the range |
| [-1.0 …​ 1.0] for image objects created with <em>image_channel_data_type</em> |
| set to CL_SNORM_INT8, or CL_SNORM_INT16.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values for image |
| objects created with <em>image_channel_data_type</em> set to CL_HALF_FLOAT.</p> |
| <p class="tableblock"> Values returned by <strong>read_imageh</strong> for image objects with |
| <em>image_channel_data_type</em> values not specified in the description above |
| are undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">half4 <strong>read_imageh</strong> (<br> |
| <em>aQual</em> image1d_array_t <em>image</em>,<br> |
| int2 <em>coord</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Use <em>coord.x</em> to do an element lookup in the 2D image identified by |
| <em>coord.y</em> in the 2D image array specified by <em>image</em>.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values in the range |
| [0.0 …​ 1.0] for image objects created with <em>image_channel_data_type</em> set |
| to one of the pre-defined packed formats or CL_UNORM_INT8, or |
| CL_UNORM_INT16.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values in the range |
| [-1.0 …​ 1.0] for image objects created with <em>image_channel_data_type</em> |
| set to CL_SNORM_INT8, or CL_SNORM_INT16.</p> |
| <p class="tableblock"> <strong>read_imageh</strong> returns half precision floating-point values for image |
| objects created with <em>image_channel_data_type</em> set to CL_HALF_FLOAT.</p> |
| <p class="tableblock"> Values returned by <strong>read_imageh</strong> for image objects with |
| <em>image_channel_data_type</em> values not specified in the description above |
| are undefined.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="_built_in_image_write_functions"><a class="anchor" href="#_built_in_image_write_functions"></a>5.2.11. Built-in Image Write Functions</h4> |
| <div class="paragraph"> |
| <p><em>aQual</em> in Table 6.25 refers to one of the access qualifiers. |
| For write functions this may be <em>write_only</em> or <em>read_write</em>.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 10. <em>Half Precision Built-in Image Write Functions</em></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">void <strong>write_imageh</strong> (<br> |
| <em>aQual</em> image2d_t <em>image</em>,<br> |
| int2 <em>coord</em>,<br> |
| half4 <em>color</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Write <em>color</em> value to location specified by <em>coord.xy</em> in the 2D image |
| specified by <em>image</em>.</p> |
| <p class="tableblock"> Appropriate data format conversion to the specified image format is done |
| before writing the color value. <em>x</em> & <em>y</em> are considered to be |
| unnormalized coordinates and must be in the range 0 …​ width - 1, and 0 |
| …​ height - 1.</p> |
| <p class="tableblock"> <strong>write_imageh</strong> can only be used with image objects created with |
| <em>image_channel_data_type</em> set to one of the pre-defined packed formats or |
| set to CL_SNORM_INT8, CL_UNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT16 or |
| CL_HALF_FLOAT.</p> |
| <p class="tableblock"> The behavior of <strong>write_imageh</strong> for image objects created with |
| <em>image_channel_data_type</em> values not specified in the description above or |
| with (<em>x</em>, <em>y</em>) coordinate values that are not in the range (0 …​ width - |
| 1, 0 …​ height - 1) respectively, is undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">void <strong>write_imageh</strong> (<br> |
| <em>aQual</em> image2d_array_t <em>image</em>,<br> |
| int4 <em>coord</em>,<br> |
| half4 <em>color</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Write <em>color</em> value to location specified by <em>coord.xy</em> in the 2D image |
| identified by <em>coord.z</em> in the 2D image array specified by <em>image</em>.</p> |
| <p class="tableblock"> Appropriate data format conversion to the specified image format is done |
| before writing the color value. <em>coord.x</em>, <em>coord.y</em> and <em>coord.z</em> are |
| considered to be unnormalized coordinates and must be in the range 0 …​ |
| image width - 1, 0 …​ image height - 1 and 0 …​ image number of layers - |
| 1.</p> |
| <p class="tableblock"> <strong>write_imageh</strong> can only be used with image objects created with |
| <em>image_channel_data_type</em> set to one of the pre-defined packed formats or |
| set to CL_SNORM_INT8, CL_UNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT16 or |
| CL_HALF_FLOAT.</p> |
| <p class="tableblock"> The behavior of <strong>write_imageh</strong> for image objects created with |
| <em>image_channel_data_type</em> values not specified in the description above or |
| with (<em>x</em>, <em>y, z</em>) coordinate values that are not in the range (0 …​ |
| image width - 1, 0 …​ image height - 1, 0 …​ image number of layers - |
| 1), respectively, is undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">void <strong>write_imageh</strong> (<br> |
| <em>aQual</em> image1d_t <em>image</em>,<br> |
| int <em>coord</em>,<br> |
| half4 <em>color</em>)</p> |
| <p class="tableblock"> void <strong>write_imageh</strong> (<br> |
| <em>aQual</em> image1d_buffer_t <em>image</em>,<br> |
| int <em>coord</em>,<br> |
| half4 <em>color</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Write <em>color</em> value to location specified by <em>coord</em> in the 1D image or 1D |
| image buffer object specified by <em>image</em>. |
| Appropriate data format conversion to the specified image format is done |
| before writing the color value. |
| <em>coord</em> is considered to be unnormalized coordinates and must be in the |
| range 0 …​ image width - 1.</p> |
| <p class="tableblock"> <strong>write_imageh</strong> can only be used with image objects created with |
| <em>image_channel_data_type</em> set to one of the pre-defined packed formats or |
| set to CL_SNORM_INT8, CL_UNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT16 or |
| CL_HALF_FLOAT. |
| Appropriate data format conversion will be done to convert channel data |
| from a floating-point value to actual data format in which the channels |
| are stored.</p> |
| <p class="tableblock"> The behavior of <strong>write_imageh</strong> for image objects created with |
| <em>image_channel_data_type</em> values not specified in the description above or |
| with coordinate values that is not in the range (0 …​ image width - 1), |
| is undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">void <strong>write_imageh</strong> (<br> |
| <em>aQual</em> image1d_array_t <em>image</em>,<br> |
| int2 <em>coord</em>,<br> |
| half4 <em>color</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Write <em>color</em> value to location specified by <em>coord.x</em> in the 1D image |
| identified by <em>coord.y</em> in the 1D image array specified by <em>image</em>. |
| Appropriate data format conversion to the specified image format is done |
| before writing the color value. <em>coord.x</em> and <em>coord.y</em> are considered to |
| be unnormalized coordinates and must be in the range 0 …​ image width - 1 |
| and 0 …​ image number of layers - 1.</p> |
| <p class="tableblock"> <strong>write_imageh</strong> can only be used with image objects created with |
| <em>image_channel_data_type</em> set to one of the pre-defined packed formats or |
| set to CL_SNORM_INT8, CL_UNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT16 or |
| CL_HALF_FLOAT. |
| Appropriate data format conversion will be done to convert channel data |
| from a floating-point value to actual data format in which the channels |
| are stored.</p> |
| <p class="tableblock"> The behavior of <strong>write_imageh</strong> for image objects created with |
| <em>image_channel_data_type</em> values not specified in the description above or |
| with (<em>x</em>, <em>y</em>) coordinate values that are not in the range (0 …​ image |
| width - 1, 0 …​ image number of layers - 1), respectively, is undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">void <strong>write_imageh</strong> (<br> |
| <em>aQual</em> image3d_t <em>image</em>,<br> |
| int4 <em>coord</em>,<br> |
| half4 <em>color</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Write color value to location specified by coord.xyz in the 3D image |
| object specified by <em>image</em>.</p> |
| <p class="tableblock"> Appropriate data format conversion to the specified image format is done |
| before writing the color value. |
| coord.x, coord.y and coord.z are considered to be unnormalized coordinates |
| and must be in the range 0 …​ image width - 1, 0 …​ image height - 1 and |
| 0 …​ image depth - 1.</p> |
| <p class="tableblock"> <strong>write_imageh</strong> can only be used with image objects created with |
| image_channel_data_type set to one of the pre-defined packed formats or |
| set to CL_SNORM_INT8, CL_UNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT16 or |
| CL_HALF_FLOAT.</p> |
| <p class="tableblock"> The behavior of <strong>write_imageh</strong> for image objects created with |
| image_channel_data_type values not specified in the description above or |
| with (x, y, z) coordinate values that are not in the range (0 …​ image |
| width - 1, 0 …​ image height - 1, 0 …​ image depth - 1), respectively, |
| is undefined.</p> |
| <p class="tableblock"> Note: This built-in function is only available if the |
| cl_khr_3d_image_writes extension is also supported by the device.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_fp16-ieee754-compliance"><a class="anchor" href="#cl_khr_fp16-ieee754-compliance"></a>5.2.12. IEEE754 Compliance</h4> |
| <div class="paragraph"> |
| <p>The following table entry describes the additions to <em>table 4.3,</em> which |
| allows applications to query the configuration information using |
| <strong>clGetDeviceInfo</strong> for an OpenCL device that supports half precision |
| floating-point.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 25%;"> |
| <col style="width: 25%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Op-code</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Return Type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_DEVICE_HALF_FP_CONFIG</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">cl_device_fp_config</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Describes half precision floating-point capability of the OpenCL device. |
| This is a bit-field that describes one or more of the following values:</p> |
| <p class="tableblock"> CL_FP_DENORM — denorms are supported</p> |
| <p class="tableblock"> CL_FP_INF_NAN — INF and NaNs are supported</p> |
| <p class="tableblock"> CL_FP_ROUND_TO_NEAREST — round to nearest even rounding mode supported</p> |
| <p class="tableblock"> CL_FP_ROUND_TO_ZERO — round to zero rounding mode supported</p> |
| <p class="tableblock"> CL_FP_ROUND_TO_INF — round to positive and negative infinity rounding |
| modes supported</p> |
| <p class="tableblock"> CL_FP_FMA — IEEE754-2008 fused multiply-add is supported</p> |
| <p class="tableblock"> CL_FP_SOFT_FLOAT — Basic floating-point operations (such as addition, |
| subtraction, multiplication) are implemented in software.</p> |
| <p class="tableblock"> The required minimum half precision floating-point capability as |
| implemented by this extension is:</p> |
| <p class="tableblock"> CL_FP_ROUND_TO_ZERO, or CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_fp16-rounding-modes"><a class="anchor" href="#cl_khr_fp16-rounding-modes"></a>5.2.13. Rounding Modes</h4> |
| <div class="paragraph"> |
| <p>If CL_FP_ROUND_TO_NEAREST is supported, the default rounding mode for |
| half-precision floating-point operations will be round to nearest even; |
| otherwise the default rounding mode will be round to zero.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Conversions to half floating point format must be correctly rounded using |
| the indicated <code>convert</code> operator rounding mode or the default rounding mode |
| for half-precision floating-point operations if no rounding mode is |
| specified by the operator, or a C-style cast is used.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Conversions from half to integer format shall correctly round using the |
| indicated <code>convert</code> operator rounding mode, or towards zero if no rounding |
| mode is specified by the operator or a C-style cast is used. |
| All conversions from half to floating point formats are exact.</p> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_fp16-relative-error-as-ulps"><a class="anchor" href="#cl_khr_fp16-relative-error-as-ulps"></a>5.2.14. Relative Error as ULPs</h4> |
| <div class="paragraph"> |
| <p>In this section we discuss the maximum relative error defined as <em>ulp</em> |
| (units in the last place).</p> |
| </div> |
| <div class="paragraph"> |
| <p>Addition, subtraction, multiplication, fused multiply-add operations on half |
| types are required to be correctly rounded using the default rounding mode |
| for half-precision floating-point operations.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The following table describes the minimum accuracy of half precision |
| floating-point arithmetic operations given as ULP values. |
| 0 ULP is used for math functions that do not require rounding. |
| The reference value used to compute the ULP value of an arithmetic operation |
| is the infinitely precise result.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 11. <em>ULP Values for Half Precision Floating-Point Arithmetic Operations</em></caption> |
| <colgroup> |
| <col style="width: 33.3333%;"> |
| <col style="width: 33.3333%;"> |
| <col style="width: 33.3334%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Min Accuracy - Full Profile</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Min Accuracy - Embedded Profile</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong><em>x</em> + <em>y</em></strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong><em>x</em> - <em>y</em></strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong><em>x</em> * <em>y</em></strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>1.0 / <em>x</em></strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 1 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong><em>x</em> / <em>y</em></strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 1 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>acos</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>acosh</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>acospi</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>asin</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>asinh</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>asinpi</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>atan</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>atanh</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>atanpi</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>atan2</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>atan2pi</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cbrt</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>ceil</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>clamp</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>copysign</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cos</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cosh</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cospi</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cross</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">absolute error tolerance of 'max * max * (3 * HLF_EPSILON)' per vector component, where <em>max</em> is the maximum input operand magnitude</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Implementation-defined</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>degrees</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>distance</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2n ulp, for gentype with vector width <em>n</em></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Implementation-defined</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>dot</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">absolute error tolerance of 'max * max * (2n - 1) * HLF_EPSILON', for vector width <em>n</em> and maximum input operand magnitude <em>max</em> across all vector components</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Implementation-defined</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>erfc</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 4 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 4 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>erf</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 4 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 4 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>exp</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>exp2</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>exp10</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>expm1</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>fabs</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>fdim</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>floor</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>fma</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>fmax</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>fmin</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>fmod</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>fract</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>frexp</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>hypot</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>ilogb</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>ldexp</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>length</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 0.25 + 0.5n ulp, for gentype with vector width <em>n</em></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Implementation-defined</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>log</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>log2</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>log10</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>log1p</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>logb</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>mad</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Implementation-defined</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Implementation-defined</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>max</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>maxmag</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>min</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>minmag</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>mix</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Implementation-defined</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Implementation-defined</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>modf</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>nan</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>nextafter</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>normalize</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 1 + n ulp, for gentype with vector width <em>n</em></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Implementation-defined</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>pow(x, y)</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 4 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 5 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>pown(x, y)</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 4 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 5 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>powr(x, y)</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 4 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 5 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>radians</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>remainder</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>remquo</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp for the remainder, at least the lower 7 bits of the integral quotient</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp for the remainder, at least the lower 7 bits of the integral quotient</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>rint</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>rootn</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 4 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 5 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>round</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>rsqrt</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><=1 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><=1 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>sign</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>sin</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>sincos</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp for sine and cosine values</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp for sine and cosine values</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>sinh</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>sinpi</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>smoothstep</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Implementation-defined</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Implementation-defined</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>sqrt</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 1 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>step</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>tan</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>tanh</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>tanpi</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>tgamma</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 4 ulp</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 4 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>trunc</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>Note: <em>Implementations may perform floating-point operations on</em> <code>half</code> |
| <em>scalar or vector data types by converting the</em> <code>half</code> <em>values to single |
| precision floating-point values and performing the operation in single |
| precision floating-point. |
| In this case, the implementation will use the</em> <code>half</code> <em>scalar or vector data |
| type as a storage only format</em>.</p> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_fp16-additions-to-chapter-8-of-the-opencl-2.0-specification"><a class="anchor" href="#cl_khr_fp16-additions-to-chapter-8-of-the-opencl-2.0-specification"></a>5.3. Additions to Chapter 8 of the OpenCL 2.0 C Specification</h3> |
| <div class="paragraph"> |
| <p>Add new sub-sections to <em>section 8.3.1. Conversion rules for normalized integer channel data types</em>:</p> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_fp16-converting-normalized-integer-channel-data-types-to-floating-point-values"><a class="anchor" href="#cl_khr_fp16-converting-normalized-integer-channel-data-types-to-floating-point-values"></a>5.3.1. Converting normalized integer channel data types to half precision floating-point values</h4> |
| <div class="paragraph"> |
| <p>For images created with image channel data type of <code>CL_UNORM_INT8</code> and |
| <code>CL_UNORM_INT16</code>, <strong>read_imagef</strong> will convert the channel values from an |
| 8-bit or 16-bit unsigned integer to normalized half precision |
| floating-point values in the range [<code>0.0h</code>, <code>1.0h</code>].</p> |
| </div> |
| <div class="paragraph"> |
| <p>For images created with image channel data type of <code>CL_SNORM_INT8</code> and |
| <code>CL_SNORM_INT16</code>, <strong>read_imagef</strong> will convert the channel values from an |
| 8-bit or 16-bit signed integer to normalized half precision floating-point |
| values in the range [<code>-1.0h</code>, <code>1.0h</code>].</p> |
| </div> |
| <div class="paragraph"> |
| <p>These conversions are performed as follows:</p> |
| </div> |
| <div class="paragraph"> |
| <p><code>CL_UNORM_INT8</code> (8-bit unsigned integer) → <code>half</code></p> |
| </div> |
| <div class="ulist none"> |
| <ul class="none"> |
| <li> |
| <p>normalized <code>half</code> value = <code>round_to_half(c / 255)</code></p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p><code>CL_UNORM_INT_101010</code> (10-bit unsigned integer) → <code>half</code></p> |
| </div> |
| <div class="ulist none"> |
| <ul class="none"> |
| <li> |
| <p>normalized <code>half</code> value = <code>round_to_half(c / 1023)</code></p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p><code>CL_UNORM_INT16</code> (16-bit unsigned integer) → <code>half</code></p> |
| </div> |
| <div class="ulist none"> |
| <ul class="none"> |
| <li> |
| <p>normalized <code>half</code> value = <code>round_to_half(c / 65535)</code></p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p><code>CL_SNORM_INT8</code> (8-bit signed integer) → <code>half</code></p> |
| </div> |
| <div class="ulist none"> |
| <ul class="none"> |
| <li> |
| <p>normalized <code>half</code> value = <code>max(-1.0h, round_to_half(c / 127))</code></p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p><code>CL_SNORM_INT16</code> (16-bit signed integer) → <code>half</code></p> |
| </div> |
| <div class="ulist none"> |
| <ul class="none"> |
| <li> |
| <p>normalized <code>half</code> value = <code>max(-1.0h, round_to_half(c / 32767))</code></p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>The accuracy of the above conversions must be <= 1.5 ulp except for the |
| following cases.</p> |
| </div> |
| <div class="paragraph"> |
| <p>For <code>CL_UNORM_INT8</code></p> |
| </div> |
| <div class="ulist none"> |
| <ul class="none"> |
| <li> |
| <p>0 must convert to <code>0.0h</code> and</p> |
| </li> |
| <li> |
| <p>255 must convert to <code>1.0h</code></p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>For <code>CL_UNORM_INT_101010</code></p> |
| </div> |
| <div class="ulist none"> |
| <ul class="none"> |
| <li> |
| <p>0 must convert to <code>0.0h</code> and</p> |
| </li> |
| <li> |
| <p>1023 must convert to <code>1.0h</code></p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>For <code>CL_UNORM_INT16</code></p> |
| </div> |
| <div class="ulist none"> |
| <ul class="none"> |
| <li> |
| <p>0 must convert to <code>0.0h</code> and</p> |
| </li> |
| <li> |
| <p>65535 must convert to <code>1.0h</code></p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>For <code>CL_SNORM_INT8</code></p> |
| </div> |
| <div class="ulist none"> |
| <ul class="none"> |
| <li> |
| <p>-128 and -127 must convert to <code>-1.0h</code>,</p> |
| </li> |
| <li> |
| <p>0 must convert to <code>0.0h</code> and</p> |
| </li> |
| <li> |
| <p>127 must convert to <code>1.0h</code></p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>For <code>CL_SNORM_INT16</code></p> |
| </div> |
| <div class="ulist none"> |
| <ul class="none"> |
| <li> |
| <p>-32768 and -32767 must convert to <code>-1.0h</code>,</p> |
| </li> |
| <li> |
| <p>0 must convert to <code>0.0h</code> and</p> |
| </li> |
| <li> |
| <p>32767 must convert to <code>1.0h</code></p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_fp16-converting-floating-point-values-to-normalized-integer-channel-data-types"><a class="anchor" href="#cl_khr_fp16-converting-floating-point-values-to-normalized-integer-channel-data-types"></a>5.3.2. Converting half precision floating-point values to normalized integer channel data types</h4> |
| <div class="paragraph"> |
| <p>For images created with image channel data type of <code>CL_UNORM_INT8</code> and |
| <code>CL_UNORM_INT16</code>, <strong>write_imagef</strong> will convert the floating-point color value |
| to an 8-bit or 16-bit unsigned integer.</p> |
| </div> |
| <div class="paragraph"> |
| <p>For images created with image channel data type of <code>CL_SNORM_INT8</code> and |
| <code>CL_SNORM_INT16</code>, <strong>write_imagef</strong> will convert the floating-point color value |
| to an 8-bit or 16-bit signed integer.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The preferred conversion uses the round to nearest even (<code>_rte</code>) rounding |
| mode, but OpenCL implementations may choose to approximate the rounding mode |
| used in the conversions described below. |
| When approximate rounding is used instead of the preferred rounding, |
| the result of the conversion must satisfy the bound given below.</p> |
| </div> |
| <div class="paragraph"> |
| <p><code>half</code> → <code>CL_UNORM_INT8</code> (8-bit unsigned integer)</p> |
| </div> |
| <div class="ulist none"> |
| <ul class="none"> |
| <li> |
| <p>Let f<sub>exact</sub> = <strong>max</strong>(<code>0</code>, <strong>min</strong>(<code>f * 255</code>, <code>255</code>))</p> |
| </li> |
| <li> |
| <p>Let f<sub>preferred</sub> = <strong>convert_uchar_sat_rte</strong>(<code>f * 255.0f</code>)</p> |
| </li> |
| <li> |
| <p>Let f<sub>approx</sub> = <strong>convert_uchar_sat_<impl-rounding-mode></strong>(<code>f * 255.0f</code>)</p> |
| </li> |
| <li> |
| <p><strong>fabs</strong>(f<sub>exact</sub> - f<sub>approx</sub>) must be <= 0.6</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p><code>half</code> → <code>CL_UNORM_INT_101010</code> (10-bit unsigned integer)</p> |
| </div> |
| <div class="ulist none"> |
| <ul class="none"> |
| <li> |
| <p>Let f<sub>exact</sub> = <strong>max</strong>(<code>0</code>, <strong>min</strong>(<code>f * 1023</code>, <code>1023</code>))</p> |
| </li> |
| <li> |
| <p>Let f<sub>preferred</sub> = <strong>min</strong>(<strong>convert_ushort_sat_rte</strong>(<code>f * 1023.0f</code>), <code>1023</code>)</p> |
| </li> |
| <li> |
| <p>Let f<sub>approx</sub> = <strong>convert_ushort_sat_<impl-rounding-mode></strong>(<code>f * 1023.0f</code>)</p> |
| </li> |
| <li> |
| <p><strong>fabs</strong>(f<sub>exact</sub> - f<sub>approx</sub>) must be <= 0.6</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p><code>half</code> → <code>CL_UNORM_INT16</code> (16-bit unsigned integer)</p> |
| </div> |
| <div class="ulist none"> |
| <ul class="none"> |
| <li> |
| <p>Let f<sub>exact</sub> = <strong>max</strong>(<code>0</code>, <strong>min</strong>(<code>f * 65535</code>, <code>65535</code>))</p> |
| </li> |
| <li> |
| <p>Let f<sub>preferred</sub> = <strong>convert_ushort_sat_rte</strong>(<code>f * 65535.0f</code>)</p> |
| </li> |
| <li> |
| <p>Let f<sub>approx</sub> = <strong>convert_ushort_sat_<impl-rounding-mode></strong>(<code>f * 65535.0f</code>)</p> |
| </li> |
| <li> |
| <p><strong>fabs</strong>(f<sub>exact</sub> - f<sub>approx</sub>) must be <= 0.6</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p><code>half</code> → <code>CL_SNORM_INT8</code> (8-bit signed integer)</p> |
| </div> |
| <div class="ulist none"> |
| <ul class="none"> |
| <li> |
| <p>Let f<sub>exact</sub> = <strong>max</strong>(<code>-128</code>, <strong>min</strong>(<code>f * 127</code>, <code>127</code>))</p> |
| </li> |
| <li> |
| <p>Let f<sub>preferred</sub> = <strong>convert_char_sat_rte</strong>(<code>f * 127.0f</code>)</p> |
| </li> |
| <li> |
| <p>Let f<sub>approx</sub> = <strong>convert_char_sat_<impl_rounding_mode></strong>(<code>f * 127.0f</code>)</p> |
| </li> |
| <li> |
| <p><strong>fabs</strong>(f<sub>exact</sub> - f<sub>approx</sub>) must be <= 0.6</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p><code>half</code> → <code>CL_SNORM_INT16</code> (16-bit signed integer)</p> |
| </div> |
| <div class="ulist none"> |
| <ul class="none"> |
| <li> |
| <p>Let f<sub>exact</sub> = <strong>max</strong>(<code>-32768</code>, <strong>min</strong>(<code>f * 32767</code>, <code>32767</code>))</p> |
| </li> |
| <li> |
| <p>Let f<sub>preferred</sub> = <strong>convert_short_sat_rte</strong>(<code>f * 32767.0f</code>)</p> |
| </li> |
| <li> |
| <p>Let f<sub>approx</sub> = <strong>convert_short_sat_<impl-rounding-mode></strong>(<code>f * 32767.0f</code>)</p> |
| </li> |
| <li> |
| <p><strong>fabs</strong>(f<sub>exact</sub> - f<sub>approx</sub>) must be <= 0.6</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_fp64"><a class="anchor" href="#cl_khr_fp64"></a>6. Double Precision Floating-Point</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This section describes the <strong>cl_khr_fp64</strong> extension. |
| This extension became an optional core feature in OpenCL 1.2.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_5"><a class="anchor" href="#_general_information_5"></a>6.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_5"><a class="anchor" href="#_version_history_5"></a>6.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_fp64-additions-to-chapter-6"><a class="anchor" href="#cl_khr_fp64-additions-to-chapter-6"></a>6.2. Additions to Chapter 6</h3> |
| <div class="paragraph"> |
| <p>The list of built-in scalar, and vector data types defined in <em>tables 6.1</em> |
| and <em>6.2</em> are extended to include the following:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 25%;"> |
| <col style="width: 75%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>double</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">A double precision float.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>double2</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">A 2-component double-precision floating-point vector.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>double3</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">A 3-component double-precision floating-point vector.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>double4</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">A 4-component double-precision floating-point vector.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>double8</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">A 8-component double-precision floating-point vector.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>double16</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">A 16-component double-precision floating-point vector.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>The built-in scalar and vector data types for <code>doublen</code> are also declared as appropriate |
| types in the OpenCL API (and header files) that can be used by an |
| application. |
| The following table describes the built-in scalar and vector data types for <code>doublen</code> as |
| defined in the OpenCL C programming language and the corresponding data type |
| available to the application:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Type in OpenCL Language</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>API type for application</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>double</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cl_double</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>double2</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cl_double2</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>double3</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cl_double3</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>double4</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cl_double4</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>double8</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cl_double8</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>double16</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cl_double16</strong></p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>The double data type must conform to the IEEE-754 double precision storage format.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The following text is added to <em>Section 6.1.1.1 The half data type</em>:</p> |
| </div> |
| <div class="paragraph"> |
| <p>Conversions from double to half are correctly rounded. |
| Conversions from half to double are lossless.</p> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_fp64-conversions"><a class="anchor" href="#cl_khr_fp64-conversions"></a>6.2.1. Conversions</h4> |
| <div class="paragraph"> |
| <p>The implicit conversion rules specified in <em>section 6.2.1</em> now include the |
| <code>double</code> scalar and <code>doublen</code> vector data types.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The explicit casts described in <em>section 6.2.2</em> are extended to take a |
| <code>double</code> scalar data type and a <code>doublen</code> vector data type.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The explicit conversion functions described in <em>section 6.2.3</em> are extended |
| to take a <code>double</code> scalar data type and a <code>doublen</code> vector data type.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The <code>as_typen()</code> function for re-interpreting types as described in <em>section |
| 6.2.4.2</em> is extended to allow conversion-free casts between <code>longn</code>, |
| <code>ulongn</code> and <code>doublen</code> scalar and vector data types.</p> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_fp64-math-functions"><a class="anchor" href="#cl_khr_fp64-math-functions"></a>6.2.2. Math Functions</h4> |
| <div class="paragraph"> |
| <p>The built-in math functions defined in <em>table 6.8</em> (also listed below) are |
| extended to include appropriate versions of functions that take <code>double</code> and |
| <code>double{2|3|4|8|16}</code> as arguments and return values. |
| <code>gentype</code> now also includes <code>double</code>, <code>double2</code>, <code>double3</code>, <code>double4</code>, <code>double8</code> and |
| <code>double16</code>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>For any specific use of a function, the actual type has to be the same for |
| all arguments and the return type.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 12. <em>Double Precision Built-in Math Functions</em></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>acos</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Arc cosine function.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>acosh</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Inverse hyperbolic cosine.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>acospi</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute <strong>acos</strong> (<em>x</em>) / π.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>asin</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Arc sine function.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>asinh</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Inverse hyperbolic sine.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>asinpi</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute <strong>asin</strong> (<em>x</em>) / π.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>atan</strong> (gentype <em>y_over_x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Arc tangent function.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>atan2</strong> (gentype <em>y</em>, gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Arc tangent of <em>y</em> / <em>x</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>atanh</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Hyperbolic arc tangent.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>atanpi</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute <strong>atan</strong> (<em>x</em>) / π.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>atan2pi</strong> (gentype <em>y</em>, gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute <strong>atan2</strong> (<em>y</em>, <em>x</em>) / π.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>cbrt</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute cube-root.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>ceil</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Round to integral value using the round to positive infinity rounding |
| mode.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>copysign</strong> (gentype <em>x</em>, gentype <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <em>x</em> with its sign changed to match the sign of <em>y</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>cos</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute cosine.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>cosh</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute hyperbolic cosine.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>cospi</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute <strong>cos</strong> (π <em>x</em>).</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>erfc</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Complementary error function.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>erf</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Error function encountered in integrating the normal distribution.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>exp</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute the base- e exponential of <em>x</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>exp2</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Exponential base 2 function.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>exp10</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Exponential base 10 function.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>expm1</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute <em>e<sup>x</sup></em>- 1.0.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>fabs</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute absolute value of a floating-point number.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>fdim</strong> (gentype <em>x</em>, gentype <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><em>x</em> - <em>y</em> if <em>x</em> > <em>y</em>, +0 if x is less than or equal to y.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>floor</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Round to integral value using the round to negative infinity rounding |
| mode.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>fma</strong> (gentype <em>a</em>, gentype <em>b</em>, gentype <em>c</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the correctly rounded floating-point representation of the sum of |
| <em>c</em> with the infinitely precise product of <em>a</em> and <em>b</em>. |
| Rounding of intermediate products shall not occur. |
| Edge case behavior is per the IEEE 754-2008 standard.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>fmax</strong> (gentype <em>x</em>, gentype <em>y</em>)<br> |
| gentype <strong>fmax</strong> (gentype <em>x</em>, double <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <em>y</em> if <em>x</em> < <em>y</em>, otherwise it returns <em>x</em>. |
| If one argument is a NaN, <strong>fmax()</strong> returns the other argument. |
| If both arguments are NaNs, <strong>fmax()</strong> returns a NaN.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>fmin</strong> (gentype <em>x</em>, gentype <em>y</em>)<br> |
| gentype <strong>fmin</strong> (gentype <em>x</em>, double <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <em>y</em> if <em>y</em> < <em>x</em>, otherwise it returns <em>x</em>. |
| If one argument is a NaN, <strong>fmin()</strong> returns the other argument. |
| If both arguments are NaNs, <strong>fmin()</strong> returns a NaN.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>fmod</strong> (gentype <em>x</em>, gentype <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Modulus. |
| Returns <em>x</em> - <em>y</em> * <strong>trunc</strong> (<em>x</em>/<em>y</em>) .</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>fract</strong> (gentype <em>x</em>, __global gentype *<em>iptr</em>)<br> |
| gentype <strong>fract</strong> (gentype <em>x</em>, __local gentype *<em>iptr</em>)<br> |
| gentype <strong>fract</strong> (gentype <em>x</em>, __private gentype *<em>iptr</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <strong>fmin</strong>( <em>x</em> - <strong>floor</strong> (<em>x</em>), 0x1. fffffffffffffp-1 ).</p> |
| <p class="tableblock"> <strong>floor</strong>(x) is returned in <em>iptr</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">double<em>n</em> <strong>frexp</strong> (double<em>n x</em>, __global int<em>n</em> *exp)<br> |
| double<em>n</em> <strong>frexp</strong> (double<em>n x</em>, __local int<em>n</em> *exp)<br> |
| double<em>n</em> <strong>frexp</strong> (double<em>n x</em>, __private int<em>n</em> *exp)<br> |
| double <strong>frexp</strong> (double <em>x</em>, __global int *exp)<br> |
| double <strong>frexp</strong> (double <em>x</em>, __local int *exp)<br> |
| double <strong>frexp</strong> (double <em>x</em>, __private int *exp)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extract mantissa and exponent from <em>x</em>. |
| For each component the mantissa returned is a float with magnitude in the |
| interval [1/2, 1) or 0. |
| Each component of <em>x</em> equals mantissa returned * 2<em><sup>exp</sup></em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>hypot</strong> (gentype <em>x</em>, gentype <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute the value of the square root of <em>x</em><sup>2</sup>+ <em>y</em><sup>2</sup> without undue |
| overflow or underflow.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int<em>n</em> <strong>ilogb</strong> (double<em>n</em> <em>x</em>)<br> |
| int <strong>ilogb</strong> (double <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Return the exponent as an integer value.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">double<em>n</em> <strong>ldexp</strong> (double<em>n</em> <em>x</em>, int<em>n</em> <em>k</em>)<br> |
| double<em>n</em> <strong>ldexp</strong> (double<em>n</em> <em>x</em>, int <em>k</em>)<br> |
| double <strong>ldexp</strong> (double <em>x</em>, int <em>k</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Multiply <em>x</em> by 2 to the power <em>k</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>lgamma</strong> (gentype <em>x</em>)<br> |
| double<em>n</em> <strong>lgamma_r</strong> (double<em>n</em> <em>x</em>, __global int<em>n</em> *<em>signp</em>)<br> |
| double<em>n</em> <strong>lgamma_r</strong> (double<em>n</em> <em>x</em>, __local int<em>n</em> *<em>signp</em>)<br> |
| double<em>n</em> <strong>lgamma_r</strong> (double<em>n</em> <em>x</em>, __private int<em>n</em> *<em>signp</em>)<br> |
| double <strong>lgamma_r</strong> (double <em>x</em>, __global int *<em>signp</em>)<br> |
| double <strong>lgamma_r</strong> (double <em>x</em>, __local int *<em>signp</em>)<br> |
| double <strong>lgamma_r</strong> (double <em>x</em>, __private int *<em>signp</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Log gamma function. |
| Returns the natural logarithm of the absolute value of the gamma function. |
| The sign of the gamma function is returned in the <em>signp</em> argument of |
| <strong>lgamma_r</strong>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>log</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute natural logarithm.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>log2</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute a base 2 logarithm.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>log10</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute a base 10 logarithm.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>log1p</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute log<sub>e</sub>(1.0 + <em>x</em>) .</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>logb</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute the exponent of <em>x</em>, which is the integral part of |
| log<em><sub>r</sub></em>|<em>x</em>|.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>mad</strong> (gentype <em>a</em>, gentype <em>b</em>, gentype <em>c</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>mad</strong> computes <em>a</em> * <em>b</em> + <em>c</em>. |
| The function may compute <em>a</em> * <em>b</em> + <em>c</em> with reduced accuracy |
| in the embedded profile. See the OpenCL SPIR-V Environment Specification |
| for details. On some hardware the mad instruction may provide better |
| performance than expanded computation of <em>a</em> * <em>b</em> + <em>c</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>maxmag</strong> (gentype <em>x</em>, gentype <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <em>x</em> if |<em>x</em>| > |<em>y</em>|, <em>y</em> if |<em>y</em>| > |<em>x</em>|, otherwise |
| <strong>fmax</strong>(<em>x</em>, <em>y</em>).</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>minmag</strong> (gentype <em>x</em>, gentype <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <em>x</em> if |<em>x</em>| < |<em>y</em>|, <em>y</em> if |<em>y</em>| < |<em>x</em>|, otherwise |
| <strong>fmin</strong>(<em>x</em>, <em>y</em>).</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>modf</strong> (gentype <em>x</em>, __global gentype *<em>iptr</em>)<br> |
| gentype <strong>modf</strong> (gentype <em>x</em>, __local gentype *<em>iptr</em>)<br> |
| gentype <strong>modf</strong> (gentype <em>x</em>, __private gentype *<em>iptr</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Decompose a floating-point number. |
| The <strong>modf</strong> function breaks the argument <em>x</em> into integral and fractional |
| parts, each of which has the same sign as the argument. |
| It stores the integral part in the object pointed to by <em>iptr</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">double<em>n</em> <strong>nan</strong> (ulong<em>n nancode</em>)<br> |
| double <strong>nan</strong> (ulong <em>nancode</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns a quiet NaN. |
| The <em>nancode</em> may be placed in the significand of the resulting NaN.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>nextafter</strong> (gentype <em>x</em>, gentype <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Computes the next representable double-precision floating-point value |
| following <em>x</em> in the direction of <em>y</em>. |
| Thus, if <em>y</em> is less than <em>x</em>, <strong>nextafter</strong>() returns the largest |
| representable floating-point number less than <em>x</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>pow</strong> (gentype <em>x</em>, gentype <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute <em>x</em> to the power <em>y</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">double<em>n</em> <strong>pown</strong> (double<em>n</em> <em>x</em>, int<em>n</em> <em>y</em>)<br> |
| double <strong>pown</strong> (double <em>x</em>, int <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute <em>x</em> to the power <em>y</em>, where <em>y</em> is an integer.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>powr</strong> (gentype <em>x</em>, gentype <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute <em>x</em> to the power <em>y</em>, where <em>x</em> is >= 0.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>remainder</strong> (gentype <em>x</em>, gentype <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute the value <em>r</em> such that <em>r</em> = <em>x</em> - <em>n</em>*<em>y</em>, where <em>n</em> is the |
| integer nearest the exact value of <em>x</em>/<em>y</em>. |
| If there are two integers closest to <em>x</em>/<em>y</em>, <em>n</em> shall be the even one. |
| If <em>r</em> is zero, it is given the same sign as <em>x</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">double<em>n</em> <strong>remquo</strong> (double<em>n</em> <em>x</em>, double<em>n</em> <em>y</em>, __global int<em>n</em> *<em>quo</em>)<br> |
| double<em>n</em> <strong>remquo</strong> (double<em>n</em> <em>x</em>, double<em>n</em> <em>y</em>, __local int<em>n</em> *<em>quo</em>)<br> |
| double<em>n</em> <strong>remquo</strong> (double<em>n</em> <em>x</em>, double<em>n</em> <em>y</em>, __private int<em>n</em> *<em>quo</em>)<br> |
| double <strong>remquo</strong> (double <em>x</em>, double <em>y</em>, __global int *<em>quo</em>)<br> |
| double <strong>remquo</strong> (double <em>x</em>, double <em>y</em>, __local int *<em>quo</em>)<br> |
| double <strong>remquo</strong> (double <em>x</em>, double <em>y</em>, __private int *<em>quo</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">The <strong>remquo</strong> function computes the value r such that <em>r</em> = <em>x</em> - <em>k</em>*<em>y</em>, |
| where <em>k</em> is the integer nearest the exact value of <em>x</em>/<em>y</em>. |
| If there are two integers closest to <em>x</em>/<em>y</em>, <em>k</em> shall be the even one. |
| If <em>r</em> is zero, it is given the same sign as <em>x</em>. |
| This is the same value that is returned by the <strong>remainder</strong> function. |
| <strong>remquo</strong> also calculates the lower seven bits of the integral quotient |
| <em>x</em>/<em>y</em>, and gives that value the same sign as <em>x</em>/<em>y</em>. |
| It stores this signed value in the object pointed to by <em>quo</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>rint</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Round to integral value (using round to nearest even rounding mode) in |
| floating-point format. |
| Refer to section 7.1 for description of rounding modes.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">double<em>n</em> <strong>rootn</strong> (double<em>n</em> <em>x</em>, int<em>n</em> <em>y</em>)<br> |
| double <strong>rootn</strong> (double <em>x</em>, int <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute <em>x</em> to the power 1/<em>y</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>round</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Return the integral value nearest to <em>x</em> rounding halfway cases away from |
| zero, regardless of the current rounding direction.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>rsqrt</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute inverse square root.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>sin</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute sine.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>sincos</strong> (gentype <em>x</em>, __global gentype *<em>cosval</em>)<br> |
| gentype <strong>sincos</strong> (gentype <em>x</em>, __local gentype *<em>cosval</em>)<br> |
| gentype <strong>sincos</strong> (gentype <em>x</em>, __private gentype *<em>cosval</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute sine and cosine of x. |
| The computed sine is the return value and computed cosine is returned in |
| <em>cosval</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>sinh</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute hyperbolic sine.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>sinpi</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute <strong>sin</strong> (π <em>x</em>).</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>sqrt</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute square root.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>tan</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute tangent.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>tanh</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute hyperbolic tangent.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>tanpi</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute <strong>tan</strong> (π <em>x</em>).</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>tgamma</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute the gamma function.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>trunc</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Round to integral value using the round to zero rounding mode.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>In addition, the following symbolic constant will also be available:</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>HUGE_VAL</strong> - A positive double expression that evaluates to infinity. |
| Used as an error value returned by the built-in math functions.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The <strong>FP_FAST_FMA</strong> macro indicates whether the <strong>fma()</strong> family of |
| functions are fast compared with direct code for double precision |
| floating-point. |
| If defined, the <strong>FP_FAST_FMA</strong> macro shall indicate that the <strong>fma()</strong> |
| function generally executes about as fast as, or faster than, a multiply and |
| an add of <strong>double</strong> operands.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The macro names given in the following list must use the values specified. |
| These constant expressions are suitable for use in #if preprocessing |
| directives.</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="preprocessor">#define</span> DBL_DIG <span class="integer">15</span> |
| <span class="preprocessor">#define</span> DBL_MANT_DIG <span class="integer">53</span> |
| <span class="preprocessor">#define</span> DBL_MAX_10_EXP +<span class="integer">308</span> |
| <span class="preprocessor">#define</span> DBL_MAX_EXP +<span class="integer">1024</span> |
| <span class="preprocessor">#define</span> DBL_MIN_10_EXP -<span class="integer">307</span> |
| <span class="preprocessor">#define</span> DBL_MIN_EXP -<span class="integer">1021</span> |
| <span class="preprocessor">#define</span> DBL_RADIX <span class="integer">2</span> |
| <span class="preprocessor">#define</span> DBL_MAX <span class="hex">0x1</span>.fffffffffffffp1023 |
| <span class="preprocessor">#define</span> DBL_MIN <span class="hex">0x1</span><span class="float">.0</span>p-<span class="integer">1022</span> |
| <span class="preprocessor">#define</span> DBL_EPSILON <span class="hex">0x1</span><span class="float">.0</span>p-<span class="integer">52</span></code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>The following table describes the built-in macro names given above in the |
| OpenCL C programming language and the corresponding macro names available to |
| the application.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Macro in OpenCL Language</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Macro for application</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>DBL_DIG</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_DBL_DIG</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>DBL_MANT_DIG</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_DBL_MANT_DIG</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>DBL_MAX_10_EXP</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_DBL_MAX_10_EXP</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>DBL_MAX_EXP</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_DBL_MAX_EXP</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>DBL_MIN_10_EXP</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_DBL_MIN_10_EXP</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>DBL_MIN_EXP</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_DBL_MIN_EXP</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>DBL_RADIX</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_DBL_RADIX</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>DBL_MAX</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_DBL_MAX</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>DBL_MIN</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_DBL_MIN</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>DBL_EPSILSON</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_DBL_EPSILON</strong></p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>The following constants are also available. |
| They are of type <code>double</code> and are accurate within the precision of the <code>double</code> |
| type.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Constant</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_E</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of e</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_LOG2E</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of log<sub>2</sub>e</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_LOG10E</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of log<sub>10</sub>e</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_LN2</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of log<sub>e</sub>2</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_LN10</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of log<sub>e</sub>10</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_PI</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of π</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_PI_2</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of π / 2</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_PI_4</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of π / 4</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_1_PI</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of 1 / π</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_2_PI</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of 2 / π</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_2_SQRTPI</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of 2 / √π</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_SQRT2</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of √2</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>M_SQRT1_2</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Value of 1 / √2</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_fp64-common-functions"><a class="anchor" href="#cl_khr_fp64-common-functions"></a>6.2.3. Common Functions</h4> |
| <div class="paragraph"> |
| <p>The built-in common functions defined in <em>table 6.12</em> (also listed below) |
| are extended to include appropriate versions of functions that take <code>double</code> |
| and <code>double{2|3|4|8|16}</code> as arguments and return values. |
| gentype now also includes <code>double</code>, <code>double2</code>, <code>double3</code>, <code>double4</code>, <code>double8</code> and |
| <code>double16</code>. |
| These are described below.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 13. Double Precision Built-in Common Functions</caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>clamp</strong> (<br> |
| gentype <em>x</em>, gentype <em>minval</em>, gentype <em>maxval</em>)</p> |
| <p class="tableblock"> gentype <strong>clamp</strong> (<br> |
| gentype <em>x</em>, double <em>minval</em>, double <em>maxval</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <strong>fmin</strong>(<strong>fmax</strong>(<em>x</em>, <em>minval</em>), <em>maxval</em>).</p> |
| <p class="tableblock"> Results are undefined if <em>minval</em> > <em>maxval</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>degrees</strong> (gentype <em>radians</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Converts <em>radians</em> to degrees,<br> |
| i.e. (180 / π) * <em>radians</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>max</strong> (gentype <em>x</em>, gentype <em>y</em>)<br> |
| gentype <strong>max</strong> (gentype <em>x</em>, double <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <em>y</em> if <em>x</em> < <em>y</em>, otherwise it returns <em>x</em>. |
| If <em>x</em> and <em>y</em> are infinite or NaN, the return values are undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>min</strong> (gentype <em>x</em>, gentype <em>y</em>)<br> |
| gentype <strong>min</strong> (gentype <em>x</em>, double <em>y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <em>y</em> if <em>y</em> < <em>x</em>, otherwise it returns <em>x</em>. |
| If <em>x</em> and <em>y</em> are infinite or NaN, the return values are undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>mix</strong> (gentype <em>x</em>, gentype <em>y</em>, gentype <em>a</em>)<br> |
| gentype <strong>mix</strong> (gentype <em>x</em>, gentype <em>y</em>, double <em>a</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the linear blend of <em>x</em> and <em>y</em> implemented as:</p> |
| <p class="tableblock"> <em>x</em> + (<em>y</em> - <em>x)</em> * <em>a</em></p> |
| <p class="tableblock"> <em>a</em> must be a value in the range 0.0 …​ 1.0. |
| If <em>a</em> is not in the range 0.0 …​ 1.0, the return values are undefined.</p> |
| <p class="tableblock"> Note: The double precision <strong>mix</strong> function can be implemented using contractions such as <strong>mad</strong> or <strong>fma</strong>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>radians</strong> (gentype <em>degrees</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Converts <em>degrees</em> to radians, i.e. (π / 180) * <em>degrees</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>step</strong> (gentype <em>edge</em>, gentype <em>x</em>)<br> |
| gentype <strong>step</strong> (double <em>edge</em>, gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns 0.0 if <em>x</em> < <em>edge</em>, otherwise it returns 1.0.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>smoothstep</strong> (<br> |
| gentype <em>edge0</em>, gentype <em>edge1</em>, gentype <em>x</em>)<br></p> |
| <p class="tableblock"> gentype <strong>smoothstep</strong> (<br> |
| double <em>edge0</em>, double <em>edge1</em>, gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns 0.0 if <em>x</em> <= <em>edge0</em> and 1.0 if <em>x</em> >= <em>edge1</em> and performs |
| smooth Hermite interpolation between 0 and 1 when <em>edge0</em> < <em>x</em> < <em>edge1</em>. |
| This is useful in cases where you would want a threshold function with a |
| smooth transition.</p> |
| <p class="tableblock"> This is equivalent to:</p> |
| <p class="tableblock"> gentype <em>t</em>;<br> |
| <em>t</em> = clamp ((<em>x</em> - <em>edge0</em>) / (<em>edge1</em> - <em>edge0</em>), 0, 1);<br> |
| return <em>t</em> * <em>t</em> * (3 - 2 * <em>t</em>);<br></p> |
| <p class="tableblock"> Results are undefined if <em>edge0</em> >= <em>edge1</em>.</p> |
| <p class="tableblock"> Note: The double precision <strong>smoothstep</strong> function can be implemented using contractions such as <strong>mad</strong> or <strong>fma</strong>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>sign</strong> (gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns 1.0 if <em>x</em> > 0, -0.0 if <em>x</em> = -0.0, +0.0 if <em>x</em> = +0.0, or -1.0 if |
| <em>x</em> < 0. |
| Returns 0.0 if <em>x</em> is a NaN.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_fp64-geometric-functions"><a class="anchor" href="#cl_khr_fp64-geometric-functions"></a>6.2.4. Geometric Functions</h4> |
| <div class="paragraph"> |
| <p>The built-in geometric functions defined in <em>table 6.13</em> (also listed below) |
| are extended to include appropriate versions of functions that take <code>double</code> |
| and <code>double{2|3|4}</code> as arguments and return values. |
| gentype now also includes <code>double</code>, <code>double2</code>, <code>double3</code> and <code>double4</code>. |
| These are described below.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Note: The double precision geometric functions can be implemented using |
| contractions such as <strong>mad</strong> or <strong>fma</strong>.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 14. <em>Double Precision Built-in Geometric Functions</em></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">double4 <strong>cross</strong> (double4 <em>p0</em>, double4 <em>p1</em>)<br> |
| double3 <strong>cross</strong> (double3 <em>p0</em>, double3 <em>p1</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the cross product of <em>p0.xyz</em> and <em>p1.xyz</em>. |
| The <em>w</em> component of the result will be 0.0.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">double <strong>dot</strong> (gentype <em>p0</em>, gentype <em>p1</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Compute the dot product of <em>p0</em> and <em>p1</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">double <strong>distance</strong> (gentype <em>p0</em>, gentype <em>p1</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the distance between <em>p0</em> and <em>p1</em>. |
| This is calculated as <strong>length</strong>(<em>p0</em> - <em>p1</em>).</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">double <strong>length</strong> (gentype <em>p</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Return the length of vector x, i.e.,<br> |
| sqrt( <em>p.x</em><sup>2</sup> + <em>p.y</em><sup>2</sup> + …​ )</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>normalize</strong> (gentype <em>p</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns a vector in the same direction as <em>p</em> but with a length of 1.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_fp64-relational-functions"><a class="anchor" href="#cl_khr_fp64-relational-functions"></a>6.2.5. Relational Functions</h4> |
| <div class="paragraph"> |
| <p>The scalar and vector relational functions described in <em>table 6.14</em> are |
| extended to include versions that take <code>double</code>, <code>double2</code>, <code>double3</code>, <code>double4</code>, |
| <code>double8</code> and <code>double16</code> as arguments.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The relational and equality operators (<, <=, >, >=, !=, ==) can be used |
| with <code>doublen</code> vector types and shall produce a vector <code>longn</code> result as |
| described in <em>section 6.3</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The functions <strong>isequal</strong>, <strong>isnotequal</strong>, <strong>isgreater</strong>, <strong>isgreaterequal</strong>, |
| <strong>isless</strong>, <strong>islessequal</strong>, <strong>islessgreater</strong>, <strong>isfinite</strong>, <strong>isinf</strong>, <strong>isnan</strong>, |
| <strong>isnormal</strong>, <strong>isordered</strong>, <strong>isunordered</strong> and <strong>signbit</strong> shall return a 0 if the |
| specified relation is <em>false</em> and a 1 if the specified relation is true for |
| scalar argument types. |
| These functions shall return a 0 if the specified relation is <em>false</em> and a |
| -1 (i.e. all bits set) if the specified relation is <em>true</em> for vector |
| argument types.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The relational functions <strong>isequal</strong>, <strong>isgreater</strong>, <strong>isgreaterequal</strong>, <strong>isless</strong>, |
| <strong>islessequal</strong>, and <strong>islessgreater</strong> always return 0 if either argument is not |
| a number (NaN). |
| <strong>isnotequal</strong> returns 1 if one or both arguments are not a number (NaN) and |
| the argument type is a scalar and returns -1 if one or both arguments are |
| not a number (NaN) and the argument type is a vector.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The functions described in <em>table 6.14</em> are extended to include the <code>doublen`</code> |
| vector types.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 15. <em>Double Precision Relational Functions</em></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>isequal</strong> (double <em>x</em>, double <em>y</em>)<br> |
| long<em>n</em> <strong>isequal</strong> (double<em>n x</em>, double<em>n y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the component-wise compare of <em>x</em> == <em>y</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>isnotequal</strong> (double <em>x</em>, double <em>y</em>)<br> |
| long<em>n</em> <strong>isnotequal</strong> (double<em>n x</em>, double<em>n y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the component-wise compare of <em>x</em> != <em>y</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>isgreater</strong> (double <em>x</em>, double <em>y</em>) |
| long<em>n</em> <strong>isgreater</strong> (double<em>n x</em>, double<em>n y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the component-wise compare of <em>x</em> > <em>y</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>isgreaterequal</strong> (double <em>x</em>, double <em>y</em>)<br> |
| long<em>n</em> <strong>isgreaterequal</strong> (double<em>n x</em>, double<em>n y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the component-wise compare of <em>x</em> >= <em>y</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>isless</strong> (double <em>x</em>, double <em>y</em>)<br> |
| long<em>n</em> <strong>isless</strong> (double<em>n x</em>, double<em>n y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the component-wise compare of <em>x</em> < <em>y</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>islessequal</strong> (double <em>x</em>, double <em>y</em>)<br> |
| long<em>n</em> <strong>islessequal</strong> (double<em>n x</em>, double<em>n y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the component-wise compare of <em>x</em> <= <em>y</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>islessgreater</strong> (double <em>x</em>, double <em>y</em>)<br> |
| long<em>n</em> <strong>islessgreater</strong> (double<em>n x</em>, double<em>n y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the component-wise compare of (<em>x</em> < <em>y</em>) || (<em>x</em> > <em>y</em>) .</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>isfinite</strong> (double)<br> |
| long<em>n</em> <strong>isfinite</strong> (double<em>n</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Test for finite value.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>isinf</strong> (double)<br> |
| long<em>n</em> <strong>isinf</strong> (double<em>n</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Test for infinity value (positive or negative) .</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>isnan</strong> (double)<br> |
| long<em>n</em> <strong>isnan</strong> (double<em>n</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Test for a NaN.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>isnormal</strong> (double)<br> |
| long<em>n</em> <strong>isnormal</strong> (double<em>n</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Test for a normal value.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>isordered</strong> (double <em>x</em>, double <em>y</em>)<br> |
| long<em>n</em> <strong>isordered</strong> (double<em>n x</em>, double<em>n y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Test if arguments are ordered. |
| <strong>isordered</strong>() takes arguments <em>x</em> and <em>y</em>, and returns the result |
| <strong>isequal</strong>(<em>x</em>, <em>x</em>) && <strong>isequal</strong>(<em>y</em>, <em>y</em>).</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>isunordered</strong> (double <em>x</em>, double <em>y</em>)<br> |
| long<em>n</em> <strong>isunordered</strong> (double<em>n x</em>, double<em>n y</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Test if arguments are unordered. |
| <strong>isunordered</strong>() takes arguments <em>x</em> and <em>y</em>, returning non-zero if <em>x</em> or |
| <em>y</em> is a NaN, and zero otherwise.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>signbit</strong> (double)<br> |
| long<em>n</em> <strong>signbit</strong> (double<em>n</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Test for sign bit. |
| The scalar version of the function returns a 1 if the sign bit in the double |
| is set else returns 0. |
| The vector version of the function returns the following for each |
| component in double<em>n</em>: -1 (i.e all bits set) if the sign bit in the double |
| is set else returns 0.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">double<em>n</em> <strong>bitselect</strong> (double<em>n a</em>, double<em>n b</em>, double<em>n c</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Each bit of the result is the corresponding bit of <em>a</em> if the |
| corresponding bit of <em>c</em> is 0. |
| Otherwise it is the corresponding bit of <em>b</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">double<em>n</em> <strong>select</strong> (double<em>n a</em>, double<em>n b</em>, long<em>n c</em>)<br> |
| double<em>n</em> <strong>select</strong> (double<em>n a</em>, double<em>n b</em>, ulong<em>n c</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">For each component,<br> |
| <em>result[i]</em> = if MSB of <em>c[i]</em> is set ? <em>b[i]</em> : <em>a[i]</em>.<br></p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_fp64-vector-data-load-and-store-functions"><a class="anchor" href="#cl_khr_fp64-vector-data-load-and-store-functions"></a>6.2.6. Vector Data Load and Store Functions</h4> |
| <div class="paragraph"> |
| <p>The vector data load (<strong>vload<em>n</em></strong>) and store (<strong>vstore<em>n</em></strong>) functions |
| described in <em>table 6.13</em> (also listed below) are extended to include |
| versions that read from or write to double scalar or vector values. |
| The generic type <code>gentype</code> is extended to include <code>double</code>. |
| The generic type <code>gentypen</code> is extended to include <code>double2</code>, <code>double3</code>, |
| <code>double4</code>, <code>double8</code> and <code>double16</code>. |
| The <strong>vstore_half</strong>, <strong>vstore_half<em>n </em></strong>and <strong>vstorea_half<em>n </em></strong> |
| functions are extended to allow a double precision scalar or vector |
| value to be written to memory as half values.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Note: <strong>vload3</strong> reads (<em>x</em>,<em>y</em>,<em>z</em>) components from address |
| <code>(<em>p</em> + (<em>offset</em> * 3))</code> into a 3-component vector. |
| <strong>vstore3</strong>, and <strong>vstore_half3</strong> write (<em>x</em>,<em>y</em>,<em>z</em>) components from a |
| 3-component vector to address <code>(<em>p</em> + (<em>offset</em> * 3))</code>. |
| In addition, <strong>vloada_half3</strong> reads (<em>x</em>,<em>y</em>,<em>z</em>) components from address |
| <code>(<em>p</em> + (<em>offset</em> * 4))</code> into a 3-component vector and <strong>vstorea_half3</strong> |
| writes (<em>x</em>,<em>y</em>,<em>z</em>) components from a 3-component vector to address |
| <code>(<em>p</em> + (<em>offset</em> * 4))</code>. |
| Whether <strong>vloada_half3</strong> and <strong>vstorea_half3</strong> read/write padding data |
| between the third vector element and the next alignment boundary is |
| implementation defined. |
| <strong>vloada_</strong> and <strong>vstoreaa_</strong> variants are provided to access data that is |
| aligned to the size of the vector, and are intended to enable performance |
| on hardware that can take advantage of the increased alignment.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 16. <em>Double Precision Vector Data Load and Store Functions</em></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype<em>n</em> <strong>vload<em>n</em></strong>(size_t <em>offset</em>, const __global gentype *<em>p</em>)</p> |
| <p class="tableblock"> gentype<em>n</em> <strong>vload<em>n</em></strong>(size_t <em>offset</em>, const __local gentype *<em>p</em>)</p> |
| <p class="tableblock"> gentype<em>n</em> <strong>vload<em>n</em></strong>(size_t <em>offset</em>, const __constant gentype *<em>p</em>)</p> |
| <p class="tableblock"> gentype<em>n</em> <strong>vload<em>n</em></strong>(size_t <em>offset</em>, const __private gentype *<em>p</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Return sizeof (gentype<em>n</em>) bytes of data read from address |
| (<em>p</em> + (<em>offset * n</em>)). |
| If gentype is double, the read address computed as (<em>p</em> + (<em>offset * n</em>)) |
| must be 64-bit aligned.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">void <strong>vstore<em>n</em></strong>(gentype<em>n</em> <em>data</em>, size_t <em>offset</em>, __global gentype *<em>p</em>)</p> |
| <p class="tableblock"> void <strong>vstore<em>n</em></strong>(gentype<em>n</em> <em>data</em>, size_t <em>offset</em>, __local gentype *<em>p</em>)</p> |
| <p class="tableblock"> void <strong>vstore<em>n</em></strong>(gentype<em>n</em> <em>data</em>, size_t <em>offset</em>, __private gentype *<em>p</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Write sizeof (gentype<em>n</em>) bytes given by <em>data</em> to address |
| (<em>p</em> + (<em>offset * n</em>)). |
| If gentype is double, the write address computed as (<em>p</em> + (<em>offset * n</em>)) |
| must be 64-bit aligned.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">void <strong>vstore_half</strong>(double <em>data</em>, size_t <em>offset</em>, __global half *<em>p</em>)<br> |
| void <strong>vstore_half_rte</strong>(double <em>data</em>, size_t <em>offset</em>, __global half *<em>p</em>)<br> |
| void <strong>vstore_half_rtz</strong>(double <em>data</em>, size_t <em>offset</em>, __global half *<em>p</em>)<br> |
| void <strong>vstore_half_rtp</strong>(double <em>data</em>, size_t <em>offset</em>, __global half *<em>p</em>)<br> |
| void <strong>vstore_half_rtn</strong>(double <em>data</em>, size_t <em>offset</em>, __global half *<em>p</em>)<br></p> |
| <p class="tableblock"> void <strong>vstore_half</strong>(double <em>data</em>, size_t <em>offset</em>, __local half *<em>p</em>)<br> |
| void <strong>vstore_half_rte</strong>(double <em>data</em>, size_t <em>offset</em>, __local half *<em>p</em>)<br> |
| void <strong>vstore_half_rtz</strong>(double <em>data</em>, size_t <em>offset</em>, __local half *<em>p</em>)<br> |
| void <strong>vstore_half_rtp</strong>(double <em>data</em>, size_t <em>offset</em>, __local half *<em>p</em>)<br> |
| void <strong>vstore_half_rtn</strong>(double <em>data</em>, size_t <em>offset</em>, __local half *<em>p</em>)<br></p> |
| <p class="tableblock"> void <strong>vstore_half</strong>(double <em>data</em>, size_t <em>offset</em>, __private half *<em>p</em>)<br> |
| void <strong>vstore_half_rte</strong>(double <em>data</em>, size_t <em>offset</em>, __private half *<em>p</em>)<br> |
| void <strong>vstore_half_rtz</strong>(double <em>data</em>, size_t <em>offset</em>, __private half *<em>p</em>)<br> |
| void <strong>vstore_half_rtp</strong>(double <em>data</em>, size_t <em>offset</em>, __private half *<em>p</em>)<br> |
| void <strong>vstore_half_rtn</strong>(double <em>data</em>, size_t <em>offset</em>, __private half *<em>p</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">The double value given by <em>data</em> is first converted to a half value |
| using the appropriate rounding mode. |
| The half value is then written to the address computed as |
| (<em>p</em> + <em>offset</em>). |
| The address computed as (<em>p</em> + <em>offset</em>) must be 16-bit aligned.</p> |
| <p class="tableblock"> <strong>vstore_half</strong> uses the current rounding mode. |
| The default current rounding mode is round to nearest even.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">void <strong>vstore_half<em>n</em></strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __global half *<em>p</em>)<br> |
| void <strong>vstore_half<em>n</em>_rte</strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __global half *<em>p</em>)<br> |
| void <strong>vstore_half<em>n</em>_rtz</strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __global half *<em>p</em>)<br> |
| void <strong>vstore_half<em>n</em>_rtp</strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __global half *<em>p</em>)<br> |
| void <strong>vstore_half<em>n</em>_rtn</strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __global half *<em>p</em>)<br></p> |
| <p class="tableblock"> void <strong>vstore_half<em>n</em></strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __local half *<em>p</em>)<br> |
| void <strong>vstore_half<em>n</em>_rte</strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __local half *<em>p</em>)<br> |
| void <strong>vstore_half<em>n</em>_rtz</strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __local half *<em>p</em>)<br> |
| void <strong>vstore_half<em>n</em>_rtp</strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __local half *<em>p</em>)<br> |
| void <strong>vstore_half<em>n</em>_rtn</strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __local half *<em>p</em>)<br></p> |
| <p class="tableblock"> void <strong>vstore_half<em>n</em></strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __private half *<em>p</em>)<br> |
| void <strong>vstore_half<em>n</em>_rte</strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __private half *<em>p</em>)<br> |
| void <strong>vstore_half<em>n</em>_rtz</strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __private half *<em>p</em>)<br> |
| void <strong>vstore_half<em>n</em>_rtp</strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __private half *<em>p</em>)<br> |
| void <strong>vstore_half<em>n</em>_rtn</strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __private half *<em>p</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">The double<em>n</em> value given by <em>data</em> is converted to a half<em>n</em> value |
| using the appropriate rounding mode. |
| The half<em>n </em>value is then written to the address computed as |
| (<em>p</em> + (<em>offset * n</em>)). |
| The address computed as (<em>p</em> + (<em>offset * n</em>)) must be 16-bit |
| aligned.</p> |
| <p class="tableblock"> <strong>vstore_half<em>n </em></strong>uses the current rounding mode. |
| The default current rounding mode is round to nearest even.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">void <strong>vstorea_half<em>n</em></strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __global half *<em>p</em>)<br> |
| void <strong>vstorea_half<em>n</em>_rte</strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __global half *<em>p</em>)<br> |
| void <strong>vstorea_half<em>n</em>_rtz</strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __global half *<em>p</em>)<br> |
| void <strong>vstorea_half<em>n</em>_rtp</strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __global half *<em>p</em>)<br> |
| void <strong>vstorea_half<em>n</em>_rtn</strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __global half *<em>p</em>)<br></p> |
| <p class="tableblock"> void <strong>vstorea_half<em>n</em></strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __local half *<em>p</em>)<br> |
| void <strong>vstorea_half<em>n</em>_rte</strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __local half *<em>p</em>)<br> |
| void <strong>vstorea_half<em>n</em>_rtz</strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __local half *<em>p</em>)<br> |
| void <strong>vstorea_half<em>n</em>_rtp</strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __local half *<em>p</em>)<br> |
| void <strong>vstorea_half<em>n</em>_rtn</strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __local half *<em>p</em>)<br></p> |
| <p class="tableblock"> void <strong>vstorea_half<em>n</em></strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __private half *<em>p</em>)<br> |
| void <strong>vstorea_half<em>n</em>_rte</strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __private half *<em>p</em>)<br> |
| void <strong>vstorea_half<em>n</em>_rtz</strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __private half *<em>p</em>)<br> |
| void <strong>vstorea_half<em>n</em>_rtp</strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __private half *<em>p</em>)<br> |
| void <strong>vstorea_half<em>n</em>_rtn</strong>(double<em>n</em> <em>data</em>, size_t <em>offset</em>, __private half *<em>p</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">The double<em>n</em> value is converted to a half<em>n</em> value |
| using the appropriate rounding mode.</p> |
| <p class="tableblock"> For n = 1, 2, 4, 8 or 16, the half<em>n</em> value is written to the |
| address computed as |
| (<em>p</em> + (<em>offset * n</em>)). |
| The address computed as (<em>p</em> + (<em>offset * n</em>)) must be aligned to |
| sizeof (half<em>n</em>) bytes.</p> |
| <p class="tableblock"> For n = 3, the half<em>3</em> value is written to the address computed as |
| (<em>p</em> + (<em>offset * 4</em>)). |
| The address computed as (<em>p</em> + (<em>offset * 4</em>)) must be aligned to |
| sizeof (half) * 4 bytes.</p> |
| <p class="tableblock"> <strong>vstorea_half<em>n</em></strong> uses the current rounding mode. |
| The default current rounding mode is round to nearest even.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_fp64-async-copies-from-global-to-local-memory-local-to-global-memory-and-prefetch"><a class="anchor" href="#cl_khr_fp64-async-copies-from-global-to-local-memory-local-to-global-memory-and-prefetch"></a>6.2.7. Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch</h4> |
| <div class="paragraph"> |
| <p>The OpenCL C programming language implements the following functions that |
| provide asynchronous copies between global and local memory and a prefetch |
| from global memory.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The generic type gentype is extended to include <code>double</code>, <code>double2</code>, <code>double3</code>, |
| <code>double4</code>, <code>double8</code> and <code>double16</code>.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 17. <em>Double Precision Built-in Async Copy and Prefetch Functions</em></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">event_t <strong>async_work_group_copy</strong> (<br> |
| __local gentype *<em>dst</em>,<br> |
| const __global gentype *<em>src</em>,<br> |
| size_t <em>num_gentypes</em>, event_t <em>event</em>)</p> |
| <p class="tableblock"> event_t <strong>async_work_group_copy</strong> (<br> |
| __global gentype <em>*dst</em>,<br> |
| const __local gentype *<em>src</em>,<br> |
| size_t <em>num_gentypes</em>, event_t <em>event</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Perform an async copy of <em>num_gentypes</em> gentype elements from <em>src</em> to |
| <em>dst</em>. |
| The async copy is performed by all work-items in a work-group and this |
| built-in function must therefore be encountered by all work-items in a |
| work-group executing the kernel with the same argument values; otherwise |
| the results are undefined.</p> |
| <p class="tableblock"> Returns an event object that can be used by <strong>wait_group_events</strong> to wait |
| for the async copy to finish. |
| The <em>event</em> argument can also be used to associate the |
| <strong>async_work_group_copy</strong> with a previous async copy allowing an event to be |
| shared by multiple async copies; otherwise <em>event</em> should be zero.</p> |
| <p class="tableblock"> If <em>event</em> argument is not zero, the event object supplied in <em>event</em> |
| argument will be returned.</p> |
| <p class="tableblock"> This function does not perform any implicit synchronization of source data |
| such as using a <strong>barrier</strong> before performing the copy.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">event_t <strong>async_work_group_strided_copy</strong> (<br> |
| __local gentype <em>*dst</em>,<br> |
| const __global gentype *<em>src</em>,<br> |
| size_t <em>num_gentypes</em>,<br> |
| size_t <em>src_stride</em>, event_t <em>event</em>)</p> |
| <p class="tableblock"> event_t <strong>async_work_group_strided_copy</strong> (<br> |
| __global gentype <em>*dst</em>,<br> |
| const __local gentype *<em>src</em>,<br> |
| size_t <em>num_gentypes</em>,<br> |
| size_t <em>dst_stride</em>, event_t <em>event</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Perform an async gather of <em>num_gentypes</em> gentype elements from <em>src</em> to |
| <em>dst</em>. |
| The <em>src_stride</em> is the stride in elements for each gentype element read |
| from <em>src</em>. |
| The async gather is performed by all work-items in a work-group and this |
| built-in function must therefore be encountered by all work-items in a |
| work-group executing the kernel with the same argument values; otherwise |
| the results are undefined.</p> |
| <p class="tableblock"> Returns an event object that can be used by <strong>wait_group_events</strong> to wait |
| for the async copy to finish. |
| The <em>event</em> argument can also be used to associate the |
| <strong>async_work_group_strided_copy</strong> with a previous async copy allowing an |
| event to be shared by multiple async copies; otherwise <em>event</em> should be |
| zero.</p> |
| <p class="tableblock"> If <em>event</em> argument is not zero, the event object supplied in <em>event</em> |
| argument will be returned.</p> |
| <p class="tableblock"> This function does not perform any implicit synchronization of source data |
| such as using a <strong>barrier</strong> before performing the copy.</p> |
| <p class="tableblock"> The behavior of <strong>async_work_group_strided_copy</strong> is undefined if |
| <em>src_stride</em> or <em>dst_stride</em> is 0, or if the <em>src_stride</em> or <em>dst_stride</em> |
| values cause the <em>src</em> or <em>dst</em> pointers to exceed the upper bounds of the |
| address space during the copy.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">void <strong>wait_group_events</strong> (<br> |
| int <em>num_events</em>, event_t *<em>event_list</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Wait for events that identify the <strong>async_work_group_copy</strong> operations to |
| complete. |
| The event objects specified in <em>event_list</em> will be released after the |
| wait is performed.</p> |
| <p class="tableblock"> This function must be encountered by all work-items in a work-group |
| executing the kernel with the same <em>num_events</em> and event objects |
| specified in <em>event_list</em>; otherwise the results are undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">void <strong>prefetch</strong> (<br> |
| const __global gentype *<em>p</em>, size_t <em>num_gentypes</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Prefetch <em>num_gentypes</em> * sizeof(gentype) bytes into the global cache. |
| The prefetch instruction is applied to a work-item in a work-group and |
| does not affect the functional behavior of the kernel.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_fp64-ieee754-compliance"><a class="anchor" href="#cl_khr_fp64-ieee754-compliance"></a>6.2.8. IEEE754 Compliance</h4> |
| <div class="paragraph"> |
| <p>The following table entry describes the additions to <em>table 4.3,</em> which |
| allows applications to query the configuration information using |
| <strong>clGetDeviceInfo</strong> for an OpenCL device that supports double precision |
| floating-point.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 25%;"> |
| <col style="width: 25%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Op-code</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Return Type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_DEVICE_DOUBLE_FP_CONFIG</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">cl_device_fp_config</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Describes double precision floating-point capability of the OpenCL device. |
| This is a bit-field that describes one or more of the following values:</p> |
| <p class="tableblock"> CL_FP_DENORM — denorms are supported</p> |
| <p class="tableblock"> CL_FP_INF_NAN — INF and NaNs are supported</p> |
| <p class="tableblock"> CL_FP_ROUND_TO_NEAREST — round to nearest even rounding mode supported</p> |
| <p class="tableblock"> CL_FP_ROUND_TO_ZERO — round to zero rounding mode supported</p> |
| <p class="tableblock"> CL_FP_ROUND_TO_INF — round to positive and negative infinity rounding |
| modes supported</p> |
| <p class="tableblock"> CL_FP_FMA — IEEE754-2008 fused multiply-add is supported</p> |
| <p class="tableblock"> CL_FP_SOFT_FLOAT — Basic floating-point operations (such as addition, |
| subtraction, multiplication) are implemented in software.</p> |
| <p class="tableblock"> The required minimum double precision floating-point capability as |
| implemented by this extension is:</p> |
| <p class="tableblock"> CL_FP_FMA |<br> |
| CL_FP_ROUND_TO_NEAREST |<br> |
| CL_FP_ROUND_TO_ZERO |<br> |
| CL_FP_ROUND_TO_INF |<br> |
| CL_FP_INF_NAN |<br> |
| CL_FP_DENORM.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>IEEE754 fused multiply-add, denorms, INF and NaNs are required to be |
| supported for double precision floating-point numbers and operations |
| on double precision floating-point numbers.</p> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_fp64-relative-error-as-ulps"><a class="anchor" href="#cl_khr_fp64-relative-error-as-ulps"></a>6.2.9. Relative Error as ULPs</h4> |
| <div class="paragraph"> |
| <p>In this section we discuss the maximum relative error defined as <em>ulp</em> |
| (units in the last place).</p> |
| </div> |
| <div class="paragraph"> |
| <p>Addition, subtraction, multiplication, fused multiply-add and conversion |
| between integer and a floating-point format are IEEE 754 compliant and |
| are therefore correctly rounded using round-to-nearest even rounding mode.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The following table describes the minimum accuracy of double precision |
| floating-point arithmetic operations given as ULP values. |
| 0 ULP is used for math functions that do not require rounding. |
| The reference value used to compute the ULP value of an arithmetic operation |
| is the infinitely precise result.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 18. <em>ULP Values for Double Precision Floating-Point Arithmetic Operations</em></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Min Accuracy</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong><em>x</em> + <em>y</em></strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong><em>x</em> - <em>y</em></strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong><em>x</em> * <em>y</em></strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>1.0 / <em>x</em></strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong><em>x</em> / <em>y</em></strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>acos</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 4 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>acosh</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 4 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>acospi</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 5 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>asin</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 4 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>asinh</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 4 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>asinpi</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 5 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>atan</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 5 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>atanh</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 5 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>atanpi</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 5 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>atan2</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 6 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>atan2pi</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 6 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cbrt</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>ceil</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>clamp</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>copysign</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cos</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 4 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cosh</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 4 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cospi</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 4 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cross</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">absolute error tolerance of 'max * max * (3 * FLT_EPSILON)' per vector component, where <em>max</em> is the maximum input operand magnitude</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>degrees</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>distance</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 5.5 + 2n ulp, for gentype with vector width <em>n</em></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>dot</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">absolute error tolerance of 'max * max * (2n - 1) * FLT_EPSILON', for vector width <em>n</em> and maximum input operand magnitude <em>max</em> across all vector components</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>erfc</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 16 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>erf</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 16 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>exp</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>exp2</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>exp10</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>expm1</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>fabs</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>fdim</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>floor</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>fma</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>fmax</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>fmin</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>fmod</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>fract</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>frexp</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>hypot</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 4 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>ilogb</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>ldexp</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>length</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 5.5 + n ulp, for gentype with vector width <em>n</em></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>log</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>log2</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>log10</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 3 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>log1p</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>logb</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>mad</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Implementation-defined</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>max</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>maxmag</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>min</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>minmag</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>mix</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Implementation-defined</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>modf</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>nan</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>nextafter</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>normalize</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 4.5 + n ulp, for gentype with vector width <em>n</em></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>pow(x, y)</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 16 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>pown(x, y)</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 16 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>powr(x, y)</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 16 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>radians</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>remainder</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>remquo</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp for the remainder, at least the lower 7 bits of the integral quotient</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>rint</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>rootn</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 16 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>round</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>rsqrt</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 2 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>sign</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>sin</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 4 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>sincos</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 4 ulp for sine and cosine values</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>sinh</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 4 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>sinpi</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 4 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>smoothstep</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Implementation-defined</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>sqrt</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>step</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>tan</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 5 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>tanh</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 5 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>tanpi</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 6 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>tgamma</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><= 16 ulp</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>trunc</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Correctly rounded</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_int32_atomics"><a class="anchor" href="#cl_khr_int32_atomics"></a>7. 32-bit Atomics</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This section describes the extensions <strong>cl_khr_global_int32_base_atomics</strong>, <strong>cl_khr_global_int32_extended_atomics</strong>, <strong>cl_khr_local_int32_base_atomics</strong>, and <strong>cl_khr_local_int32_extended_atomics</strong>. |
| These extensions allow atomic operations to be performed on 32-bit signed and unsigned integers in global and local memory.</p> |
| </div> |
| <div class="paragraph"> |
| <p>These extensions became core features in OpenCL 1.1, except the built-in atomic function names are changed to use the <strong>atomic_</strong> prefix instead of <strong>atom_</strong> and the volatile qualifier was added to the pointer parameter <em>p</em>.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_6"><a class="anchor" href="#_general_information_6"></a>7.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_6"><a class="anchor" href="#_version_history_6"></a>7.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_global_atomics_for_32_bit_integers"><a class="anchor" href="#_global_atomics_for_32_bit_integers"></a>7.2. Global Atomics for 32-bit Integers</h3> |
| <div class="sect3"> |
| <h4 id="_base_atomics"><a class="anchor" href="#_base_atomics"></a>7.2.1. Base Atomics</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 19. <em>Built-in Atomic Functions for</em> <strong>cl_khr_global_int32_base_atomics</strong></caption> |
| <colgroup> |
| <col style="width: 64.2857%;"> |
| <col style="width: 35.7143%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>atom_add</strong> (volatile __global int *<em>p</em>, int <em>val</em>)<br> |
| uint <strong>atom_add</strong> (volatile __global uint *<em>p</em>, uint <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 32-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute (<em>old</em> + <em>val</em>) and store result at location |
| pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>atom_sub</strong> (volatile __global int *<em>p</em>, int <em>val</em>)<br> |
| uint <strong>atom_sub</strong> (volatile __global uint *<em>p</em>, uint <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 32-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute (<em>old</em> - <em>val</em>) and store result at location |
| pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>atom_xchg</strong> (volatile __global int *<em>p</em>, int <em>val</em>)<br> |
| uint <strong>atom_xchg</strong> (volatile __global uint *<em>p</em>, uint <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Swaps the <em>old</em> value stored at location <em>p</em> with new value given by |
| <em>val</em>. Returns <em>old</em> value.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>atom_inc</strong> (volatile __global int *<em>p</em>)<br> |
| uint <strong>atom_inc</strong> (volatile __global uint *<em>p</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 32-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute (<em>old</em> + <em>1</em>) and store result at location |
| pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>atom_dec</strong> (volatile __global int *<em>p</em>)<br> |
| uint <strong>atom_dec</strong> (volatile __global uint *<em>p</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 32-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute (<em>old</em> - <em>1</em>) and store result at location |
| pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>atom_cmpxchg</strong> (volatile __global int *<em>p</em>, int <em>cmp</em>, int <em>val</em>)<br> |
| uint <strong>atom_cmpxchg</strong> (volatile __global uint *<em>p</em>, uint <em>cmp</em>, uint <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 32-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute (<em>old</em> == <em>cmp</em>) ? <em>val</em> : <em>old</em> and store |
| result at location pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="_extended_atomics"><a class="anchor" href="#_extended_atomics"></a>7.2.2. Extended Atomics</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 20. <em>Built-in Atomic Functions for</em> <strong>cl_khr_global_int32_extended_atomics</strong></caption> |
| <colgroup> |
| <col style="width: 64.2857%;"> |
| <col style="width: 35.7143%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>atom_min</strong> (volatile __global int *<em>p</em>, int <em>val</em>)<br> |
| uint <strong>atom_min</strong> (volatile __global uint *<em>p</em>, uint <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 32-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute <strong>min</strong>(<em>old</em>, <em>val</em>) and store minimum value at |
| location pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>atom_max</strong> (volatile __global int *<em>p</em>, int <em>val</em>)<br> |
| uint <strong>atom_max</strong> (volatile __global uint *<em>p</em>, uint <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 32-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute <strong>max</strong>(<em>old</em>, <em>val</em>) and store maximum value at |
| location pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>atom_and</strong> (volatile __global int *<em>p</em>, int <em>val</em>)<br> |
| uint <strong>atom_and</strong> (volatile __global uint *<em>p</em>, uint <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 32-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute (<em>old</em> & val) and store result at location |
| pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>atom_or</strong> (volatile __global int *<em>p</em>, int <em>val</em>)<br> |
| uint <strong>atom_or</strong> (volatile __global uint *<em>p</em>, uint <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 32-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute (<em>old</em> | val) and store result at location |
| pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>atom_xor</strong> (volatile __global int *<em>p</em>, int <em>val</em>)<br> |
| uint <strong>atom_xor</strong> (volatile __global uint *<em>p</em>, uint <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 32-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute (<em>old</em> ^ val) and store result at location |
| pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_local_atomics_for_32_bit_integers"><a class="anchor" href="#_local_atomics_for_32_bit_integers"></a>7.3. Local Atomics for 32-bit Integers</h3> |
| <div class="sect3"> |
| <h4 id="_base_atomics_2"><a class="anchor" href="#_base_atomics_2"></a>7.3.1. Base Atomics</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 21. <em>Built-in Atomic Functions for</em> <strong>cl_khr_local_int32_base_atomics</strong></caption> |
| <colgroup> |
| <col style="width: 64.2857%;"> |
| <col style="width: 35.7143%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>atom_add</strong> (volatile __local int *<em>p</em>, int <em>val</em>)<br> |
| uint <strong>atom_add</strong> (volatile __local uint *<em>p</em>, uint <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 32-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute (<em>old</em> + <em>val</em>) and store result at location |
| pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>atom_sub</strong> (volatile __local int *<em>p</em>, int <em>val</em>)<br> |
| uint <strong>atom_sub</strong> (volatile __local uint *<em>p</em>, uint <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 32-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute (<em>old</em> - <em>val</em>) and store result at location |
| pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>atom_xchg</strong> (volatile __local int *<em>p</em>, int <em>val</em>)<br> |
| uint <strong>atom_xchg</strong> (volatile __local uint *<em>p</em>, uint <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Swaps the <em>old</em> value stored at location <em>p</em> with new value given by |
| <em>val</em>. Returns <em>old</em> value.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>atom_inc</strong> (volatile __local int *<em>p</em>)<br> |
| uint <strong>atom_inc</strong> (volatile __local uint *<em>p</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 32-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute (<em>old</em> + <em>1</em>) and store result at location |
| pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>atom_dec</strong> (volatile __local int *<em>p</em>)<br> |
| uint <strong>atom_dec</strong> (volatile __local uint *<em>p</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 32-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute (<em>old</em> - <em>1</em>) and store result at location |
| pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>atom_cmpxchg</strong> (volatile __local int *<em>p</em>, int <em>cmp</em>, int <em>val</em>)<br> |
| uint <strong>atom_cmpxchg</strong> (volatile __local uint *<em>p</em>, uint <em>cmp</em>, uint <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 32-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute (<em>old</em> == <em>cmp</em>) ? <em>val</em> : <em>old</em> and store |
| result at location pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="_extended_atomics_2"><a class="anchor" href="#_extended_atomics_2"></a>7.3.2. Extended Atomics</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 22. <em>Built-in Atomic Functions for</em> <strong>cl_khr_local_int32_extended_atomics</strong></caption> |
| <colgroup> |
| <col style="width: 64.2857%;"> |
| <col style="width: 35.7143%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>atom_min</strong> (volatile __local int *<em>p</em>, int <em>val</em>)<br> |
| uint <strong>atom_min</strong> (volatile __local uint *<em>p</em>, uint <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 32-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute <strong>min</strong>(<em>old</em>, <em>val</em>) and store minimum value at |
| location pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>atom_max</strong> (volatile __local int *<em>p</em>, int <em>val</em>)<br> |
| uint <strong>atom_max</strong> (volatile __local uint *<em>p</em>, uint <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 32-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute <strong>max</strong>(<em>old</em>, <em>val</em>) and store maximum value at |
| location pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>atom_and</strong> (volatile __local int *<em>p</em>, int <em>val</em>)<br> |
| uint <strong>atom_and</strong> (volatile __local uint *<em>p</em>, uint <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 32-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute (<em>old</em> & val) and store result at location |
| pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>atom_or</strong> (volatile __local int *<em>p</em>, int <em>val</em>)<br> |
| uint <strong>atom_or</strong> (volatile __local uint *<em>p</em>, uint <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 32-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute (<em>old</em> | val) and store result at location |
| pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>atom_xor</strong> (volatile __local int *<em>p</em>, int <em>val</em>)<br> |
| uint <strong>atom_xor</strong> (volatile __local uint *<em>p</em>, uint <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 32-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute (<em>old</em> ^ val) and store result at location |
| pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_int64_atomics"><a class="anchor" href="#cl_khr_int64_atomics"></a>8. 64-bit Atomics</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This section describes the <strong>cl_khr_int64_base_atomics</strong> and <strong>cl_khr_int64_extended_atomics</strong> extensions. These extensions allow atomic operations to be performed on 64-bit signed and unsigned integers in global and local memory.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_7"><a class="anchor" href="#_general_information_7"></a>8.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_7"><a class="anchor" href="#_version_history_7"></a>8.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 23. <em>Built-in Atomic Functions for</em> <strong>cl_khr_int64_base_atomics</strong></caption> |
| <colgroup> |
| <col style="width: 64.2857%;"> |
| <col style="width: 35.7143%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">long <strong>atom_add</strong> (volatile __global long *<em>p</em>, long <em>val</em>)<br> |
| long <strong>atom_add</strong> (volatile __local long *<em>p</em>, long <em>val</em>)<br> |
| <br> |
| ulong <strong>atom_add</strong> (volatile __global ulong *<em>p</em>, ulong <em>val</em>)<br> |
| ulong <strong>atom_add</strong> (volatile __local ulong *<em>p</em>, ulong <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 64-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute (<em>old</em> + <em>val</em>) and store result at location |
| pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">long <strong>atom_sub</strong> (volatile __global long *<em>p</em>, long <em>val</em>)<br> |
| long <strong>atom_sub</strong> (volatile __local long *<em>p</em>, long <em>val</em>)<br> |
| <br> |
| ulong <strong>atom_sub</strong> (volatile __global ulong *<em>p</em>, ulong <em>val</em>)<br> |
| ulong <strong>atom_sub</strong> (volatile __local ulong *<em>p</em>, ulong <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 64-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute (<em>old</em> - <em>val</em>) and store result at location |
| pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">long <strong>atom_xchg</strong> (volatile __global long *<em>p</em>, long <em>val</em>)<br> |
| long <strong>atom_xchg</strong> (volatile __local long *<em>p</em>, long <em>val</em>)<br> |
| <br> |
| ulong <strong>atom_xchg</strong> (volatile __global ulong *<em>p</em>, ulong <em>val</em>)<br> |
| ulong <strong>atom_xchg</strong> (volatile __local ulong *<em>p</em>, ulong <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Swaps the <em>old</em> value stored at location <em>p</em> with new value given by |
| <em>val</em>. Returns <em>old</em> value.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">long <strong>atom_inc</strong> (volatile __global long *<em>p</em>)<br> |
| long <strong>atom_inc</strong> (volatile __local long *<em>p</em>)<br> |
| <br> |
| ulong <strong>atom_inc</strong> (volatile __global ulong *<em>p</em>)<br> |
| ulong <strong>atom_inc</strong> (volatile __local ulong *<em>p</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 64-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute (<em>old</em> + <em>1</em>) and store result at location |
| pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">long <strong>atom_dec</strong> (volatile __global long *<em>p</em>)<br> |
| long <strong>atom_dec</strong> (volatile __local long *<em>p</em>)<br> |
| <br> |
| ulong <strong>atom_dec</strong> (volatile __global ulong *<em>p</em>)<br> |
| ulong <strong>atom_dec</strong> (volatile __local ulong *<em>p</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 64-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute (<em>old</em> - <em>1</em>) and store result at location |
| pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">long <strong>atom_cmpxchg</strong> (volatile __global long *<em>p</em>, long <em>cmp</em>, long <em>val</em>)<br> |
| long <strong>atom_cmpxchg</strong> (volatile __local long *<em>p</em>, long <em>cmp</em>, long <em>val</em>)<br> |
| <br> |
| ulong <strong>atom_cmpxchg</strong> (volatile __global ulong *<em>p</em>, ulong <em>cmp</em>, ulong <em>val</em>)<br> |
| ulong <strong>atom_cmpxchg</strong> (volatile __local ulong *<em>p</em>, ulong <em>cmp</em>, ulong <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 64-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute (<em>old</em> == <em>cmp</em>) ? <em>val</em> : <em>old</em> and store |
| result at location pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 24. <em>Built-in Atomic Functions for</em> <strong>cl_khr_int64_extended_atomics</strong></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">long <strong>atom_min</strong> (volatile __global long *<em>p</em>, long <em>val</em>)<br> |
| long <strong>atom_min</strong> (volatile __local long *<em>p</em>, long <em>val</em>)<br> |
| <br> |
| ulong <strong>atom_min</strong> (volatile __global ulong *<em>p</em>, ulong <em>val</em>)<br> |
| ulong <strong>atom_min</strong> (volatile __local ulong *<em>p</em>, ulong <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 64-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute <strong>min</strong>(<em>old</em>, <em>val</em>) and store minimum value at |
| location pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">long <strong>atom_max</strong> (volatile __global long *<em>p</em>, long <em>val</em>)<br> |
| long <strong>atom_max</strong> (volatile __local long *<em>p</em>, long <em>val</em>)<br> |
| <br> |
| ulong <strong>atom_max</strong> (volatile __global ulong *<em>p</em>, ulong <em>val</em>)<br> |
| ulong <strong>atom_max</strong> (volatile __local ulong *<em>p</em>, ulong <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 64-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute <strong>max</strong>(<em>old</em>, <em>val</em>) and store maximum value at |
| location pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">long <strong>atom_and</strong> (volatile __global long *<em>p</em>, long <em>val</em>)<br> |
| long <strong>atom_and</strong> (volatile __local long *<em>p</em>, long <em>val</em>)<br> |
| <br> |
| ulong <strong>atom_and</strong> (volatile __global ulong *<em>p</em>, ulong <em>val</em>)<br> |
| ulong <strong>atom_and</strong> (volatile __local ulong *<em>p</em>, ulong <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 64-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute (<em>old</em> & val) and store result at location |
| pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">long <strong>atom_or</strong> (volatile __global long *<em>p</em>, long <em>val</em>)<br> |
| long <strong>atom_or</strong> (volatile __local long *<em>p</em>, long <em>val</em>)<br> |
| <br> |
| ulong <strong>atom_or</strong> (volatile __global ulong *<em>p</em>, ulong <em>val</em>)<br> |
| ulong <strong>atom_or</strong> (volatile __local ulong *<em>p</em>, ulong <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 64-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute (<em>old</em> | val) and store result at location |
| pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">long <strong>atom_xor</strong> (volatile __global long *<em>p</em>, long <em>val</em>)<br> |
| long <strong>atom_xor</strong> (volatile __local long *<em>p</em>, long <em>val</em>)<br> |
| <br> |
| ulong <strong>atom_xor</strong> (volatile __global ulong *<em>p</em>, ulong <em>val</em>)<br> |
| ulong <strong>atom_xor</strong> (volatile __local ulong *<em>p</em>, ulong <em>val</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read the 64-bit value (referred to as <em>old</em>) stored at location |
| pointed by <em>p</em>. Compute (<em>old</em> ^ val) and store result at location |
| pointed by <em>p</em>. The function returns <em>old</em>.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>Note: Atomic operations on 64-bit integers and 32-bit integers (and |
| float) are also atomic w.r.t. each other.</p> |
| </div> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_select_fprounding_mode"><a class="anchor" href="#cl_khr_select_fprounding_mode"></a>9. Selecting the Rounding Mode <strong>(DEPRECATED)</strong></h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This section describes the <strong>cl_khr_select_fprounding_mode</strong> extension. |
| It allows an application to specify the rounding mode for an instruction or group of instructions in the program source.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>This extension was deprecated in OpenCL 1.1 and its use is not recommended.</strong></p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_8"><a class="anchor" href="#_general_information_8"></a>9.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_8"><a class="anchor" href="#_version_history_8"></a>9.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_changes_to_opencl_c_specification"><a class="anchor" href="#_changes_to_opencl_c_specification"></a>9.2. Changes to OpenCL C specification</h3> |
| <div class="paragraph"> |
| <p>With this extension, the rounding mode may be specified using the following <strong>#pragma</strong> in the OpenCL program source:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="preprocessor">#pragma</span> OPENCL SELECT_ROUNDING_MODE <rounding-mode></code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>The <em><rounding-mode></em> may be one of the following values:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p><strong>rte</strong> - round to nearest even</p> |
| </li> |
| <li> |
| <p><strong>rtz</strong> - round to zero</p> |
| </li> |
| <li> |
| <p><strong>rtp</strong> - round to positive infinity</p> |
| </li> |
| <li> |
| <p><strong>rtn</strong> - round to negative infinity</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>If this extensions is supported then the OpenCL implementation must support all four rounding modes for single precision floating-point.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The <strong>#pragma</strong> sets the rounding mode for all instructions that operate on floating-point types (scalar or vector types) or produce floating-point values that follow this pragma in the program source until the next <strong>#pragma</strong>. |
| Note that the rounding mode specified for a block of code is known at compile time. |
| When inside a compound statement, the pragma takes effect from its occurrence until another <strong>#pragma</strong> is encountered (including within a nested compound statement), or until the end of the compound statement; at the end of a compound statement the state for the pragma is restored to its condition just before the compound statement. |
| Except where otherwise documented, the callee functions do not inherit the rounding mode of the caller function.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If this extension is enabled, the <code>__ROUNDING_MODE__</code> preprocessor symbol shall be defined to be one of the following according to the current rounding mode:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="preprocessor">#define</span> __ROUNDING_MODE__ rte |
| <span class="preprocessor">#define</span> __ROUNDING_MODE__ rtz |
| <span class="preprocessor">#define</span> __ROUNDING_MODE__ rtp |
| <span class="preprocessor">#define</span> __ROUNDING_MODE__ rtz</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>This is intended to enable remapping <code>foo()</code> to <code>foo_rte()</code> by the preprocessor by using:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="preprocessor">#define</span> foo foo <span class="preprocessor">#</span><span class="preprocessor"># __ROUNDING_MODE__</span></code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>The default rounding mode is round to nearest even. |
| The built-in math functions described in <em>section 6.11.2</em>, the common functions described in <em>section 6.11.4</em> and the geometric functions described in <em>section 6.11.5</em> are implemented with the round to nearest even rounding mode. |
| Various built-in conversions and the <strong>vstore_half</strong> and <strong>vstorea_half</strong> built-in functions that do not specify a rounding mode inherit the current rounding mode. |
| Conversions from floating-point to integer type always use <code>rtz</code> mode, except where the user specifically asks for another rounding mode.</p> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_gl_sharing"><a class="anchor" href="#cl_khr_gl_sharing"></a>10. Creating an OpenCL Context from an OpenGL Context or Share Group</h2> |
| <div class="sectionbody"> |
| <div class="sect2"> |
| <h3 id="cl_khr_gl_sharing-overview"><a class="anchor" href="#cl_khr_gl_sharing-overview"></a>10.1. Overview</h3> |
| <div class="paragraph"> |
| <p>This section describes functionality in the <strong>cl_khr_gl_sharing</strong> extension |
| to associate an OpenCL context with an OpenGL context or share group object. |
| Once an OpenCL context is associated with an OpenGL context or share group |
| object, the functionality described in the section |
| <a href="#cl_khr_gl_sharing__memobjs">Creating OpenCL Memory Objects from OpenGL Objects</a> |
| may be used to share OpenGL buffer, texture, and renderbuffer objects with the OpenCL context.</p> |
| </div> |
| <div class="paragraph"> |
| <p>An OpenGL implementation supporting buffer objects and sharing of texture |
| and buffer object images with OpenCL is required by this extension.</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_9"><a class="anchor" href="#_general_information_9"></a>10.2. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_9"><a class="anchor" href="#_version_history_9"></a>10.2.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_gl_sharing-new-procedures-and-functions"><a class="anchor" href="#cl_khr_gl_sharing-new-procedures-and-functions"></a>10.3. New Procedures and Functions</h3> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_int clGetGLContextInfoKHR(<span class="directive">const</span> cl_context_properties *properties, |
| cl_gl_context_info param_name, |
| size_t param_value_size, |
| <span class="directive">void</span> *param_value, |
| size_t *param_value_size_ret);</code></pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_gl_sharing-new-tokens"><a class="anchor" href="#cl_khr_gl_sharing-new-tokens"></a>10.4. New Tokens</h3> |
| <div class="paragraph"> |
| <p>Returned by <strong>clCreateContext</strong>, <strong>clCreateContextFromType</strong>, and |
| <strong>clGetGLContextInfoKHR</strong> when an invalid OpenGL context or share group object |
| handle is specified in <em>properties</em>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Accepted as the <em>param_name</em> argument of <strong>clGetGLContextInfoKHR</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR |
| CL_DEVICES_FOR_GL_CONTEXT_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Accepted as an attribute name in the <em>properties</em> argument of |
| <strong>clCreateContext</strong> and <strong>clCreateContextFromType</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_GL_CONTEXT_KHR |
| CL_EGL_DISPLAY_KHR |
| CL_GLX_DISPLAY_KHR |
| CL_WGL_HDC_KHR |
| CL_CGL_SHAREGROUP_KHR</pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_gl_sharing-additions-to-chapter-4"><a class="anchor" href="#cl_khr_gl_sharing-additions-to-chapter-4"></a>10.5. Additions to Chapter 4 of the OpenCL 2.2 Specification</h3> |
| <div class="paragraph"> |
| <p>In <em>section 4.4</em>, replace the description of <em>properties</em> under |
| <strong>clCreateContext</strong> with:</p> |
| </div> |
| <div class="paragraph"> |
| <p>"`<em>properties</em> points to an attribute list, which is a array of ordered |
| <attribute name, value> pairs terminated with zero. |
| If an attribute is not specified in <em>properties</em>, then its default value |
| (listed in <em>table 4.5</em>) is used (it is said to be specified implicitly). |
| If <em>properties</em> is <code>NULL</code> or empty (points to a list whose first value is |
| zero), all attributes take on their default values.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Attributes control sharing of OpenCL memory objects with OpenGL buffer, |
| texture, and renderbuffer objects. |
| Depending on the platform-specific API used to bind OpenGL contexts to the |
| window system, the following attributes may be set to identify an OpenGL |
| context:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>When the CGL binding API is supported, the attribute |
| CL_CGL_SHAREGROUP_KHR should be set to a CGLShareGroup handle to a CGL |
| share group object.</p> |
| </li> |
| <li> |
| <p>When the EGL binding API is supported, the attribute CL_GL_CONTEXT_KHR |
| should be set to an EGLContext handle to an OpenGL ES or OpenGL context, |
| and the attribute CL_EGL_DISPLAY_KHR should be set to the EGLDisplay |
| handle of the display used to create the OpenGL ES or OpenGL context.</p> |
| </li> |
| <li> |
| <p>When the GLX binding API is supported, the attribute CL_GL_CONTEXT_KHR |
| should be set to a GLXContext handle to an OpenGL context, and the |
| attribute CL_GLX_DISPLAY_KHR should be set to the Display handle of the |
| X Window System display used to create the OpenGL context.</p> |
| </li> |
| <li> |
| <p>When the WGL binding API is supported, the attribute CL_GL_CONTEXT_KHR |
| should be set to an HGLRC handle to an OpenGL context, and the attribute |
| CL_WGL_HDC_KHR should be set to the HDC handle of the display used to |
| create the OpenGL context.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>Memory objects created in the context so specified may be shared with the |
| specified OpenGL or OpenGL ES context (as well as with any other OpenGL |
| contexts on the share list of that context, according to the description of |
| sharing in the GLX 1.4 and EGL 1.4 specifications, and the WGL documentation |
| for OpenGL implementations on Microsoft Windows), or with the explicitly |
| identified OpenGL share group for CGL. |
| If no OpenGL or OpenGL ES context or share group is specified in the |
| attribute list, then memory objects may not be shared, and calling any of |
| the commands described in <a href="#cl_khr_gl_sharing__memobjs">Creating OpenCL |
| Memory Objects from OpenGL Objects</a> will result in a |
| CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR error.`"</p> |
| </div> |
| <div class="paragraph"> |
| <p>OpenCL / OpenGL sharing does not support the CL_CONTEXT_INTEROP_USER_SYNC |
| property defined in <em>table 4.5</em>. |
| Specifying this property when creating a context with OpenCL / OpenGL |
| sharing will return an appropriate error.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Add to <em>table 4.5</em>:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 25. <em>OpenGL Sharing Context Creation Attributes</em></caption> |
| <colgroup> |
| <col style="width: 33.3333%;"> |
| <col style="width: 33.3333%;"> |
| <col style="width: 33.3334%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Attribute Name</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Allowed Values</strong> |
| |
| <strong>(Default value is in bold)</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_GL_CONTEXT_KHR</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>0</strong>, OpenGL context handle</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">OpenGL context to associated the OpenCL context with</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_CGL_SHAREGROUP_KHR</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>0</strong>, CGL share group handle</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CGL share group to associate the OpenCL context with</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_EGL_DISPLAY_KHR</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>EGL_NO_DISPLAY</strong>, EGLDisplay handle</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">EGLDisplay an OpenGL context was created with respect to</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_GLX_DISPLAY_KHR</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>None</strong>, X handle</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">X Display an OpenGL context was created with respect to</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_WGL_HDC_KHR</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>0</strong>, HDC handle</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">HDC an OpenGL context was created with respect to</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>Replace the first error in the list for <strong>clCreateContext</strong> with:</p> |
| </div> |
| <div class="paragraph"> |
| <p>"`<em>errcode_ret</em> returns CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR if a context |
| was specified by any of the following means:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>A context was specified for an EGL-based OpenGL ES or OpenGL |
| implementation by setting the attributes CL_GL_CONTEXT_KHR and |
| CL_EGL_DISPLAY_KHR.</p> |
| </li> |
| <li> |
| <p>A context was specified for a GLX-based OpenGL implementation by setting |
| the attributes CL_GL_CONTEXT_KHR and CL_GLX_DISPLAY_KHR.</p> |
| </li> |
| <li> |
| <p>A context was specified for a WGL-based OpenGL implementation by setting |
| the attributes CL_GL_CONTEXT_KHR and CL_WGL_HDC_KHR</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>and any of the following conditions hold:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>The specified display and context attributes do not identify a valid |
| OpenGL or OpenGL ES context.</p> |
| </li> |
| <li> |
| <p>The specified context does not support buffer and renderbuffer objects.</p> |
| </li> |
| <li> |
| <p>The specified context is not compatible with the OpenCL context being |
| created (for example, it exists in a physically distinct address space, |
| such as another hardware device; or it does not support sharing data |
| with OpenCL due to implementation restrictions).</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p><em>errcode_ret</em> returns CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR if a share |
| group was specified for a CGL-based OpenGL implementation by setting the |
| attribute CL_CGL_SHAREGROUP_KHR, and the specified share group does not |
| identify a valid CGL share group object.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>errcode_ret</em> returns CL_INVALID_OPERATION if a context was specified as |
| described above and any of the following conditions hold:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>A context or share group object was specified for one of CGL, EGL, GLX, |
| or WGL and the OpenGL implementation does not support that window-system |
| binding API.</p> |
| </li> |
| <li> |
| <p>More than one of the attributes CL_CGL_SHAREGROUP_KHR, |
| CL_EGL_DISPLAY_KHR, CL_GLX_DISPLAY_KHR, and CL_WGL_HDC_KHR is set to a |
| non-default value.</p> |
| </li> |
| <li> |
| <p>Both of the attributes CL_CGL_SHAREGROUP_KHR and CL_GL_CONTEXT_KHR are |
| set to non-default values.</p> |
| </li> |
| <li> |
| <p>Any of the devices specified in the <em>devices</em> argument cannot support |
| OpenCL objects which share the data store of an OpenGL object.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p><em>errcode_ret</em> returns CL_INVALID_PROPERTY if an attribute name other than |
| those specified in <em>table 4.5</em> or if CL_CONTEXT_INTEROP_USER_SYNC is |
| specified in <em>properties</em>.`"</p> |
| </div> |
| <div class="paragraph"> |
| <p>Replace the description of <em>properties</em> under <strong>clCreateContextFromType</strong> |
| with:</p> |
| </div> |
| <div class="paragraph"> |
| <p>“_properties_ points to an attribute list whose format and valid contents |
| are identical to the <strong>properties</strong> argument of <strong>clCreateContext</strong>.”</p> |
| </div> |
| <div class="paragraph"> |
| <p>Replace the first error in the list for <strong>clCreateContextFromType</strong> with the |
| same two new errors described above for <strong>clCreateContext</strong>.</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_gl_sharing-additions-to-chapter-5"><a class="anchor" href="#cl_khr_gl_sharing-additions-to-chapter-5"></a>10.6. Additions to Chapter 5 of the OpenCL 2.2 Specification</h3> |
| <div class="paragraph"> |
| <p>Add a new section to describe the new API for querying OpenCL devices that |
| support sharing with OpenGL:</p> |
| </div> |
| <div class="paragraph"> |
| <p>"`OpenCL device(s) corresponding to an OpenGL context may be queried. |
| Such a device may not always exist (for example, if an OpenGL context is |
| specified on a GPU not supporting OpenCL command queues, but which does |
| support shared CL/GL objects), and if it does exist, may change over time. |
| When such a device does exist, acquiring and releasing shared CL/GL objects |
| may be faster on a command queue corresponding to this device than on |
| command queues corresponding to other devices available to an OpenCL |
| context.</p> |
| </div> |
| <div class="paragraph"> |
| <p>To query the currently corresponding device, use the function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_int clGetGLContextInfoKHR(<span class="directive">const</span> cl_context_properties *properties, |
| cl_gl_context_info param_name, |
| size_t param_value_size, |
| <span class="directive">void</span> *param_value, |
| size_t *param_value_size_ret)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p><em>properties</em> points to an attribute list whose format and valid contents are |
| identical to the <em>properties</em> argument of <strong>clCreateContext</strong>. |
| <em>properties</em> must identify a single valid GL context or GL share group |
| object.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>param_name</em> is a constant that specifies the device types to query, and |
| must be one of the values shown in the table below.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>param_value</em> is a pointer to memory where the result of the query is |
| returned as described in the table below. |
| If <em>param_value</em> is <code>NULL</code>, it is ignored.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>param_value_size</em> specifies the size in bytes of memory pointed to by |
| <em>param_value</em>. |
| This size must be greater than or equal to the size of the return type |
| described in the table below.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>param_value_size_ret</em> returns the actual size in bytes of data being |
| queried by <em>param_value</em>. |
| If <em>param_value_size_ret</em> is <code>NULL</code>, it is ignored.</p> |
| </div> |
| <table id="cl_khr_gl_sharing-clGetGLContextInfoKHR-table" class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 26. <em>Supported Device Types for</em> <strong>clGetGLContextInfoKHR</strong></caption> |
| <colgroup> |
| <col style="width: 40%;"> |
| <col style="width: 20%;"> |
| <col style="width: 40%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>param_name</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Return Type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Information returned in param_value</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cl_device_id</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Return the OpenCL device currently associated with the specified OpenGL |
| context.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_DEVICES_FOR_GL_CONTEXT_KHR</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cl_device_id[]</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Return all OpenCL devices which may be associated with the specified |
| OpenGL context.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p><strong>clGetGLContextInfoKHR</strong> returns CL_SUCCESS if the function is executed |
| successfully. |
| If no device(s) exist corresponding to <em>param_name</em>, the call will not fail, |
| but the value of <em>param_value_size_ret</em> will be zero.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clGetGLContextInfoKHR</strong> returns CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR if a |
| context was specified by any of the following means:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>A context was specified for an EGL-based OpenGL ES or OpenGL |
| implementation by setting the attributes CL_GL_CONTEXT_KHR and |
| CL_EGL_DISPLAY_KHR.</p> |
| </li> |
| <li> |
| <p>A context was specified for a GLX-based OpenGL implementation by setting |
| the attributes CL_GL_CONTEXT_KHR and CL_GLX_DISPLAY_KHR.</p> |
| </li> |
| <li> |
| <p>A context was specified for a WGL-based OpenGL implementation by setting |
| the attributes CL_GL_CONTEXT_KHR and CL_WGL_HDC_KHR.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>and any of the following conditions hold:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>The specified display and context attributes do not identify a valid |
| OpenGL or OpenGL ES context.</p> |
| </li> |
| <li> |
| <p>The specified context does not support buffer and renderbuffer objects.</p> |
| </li> |
| <li> |
| <p>The specified context is not compatible with the OpenCL context being |
| created (for example, it exists in a physically distinct address space, |
| such as another hardware device; or it does not support sharing data |
| with OpenCL due to implementation restrictions).</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clGetGLContextInfoKHR</strong> returns CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR if a |
| share group was specified for a CGL-based OpenGL implementation by setting |
| the attribute CL_CGL_SHAREGROUP_KHR, and the specified share group does not |
| identify a valid CGL share group object.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clGetGLContextInfoKHR</strong> returns CL_INVALID_OPERATION if a context was |
| specified as described above and any of the following conditions hold:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>A context or share group object was specified for one of CGL, EGL, GLX, |
| or WGL and the OpenGL implementation does not support that window-system |
| binding API.</p> |
| </li> |
| <li> |
| <p>More than one of the attributes CL_CGL_SHAREGROUP_KHR, |
| CL_EGL_DISPLAY_KHR, CL_GLX_DISPLAY_KHR, and CL_WGL_HDC_KHR is set to a |
| non-default value.</p> |
| </li> |
| <li> |
| <p>Both of the attributes CL_CGL_SHAREGROUP_KHR and CL_GL_CONTEXT_KHR are |
| set to non-default values.</p> |
| </li> |
| <li> |
| <p>Any of the devices specified in the <devices> argument cannot support |
| OpenCL objects which share the data store of an OpenGL object.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clGetGLContextInfoKHR</strong> returns CL_INVALID_VALUE if an attribute name other |
| than those specified in <em>table 4.5</em> is specified in <em>properties</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Additionally, <strong>clGetGLContextInfoKHR</strong> returns CL_INVALID_VALUE if |
| <em>param_name</em> is not one of the values listed in the table |
| <a href="#cl_khr_gl_sharing-clGetGLContextInfoKHR-table"><em>GL context information that |
| can be queried with</em> <strong>clGetGLContextInfoKHR</strong></a>, or if the size in bytes |
| specified by <em>param_value_size</em> is less than the size of the return type |
| shown in the table and <em>param_value</em> is not a <code>NULL</code> value; |
| CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by |
| the OpenCL implementation on the device; or CL_OUT_OF_HOST_MEMORY if there |
| is a failure to allocate resources required by the OpenCL implementation on |
| the host.`"</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_gl_sharing-issues"><a class="anchor" href="#cl_khr_gl_sharing-issues"></a>10.7. Issues</h3> |
| <div class="olist arabic"> |
| <ol class="arabic"> |
| <li> |
| <p>How should the OpenGL context be identified when creating an associated |
| OpenCL context?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>RESOLVED: by using a (display,context handle) attribute pair to identify an |
| arbitrary OpenGL or OpenGL ES context with respect to one of the |
| window-system binding layers EGL, GLX, or WGL, or a share group handle to |
| identify a CGL share group. |
| If a context is specified, it need not be current to the thread calling |
| clCreateContext*.</p> |
| </div> |
| <div class="paragraph"> |
| <p>A previously suggested approach would use a single boolean attribute |
| CL_USE_GL_CONTEXT_KHR to allow creating a context associated with the |
| currently bound OpenGL context. |
| This may still be implemented as a separate extension, and might allow more |
| efficient acquire/release behavior in the special case where they are being |
| executed in the same thread as the bound GL context used to create the CL |
| context.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>What should the format of an attribute list be?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>After considerable discussion, we think we can live with a list of |
| <attribute name,value> pairs terminated by zero. |
| The list is passed as 'cl_context_properties *<em>properties'</em>, where |
| cl_context_properties is typedefed to be 'intptr_t' in cl.h.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This effectively allows encoding all scalar integer, pointer, and handle |
| values in the host API into the argument list and is analogous to the |
| structure and type of EGL attribute lists. |
| <code>NULL</code> attribute lists are also allowed. |
| Again as for EGL, any attributes not explicitly passed in the list will take |
| on a defined default value that does something reasonable.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Experience with EGL, GLX, and WGL has shown attribute lists to be a |
| sufficiently flexible and general mechanism to serve the needs of management |
| calls such as context creation. |
| It is not completely general (encoding floating-point and non-scalar |
| attribute values is not straightforward), and other approaches were |
| suggested such as opaque attribute lists with getter/setter methods, or |
| arrays of varadic structures.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>What’s the behavior of an associated OpenGL or OpenCL context when using |
| resources defined by the other associated context, and that context is |
| destroyed?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>RESOLVED: OpenCL objects place a reference on the data store underlying the |
| corresponding GL object when they’re created. |
| The GL name corresponding to that data store may be deleted, but the data |
| store itself remains so long as any CL object has a reference to it. |
| However, destroying all GL contexts in the share group corresponding to a CL |
| context results in implementation-dependent behavior when using a |
| corresponding CL object, up to and including program termination.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>How about sharing with D3D?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>Sharing between D3D and OpenCL should use the same attribute list mechanism, |
| though obviously with different parameters, and be exposed as a similar |
| parallel OpenCL extension. |
| There may be an interaction between that extension and this one since it’s |
| not yet clear if it will be possible to create a CL context simultaneously |
| sharing GL and D3D objects.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>Under what conditions will context creation fail due to sharing?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>RESOLVED: Several cross-platform failure conditions are described (GL |
| context or CGL share group doesn’t exist, GL context doesn’t support types |
| of GL objects, GL context implementation doesn’t allow sharing), but |
| additional failures may result due to implementation-dependent reasons and |
| should be added to this extension as such failures are discovered. |
| Sharing between OpenCL and OpenGL requires integration at the driver |
| internals level.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>What command queues can <strong>clEnqueueAcquire/ReleaseGLObjects</strong> be placed |
| on?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>RESOLVED: All command queues. |
| This restriction is enforced at context creation time. |
| If any device passed to context creation cannot support shared CL/GL |
| objects, context creation will fail with a CL_INVALID_OPERATION error.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>How can applications determine which command queue to place an |
| Acquire/Release on?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>RESOLVED: The <strong>clGetGLContextInfoKHR</strong> returns either the CL device currently |
| corresponding to a specified GL context (typically the display it’s running |
| on), or a list of all the CL devices the specified context might run on |
| (potentially useful in multiheaded / “virtual screen” environments). |
| This command is not simply placed in <a href="#cl_khr_gl_sharing__memobjs">Creating |
| OpenCL Memory Objects from OpenGL Objects</a> because it relies on the same |
| property-list method of specifying a GL context introduced by this |
| extension.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If no devices are returned, it means that the GL context exists on an older |
| GPU not capable of running OpenCL, but still capable of sharing objects |
| between GL running on that GPU and CL running elsewhere.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>What is the meaning of the CL_DEVICES_FOR_GL_CONTEXT_KHR query?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>RESOLVED: The list of all CL devices that may ever be associated with a |
| specific GL context. |
| On platforms such as MacOS X, the “virtual screen” concept allows multiple |
| GPUs to back a single virtual display. |
| Similar functionality might be implemented on other windowing systems, such |
| as a transparent heterogenous multiheaded X server. |
| Therefore the exact meaning of this query is interpreted relative to the |
| binding layer API in use.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| </ol> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_gl_sharing__memobjs"><a class="anchor" href="#cl_khr_gl_sharing__memobjs"></a>11. Creating OpenCL Memory Objects from OpenGL Objects</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This section describes functionality in the <strong>cl_khr_gl_sharing</strong> extension |
| to use OpenGL buffer, texture, and renderbuffer objects as OpenCL memory objects. |
| OpenCL memory objects may be created from OpenGL objects if and only if the |
| OpenCL context is associated with an OpenGL context or share group object. |
| The section <a href="#cl_khr_gl_sharing">Creating an OpenCL Context from an OpenGL Context or Share Group</a> |
| describes how to create an OpenCL context associated with an OpenGL context or share group object.</p> |
| </div> |
| <div class="paragraph"> |
| <p>An OpenCL image object may be created from an OpenGL texture or renderbuffer object. |
| An OpenCL buffer object may be created from an OpenGL buffer object.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Any supported OpenGL object defined within the associated OpenGL context |
| or share group object may be shared, with the exception of the default |
| OpenGL objects (i.e. objects named zero), which may not be shared.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_10"><a class="anchor" href="#_general_information_10"></a>11.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_10"><a class="anchor" href="#_version_history_10"></a>11.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_gl_sharing__memobjs-lifetime-of-shared-objects"><a class="anchor" href="#cl_khr_gl_sharing__memobjs-lifetime-of-shared-objects"></a>11.2. Lifetime of Shared Objects</h3> |
| <div class="paragraph"> |
| <p>An OpenCL memory object created from an OpenGL object (hereinafter referred |
| to as a “shared CL/GL object”) remains valid as long as the corresponding |
| GL object has not been deleted. |
| If the GL object is deleted through the GL API (e.g. <strong>glDeleteBuffers</strong>, |
| <strong>glDeleteTextures,</strong> or <strong>glDeleteRenderbuffers</strong>), subsequent use of the CL |
| buffer or image object will result in undefined behavior, including but not |
| limited to possible CL errors and data corruption, but may not result in |
| program termination.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The CL context and corresponding command-queues are dependent on the |
| existence of the GL share group object, or the share group associated with |
| the GL context from which the CL context is created. |
| If the GL share group object or all GL contexts in the share group are |
| destroyed, any use of the CL context or command-queue(s) will result in |
| undefined behavior, which may include program termination. |
| Applications should destroy the CL command-queue(s) and CL context before |
| destroying the corresponding GL share group or contexts</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_gl_sharing__memobjs-cl-buffer-objects-from-gl-buffer-objects"><a class="anchor" href="#cl_khr_gl_sharing__memobjs-cl-buffer-objects-from-gl-buffer-objects"></a>11.3. OpenCL Buffer Objects from OpenGL Buffer Objects</h3> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_mem clCreateFromGLBuffer(cl_context context, |
| cl_mem_flags flags, |
| GLuint bufobj, |
| cl_int *errcode_ret)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>creates an OpenCL buffer object from an OpenGL buffer object.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>context</em> is a valid OpenCL context created from an OpenGL context.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>flags</em> is a bit-field that is used to specify usage information. |
| Refer to <em>table 5.3</em> for a description of <em>flags</em>. |
| Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values |
| specified in <em>table 5.3</em> can be used.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>bufobj</em> is the name of a GL buffer object. |
| The data store of the GL buffer object must have have been previously |
| created by calling <strong>glBufferData</strong>, although its contents need not be |
| initialized. |
| The size of the data store will be used to determine the size of the CL |
| buffer object.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>errcode_ret</em> will return an appropriate error code as described below. |
| If <em>errcode_ret</em> is <code>NULL</code>, no error code is returned.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clCreateFromGLBuffer</strong> returns a valid non-zero OpenCL buffer object and |
| <em>errcode_ret</em> is set to CL_SUCCESS if the buffer object is created |
| successfully. |
| Otherwise, it returns a <code>NULL</code> value with one of the following error values |
| returned in <em>errcode_ret</em>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_CONTEXT if <em>context</em> is not a valid context or was not |
| created from a GL context.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_VALUE if values specified in <em>flags</em> are not valid.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_GL_OBJECT if <em>bufobj</em> is not a GL buffer object or is a GL |
| buffer object but does not have an existing data store or the size of |
| the buffer is 0.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_RESOURCES if there is a failure to allocate resources required |
| by the OpenCL implementation on the device.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>The size of the GL buffer object data store at the time |
| <strong>clCreateFromGLBuffer</strong> is called will be used as the size of buffer object |
| returned by <strong>clCreateFromGLBuffer</strong>. |
| If the state of a GL buffer object is modified through the GL API (e.g. |
| <strong>glBufferData</strong>) while there exists a corresponding CL buffer object, |
| subsequent use of the CL buffer object will result in undefined behavior.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The <strong>clRetainMemObject</strong> and <strong>clReleaseMemObject</strong> functions can be used to |
| retain and release the buffer object.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The CL buffer object created using clCreateFromGLBuffer can also be used to |
| create a CL 1D image buffer object.</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_gl_sharing__memobjs-cl-image-objects-from-gl-textures"><a class="anchor" href="#cl_khr_gl_sharing__memobjs-cl-image-objects-from-gl-textures"></a>11.4. OpenCL Image Objects from OpenGL Textures</h3> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_mem clCreateFromGLTexture(cl_context context, |
| cl_mem_flags flags, |
| GLenum texture_target, |
| GLint miplevel, |
| GLuint texture, |
| cl_int *errcode_ret)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>creates the following:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>an OpenCL 2D image object from an OpenGL 2D texture object or a single |
| face of an OpenGL cubemap texture object,</p> |
| </li> |
| <li> |
| <p>an OpenCL 2D image array object from an OpenGL 2D texture array object,</p> |
| </li> |
| <li> |
| <p>an OpenCL 1D image object from an OpenGL 1D texture object,</p> |
| </li> |
| <li> |
| <p>an OpenCL 1D image buffer object from an OpenGL texture buffer object,</p> |
| </li> |
| <li> |
| <p>an OpenCL 1D image array object from an OpenGL 1D texture array object,</p> |
| </li> |
| <li> |
| <p>an OpenCL 3D image object from an OpenGL 3D texture object.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p><em>context</em> is a valid OpenCL context created from an OpenGL context.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>flags</em> is a bit-field that is used to specify usage information. |
| Refer to <em>table 5.3</em> for a description of <em>flags</em>. |
| Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values |
| specified in <em>table 5.3</em> may be used.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>texture_target</em> must be one of GL_TEXTURE_1D, GL_TEXTURE_1D_ARRAY, |
| GL_TEXTURE_BUFFER, GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D, |
| GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_POSITIVE_Y, |
| GL_TEXTURE_CUBE_MAP_POSITIVE_Z, GL_TEXTURE_CUBE_MAP_NEGATIVE_X, |
| GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, or |
| GL_TEXTURE_RECTANGLE (Note: GL_TEXTURE_RECTANGLE requires OpenGL 3.1. |
| Alternatively, GL_TEXTURE_RECTANGLE_ARB may be specified if the OpenGL |
| extension <strong>GL_ARB_texture_rectangle</strong> is supported.). |
| <em>texture_target</em> is used only to define the image type of <em>texture</em>. |
| No reference to a bound GL texture object is made or implied by this |
| parameter.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>miplevel</em> is the mipmap level to be used. |
| If <em>texture_target</em> is GL_TEXTURE_BUFFER, <em>miplevel</em> must be 0. |
| Note: Implementations may return CL_INVALID_OPERATION for miplevel |
| values > 0.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>texture</em> is the name of a GL 1D, 2D, 3D, 1D array, 2D array, cubemap, |
| rectangle or buffer texture object. |
| The texture object must be a complete texture as per OpenGL rules on texture |
| completeness. |
| The <em>texture</em> format and dimensions defined by OpenGL for the specified |
| <em>miplevel</em> of the texture will be used to create the OpenCL image memory |
| object. |
| Only GL texture objects with an internal format that maps to appropriate |
| image channel order and data type specified in <em>tables 5.5</em> and <em>5.6</em> may be |
| used to create the OpenCL image memory object.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>errcode_ret</em> will return an appropriate error code as described below. |
| If <em>errcode_ret</em> is <code>NULL</code>, no error code is returned.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clCreateFromGLTexture</strong> returns a valid non-zero OpenCL image object and |
| <em>errcode_ret</em> is set to CL_SUCCESS if the image object is created |
| successfully. |
| Otherwise, it returns a <code>NULL</code> value with one of the following error values |
| returned in <em>errcode_ret</em>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_CONTEXT if <em>context</em> is not a valid context or was not |
| created from a GL context.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_VALUE if values specified in <em>flags</em> are not valid or if |
| value specified in <em>texture_target</em> is not one of the values specified |
| in the description of <em>texture_target</em>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_MIP_LEVEL if <em>miplevel</em> is less than the value of |
| <em>level<sub>base</sub></em> (for OpenGL implementations) or zero (for OpenGL ES |
| implementations); or greater than the value of <em>q</em> (for both OpenGL and |
| OpenGL ES). |
| <em>level<sub>base</sub></em> and <em>q</em> are defined for the texture in <em>section 3.8.10</em> |
| (Texture Completeness) of the OpenGL 2.1 specification and <em>section |
| 3.7.10</em> of the OpenGL ES 2.0.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_MIP_LEVEL if <em>miplevel</em> is greather than zero and the OpenGL |
| implementation does not support creating from non-zero mipmap levels.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_GL_OBJECT if <em>texture</em> is not a GL texture object whose type |
| matches <em>texture_target</em>, if the specified <em>miplevel</em> of <em>texture</em> is |
| not defined, or if the width or height of the specified <em>miplevel</em> is |
| zero or if the GL texture object is incomplete.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the OpenGL texture internal format |
| does not map to a supported OpenCL image format.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_OPERATION if <em>texture</em> is a GL texture object created with a |
| border width value greater than zero.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_RESOURCES if there is a failure to allocate resources required |
| by the OpenCL implementation on the device.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>If the state of a GL texture object is modified through the GL API (e.g. |
| <strong>glTexImage2D</strong>, <strong>glTexImage3D</strong> or the values of the texture parameters |
| GL_TEXTURE_BASE_LEVEL or GL_TEXTURE_MAX_LEVEL are modified) while there |
| exists a corresponding CL image object, subsequent use of the CL image |
| object will result in undefined behavior.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The <strong>clRetainMemObject</strong> and <strong>clReleaseMemObject</strong> functions can be used to |
| retain and release the image objects.</p> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_gl_sharing__memobjs-list-of-opengl-and-corresponding-opencl-image-formats"><a class="anchor" href="#cl_khr_gl_sharing__memobjs-list-of-opengl-and-corresponding-opencl-image-formats"></a>11.4.1. List of OpenGL and corresponding OpenCL Image Formats</h4> |
| <div class="paragraph"> |
| <p>The table below describes the list of OpenGL texture internal formats and |
| the corresponding OpenCL image formats. |
| If a OpenGL texture object with an internal format from the table below is |
| successfully created by OpenGL, then there is guaranteed to be a mapping to |
| one of the corresponding OpenCL image format(s) in that table. |
| Texture objects created with other OpenGL internal formats may (but are not |
| guaranteed to) have a mapping to an OpenCL image format; if such mappings |
| exist, they are guaranteed to preserve all color components, data types, and |
| at least the number of bits/component actually allocated by OpenGL for that |
| format.</p> |
| </div> |
| <table id="cl_khr_gl_sharing__memobjs-mapping-of-image-formats" class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 27. <em>OpenGL internal formats and corresponding OpenCL internal formats</em></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>GL internal format</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>CL image format</strong> |
| |
| <strong>(channel order, channel data type)</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RGBA8</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_UNORM_INT8 or</p> |
| <p class="tableblock">CL_BGRA, CL_UNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_SRGB8_ALPHA8</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_sRGBA, CL_UNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_UNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_BGRA, CL_UNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RGBA8I, GL_RGBA8I_EXT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_SIGNED_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RGBA16I, GL_RGBA16I_EXT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_SIGNED_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RGBA32I, GL_RGBA32I_EXT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_SIGNED_INT32</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RGBA8UI, GL_RGBA8UI_EXT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_UNSIGNED_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RGBA16UI, GL_RGBA16UI_EXT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_UNSIGNED_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RGBA32UI, GL_RGBA32UI_EXT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_UNSIGNED_INT32</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RGBA8_SNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_SNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RGBA16</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_UNORM_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RGBA16_SNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_SNORM_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RGBA16F, GL_RGBA16F_ARB</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_HALF_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RGBA32F, GL_RGBA32F_ARB</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_R8</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_UNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_R8_SNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_SNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_R16</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_UNORM_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_R16_SNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_SNORM_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_R16F</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_HALF_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_R32F</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_R8I</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_SIGNED_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_R16I</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_SIGNED_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_R32I</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_SIGNED_INT32</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_R8UI</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_UNSIGNED_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_R16UI</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_UNSIGNED_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_R32UI</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_UNSIGNED_INT32</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RG8</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_UNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RG8_SNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_SNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RG16</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_UNORM_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RG16_SNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_SNORM_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RG16F</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_HALF_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RG32F</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RG8I</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_SIGNED_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RG16I</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_SIGNED_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RG32I</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_SIGNED_INT32</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RG8UI</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_UNSIGNED_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RG16UI</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_UNSIGNED_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_RG32UI</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_UNSIGNED_INT32</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_gl_sharing__memobjs-cl-image-objects-from-gl-renderbuffers"><a class="anchor" href="#cl_khr_gl_sharing__memobjs-cl-image-objects-from-gl-renderbuffers"></a>11.5. OpenCL Image Objects from OpenGL Renderbuffers</h3> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_mem clCreateFromGLRenderbuffer(cl_context context, |
| cl_mem_flags flags, |
| GLuint renderbuffer, |
| cl_int *errcode_ret)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>creates an OpenCL 2D image object from an OpenGL renderbuffer object.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>context</em> is a valid OpenCL context created from an OpenGL context.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>flags</em> is a bit-field that is used to specify usage information. |
| Refer to <em>table 5.3</em> for a description of <em>flags</em>. |
| Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values |
| specified in <em>table 5.3</em> can be used.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>renderbuffer</em> is the name of a GL renderbuffer object. |
| The renderbuffer storage must be specified before the image object can be |
| created. |
| The <em>renderbuffer</em> format and dimensions defined by OpenGL will be used to |
| create the 2D image object. |
| Only GL renderbuffers with internal formats that maps to appropriate image |
| channel order and data type specified in <em>tables 5.5</em> and <em>5.6</em> can be used |
| to create the 2D image object.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>errcode_ret</em> will return an appropriate error code as described below. |
| If <em>errcode_ret</em> is <code>NULL</code>, no error code is returned.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clCreateFromGLRenderbuffer</strong> returns a valid non-zero OpenCL image object |
| and <em>errcode_ret</em> is set to CL_SUCCESS if the image object is created |
| successfully. |
| Otherwise, it returns a <code>NULL</code> value with one of the following error values |
| returned in <em>errcode_ret</em>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_CONTEXT if <em>context</em> is not a valid context or was not |
| created from a GL context.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_VALUE if values specified in <em>flags</em> are not valid.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_GL_OBJECT if <em>renderbuffer</em> is not a GL renderbuffer object |
| or if the width or height of <em>renderbuffer</em> is zero.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the OpenGL renderbuffer internal |
| format does not map to a supported OpenCL image format.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_OPERATION if <em>renderbuffer</em> is a multi-sample GL renderbuffer |
| object.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_RESOURCES if there is a failure to allocate resources required |
| by the OpenCL implementation on the device.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>If the state of a GL renderbuffer object is modified through the GL API |
| (i.e. changes to the dimensions or format used to represent pixels of the GL |
| renderbuffer using appropriate GL API calls such as <strong>glRenderbufferStorage</strong>) |
| while there exists a corresponding CL image object, subsequent use of the CL |
| image object will result in undefined behavior.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The <strong>clRetainMemObject</strong> and <strong>clReleaseMemObject</strong> functions can be used to |
| retain and release the image objects.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The table <a href="#cl_khr_gl_sharing__memobjs-mapping-of-image-formats"><em>OpenGL |
| internal formats and corresponding OpenCL internal formats</em></a> describes the |
| list of OpenGL renderbuffer internal formats and the corresponding OpenCL |
| image formats. |
| If an OpenGL renderbuffer object with an internal format from the table is |
| successfully created by OpenGL, then there is guaranteed to be a mapping to |
| one of the corresponding OpenCL image format(s) in that table. |
| Renderbuffer objects created with other OpenGL internal formats may (but are |
| not guaranteed to) have a mapping to an OpenCL image format; if such |
| mappings exist, they are guaranteed to preserve all color components, data |
| types, and at least the number of bits/component actually allocated by |
| OpenGL for that format.</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_gl_sharing__memobjs-querying-gl-object-information-from-a-cl-memory-object"><a class="anchor" href="#cl_khr_gl_sharing__memobjs-querying-gl-object-information-from-a-cl-memory-object"></a>11.6. Querying OpenGL object information from an OpenCL memory object</h3> |
| <div class="paragraph"> |
| <p>The OpenGL object used to create the OpenCL memory object and information |
| about the object type i.e. whether it is a texture, renderbuffer or buffer |
| object can be queried using the following function. |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_int clGetGLObjectInfo(cl_mem memobj, |
| cl_gl_object_type *gl_object_type, |
| GLuint *gl_object_name)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p><em>gl_object_type</em> returns the type of GL object attached to <em>memobj</em> and can |
| be CL_GL_OBJECT_BUFFER, CL_GL_OBJECT_TEXTURE2D, CL_GL_OBJECT_TEXTURE3D, |
| CL_GL_OBJECT_TEXTURE2D_ARRAY, CL_GL_OBJECT_TEXTURE1D, |
| CL_GL_OBJECT_TEXTURE1D_ARRAY, CL_GL_OBJECT_TEXTURE_BUFFER, or |
| CL_GL_OBJECT_RENDERBUFFER. |
| If <em>gl_object_type</em> is <code>NULL</code>, it is ignored</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>gl_object_name</em> returns the GL object name used to create <em>memobj</em>. |
| If <em>gl_object_name</em> is <code>NULL</code>, it is ignored.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clGetGLObjectInfo</strong> returns CL_SUCCESS if the call was executed |
| successfully. |
| Otherwise, it returns one of the following errors:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_MEM_OBJECT if <em>memobj</em> is not a valid OpenCL memory object.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_GL_OBJECT if there is no GL object associated with <em>memobj</em>.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_RESOURCES if there is a failure to allocate resources required |
| by the OpenCL implementation on the device.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_int clGetGLTextureInfo(cl_mem memobj, |
| cl_gl_texture_info param_name, |
| size_t param_value_size, |
| <span class="directive">void</span> *param_value, |
| size_t *param_value_size_ret)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>returns additional information about the GL texture object associated with |
| <em>memobj</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>param_name</em> specifies what additional information about the GL texture |
| object associated with <em>memobj</em> to query. |
| The list of supported <em>param_name</em> types and the information returned in |
| <em>param_value</em> by <strong>clGetGLTextureInfo</strong> is described in the table below.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>param_value</em> is a pointer to memory where the result being queried is |
| returned. |
| If <em>param_value</em> is <code>NULL</code>, it is ignored.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>param_value_size</em> is used to specify the size in bytes of memory pointed to |
| by <em>param_value</em>. |
| This size must be >= size of return type as described in the table below.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>param_value_size_ret</em> returns the actual size in bytes of data copied to |
| <em>param_value</em>. |
| If <em>param_value_size_ret</em> is <code>NULL</code>, it is ignored.</p> |
| </div> |
| <table id="cl_khr_gl_sharing__memobjs-clGetGLTextureInfo-queries" class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 28. <em>OpenGL texture info that may be queried with</em> <strong>clGetGLTextureInfo</strong></caption> |
| <colgroup> |
| <col style="width: 33.3333%;"> |
| <col style="width: 33.3333%;"> |
| <col style="width: 33.3334%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_gl_texture_info</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Return Type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Info. returned in <em>param_value</em></strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_GL_TEXTURE_TARGET</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GLenum</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">The <em>texture_target</em> argument specified in <strong>clCreateFromGLTexture</strong>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_GL_MIPMAP_LEVEL</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GLint</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">The <em>miplevel</em> argument specified in <strong>clCreateFromGLTexture</strong>.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p><strong>clGetGLTextureInfo</strong> returns CL_SUCCESS if the function is executed |
| successfully. |
| Otherwise, it returns one of the following errors:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_MEM_OBJECT if <em>memobj</em> is not a valid OpenCL memory object.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_GL_OBJECT if there is no GL texture object associated with |
| <em>memobj</em>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_VALUE if <em>param_name</em> is not valid, or if size in bytes |
| specified by <em>param_value_size</em> is less than the size of the return type |
| as described in the table above and <em>param_value</em> is not <code>NULL</code>, or if |
| <em>param_value</em> and <em>param_value_size_ret</em> are <code>NULL</code>.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_RESOURCES if there is a failure to allocate resources required |
| by the OpenCL implementation on the device.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_gl_sharing__memobjs-sharing-memory-objects-that-map-to-gl-objects-between-gl-and-cl-contexts"><a class="anchor" href="#cl_khr_gl_sharing__memobjs-sharing-memory-objects-that-map-to-gl-objects-between-gl-and-cl-contexts"></a>11.7. Sharing memory objects that map to GL objects between GL and CL contexts</h3> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_int clEnqueueAcquireGLObjects(cl_command_queue command_queue, |
| cl_uint num_objects, |
| <span class="directive">const</span> cl_mem *mem_objects, |
| cl_uint num_events_in_wait_list, |
| <span class="directive">const</span> cl_event *event_wait_list, |
| cl_event *event)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>is used to acquire OpenCL memory objects that have been created from OpenGL |
| objects. |
| These objects need to be acquired before they can be used by any OpenCL |
| commands queued to a command-queue. |
| The OpenGL objects are acquired by the OpenCL context associated with |
| <em>command_queue</em> and can therefore be used by all command-queues associated |
| with the OpenCL context.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>command_queue</em> is a valid command-queue. |
| All devices used to create the OpenCL context associated with |
| <em>command_queue</em> must support acquiring shared CL/GL objects. |
| This constraint is enforced at context creation time.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>num_objects</em> is the number of memory objects to be acquired in |
| <em>mem_objects</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>mem_objects</em> is a pointer to a list of CL memory objects that correspond to |
| GL objects.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>event_wait_list</em> and <em>num_events_in_wait_list</em> specify events that need to |
| complete before this particular command can be executed. |
| If <em>event_wait_list</em> is <code>NULL</code>, then this particular command does not wait |
| on any event to complete. |
| If <em>event_wait_list</em> is <code>NULL</code>, <em>num_events_in_wait_list</em> must be 0. |
| If <em>event_wait_list</em> is not <code>NULL</code>, the list of events pointed to by |
| <em>event_wait_list</em> must be valid and <em>num_events_in_wait_list</em> must be |
| greater than 0. |
| The events specified in</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>event_wait_list</em> act as synchronization points.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>event</em> returns an event object that identifies this command |
| and can be used to query wait for this command to complete. |
| If <em>event</em> is <code>NULL</code> or the enqueue is unsuccessful, no event will be |
| created and therefore it will not be possible to query the status of this |
| command or to wait for this command to complete. |
| If <em>event_wait_list</em> and <em>event</em> are not <code>NULL</code>, <em>event</em> must not refer |
| to an element of the <em>event_wait_list</em> array.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clEnqueueAcquireGLObjects</strong> returns CL_SUCCESS if the function is executed |
| successfully. |
| If <em>num_objects</em> is 0 and <em>mem_objects</em> is <code>NULL</code> the function does nothing |
| and returns CL_SUCCESS. |
| Otherwise, it returns one of the following errors:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_VALUE if <em>num_objects</em> is zero and <em>mem_objects</em> is not a |
| <code>NULL</code> value or if <em>num_objects</em> > 0 and <em>mem_objects</em> is <code>NULL</code>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_MEM_OBJECT if memory objects in <em>mem_objects</em> are not valid |
| OpenCL memory objects.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_COMMAND_QUEUE if <em>command_queue</em> is not a valid |
| command-queue.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_CONTEXT if context associated with <em>command_queue</em> was not |
| created from an OpenGL context</p> |
| </li> |
| <li> |
| <p>CL_INVALID_GL_OBJECT if memory objects in <em>mem_objects</em> have not been |
| created from a GL object(s).</p> |
| </li> |
| <li> |
| <p>CL_INVALID_EVENT_WAIT_LIST if <em>event_wait_list</em> is <code>NULL</code> and |
| <em>num_events_in_wait_list</em> > 0, or <em>event_wait_list</em> is not <code>NULL</code> and |
| <em>num_events_in_wait_list</em> is 0, or if event objects in <em>event_wait_list</em> |
| are not valid events.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_RESOURCES if there is a failure to allocate resources required |
| by the OpenCL implementation on the device.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_int clEnqueueReleaseGLObjects(cl_command_queue command_queue, |
| cl_uint num_objects, |
| <span class="directive">const</span> cl_mem *mem_objects, |
| cl_uint num_events_in_wait_list, |
| <span class="directive">const</span> cl_event *event_wait_list, |
| cl_event *event)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>is used to release OpenCL memory objects that have been created from OpenGL |
| objects. |
| These objects need to be released before they can be used by OpenGL. |
| The OpenGL objects are released by the OpenCL context associated with |
| <em>command_queue</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>num_objects</em> is the number of memory objects to be released in |
| <em>mem_objects</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>mem_objects</em> is a pointer to a list of CL memory objects that correspond to |
| GL objects.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>event_wait_list</em> and <em>num_events_in_wait_list</em> specify events that need to |
| complete before this command can be executed. |
| If <em>event_wait_list</em> is <code>NULL</code>, then this particular command does not wait |
| on any event to complete. |
| If <em>event_wait_list</em> is <code>NULL</code>, <em>num_events_in_wait_list</em> must be 0. |
| If <em>event_wait_list</em> is not <code>NULL</code>, the list of events pointed to by |
| <em>event_wait_list</em> must be valid and <em>num_events_in_wait_list</em> must be |
| greater than 0. |
| The events specified in <em>event_wait_list</em> act as synchronization points.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>event</em> returns an event object that identifies this command |
| and can be used to query or wait for this command to complete. |
| If <em>event</em> is <code>NULL</code> or the enqueue is unsuccessful, no event will be |
| created and therefore it will not be possible to query the status of this |
| command or to wait for this command to complete. |
| If <em>event_wait_list</em> and <em>event</em> are not <code>NULL</code>, <em>event</em> must not refer |
| to an element of the <em>event_wait_list</em> array.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clEnqueueReleaseGLObjects</strong> returns CL_SUCCESS if the function is executed |
| successfully. |
| If <em>num_objects</em> is 0 and <em>mem_objects</em> is <code>NULL</code> the function does nothing |
| and returns CL_SUCCESS. |
| Otherwise, it returns one of the following errors:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_VALUE if <em>num_objects</em> is zero and <em>mem_objects</em> is not a |
| <code>NULL</code> value or if <em>num_objects</em> > 0 and <em>mem_objects</em> is <code>NULL</code>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_MEM_OBJECT if memory objects in <em>mem_objects</em> are not valid |
| OpenCL memory objects.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_COMMAND_QUEUE if <em>command_queue</em> is not a valid |
| command-queue.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_CONTEXT if context associated with <em>command_queue</em> was not |
| created from an OpenGL context</p> |
| </li> |
| <li> |
| <p>CL_INVALID_GL_OBJECT if memory objects in <em>mem_objects</em> have not been |
| created from a GL object(s).</p> |
| </li> |
| <li> |
| <p>CL_INVALID_EVENT_WAIT_LIST if <em>event_wait_list</em> is <code>NULL</code> and |
| <em>num_events_in_wait_list</em> > 0, or <em>event_wait_list</em> is not <code>NULL</code> and |
| <em>num_events_in_wait_list</em> is 0, or if event objects in <em>event_wait_list</em> |
| are not valid events.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_RESOURCES if there is a failure to allocate resources required |
| by the OpenCL implementation on the device.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_gl_sharing__memobjs-synchronizing-opencl-and-opengl-access-to-shared-objects"><a class="anchor" href="#cl_khr_gl_sharing__memobjs-synchronizing-opencl-and-opengl-access-to-shared-objects"></a>11.7.1. Synchronizing OpenCL and OpenGL Access to Shared Objects</h4> |
| <div class="paragraph"> |
| <p>In order to ensure data integrity, the application is responsible for |
| synchronizing access to shared CL/GL objects by their respective APIs. |
| Failure to provide such synchronization may result in race conditions and |
| other undefined behavior including non-portability between implementations.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Prior to calling <strong>clEnqueueAcquireGLObjects</strong>, the application must ensure |
| that any pending GL operations which access the objects specified in |
| <em>mem_objects</em> have completed. |
| This may be accomplished portably by issuing and waiting for completion of a |
| <strong>glFinish</strong> command on all GL contexts with pending references to these |
| objects. |
| Implementations may offer more efficient synchronization methods; for |
| example on some platforms calling <strong>glFlush</strong> may be sufficient, or |
| synchronization may be implicit within a thread, or there may be |
| vendor-specific extensions that enable placing a fence in the GL command |
| stream and waiting for completion of that fence in the CL command queue. |
| Note that no synchronization methods other than <strong>glFinish</strong> are portable |
| between OpenGL implementations at this time.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Similarly, after calling <strong>clEnqueueReleaseGLObjects</strong>, the application is |
| responsible for ensuring that any pending OpenCL operations which access the |
| objects specified in <em>mem_objects</em> have completed prior to executing |
| subsequent GL commands which reference these objects. |
| This may be accomplished portably by calling <strong>clWaitForEvents</strong> with the |
| event object returned by <strong>clEnqueueReleaseGLObjects,</strong> or by calling |
| <strong>clFinish</strong>. |
| As above, some implementations may offer more efficient methods.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The application is responsible for maintaining the proper order of |
| operations if the CL and GL contexts are in separate threads.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If a GL context is bound to a thread other than the one in which |
| <strong>clEnqueueReleaseGLObjects</strong> is called, changes to any of the objects in |
| <em>mem_objects</em> may not be visible to that context without additional steps |
| being taken by the application. |
| For an OpenGL 3.1 (or later) context, the requirements are described in |
| Appendix D (“Shared Objects and Multiple Contexts”) of the OpenGL 3.1 |
| Specification. |
| For prior versions of OpenGL, the requirements are implementation-dependent.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Attempting to access the data store of an OpenGL object after it has been |
| acquired by OpenCL and before it has been released will result in undefined |
| behavior. |
| Similarly, attempting to access a shared CL/GL object from OpenCL before it |
| has been acquired by the OpenCL command queue, or after it has been |
| released, will result in undefined behavior.</p> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_gl_sharing__memobjs-event-command-types"><a class="anchor" href="#cl_khr_gl_sharing__memobjs-event-command-types"></a>11.7.2. Event Command Types for Sharing memory objects that map to GL objects</h4> |
| <div class="paragraph"> |
| <p>The following table describes the event command types for the OpenCL commands |
| to acquire and release OpenCL memory objects that have been created from |
| OpenGL objects:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 29. List of supported event command types</caption> |
| <colgroup> |
| <col style="width: 40%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Events Created By</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Event Command Type</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>clEnqueueAcquireGLObjects</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_COMMAND_<wbr>ACQUIRE_<wbr>GL_<wbr>OBJECTS</code></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>clEnqueueReleaseGLObjects</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_COMMAND_<wbr>RELEASE_<wbr>GL_<wbr>OBJECTS</code></p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_gl_event"><a class="anchor" href="#cl_khr_gl_event"></a>12. Creating OpenCL Event Objects from OpenGL Sync Objects</h2> |
| <div class="sectionbody"> |
| <div class="sect2"> |
| <h3 id="cl_khr_gl_event-overview"><a class="anchor" href="#cl_khr_gl_event-overview"></a>12.1. Overview</h3> |
| <div class="paragraph"> |
| <p>This section describes the <strong>cl_khr_gl_event</strong> extension. |
| This extension allows creating OpenCL event objects linked to OpenGL fence |
| sync objects, potentially improving efficiency of sharing images and buffers |
| between the two APIs. |
| The companion <strong>GL_ARB_cl_event</strong> extension provides the complementary |
| functionality of creating an OpenGL sync object from an OpenCL event object.</p> |
| </div> |
| <div class="paragraph"> |
| <p>In addition, this extension modifies the behavior of |
| <strong>clEnqueueAcquireGLObjects</strong> and <strong>clEnqueueReleaseGLObjects</strong> to implicitly |
| guarantee synchronization with an OpenGL context bound in the same thread as |
| the OpenCL context.</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_11"><a class="anchor" href="#_general_information_11"></a>12.2. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_11"><a class="anchor" href="#_version_history_11"></a>12.2.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_gl_event-new-procedures-and-functions"><a class="anchor" href="#cl_khr_gl_event-new-procedures-and-functions"></a>12.3. New Procedures and Functions</h3> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_event clCreateEventFromGLsyncKHR(cl_context context, |
| GLsync sync, |
| cl_int *errcode_ret);</code></pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_gl_event-new-tokens"><a class="anchor" href="#cl_khr_gl_event-new-tokens"></a>12.4. New Tokens</h3> |
| <div class="paragraph"> |
| <p>Returned by <strong>clGetEventInfo</strong> when <em>param_name</em> is CL_EVENT_COMMAND_TYPE:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR</pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_gl_event-additions-to-chapter-5"><a class="anchor" href="#cl_khr_gl_event-additions-to-chapter-5"></a>12.5. Additions to Chapter 5 of the OpenCL 2.2 Specification</h3> |
| <div class="paragraph"> |
| <p>Add following to the fourth paragraph of <em>section 5.11</em> (prior to the |
| description of <strong>clWaitForEvents</strong>):</p> |
| </div> |
| <div class="paragraph"> |
| <p>“Event objects can also be used to reflect the status of an OpenGL sync |
| object. |
| The sync object in turn refers to a fence command executing in an OpenGL |
| command stream. |
| This provides another method of coordinating sharing of buffers and images |
| between OpenGL and OpenCL.”</p> |
| </div> |
| <div class="paragraph"> |
| <p>Add CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR to the valid <em>param_value</em> values |
| returned by <strong>clGetEventInfo</strong> for <em>param_name</em> CL_EVENT_COMMAND_TYPE (in the |
| third row and third column of <em>table 5.22</em>).</p> |
| </div> |
| <div class="paragraph"> |
| <p>Add new <em>subsection 5.11.1</em>:</p> |
| </div> |
| <div class="paragraph"> |
| <p>"`<strong>5.11.1 Linking Event Objects to OpenGL Synchronization Objects</strong></p> |
| </div> |
| <div class="paragraph"> |
| <p>An event object may be created by linking to an OpenGL <strong>sync object</strong>. |
| Completion of such an event object is equivalent to waiting for completion |
| of the fence command associated with the linked GL sync object.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_event clCreateEventFromGLsyncKHR(cl_context context, |
| GLsync sync, |
| cl_int *errcode_ret)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>creates a linked event object.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>context</em> is a valid OpenCL context created from an OpenGL context or share |
| group, using the <strong>cl_khr_gl_sharing</strong> extension.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>sync</em> is the name of a sync object in the GL share group associated with |
| <em>context</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clCreateEventFromGLsyncKHR</strong> returns a valid OpenCL event object and |
| <em>errcode_ret</em> is set to CL_SUCCESS if the event object is created |
| successfully. |
| Otherwise, it returns a <code>NULL</code> value with one of the following error values |
| returned in <em>errcode_ret</em>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_CONTEXT if <em>context</em> is not a valid context, or was not |
| created from a GL context.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_GL_OBJECT if <em>sync</em> is not the name of a sync object in the |
| GL share group associated with <em>context</em>.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>The parameters of an event object linked to a GL sync object will return the |
| following values when queried with <strong>clGetEventInfo</strong>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>The CL_EVENT_COMMAND_QUEUE of a linked event is <code>NULL</code>, because the |
| event is not associated with any OpenCL command queue.</p> |
| </li> |
| <li> |
| <p>The CL_EVENT_COMMAND_TYPE of a linked event is |
| CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR, indicating that the event is |
| associated with a GL sync object, rather than an OpenCL command.</p> |
| </li> |
| <li> |
| <p>The CL_EVENT_COMMAND_EXECUTION_STATUS of a linked event is either |
| CL_SUBMITTED, indicating that the fence command associated with the sync |
| object has not yet completed, or CL_COMPLETE, indicating that the fence |
| command has completed.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clCreateEventFromGLsyncKHR</strong> performs an implicit <strong>clRetainEvent</strong> on the |
| returned event object. |
| Creating a linked event object also places a reference on the linked GL sync |
| object. |
| When the event object is deleted, the reference will be removed from the GL |
| sync object.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Events returned from <strong>clCreateEventFromGLsyncKHR</strong> can be used in the |
| <em>event_wait_list</em> argument to <strong>clEnqueueAcquireGLObjects</strong> and CL APIs that |
| take a cl_event as an argument but do not enqueue commands. |
| Passing such events to any other CL API that enqueues commands will generate |
| a CL_INVALID_EVENT error.`"</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_gl_event-additions-to-extension-specification"><a class="anchor" href="#cl_khr_gl_event-additions-to-extension-specification"></a>12.6. Additions to the OpenCL Extension Specification</h3> |
| <div class="paragraph"> |
| <p>Add following the paragraph describing parameter <em>event</em> to |
| <strong>clEnqueueAcquireGLObjects</strong>:</p> |
| </div> |
| <div class="paragraph"> |
| <p>"`If an OpenGL context is bound to the current thread, then any OpenGL |
| commands which</p> |
| </div> |
| <div class="olist arabic"> |
| <ol class="arabic"> |
| <li> |
| <p>affect or access the contents of a memory object listed in the |
| <em>mem_objects</em> list, and</p> |
| </li> |
| <li> |
| <p>were issued on that OpenGL context prior to the call to |
| <strong>clEnqueueAcquireGLObjects</strong></p> |
| </li> |
| </ol> |
| </div> |
| <div class="paragraph"> |
| <p>will complete before execution of any OpenCL commands following the |
| <strong>clEnqueueAcquireGLObjects</strong> which affect or access any of those memory |
| objects. |
| If a non-<code>NULL</code> <em>event</em> object is returned, it will report completion only |
| after completion of such OpenGL commands.`"</p> |
| </div> |
| <div class="paragraph"> |
| <p>Add following the paragraph describing parameter <em>event</em> to |
| <strong>clEnqueueReleaseGLObjects</strong>:</p> |
| </div> |
| <div class="paragraph"> |
| <p>"`If an OpenGL context is bound to the current thread, then then any OpenGL |
| commands which</p> |
| </div> |
| <div class="olist arabic"> |
| <ol class="arabic"> |
| <li> |
| <p>affect or access the contents of the memory objects listed in the |
| <em>mem_objects</em> list, and</p> |
| </li> |
| <li> |
| <p>are issued on that context after the call to <strong>clEnqueueReleaseGLObjects</strong></p> |
| </li> |
| </ol> |
| </div> |
| <div class="paragraph"> |
| <p>will not execute until after execution of any OpenCL commands preceding the</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clEnqueueReleaseGLObjects</strong> which affect or access any of those memory |
| objects. |
| If a non-<code>NULL</code> <em>event</em> object is returned, it will report completion before |
| execution of such OpenGL commands.`"</p> |
| </div> |
| <div class="paragraph"> |
| <p>Replace the second paragraph of |
| <a href="#cl_khr_gl_sharing__memobjs-synchronizing-opencl-and-opengl-access-to-shared-objects">Synchronizing OpenCL and OpenGL Access to Shared Objects</a> with:</p> |
| </div> |
| <div class="paragraph"> |
| <p>"`Prior to calling <strong>clEnqueueAcquireGLObjects</strong>, the application must ensure |
| that any pending OpenGL operations which access the objects specified in |
| <em>mem_objects</em> have completed.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If the <strong>cl_khr_gl_event</strong> extension is supported, then the OpenCL |
| implementation will ensure that any such pending OpenGL operations are |
| complete for an OpenGL context bound to the same thread as the OpenCL |
| context. |
| This is referred to as <em>implicit synchronization</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If the <strong>cl_khr_gl_event</strong> extension is supported and the OpenGL context in |
| question supports fence sync objects, completion of OpenGL commands may also |
| be determined by placing a GL fence command after those commands using |
| <strong>glFenceSync</strong>, creating an event from the resulting GL sync object using |
| <strong>clCreateEventFromGLsyncKHR</strong>, and determining completion of that event |
| object via <strong>clEnqueueAcquireGLObjects</strong>. |
| This method may be considerably more efficient than calling <strong>glFinish</strong>, and |
| is referred to as <em>explicit synchronization</em>. |
| Explicit synchronization is most useful when an OpenGL context bound to |
| another thread is accessing the memory objects.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If the <strong>cl_khr_gl_event</strong> extension is not supported, completion of OpenGL |
| commands may be determined by issuing and waiting for completion of a |
| <strong>glFinish</strong> command on all OpenGL contexts with pending references to these |
| objects. |
| Some implementations may offer other efficient synchronization methods. |
| If such methods exist they will be described in platform-specific |
| documentation.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Note that no synchronization method other than <strong>glFinish</strong> is portable |
| between all OpenGL implementations and all OpenCL implementations. |
| While this is the only way to ensure completion that is portable to all |
| platforms, <strong>glFinish</strong> is an expensive operation and its use should be |
| avoided if the <strong>cl_khr_gl_event</strong> extension is supported on a platform.`"</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_gl_event-issues"><a class="anchor" href="#cl_khr_gl_event-issues"></a>12.7. Issues</h3> |
| <div class="olist arabic"> |
| <ol class="arabic"> |
| <li> |
| <p>How are references between CL events and GL syncs handled?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>PROPOSED: The linked CL event places a single reference on the GL sync |
| object. |
| That reference is removed when the CL event is deleted. |
| A more expensive alternative would be to reflect changes in the CL event |
| reference count through to the GL sync.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>How are linkages to synchronization primitives in other APIs handled?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>UNRESOLVED. |
| We will at least want to have a way to link events to EGL sync objects. |
| There is probably no analogous DX concept. |
| There would be an entry point for each type of synchronization primitive to |
| be linked to, such as clCreateEventFromEGLSyncKHR.</p> |
| </div> |
| <div class="paragraph"> |
| <p>An alternative is a generic clCreateEventFromExternalEvent taking an |
| attribute list. |
| The attribute list would include information defining the type of the |
| external primitive and additional information (GL sync object handle, EGL |
| display and sync object handle, etc.) specific to that type. |
| This allows a single entry point to be reused.</p> |
| </div> |
| <div class="paragraph"> |
| <p>These will probably be separate extensions following the API proposed here.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>Should the CL_EVENT_COMMAND_TYPE correspond to the type of command |
| (fence) or the type of the linked sync object?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>PROPOSED: To the type of the linked sync object.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>Should we support both explicit and implicit synchronization?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>PROPOSED: Yes. |
| Implicit synchronization is suitable when GL and CL are executing in the |
| same application thread. |
| Explicit synchronization is suitable when they are executing in different |
| threads but the expense of glFinish is too high.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>Should this be a platform or device extension?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>PROPOSED: Platform extension. |
| This may result in considerable under-the-hood work to implement the |
| sync→event semantics using only the public GL API, however, when multiple |
| drivers and devices with different GL support levels coexist in the same |
| runtime.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>Where can events generated from GL syncs be usable?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>PROPOSED: Only with clEnqueueAcquireGLObjects, and attempting to use such an |
| event elsewhere will generate an error. |
| There is no apparent use case for using such events elsewhere, and possibly |
| some cost to supporting it, balanced by the cost of checking the source of |
| events in all other commands accepting them as parameters.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| </ol> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_d3d10_sharing"><a class="anchor" href="#cl_khr_d3d10_sharing"></a>13. Creating OpenCL Memory Objects from Direct3D 10 Buffers and Textures</h2> |
| <div class="sectionbody"> |
| <div class="sect2"> |
| <h3 id="cl_khr_d3d10_sharing-overview"><a class="anchor" href="#cl_khr_d3d10_sharing-overview"></a>13.1. Overview</h3> |
| <div class="paragraph"> |
| <p>This section describes the <strong>cl_khr_d3d10_sharing</strong> extension. |
| The goal of this extension is to provide interoperability between OpenCL and |
| Direct3D 10.</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_12"><a class="anchor" href="#_general_information_12"></a>13.2. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_12"><a class="anchor" href="#_version_history_12"></a>13.2.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_d3d10_sharing-new-procedures-and-functions"><a class="anchor" href="#cl_khr_d3d10_sharing-new-procedures-and-functions"></a>13.3. New Procedures and Functions</h3> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_int clGetDeviceIDsFromD3D10KHR(cl_platform_id platform, |
| cl_d3d10_device_source_khr d3d_device_source, |
| <span class="directive">void</span> *d3d_object, |
| cl_d3d10_device_set_khr d3d_device_set, |
| cl_uint num_entries, |
| cl_device_id *devices, |
| cl_uint *num_devices) |
| |
| cl_mem clCreateFromD3D10BufferKHR(cl_context context, |
| cl_mem_flags flags, |
| ID3D10Buffer *resource, |
| cl_int *errcode_ret) |
| |
| cl_mem clCreateFromD3D10Texture2DKHR(cl_context context, |
| cl_mem_flags flags, |
| ID3D10Texture2D *resource, |
| UINT subresource, |
| cl_int *errcode_ret) |
| |
| cl_mem clCreateFromD3D10Texture3DKHR(cl_context context, |
| cl_mem_flags flags, |
| ID3D10Texture3D *resource, |
| UINT subresource, |
| cl_int *errcode_ret) |
| |
| cl_int clEnqueueAcquireD3D10ObjectsKHR(cl_command_queue command_queue, |
| cl_uint num_objects, |
| <span class="directive">const</span> cl_mem *mem_objects, |
| cl_uint num_events_in_wait_list, |
| <span class="directive">const</span> cl_event *event_wait_list, |
| cl_event *event) |
| |
| cl_int clEnqueueReleaseD3D10ObjectsKHR(cl_command_queue command_queue, |
| cl_uint num_objects, |
| <span class="directive">const</span> cl_mem *mem_objects, |
| cl_uint num_events_in_wait_list, |
| <span class="directive">const</span> cl_event *event_wait_list, |
| cl_event *event)</code></pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_d3d10_sharing-new-tokens"><a class="anchor" href="#cl_khr_d3d10_sharing-new-tokens"></a>13.4. New Tokens</h3> |
| <div class="paragraph"> |
| <p>Accepted as a Direct3D 10 device source in the <em>d3d_device_source</em> parameter |
| of <strong>clGetDeviceIDsFromD3D10KHR</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_D3D10_DEVICE_KHR |
| CL_D3D10_DXGI_ADAPTER_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Accepted as a set of Direct3D 10 devices in the <em>d3d_device_set</em> parameter |
| of <strong>clGetDeviceIDsFromD3D10KHR</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_PREFERRED_DEVICES_FOR_D3D10_KHR |
| CL_ALL_DEVICES_FOR_D3D10_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Accepted as a property name in the <em>properties</em> parameter of |
| <strong>clCreateContext</strong> and <strong>clCreateContextFromType</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_CONTEXT_D3D10_DEVICE_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Accepted as a property name in the <em>param_name</em> parameter of |
| <strong>clGetContextInfo</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Accepted as the property being queried in the <em>param_name</em> parameter of |
| <strong>clGetMemObjectInfo</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_MEM_D3D10_RESOURCE_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Accepted as the property being queried in the <em>param_name</em> parameter of |
| <strong>clGetImageInfo</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_IMAGE_D3D10_SUBRESOURCE_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Returned in the <em>param_value</em> parameter of <strong>clGetEventInfo</strong> when |
| <em>param_name</em> is CL_EVENT_COMMAND_TYPE:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR |
| CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Returned by <strong>clCreateContext</strong> and <strong>clCreateContextFromType</strong> if the Direct3D |
| 10 device specified for interoperability is not compatible with the devices |
| against which the context is to be created:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_INVALID_D3D10_DEVICE_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Returned by <strong>clCreateFromD3D10BufferKHR</strong> when <em>resource</em> is not a Direct3D |
| 10 buffer object, and by <strong>clCreateFromD3D10Texture2DKHR</strong> and |
| <strong>clCreateFromD3D10Texture3DKHR</strong> when <em>resource</em> is not a Direct3D 10 texture |
| object:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_INVALID_D3D10_RESOURCE_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Returned by <strong>clEnqueueAcquireD3D10ObjectsKHR</strong> when any of <em>mem_objects</em> are |
| currently acquired by OpenCL:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Returned by <strong>clEnqueueReleaseD3D10ObjectsKHR</strong> when any of <em>mem_objects</em> are |
| not currently acquired by OpenCL:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR</pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_d3d10_sharing-additions-to-chapter-4"><a class="anchor" href="#cl_khr_d3d10_sharing-additions-to-chapter-4"></a>13.5. Additions to Chapter 4 of the OpenCL 2.2 Specification</h3> |
| <div class="paragraph"> |
| <p>In <em>section 4.4</em>, replace the description of <em>properties</em> under |
| <strong>clCreateContext</strong> with:</p> |
| </div> |
| <div class="paragraph"> |
| <p>“_properties_ specifies a list of context property names and their |
| corresponding values. |
| Each property is followed immediately by the corresponding desired value. |
| The list is terminated with zero. |
| If a property is not specified in <em>properties</em>, then its default value |
| (listed in <em>table 4.5</em>) is used (it is said to be specified implicitly). |
| If <em>properties</em> is <code>NULL</code> or empty (points to a list whose first value is |
| zero), all attributes take on their default values.”</p> |
| </div> |
| <div class="paragraph"> |
| <p>Add the following to <em>table 4.5</em>:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 40%;"> |
| <col style="width: 20%;"> |
| <col style="width: 40%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_context_properties enum</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Property value</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_CONTEXT_D3D10_DEVICE_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">ID3D10Device *</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Specifies the ID3D10Device * to use for Direct3D 10 interoperability. |
| The default value is <code>NULL</code>.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>Add to the list of errors for <strong>clCreateContext</strong>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_D3D10_DEVICE_KHR if the value of the property |
| CL_CONTEXT_D3D10_DEVICE_KHR is non-<code>NULL</code> and does not specify a valid |
| Direct3D 10 device with which the <em>cl_device_ids</em> against which this |
| context is to be created may interoperate.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_OPERATION if Direct3D 10 interoperability is specified by |
| setting CL_INVALID_D3D10_DEVICE_KHR to a non-<code>NULL</code> value, and |
| interoperability with another graphics API is also specified.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>Add to the list of errors for <strong>clCreateContextFromType</strong> the same new errors |
| described above for <strong>clCreateContext</strong>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Add the following row to <em>table 4.6</em>:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 40%;"> |
| <col style="width: 20%;"> |
| <col style="width: 40%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_context_info</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Return Type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Information returned in param_value</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cl_bool</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns CL_TRUE if Direct3D 10 resources created as shared by setting |
| <em>MiscFlags</em> to include D3D10_RESOURCE_MISC_SHARED will perform faster when |
| shared with OpenCL, compared with resources which have not set this flag. |
| Otherwise returns CL_FALSE.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_d3d10_sharing-additions-to-chapter-5"><a class="anchor" href="#cl_khr_d3d10_sharing-additions-to-chapter-5"></a>13.6. Additions to Chapter 5 of the OpenCL 2.2 Specification</h3> |
| <div class="paragraph"> |
| <p>Add to the list of errors for <strong>clGetMemObjectInfo</strong>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_D3D10_RESOURCE_KHR if <em>param_name</em> is |
| CL_MEM_D3D10_RESOURCE_KHR and <em>memobj</em> was not created by the function |
| <strong>clCreateFromD3D10BufferKHR</strong>, <strong>clCreateFromD3D10Texture2DKHR</strong>, or |
| <strong>clCreateFromD3D10Texture3DKHR</strong>.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>Extend <em>table 5.12</em> to include the following entry.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 40%;"> |
| <col style="width: 20%;"> |
| <col style="width: 40%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_mem_info</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Return type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Info. returned in <em>param_value</em></strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_MEM_D3D10_RESOURCE_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">ID3D10Resource *</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">If <em>memobj</em> was created using <strong>clCreateFromD3D10BufferKHR</strong>, |
| <strong>clCreateFromD3D10Texture2DKHR</strong>, or <strong>clCreateFromD3D10Texture3DKHR</strong>, |
| returns the <em>resource</em> argument specified when <em>memobj</em> was created.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>Add to the list of errors for <strong>clGetImageInfo</strong>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_D3D10_RESOURCE_KHR if <em>param_name</em> is |
| CL_MEM_D3D10_SUBRESOURCE_KHR and <em>image</em> was not created by the function |
| <strong>clCreateFromD3D10Texture2DKHR</strong>, or <strong>clCreateFromD3D10Texture3DKHR</strong>.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>Extend <em>table 5.9</em> to include the following entry.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 40%;"> |
| <col style="width: 20%;"> |
| <col style="width: 40%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_image_info</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Return type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Info. returned in <em>param_value</em></strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_MEM_D3D10_SUBRESOURCE_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">UINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">If <em>image</em> was created using <strong>clCreateFromD3D10Texture2DKHR</strong>, or |
| <strong>clCreateFromD3D10Texture3DKHR</strong>, returns the <em>subresource</em> argument |
| specified when <em>image</em> was created.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>Add to <em>table 5.22</em> in the <strong>Info returned in <param_value></strong> column for |
| <em>cl_event_info</em> = CL_EVENT_COMMAND_TYPE:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR |
| CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR</pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_d3d10_sharing-sharing-memory-objects-with-direct3d-10-resources"><a class="anchor" href="#cl_khr_d3d10_sharing-sharing-memory-objects-with-direct3d-10-resources"></a>13.7. Sharing Memory Objects with Direct3D 10 Resources</h3> |
| <div class="paragraph"> |
| <p>This section discusses OpenCL functions that allow applications to use |
| Direct3D 10 resources as OpenCL memory objects. |
| This allows efficient sharing of data between OpenCL and Direct3D 10. |
| The OpenCL API may be used to execute kernels that read and/or write memory |
| objects that are also Direct3D 10 resources. |
| An OpenCL image object may be created from a Direct3D 10 texture resource. |
| An OpenCL buffer object may be created from a Direct3D 10 buffer resource. |
| OpenCL memory objects may be created from Direct3D 10 objects if and only if |
| the OpenCL context has been created from a Direct3D 10 device.</p> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_d3d10_sharing-querying-opencl-devices-corresponding-to-direct3d-10-devices"><a class="anchor" href="#cl_khr_d3d10_sharing-querying-opencl-devices-corresponding-to-direct3d-10-devices"></a>13.7.1. Querying OpenCL Devices Corresponding to Direct3D 10 Devices</h4> |
| <div class="paragraph"> |
| <p>The OpenCL devices corresponding to a Direct3D 10 device may be queried. |
| The OpenCL devices corresponding to a DXGI adapter may also be queried. |
| The OpenCL devices corresponding to a Direct3D 10 device will be a subset of |
| the OpenCL devices corresponding to the DXGI adapter against which the |
| Direct3D 10 device was created.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The OpenCL devices corresponding to a Direct3D 10 device or a DXGI device |
| may be queried using the function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_int clGetDeviceIDsFromD3D10KHR(cl_platform_id platform, |
| cl_d3d10_device_source_khr d3d_device_source, |
| <span class="directive">void</span> *d3d_object, |
| cl_d3d10_device_set_khr d3d_device_set, |
| cl_uint num_entries, |
| cl_device_id *devices, |
| cl_uint *num_devices)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p><em>platform</em> refers to the platform ID returned by <strong>clGetPlatformIDs</strong>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>d3d_device_source</em> specifies the type of <em>d3d_object</em>, and must be one of |
| the values shown in the table below.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>d3d_object</em> specifies the object whose corresponding OpenCL devices are |
| being queried. |
| The type of <em>d3d_object</em> must be as specified in the table below.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>d3d_device_set</em> specifies the set of devices to return, and must be one of |
| the values shown in the table below.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>num_entries</em> is the number of cl_device_id entries that can be added to |
| <em>devices</em>. |
| If <em>devices</em> is not <code>NULL</code> then <em>num_entries</em> must be greater than zero.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>devices</em> returns a list of OpenCL devices found. |
| The cl_device_id values returned in <em>devices</em> can be used to identify a |
| specific OpenCL device. |
| If <em>devices</em> is <code>NULL</code>, this argument is ignored. |
| The number of OpenCL devices returned is the minimum of the value specified |
| by <em>num_entries</em> and the number of OpenCL devices corresponding to |
| <em>d3d_object</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>num_devices</em> returns the number of OpenCL devices available that correspond |
| to <em>d3d_object</em>. |
| If <em>num_devices</em> is <code>NULL</code>, this argument is ignored.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clGetDeviceIDsFromD3D10KHR</strong> returns CL_SUCCESS if the function is executed |
| successfully. |
| Otherwise it may return</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_PLATFORM if <em>platform</em> is not a valid platform.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_VALUE if <em>d3d_device_source</em> is not a valid value, |
| <em>d3d_device_set</em> is not a valid value, <em>num_entries</em> is equal to zero |
| and <em>devices</em> is not <code>NULL</code>, or if both <em>num_devices</em> and <em>devices</em> are |
| <code>NULL</code>.</p> |
| </li> |
| <li> |
| <p>CL_DEVICE_NOT_FOUND if no OpenCL devices that correspond to <em>d3d_object</em> |
| were found.</p> |
| </li> |
| </ul> |
| </div> |
| <table id="cl_khr_d3d10_sharing-clGetDeviceIDsFromD3D10KHR-object-type" class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 30. <em>Direct3D 10 object types that may be used by</em> <strong>clGetDeviceIDsFromD3D10KHR</strong></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_d3d_device_source_khr</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Type of <em>d3d_object</em></strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_D3D10_DEVICE_KHR</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">ID3D10Device *</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_D3D10_DXGI_ADAPTER_KHR</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">IDXGIAdapter *</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <table id="cl_khr_d3d10_sharing-clGetDeviceIDsFromD3D10KHR-devices" class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 31. <em>Sets of devices queriable using</em> <strong>clGetDeviceIDsFromD3D10KHR</strong></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_d3d_device_set_khr</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Devices returned in <em>devices</em></strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_PREFERRED_DEVICES_FOR_D3D10_KHR</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">The preferred OpenCL devices associated with the specified Direct3D |
| object.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_ALL_DEVICES_FOR_D3D10_KHR</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">All OpenCL devices which may interoperate with the specified Direct3D |
| object. |
| Performance of sharing data on these devices may be considerably less than |
| on the preferred devices.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_d3d10_sharing-lifetime-of-shared-objects"><a class="anchor" href="#cl_khr_d3d10_sharing-lifetime-of-shared-objects"></a>13.7.2. Lifetime of Shared Objects</h4> |
| <div class="paragraph"> |
| <p>An OpenCL memory object created from a Direct3D 10 resource remains valid as |
| long as the corresponding Direct3D 10 resource has not been deleted. |
| If the Direct3D 10 resource is deleted through the Direct3D 10 API, |
| subsequent use of the OpenCL memory object will result in undefined |
| behavior, including but not limited to possible OpenCL errors, data |
| corruption, and program termination.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The successful creation of a cl_context against a Direct3D 10 device |
| specified via the context create parameter CL_CONTEXT_D3D10_DEVICE_KHR will |
| increment the internal Direct3D reference count on the specified Direct3D 10 |
| device. |
| The internal Direct3D reference count on that Direct3D 10 device will be |
| decremented when the OpenCL reference count on the returned OpenCL context |
| drops to zero.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The OpenCL context and corresponding command-queues are dependent on the |
| existence of the Direct3D 10 device from which the OpenCL context was |
| created. |
| If the Direct3D 10 device is deleted through the Direct3D 10 API, subsequent |
| use of the OpenCL context will result in undefined behavior, including but |
| not limited to possible OpenCL errors, data corruption, and program |
| termination.</p> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_d3d10_sharing-sharing-direct3d-10-buffer-resources-as-opencl-buffer-objects"><a class="anchor" href="#cl_khr_d3d10_sharing-sharing-direct3d-10-buffer-resources-as-opencl-buffer-objects"></a>13.7.3. Sharing Direct3D 10 Buffer Resources as OpenCL Buffer Objects</h4> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_mem clCreateFromD3D10BufferKHR(cl_context context, |
| cl_mem_flags flags, |
| ID3D10Buffer *resource, |
| cl_int *errcode_ret)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>creates an OpenCL buffer object from a Direct3D 10 buffer.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>context</em> is a valid OpenCL context created from a Direct3D 10 device.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>flags</em> is a bit-field that is used to specify usage information. |
| Refer to <em>table 5.3</em> for a description of <em>flags</em>. |
| Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values |
| specified in <em>table 5.3</em> can be used.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>resource</em> is a pointer to the Direct3D 10 buffer to share.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>errcode_ret</em> will return an appropriate error code. |
| If <em>errcode_ret</em> is <code>NULL</code>, no error code is returned.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clCreateFromD3D10BufferKHR</strong> returns a valid non-zero OpenCL buffer object |
| and <em>errcode_ret</em> is set to CL_SUCCESS if the buffer object is created |
| successfully. |
| Otherwise, it returns a <code>NULL</code> value with one of the following error values |
| returned in <em>errcode_ret</em>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_CONTEXT if <em>context</em> is not a valid context.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_VALUE if values specified in <em>flags</em> are not valid.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_D3D10_RESOURCE_KHR if <em>resource</em> is not a Direct3D 10 buffer |
| resource, if <em>resource</em> was created with the D3D10_USAGE flag |
| D3D10_USAGE_IMMUTABLE, if a cl_mem from <em>resource</em> has already been |
| created using <strong>clCreateFromD3D10BufferKHR</strong>, or if <em>context</em> was not |
| created against the same Direct3D 10 device from which <em>resource</em> was |
| created.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>The size of the returned OpenCL buffer object is the same as the size of |
| <em>resource</em>. |
| This call will increment the internal Direct3D reference count on |
| <em>resource</em>. |
| The internal Direct3D reference count on <em>resource</em> will be decremented when |
| the OpenCL reference count on the returned OpenCL memory object drops to |
| zero.</p> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_d3d10_sharing-sharing-direct3d-10-texture-and-resources-as-opencl-image-objects"><a class="anchor" href="#cl_khr_d3d10_sharing-sharing-direct3d-10-texture-and-resources-as-opencl-image-objects"></a>13.7.4. Sharing Direct3D 10 Texture and Resources as OpenCL Image Objects</h4> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_mem clCreateFromD3D10Texture2DKHR(cl_context context, |
| cl_mem_flags flags, |
| ID3D10Texture2D *resource, |
| UINT subresource, |
| cl_int *errcode_ret)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>creates an OpenCL 2D image object from a subresource of a Direct3D 10 2D |
| texture.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>context</em> is a valid OpenCL context created from a Direct3D 10 device.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>flags</em> is a bit-field that is used to specify usage information. |
| Refer to <em>table 5.3</em> for a description of <em>flags</em>. |
| Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values |
| specified in <em>table 5.3</em> can be used.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>resource</em> is a pointer to the Direct3D 10 2D texture to share.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>subresource</em> is the subresource of <em>resource</em> to share.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>errcode_ret</em> will return an appropriate error code. |
| If <em>errcode_ret</em> is <code>NULL</code>, no error code is returned.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clCreateFromD3D10Texture2DKHR</strong> returns a valid non-zero OpenCL image object |
| and <em>errcode_ret</em> is set to CL_SUCCESS if the image object is created |
| successfully. |
| Otherwise, it returns a <code>NULL</code> value with one of the following error values |
| returned in <em>errcode_ret</em>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_CONTEXT if <em>context</em> is not a valid context.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_VALUE if values specified in <em>flags</em> are not valid or if |
| <em>subresource</em> is not a valid subresource index for <em>resource</em>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_D3D10_RESOURCE_KHR if <em>resource</em> is not a Direct3D 10 texture |
| resource, if <em>resource</em> was created with the D3D10_USAGE flag |
| D3D10_USAGE_IMMUTABLE, if <em>resource</em> is a multisampled texture, if a |
| cl_mem from subresource <em>subresource</em> of <em>resource</em> has already been |
| created using <strong>clCreateFromD3D10Texture2DKHR</strong>, or if <em>context</em> was not |
| created against the same Direct3D 10 device from which <em>resource</em> was |
| created.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the Direct3D 10 texture format of |
| <em>resource</em> is not listed in the table |
| <a href="#cl_khr_d3d10_sharing-mapping-of-image-formats"><em>Direct3D 10 formats and |
| corresponding OpenCL image formats</em></a> or if the Direct3D 10 texture |
| format of <em>resource</em> does not map to a supported OpenCL image format.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>The width and height of the returned OpenCL 2D image object are determined |
| by the width and height of subresource <em>subresource</em> of <em>resource</em>. |
| The channel type and order of the returned OpenCL 2D image object is |
| determined by the format of <em>resource</em> by the table |
| <a href="#cl_khr_d3d10_sharing-mapping-of-image-formats"><em>Direct3D 10 formats and |
| corresponding OpenCL image formats</em></a>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This call will increment the internal Direct3D reference count on |
| <em>resource</em>. |
| The internal Direct3D reference count on <em>resource</em> will be decremented when |
| the OpenCL reference count on the returned OpenCL memory object drops to |
| zero.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_mem clCreateFromD3D10Texture3DKHR(cl_context context, |
| cl_mem_flags flags, |
| ID3D10Texture3D *resource, |
| UINT subresource, |
| cl_int *errcode_ret)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>creates an OpenCL 3D image object from a subresource of a Direct3D 10 3D |
| texture.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>context</em> is a valid OpenCL context created from a Direct3D 10 device.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>flags</em> is a bit-field that is used to specify usage information. |
| Refer to table 5.3 for a description of <em>flags</em>. |
| Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values |
| specified in <em>table 5.3</em> can be used.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>resource</em> is a pointer to the Direct3D 10 3D texture to share.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>subresource</em> is the subresource of <em>resource</em> to share.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>errcode_ret</em> will return an appropriate error code. |
| If <em>errcode_ret</em> is <code>NULL</code>, no error code is returned.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clCreateFromD3D10Texture3DKHR</strong> returns a valid non-zero OpenCL image object |
| and <em>errcode_ret</em> is set to CL_SUCCESS if the image object is created |
| successfully. |
| Otherwise, it returns a <code>NULL</code> value with one of the following error values |
| returned in <em>errcode_ret</em>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_CONTEXT if <em>context</em> is not a valid context.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_VALUE if values specified in <em>flags</em> are not valid or if |
| <em>subresource</em> is not a valid subresource index for <em>resource</em>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_D3D10_RESOURCE_KHR if <em>resource</em> is not a Direct3D 10 texture |
| resource, if <em>resource</em> was created with the D3D10_USAGE flag |
| D3D10_USAGE_IMMUTABLE, if <em>resource</em> is a multisampled texture, if a |
| cl_mem from subresource <em>subresource</em> of <em>resource</em> has already been |
| created using <strong>clCreateFromD3D10Texture3DKHR</strong>, or if <em>context</em> was not |
| created against the same Direct3D 10 device from which <em>resource</em> was |
| created.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the Direct3D 10 texture format of |
| <em>resource</em> is not listed in the table |
| <a href="#cl_khr_d3d10_sharing-mapping-of-image-formats"><em>Direct3D 10 formats and |
| corresponding OpenCL image formats</em></a> or if the Direct3D 10 texture |
| format of <em>resource</em> does not map to a supported OpenCL image format.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>The width, height and depth of the returned OpenCL 3D image object are |
| determined by the width, height and depth of subresource <em>subresource</em> of |
| <em>resource</em>. |
| The channel type and order of the returned OpenCL 3D image object is |
| determined by the format of <em>resource</em> by the table |
| <a href="#cl_khr_d3d10_sharing-mapping-of-image-formats"><em>Direct3D 10 formats and |
| corresponding OpenCL image formats</em></a>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This call will increment the internal Direct3D reference count on |
| <em>resource</em>. |
| The internal Direct3D reference count on <em>resource</em> will be decremented when |
| the OpenCL reference count on the returned OpenCL memory object drops to |
| zero.</p> |
| </div> |
| <table id="cl_khr_d3d10_sharing-mapping-of-image-formats" class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 32. <em>Direct3D 10 formats and corresponding OpenCL image formats</em></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>DXGI format</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>CL image format</strong> |
| |
| <strong>(channel order, channel data type)</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R32G32B32A32_FLOAT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R32G32B32A32_UINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_UNSIGNED_INT32</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R32G32B32A32_SINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_SIGNED_INT32</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16G16B16A16_FLOAT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_HALF_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16G16B16A16_UNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_UNORM_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16G16B16A16_UINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_UNSIGNED_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16G16B16A16_SNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_SNORM_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16G16B16A16_SINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_SIGNED_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_B8G8R8A8_UNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_BGRA, CL_UNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R8G8B8A8_UNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_UNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R8G8B8A8_UINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_UNSIGNED_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R8G8B8A8_SNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_SNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R8G8B8A8_SINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_SIGNED_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R32G32_FLOAT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R32G32_UINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_UNSIGNED_INT32</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R32G32_SINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_SIGNED_INT32</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16G16_FLOAT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_HALF_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16G16_UNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_UNORM_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16G16_UINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_UNSIGNED_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16G16_SNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_SNORM_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16G16_SINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_SIGNED_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R8G8_UNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_UNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R8G8_UINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_UNSIGNED_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R8G8_SNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_SNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R8G8_SINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_SIGNED_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R32_FLOAT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R32_UINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_UNSIGNED_INT32</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R32_SINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_SIGNED_INT32</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16_FLOAT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_HALF_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16_UNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_UNORM_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16_UINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_UNSIGNED_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16_SNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_SNORM_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16_SINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_SIGNED_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R8_UNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_UNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R8_UINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_UNSIGNED_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R8_SNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_SNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R8_SINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_SIGNED_INT8</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_d3d10_sharing-querying-direct3d-properties-of-memory-objects-created-from-direct3d-10-resources"><a class="anchor" href="#cl_khr_d3d10_sharing-querying-direct3d-properties-of-memory-objects-created-from-direct3d-10-resources"></a>13.7.5. Querying Direct3D properties of memory objects created from Direct3D 10 resources</h4> |
| <div class="paragraph"> |
| <p>Properties of Direct3D 10 objects may be queried using <strong>clGetMemObjectInfo</strong> |
| and <strong>clGetImageInfo</strong> with <em>param_name</em> CL_MEM_D3D10_RESOURCE_KHR and</p> |
| </div> |
| <div class="paragraph"> |
| <p>CL_IMAGE_D3D10_SUBRESOURCE_KHR respectively as described in <em>sections 5.4.3</em> |
| and <em>5.3.6</em>.</p> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_d3d10_sharing-sharing-memory-objects-created-from-direct3d-10-resources-between-direct3d-10-and-opencl-contexts"><a class="anchor" href="#cl_khr_d3d10_sharing-sharing-memory-objects-created-from-direct3d-10-resources-between-direct3d-10-and-opencl-contexts"></a>13.7.6. Sharing memory objects created from Direct3D 10 resources between Direct3D 10 and OpenCL contexts</h4> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_int clEnqueueAcquireD3D10ObjectsKHR(cl_command_queue command_queue, |
| cl_uint num_objects, |
| <span class="directive">const</span> cl_mem *mem_objects, |
| cl_uint num_events_in_wait_list, |
| <span class="directive">const</span> cl_event *event_wait_list, |
| cl_event *event)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>is used to acquire OpenCL memory objects that have been created from |
| Direct3D 10 resources. |
| The Direct3D 10 objects are acquired by the OpenCL context associated with |
| <em>command_queue</em> and can therefore be used by all command-queues associated |
| with the OpenCL context.</p> |
| </div> |
| <div class="paragraph"> |
| <p>OpenCL memory objects created from Direct3D 10 resources must be acquired |
| before they can be used by any OpenCL commands queued to a command-queue. |
| If an OpenCL memory object created from a Direct3D 10 resource is used while |
| it is not currently acquired by OpenCL, the call attempting to use that |
| OpenCL memory object will return CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If CL_CONTEXT_INTEROP_USER_SYNC is not specified as CL_TRUE during context |
| creation, <strong>clEnqueueAcquireD3D10ObjectsKHR</strong> provides the synchronization |
| guarantee that any Direct3D 10 calls involving the interop device(s) used in |
| the OpenCL context made before <strong>clEnqueueAcquireD3D10ObjectsKHR</strong> is called |
| will complete executing before <em>event</em> reports completion and before the |
| execution of any subsequent OpenCL work issued in <em>command_queue</em> begins. |
| If the context was created with properties specifying |
| CL_CONTEXT_INTEROP_USER_SYNC as CL_TRUE, the user is responsible for |
| guaranteeing that any Direct3D 10 calls involving the interop device(s) used |
| in the OpenCL context made before <strong>clEnqueueAcquireD3D10ObjectsKHR</strong> is |
| called have completed before calling <strong>clEnqueueAcquireD3D10ObjectsKHR.</strong></p> |
| </div> |
| <div class="paragraph"> |
| <p><em>command_queue</em> is a valid command-queue.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>num_objects</em> is the number of memory objects to be acquired in |
| <em>mem_objects</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>mem_objects</em> is a pointer to a list of OpenCL memory objects that were |
| created from Direct3D 10 resources.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>event_wait_list</em> and <em>num_events_in_wait_list</em> specify events that need to |
| complete before this particular command can be executed. |
| If <em>event_wait_list</em> is <code>NULL</code>, then this particular command does not wait |
| on any event to complete. |
| If <em>event_wait_list</em> is <code>NULL</code>, <em>num_events_in_wait_list</em> must be 0. |
| If <em>event_wait_list</em> is not <code>NULL</code>, the list of events pointed to by |
| <em>event_wait_list</em> must be valid and <em>num_events_in_wait_list</em> must be |
| greater than 0. |
| The events specified in <em>event_wait_list</em> act as synchronization points.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>event</em> returns an event object that identifies this command and |
| can be used to query or wait for this command to complete. |
| If <em>event</em> is <code>NULL</code> or the enqueue is unsuccessful, no event will be |
| created and therefore it will not be possible to query the status of this |
| command or to wait for this command to complete. |
| If <em>event_wait_list</em> and <em>event</em> are not <code>NULL</code>, <em>event</em> must not refer |
| to an element of the <em>event_wait_list</em> array.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clEnqueueAcquireD3D10ObjectsKHR</strong> returns CL_SUCCESS if the function is |
| executed successfully. |
| If <em>num_objects</em> is 0 and <em>mem_objects</em> is <code>NULL</code> then the function does |
| nothing and returns CL_SUCCESS. |
| Otherwise it returns one of the following errors:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_VALUE if <em>num_objects</em> is zero and <em>mem_objects</em> is not a |
| <code>NULL</code> value or if <em>num_objects</em> > 0 and <em>mem_objects</em> is <code>NULL</code>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_MEM_OBJECT if memory objects in <em>mem_objects</em> are not valid |
| OpenCL memory objects or if memory objects in <em>mem_objects</em> have not |
| been created from Direct3D 10 resources.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_COMMAND_QUEUE if <em>command_queue</em> is not a valid |
| command-queue.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_CONTEXT if context associated with <em>command_queue</em> was not |
| created from an Direct3D 10 context.</p> |
| </li> |
| <li> |
| <p>CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR if memory objects in |
| <em>mem_objects</em> have previously been acquired using |
| <strong>clEnqueueAcquireD3D10ObjectsKHR</strong> but have not been released using |
| <strong>clEnqueueReleaseD3D10ObjectsKHR</strong>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_EVENT_WAIT_LIST if <em>event_wait_list</em> is <code>NULL</code> and |
| <em>num_events_in_wait_list</em> > 0, or <em>event_wait_list</em> is not <code>NULL</code> and |
| <em>num_events_in_wait_list</em> is 0, or if event objects in <em>event_wait_list</em> |
| are not valid events.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_int clEnqueueReleaseD3D10ObjectsKHR(cl_command_queue command_queue, |
| cl_uint num_objects, |
| <span class="directive">const</span> cl_mem *mem_objects, |
| cl_uint num_events_in_wait_list, |
| <span class="directive">const</span> cl_event *event_wait_list, |
| cl_event *event)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>is used to release OpenCL memory objects that have been created from |
| Direct3D 10 resources. |
| The Direct3D 10 objects are released by the OpenCL context associated with |
| <em>command_queue</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>OpenCL memory objects created from Direct3D 10 resources which have been |
| acquired by OpenCL must be released by OpenCL before they may be accessed by |
| Direct3D 10. |
| Accessing a Direct3D 10 resource while its corresponding OpenCL memory |
| object is acquired is in error and will result in undefined behavior, |
| including but not limited to possible OpenCL errors, data corruption, and |
| program termination.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If CL_CONTEXT_INTEROP_USER_SYNC is not specified as CL_TRUE during context |
| creation, <strong>clEnqueueReleaseD3D10ObjectsKHR</strong> provides the synchronization |
| guarantee that any calls to Direct3D 10 calls involving the interop |
| device(s) used in the OpenCL context made after the call to |
| <strong>clEnqueueReleaseD3D10ObjectsKHR</strong> will not start executing until after all |
| events in <em>event_wait_list</em> are complete and all work already submitted to |
| <em>command_queue</em> completes execution. |
| If the context was created with properties specifying |
| CL_CONTEXT_INTEROP_USER_SYNC as CL_TRUE, the user is responsible for |
| guaranteeing that any Direct3D 10 calls involving the interop device(s) used |
| in the OpenCL context made after <strong>clEnqueueReleaseD3D10ObjectsKHR</strong> will not |
| start executing until after event returned by |
| <strong>clEnqueueReleaseD3D10ObjectsKHR</strong> reports completion.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>num_objects</em> is the number of memory objects to be released in |
| <em>mem_objects</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>mem_objects</em> is a pointer to a list of OpenCL memory objects that were |
| created from Direct3D 10 resources.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>event_wait_list</em> and <em>num_events_in_wait_list</em> specify events that need to |
| complete before this particular command can be executed. |
| If <em>event_wait_list</em> is <code>NULL</code>, then this particular command does not wait |
| on any event to complete. |
| If <em>event_wait_list</em> is <code>NULL</code>, <em>num_events_in_wait_list</em> must be 0. |
| If <em>event_wait_list</em> is not <code>NULL</code>, the list of events pointed to by |
| <em>event_wait_list</em> must be valid and <em>num_events_in_wait_list</em> must be |
| greater than 0.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>event</em> returns an event object that identifies this command and |
| can be used to query or wait for this command to complete. |
| If <em>event</em> is <code>NULL</code> or the enqueue is unsuccessful, no event will be |
| created and therefore it will not be possible to query the status of this |
| command or to wait for this command to complete. |
| If <em>event_wait_list</em> and <em>event</em> are not <code>NULL</code>, <em>event</em> must not refer |
| to an element of the <em>event_wait_list</em> array.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clEnqueueReleaseD3D10ObjectsKHR</strong> returns CL_SUCCESS if the function is |
| executed successfully. |
| If <em>num_objects</em> is 0 and <em>mem_objects</em> is <code>NULL</code> the function does nothing |
| and returns CL_SUCCESS. |
| Otherwise it returns one of the following errors:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_VALUE if <em>num_objects</em> is zero and <em>mem_objects</em> is not a |
| <code>NULL</code> value or if <em>num_objects</em> > 0 and <em>mem_objects</em> is <code>NULL</code>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_MEM_OBJECT if memory objects in <em>mem_objects</em> are not valid |
| OpenCL memory objects or if memory objects in <em>mem_objects</em> have not |
| been created from Direct3D 10 resources.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_COMMAND_QUEUE if <em>command_queue</em> is not a valid |
| command-queue.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_CONTEXT if context associated with <em>command_queue</em> was not |
| created from a Direct3D 10 device.</p> |
| </li> |
| <li> |
| <p>CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR if memory objects in <em>mem_objects</em> |
| have not previously been acquired using |
| <strong>clEnqueueAcquireD3D10ObjectsKHR</strong>, or have been released using |
| <strong>clEnqueueReleaseD3D10ObjectsKHR</strong> since the last time that they were |
| acquired.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_EVENT_WAIT_LIST if <em>event_wait_list</em> is <code>NULL</code> and |
| <em>num_events_in_wait_list</em> > 0, or <em>event_wait_list</em> is not <code>NULL</code> and |
| <em>num_events_in_wait_list</em>> is 0, or if event objects in |
| <em>event_wait_list</em> are not valid events.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_d3d10_sharing-event-command-types"><a class="anchor" href="#cl_khr_d3d10_sharing-event-command-types"></a>13.7.7. Event Command Types for Sharing memory objects that map to Direct3D 10 objects</h4> |
| <div class="paragraph"> |
| <p>The following table describes the event command types for the OpenCL commands |
| to acquire and release OpenCL memory objects that have been created from |
| Direct3D 10 objects:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 33. List of supported event command types</caption> |
| <colgroup> |
| <col style="width: 40%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Events Created By</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Event Command Type</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>clEnqueueAcquireD3D10ObjectsKHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_COMMAND_<wbr>ACQUIRE_<wbr>D3D10_<wbr>OBJECTS_<wbr>KHR</code></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>clEnqueueReleaseD3D10ObjectsKHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_COMMAND_<wbr>RELEASE_<wbr>D3D10_<wbr>OBJECTS_<wbr>KHR</code></p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_d3d10_sharing-issues"><a class="anchor" href="#cl_khr_d3d10_sharing-issues"></a>13.8. Issues</h3> |
| <div class="olist arabic"> |
| <ol class="arabic"> |
| <li> |
| <p>Should this extension be KHR or EXT?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>PROPOSED: KHR. |
| If this extension is to be approved by Khronos then it should be KHR, |
| otherwise EXT. |
| Not all platforms can support this extension, but that is also true of |
| OpenGL interop.</p> |
| </div> |
| <div class="paragraph"> |
| <p>RESOLVED: KHR.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>Requiring SharedHandle on ID3D10Resource</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>Requiring this can largely simplify things at the DDI level and make some |
| implementations faster. |
| However, the DirectX spec only defines the shared handle for a subset of the |
| resources we would like to support:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>D3D10_RESOURCE_MISC_SHARED - Enables the sharing of resource data between |
| two or more Direct3D devices. |
| The only resources that can be shared are 2D non-mipmapped textures.</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>PROPOSED A: Add wording to the spec about some implementations needing the |
| resource setup as shared:</p> |
| </div> |
| <div class="paragraph"> |
| <p>“Some implementations may require the resource to be shared on the D3D10 |
| side of the API”</p> |
| </div> |
| <div class="paragraph"> |
| <p>If we do that, do we need another enum to describe this failure case?</p> |
| </div> |
| <div class="paragraph"> |
| <p>PROPOSED B: Require that all implementations support both shared and |
| non-shared resources. |
| The restrictions prohibiting multisample textures and the flag |
| D3D10_USAGE_IMMUTABLE guarantee software access to all shareable resources.</p> |
| </div> |
| <div class="paragraph"> |
| <p>RESOLVED: Require that implementations support both |
| D3D10_RESOURCE_MISC_SHARED being set and not set. |
| Add the query for CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR to determine |
| on a per-context basis which method will be faster.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>Texture1D support</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>There is not a matching CL type, so do we want to support this and map to |
| buffer or Texture2D? If so the command might correspond to the 2D / 3D |
| versions:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_mem clCreateFromD3D10Texture1D(cl_context context, |
| cl_mem_flags flags, |
| ID3D10Texture2D *resource, |
| UINT subresource, |
| cl_int *errcode_ret)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>RESOLVED: We will not add support for ID3D10Texture1D objects unless a |
| corresponding OpenCL 1D Image type is created.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>CL/D3D10 queries</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>The GL interop has clGetGLObjectInfo and clGetGLTextureInfo. |
| It is unclear if these are needed on the D3D10 interop side since the D3D10 |
| spec makes these queries trivial on the D3D10 object itself. |
| Also, not all of the semantics of the GL call map across.</p> |
| </div> |
| <div class="paragraph"> |
| <p>PROPOSED: Add the <strong>clGetMemObjectInfo</strong> and <strong>clGetImageInfo</strong> parameter names |
| CL_MEM_D3D10_RESOURCE_KHR and CL_IMAGE_D3D10_SUBRESOURCE_KHR to query the |
| D3D10 resource from which a cl_mem was created. |
| From this data, any D3D10 side information may be queried using the D3D10 |
| API.</p> |
| </div> |
| <div class="paragraph"> |
| <p>RESOLVED: We will use <strong>clGetMemObjectInfo</strong> and <strong>clGetImageInfo</strong> to access |
| this information.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| </ol> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_d3d11_sharing"><a class="anchor" href="#cl_khr_d3d11_sharing"></a>14. Creating OpenCL Memory Objects from Direct3D 11 Buffers and Textures</h2> |
| <div class="sectionbody"> |
| <div class="sect2"> |
| <h3 id="cl_khr_d3d11_sharing-overview"><a class="anchor" href="#cl_khr_d3d11_sharing-overview"></a>14.1. Overview</h3> |
| <div class="paragraph"> |
| <p>This section describes the <strong>cl_khr_d3d11_sharing</strong> extension. |
| The goal of this extension is to provide interoperability between OpenCL and |
| Direct3D 11.</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_13"><a class="anchor" href="#_general_information_13"></a>14.2. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_13"><a class="anchor" href="#_version_history_13"></a>14.2.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_d3d11_sharing-new-procedures-and-functions"><a class="anchor" href="#cl_khr_d3d11_sharing-new-procedures-and-functions"></a>14.3. New Procedures and Functions</h3> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_int clGetDeviceIDsFromD3D11KHR(cl_platform_id platform, |
| cl_d3d11_device_source_khr d3d_device_source, |
| <span class="directive">void</span> *d3d_object, |
| cl_d3d11_device_set_khr d3d_device_set, |
| cl_uint num_entries, |
| cl_device_id *devices, |
| cl_uint *num_devices) |
| |
| cl_mem clCreateFromD3D11BufferKHR(cl_context context, |
| cl_mem_flags flags, |
| ID3D11Buffer *resource, |
| cl_int *errcode_ret) |
| |
| cl_mem clCreateFromD3D11Texture2DKHR(cl_context context, |
| cl_mem_flags flags, |
| ID3D11Texture2D *resource, |
| UINT subresource, |
| cl_int *errcode_ret) |
| |
| cl_mem clCreateFromD3D11Texture3DKHR(cl_context context, |
| cl_mem_flags flags, |
| ID3D11Texture3D *resource, |
| UINT subresource, |
| cl_int *errcode_ret) |
| |
| cl_int clEnqueueAcquireD3D11ObjectsKHR(cl_command_queue command_queue, |
| cl_uint num_objects, |
| <span class="directive">const</span> cl_mem *mem_objects, |
| cl_uint num_events_in_wait_list, |
| <span class="directive">const</span> cl_event *event_wait_list, |
| cl_event *event) |
| |
| cl_int clEnqueueReleaseD3D11ObjectsKHR(cl_command_queue command_queue, |
| cl_uint num_objects, |
| <span class="directive">const</span> cl_mem *mem_objects, |
| cl_uint num_events_in_wait_list, |
| <span class="directive">const</span> cl_event *event_wait_list, |
| cl_event *event)</code></pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_d3d11_sharing-new-tokens"><a class="anchor" href="#cl_khr_d3d11_sharing-new-tokens"></a>14.4. New Tokens</h3> |
| <div class="paragraph"> |
| <p>Accepted as a Direct3D 11 device source in the <em>d3d_device_source</em> parameter |
| of <strong>clGetDeviceIDsFromD3D11KHR</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_D3D11_DEVICE_KHR |
| CL_D3D11_DXGI_ADAPTER_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Accepted as a set of Direct3D 11 devices in the _d3d_device_set_parameter of |
| <strong>clGetDeviceIDsFromD3D11KHR</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_PREFERRED_DEVICES_FOR_D3D11_KHR |
| CL_ALL_DEVICES_FOR_D3D11_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Accepted as a property name in the <em>properties</em> parameter of |
| <strong>clCreateContext</strong> and <strong>clCreateContextFromType</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_CONTEXT_D3D11_DEVICE_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Accepted as a property name in the <em>param_name</em> parameter of |
| <strong>clGetContextInfo</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Accepted as the property being queried in the <em>param_name</em> parameter of |
| <strong>clGetMemObjectInfo</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_MEM_D3D11_RESOURCE_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Accepted as the property being queried in the <em>param_name</em> parameter of |
| <strong>clGetImageInfo</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_IMAGE_D3D11_SUBRESOURCE_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Returned in the <em>param_value</em> parameter of <strong>clGetEventInfo</strong> when |
| <em>param_name</em> is CL_EVENT_COMMAND_TYPE:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR |
| CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Returned by <strong>clCreateContext</strong> and <strong>clCreateContextFromType</strong> if the Direct3D |
| 11 device specified for interoperability is not compatible with the devices |
| against which the context is to be created:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_INVALID_D3D11_DEVICE_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Returned by <strong>clCreateFromD3D11BufferKHR</strong> when <em>resource</em> is not a Direct3D |
| 11 buffer object, and by <strong>clCreateFromD3D11Texture2DKHR</strong> and |
| <strong>clCreateFromD3D11Texture3DKHR</strong> when <em>resource</em> is not a Direct3D 11 texture |
| object.</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_INVALID_D3D11_RESOURCE_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Returned by <strong>clEnqueueAcquireD3D11ObjectsKHR</strong> when any of <em>mem_objects</em> are |
| currently acquired by OpenCL:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Returned by <strong>clEnqueueReleaseD3D11ObjectsKHR</strong> when any of <em>mem_objects</em> are |
| not currently acquired by OpenCL:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR</pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_d3d11_sharing-additions-to-chapter-4"><a class="anchor" href="#cl_khr_d3d11_sharing-additions-to-chapter-4"></a>14.5. Additions to Chapter 4 of the OpenCL 2.2 Specification</h3> |
| <div class="paragraph"> |
| <p>In <em>section 4.4</em>, replace the description of <em>properties</em> under |
| <strong>clCreateContext</strong> with:</p> |
| </div> |
| <div class="paragraph"> |
| <p>“_properties_ specifies a list of context property names and their |
| corresponding values. |
| Each property is followed immediately by the corresponding desired value. |
| The list is terminated with zero. |
| If a property is not specified in <em>properties</em>, then its default value |
| (listed in <em>table 4.5</em>) is used (it is said to be specified implicitly). |
| If <em>properties</em> is <code>NULL</code> or empty (points to a list whose first value is |
| zero), all attributes take on their default values.”</p> |
| </div> |
| <div class="paragraph"> |
| <p>Add the following to <em>table 4.5</em>:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 40%;"> |
| <col style="width: 20%;"> |
| <col style="width: 40%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_context_properties enum</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Property value</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_CONTEXT_D3D11_DEVICE_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">ID3D11Device *</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Specifies the ID3D11Device * to use for Direct3D 11 interoperability.</p> |
| <p class="tableblock"> The default value is <code>NULL</code>.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>Add to the list of errors for <strong>clCreateContext</strong>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_D3D11_DEVICE_KHR if the value of the property |
| CL_CONTEXT_D3D11_DEVICE_KHR is non-<code>NULL</code> and does not specify a valid |
| Direct3D 11 device with which the <em>cl_device_ids</em> against which this |
| context is to be created may interoperate.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_OPERATION if Direct3D 11 interoperability is specified by |
| setting CL_INVALID_D3D11_DEVICE_KHR to a non-<code>NULL</code> value, and |
| interoperability with another graphics API is also specified.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>Add to the list of errors for <strong>clCreateContextFromType</strong> the same new errors |
| described above for <strong>clCreateContext</strong>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Add the following row to <em>table 4.6</em>:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 40%;"> |
| <col style="width: 20%;"> |
| <col style="width: 40%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_context_info</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Return Type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Information returned in param_value</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cl_bool</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns CL_TRUE if Direct3D 11 resources created as shared by setting |
| <em>MiscFlags</em> to include D3D11_RESOURCE_MISC_SHARED will perform faster when |
| shared with OpenCL, compared with resources which have not set this flag. |
| Otherwise returns CL_FALSE.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_d3d11_sharing-additions-to-chapter-5"><a class="anchor" href="#cl_khr_d3d11_sharing-additions-to-chapter-5"></a>14.6. Additions to Chapter 5 of the OpenCL 2.2 Specification</h3> |
| <div class="paragraph"> |
| <p>Add to the list of errors for <strong>clGetMemObjectInfo</strong>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_D3D11_RESOURCE_KHR if <em>param_name</em> is |
| CL_MEM_D3D11_RESOURCE_KHR and <em>memobj</em> was not created by the function |
| <strong>clCreateFromD3D11BufferKHR</strong>, <strong>clCreateFromD3D11Texture2DKHR</strong>, or |
| <strong>clCreateFromD3D11Texture3DKHR</strong>.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>Extend <em>table 5.12</em> to include the following entry.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 40%;"> |
| <col style="width: 20%;"> |
| <col style="width: 40%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_mem_info</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Return type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Info. returned in <em>param_value</em></strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_MEM_D3D11_RESOURCE_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">ID3D11Resource *</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">If <em>memobj</em> was created using <strong>clCreateFromD3D11BufferKHR</strong>, |
| <strong>clCreateFromD3D11Texture2DKHR</strong>, or <strong>clCreateFromD3D11Texture3DKHR</strong>, |
| returns the <em>resource</em> argument specified when <em>memobj</em> was created.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>Add to the list of errors for <strong>clGetImageInfo</strong>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_D3D11_RESOURCE_KHR if <em>param_name</em> is |
| CL_MEM_D3D11_SUBRESOURCE_KHR and <em>image</em> was not created by the function |
| <strong>clCreateFromD3D11Texture2DKHR</strong>, or <strong>clCreateFromD3D11Texture3DKHR</strong>.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>Extend <em>table 5.9</em> to include the following entry.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 40%;"> |
| <col style="width: 20%;"> |
| <col style="width: 40%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_image_info</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Return type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Info. returned in <em>param_value</em></strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_MEM_D3D11_SUBRESOURCE_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">UINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">If <em>image</em> was created using <strong>clCreateFromD3D11Texture2DKHR</strong>, or |
| <strong>clCreateFromD3D11Texture3DKHR</strong>, returns the <em>subresource</em> argument |
| specified when <em>image</em> was created.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>Add to <em>table 5.22</em> in the <strong>Info returned in param_value</strong> column for |
| <em>cl_event_info</em> = CL_EVENT_COMMAND_TYPE:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR |
| CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR</pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_d3d11_sharing-sharing-memory-objects-with-direct3d-11-resources"><a class="anchor" href="#cl_khr_d3d11_sharing-sharing-memory-objects-with-direct3d-11-resources"></a>14.7. Sharing Memory Objects with Direct3D 11 Resources</h3> |
| <div class="paragraph"> |
| <p>This section discusses OpenCL functions that allow applications to use |
| Direct3D 11 resources as OpenCL memory objects. |
| This allows efficient sharing of data between OpenCL and Direct3D 11. |
| The OpenCL API may be used to execute kernels that read and/or write memory |
| objects that are also Direct3D 11 resources. |
| An OpenCL image object may be created from a Direct3D 11 texture resource. |
| An OpenCL buffer object may be created from a Direct3D 11 buffer resource. |
| OpenCL memory objects may be created from Direct3D 11 objects if and only if |
| the OpenCL context has been created from a Direct3D 11 device.</p> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_d3d11_sharing-querying-opencl-devices-corresponding-to-direct3d-11-devices"><a class="anchor" href="#cl_khr_d3d11_sharing-querying-opencl-devices-corresponding-to-direct3d-11-devices"></a>14.7.1. Querying OpenCL Devices Corresponding to Direct3D 11 Devices</h4> |
| <div class="paragraph"> |
| <p>The OpenCL devices corresponding to a Direct3D 11 device may be queried. |
| The OpenCL devices corresponding to a DXGI adapter may also be queried. |
| The OpenCL devices corresponding to a Direct3D 11 device will be a subset of |
| the OpenCL devices corresponding to the DXGI adapter against which the |
| Direct3D 11 device was created.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The OpenCL devices corresponding to a Direct3D 11 device or a DXGI device |
| may be queried using the function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_int clGetDeviceIDsFromD3D11KHR(cl_platform_id platform, |
| cl_d3d11_device_source_khr d3d_device_source, |
| <span class="directive">void</span> *d3d_object, |
| cl_d3d11_device_set_khr d3d_device_set, |
| cl_uint num_entries, |
| cl_device_id *devices, |
| cl_uint *num_devices)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p><em>platform</em> refers to the platform ID returned by <strong>clGetPlatformIDs</strong>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>d3d_device_source</em> specifies the type of <em>d3d_object</em>, and must be one of |
| the values shown in the table below.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>d3d_object</em> specifies the object whose corresponding OpenCL devices are |
| being queried. |
| The type of <em>d3d_object</em> must be as specified in the table below.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>d3d_device_set</em> specifies the set of devices to return, and must be one of |
| the values shown in the table below.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>num_entries</em> is the number of cl_device_id entries that can be added to |
| <em>devices</em>. |
| If <em>devices</em> is not <code>NULL</code> then <em>num_entries</em> must be greater than zero.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>devices</em> returns a list of OpenCL devices found. |
| The cl_device_id values returned in <em>devices</em> can be used to identify a |
| specific OpenCL device. |
| If <em>devices</em> is <code>NULL</code>, this argument is ignored. |
| The number of OpenCL devices returned is the minimum of the value specified |
| by <em>num_entries</em> and the number of OpenCL devices corresponding to |
| <em>d3d_object</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>num_devices</em> returns the number of OpenCL devices available that correspond |
| to <em>d3d_object</em>. |
| If <em>num_devices</em> is <code>NULL</code>, this argument is ignored.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clGetDeviceIDsFromD3D10KHR</strong> returns CL_SUCCESS if the function is executed |
| successfully. |
| Otherwise it may return</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_PLATFORM if <em>platform</em> is not a valid platform.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_VALUE if <em>d3d_device_source</em> is not a valid value, |
| <em>d3d_device_set</em> is not a valid value, <em>num_entries</em> is equal to zero |
| and <em>devices</em> is not <code>NULL</code>, or if both <em>num_devices</em> and <em>devices</em> are |
| <code>NULL</code>.</p> |
| </li> |
| <li> |
| <p>CL_DEVICE_NOT_FOUND if no OpenCL devices that correspond to <em>d3d_object</em> |
| were found.</p> |
| </li> |
| </ul> |
| </div> |
| <table id="cl_khr_d3d11_sharing-clGetDeviceIDsFromD3D11KHR-object-type" class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 34. <em>Direct3D 11 object types that may be used by</em> <strong>clGetDeviceIDsFromD3D11KHR</strong></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_d3d_device_source_khr</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Type of <em>d3d_object</em></strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_D3D11_DEVICE_KHR</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">ID3D11Device *</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_D3D11_DXGI_ADAPTER_KHR</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">IDXGIAdapter *</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <table id="cl_khr_d3d11_sharing-clGetDeviceIDsFromD3D10KHR-devices" class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 35. <em>Sets of devices queriable using</em> <strong>clGetDeviceIDsFromD3D11KHR</strong></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_d3d_device_set_khr</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Devices returned in <em>devices</em></strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_PREFERRED_DEVICES_FOR_D3D11_KHR</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">The preferred OpenCL devices associated with the specified Direct3D |
| object.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_ALL_DEVICES_FOR_D3D11_KHR</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">All OpenCL devices which may interoperate with the specified Direct3D |
| object. |
| Performance of sharing data on these devices may be considerably less than |
| on the preferred devices.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_d3d11_sharing-lifetime-of-shared-objects"><a class="anchor" href="#cl_khr_d3d11_sharing-lifetime-of-shared-objects"></a>14.7.2. Lifetime of Shared Objects</h4> |
| <div class="paragraph"> |
| <p>An OpenCL memory object created from a Direct3D 11 resource remains valid as |
| long as the corresponding Direct3D 11 resource has not been deleted. |
| If the Direct3D 11 resource is deleted through the Direct3D 11 API, |
| subsequent use of the OpenCL memory object will result in undefined |
| behavior, including but not limited to possible OpenCL errors, data |
| corruption, and program termination.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The successful creation of a cl_context against a Direct3D 11 device |
| specified via the context create parameter CL_CONTEXT_D3D11_DEVICE_KHR will |
| increment the internal Direct3D reference count on the specified Direct3D 11 |
| device. |
| The internal Direct3D reference count on that Direct3D 11 device will be |
| decremented when the OpenCL reference count on the returned OpenCL context |
| drops to zero.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The OpenCL context and corresponding command-queues are dependent on the |
| existence of the Direct3D 11 device from which the OpenCL context was |
| created. |
| If the Direct3D 11 device is deleted through the Direct3D 11 API, subsequent |
| use of the OpenCL context will result in undefined behavior, including but |
| not limited to possible OpenCL errors, data corruption, and program |
| termination.</p> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_d3d11_sharing-sharing-direct3d-11-buffer-resources-as-opencl-buffer-objects"><a class="anchor" href="#cl_khr_d3d11_sharing-sharing-direct3d-11-buffer-resources-as-opencl-buffer-objects"></a>14.7.3. Sharing Direct3D 11 Buffer Resources as OpenCL Buffer Objects</h4> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_mem clCreateFromD3D11BufferKHR(cl_context context, |
| cl_mem_flags flags, |
| ID3D11Buffer *resource, |
| cl_int *errcode_ret)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>creates an OpenCL buffer object from a Direct3D 11 buffer.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>context</em> is a valid OpenCL context created from a Direct3D 11 device.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>flags</em> is a bit-field that is used to specify usage information. |
| Refer to table 5.3 for a description of <em>flags</em>. |
| Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values |
| specified in <em>table 5.3</em> can be used.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>resource</em> is a pointer to the Direct3D 11 buffer to share.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>errcode_ret</em> will return an appropriate error code. |
| If <em>errcode_ret</em> is <code>NULL</code>, no error code is returned.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clCreateFromD3D11BufferKHR</strong> returns a valid non-zero OpenCL buffer object |
| and <em>errcode_ret</em> is set to CL_SUCCESS if the buffer object is created |
| successfully. |
| Otherwise, it returns a <code>NULL</code> value with one of the following error values |
| returned in <em>errcode_ret</em>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_CONTEXT if <em>context</em> is not a valid context.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_VALUE if values specified in <em>flags</em> are not valid.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_D3D11_RESOURCE_KHR if <em>resource</em> is not a Direct3D 11 buffer |
| resource, if <em>resource</em> was created with the D3D11_USAGE flag |
| D3D11_USAGE_IMMUTABLE, if a cl_mem from <em>resource</em> has already been |
| created using <strong>clCreateFromD3D11BufferKHR</strong>, or if <em>context</em> was not |
| created against the same Direct3D 11 device from which <em>resource</em> was |
| created.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>The size of the returned OpenCL buffer object is the same as the size of |
| <em>resource</em>. |
| This call will increment the internal Direct3D reference count on |
| <em>resource</em>. |
| The internal Direct3D reference count on <em>resource</em> will be decremented when |
| the OpenCL reference count on the returned OpenCL memory object drops to |
| zero.</p> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_d3d11_sharing-sharing-direct3d-11-texture-and-resources-as-opencl-image-objects"><a class="anchor" href="#cl_khr_d3d11_sharing-sharing-direct3d-11-texture-and-resources-as-opencl-image-objects"></a>14.7.4. Sharing Direct3D 11 Texture and Resources as OpenCL Image Objects</h4> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_mem clCreateFromD3D11Texture2DKHR(cl_context context, |
| cl_mem_flags flags, |
| ID3D11Texture2D *resource, |
| UINT subresource, |
| cl_int *errcode_ret)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>creates an OpenCL 2D image object from a subresource of a Direct3D 11 2D |
| texture.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>context</em> is a valid OpenCL context created from a Direct3D 11 device.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>flags</em> is a bit-field that is used to specify usage information. |
| Refer to <em>table 5.3</em> for a description of <em>flags</em>. |
| Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values |
| specified in <em>table 5.3</em> can be used.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>resource</em> is a pointer to the Direct3D 11 2D texture to share.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>subresource</em> is the subresource of <em>resource</em> to share.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>errcode_ret</em> will return an appropriate error code. |
| If <em>errcode_ret</em> is <code>NULL</code>, no error code is returned.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clCreateFromD3D11Texture2DKHR</strong> returns a valid non-zero OpenCL image object |
| and <em>errcode_ret</em> is set to CL_SUCCESS if the image object is created |
| successfully. |
| Otherwise, it returns a <code>NULL</code> value with one of the following error values |
| returned in <em>errcode_ret</em>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_CONTEXT if <em>context</em> is not a valid context.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_VALUE if values specified in <em>flags</em> are not valid or if |
| <em>subresource</em> is not a valid subresource index for <em>resource</em>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_D3D11_RESOURCE_KHR if <em>resource</em> is not a Direct3D 11 texture |
| resource, if <em>resource</em> was created with the D3D11_USAGE flag |
| D3D11_USAGE_IMMUTABLE, if <em>resource</em> is a multisampled texture, if a |
| cl_mem from subresource <em>subresource</em> of <em>resource</em> has already been |
| created using <strong>clCreateFromD3D11Texture2DKHR</strong>, or if <em>context</em> was not |
| created against the same Direct3D 10 device from which <em>resource</em> was |
| created.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the Direct3D 11 texture format of |
| <em>resource</em> is not listed in the table |
| <a href="#cl_khr_d3d11_sharing-mapping-of-image-formats"><em>Direct3D 11 formats and |
| corresponding OpenCL image formats</em></a> or if the Direct3D 11 texture |
| format of <em>resource</em> does not map to a supported OpenCL image format.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>The width and height of the returned OpenCL 2D image object are determined |
| by the width and height of subresource <em>subresource</em> of <em>resource</em>. |
| The channel type and order of the returned OpenCL 2D image object is |
| determined by the format of <em>resource</em> by the table |
| <a href="#cl_khr_d3d11_sharing-mapping-of-image-formats"><em>Direct3D 11 formats and |
| corresponding OpenCL image formats</em></a>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This call will increment the internal Direct3D reference count on |
| <em>resource</em>. |
| The internal Direct3D reference count on <em>resource</em> will be decremented when |
| the OpenCL reference count on the returned OpenCL memory object drops to |
| zero.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_mem clCreateFromD3D11Texture3DKHR(cl_context context, |
| cl_mem_flags flags, |
| ID3D11Texture3D *resource, |
| UINT subresource, |
| cl_int *errcode_ret)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>creates an OpenCL 3D image object from a subresource of a Direct3D 11 3D |
| texture.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>context</em> is a valid OpenCL context created from a Direct3D 11 device.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>flags</em> is a bit-field that is used to specify usage information. |
| Refer to <em>table 5.3</em> for a description of <em>flags</em>. |
| Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values |
| specified in <em>table 5.3</em> can be used.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>resource</em> is a pointer to the Direct3D 11 3D texture to share.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>subresource</em> is the subresource of <em>resource</em> to share.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>errcode_ret</em> will return an appropriate error code. |
| If <em>errcode_ret</em> is <code>NULL</code>, no error code is returned.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clCreateFromD3D11Texture3DKHR</strong> returns a valid non-zero OpenCL image object |
| and <em>errcode_ret</em> is set to CL_SUCCESS if the image object is created |
| successfully. |
| Otherwise, it returns a <code>NULL</code> value with one of the following error values |
| returned in <em>errcode_ret</em>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_CONTEXT if <em>context</em> is not a valid context.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_VALUE if values specified in <em>flags</em> are not valid or if |
| <em>subresource</em> is not a valid subresource index for <em>resource</em>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_D3D11_RESOURCE_KHR if <em>resource</em> is not a Direct3D 11 texture |
| resource, if <em>resource</em> was created with the D3D11_USAGE flag |
| D3D11_USAGE_IMMUTABLE, if <em>resource</em> is a multisampled texture, if a |
| cl_mem from subresource <em>subresource</em> of <em>resource</em> has already been |
| created using <strong>clCreateFromD3D11Texture3DKHR</strong>, or if <em>context</em> was not |
| created against the same Direct3D 11 device from which <em>resource</em> was |
| created.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the Direct3D 11 texture format of |
| <em>resource</em> is not listed in the table |
| <a href="#cl_khr_d3d11_sharing-mapping-of-image-formats"><em>Direct3D 11 formats and |
| corresponding OpenCL image formats</em></a> or if the Direct3D 11 texture |
| format of <em>resource</em> does not map to a supported OpenCL image format.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>The width, height and depth of the returned OpenCL 3D image object are |
| determined by the width, height and depth of subresource <em>subresource</em> of |
| <em>resource</em>. |
| The channel type and order of the returned OpenCL 3D image object is |
| determined by the format of <em>resource</em> by the table |
| <a href="#cl_khr_d3d11_sharing-mapping-of-image-formats"><em>Direct3D 11 formats and |
| corresponding OpenCL image formats</em></a>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This call will increment the internal Direct3D reference count on |
| <em>resource</em>. |
| The internal Direct3D reference count on <em>resource</em> will be decremented when |
| the OpenCL reference count on the returned OpenCL memory object drops to |
| zero.</p> |
| </div> |
| <table id="cl_khr_d3d11_sharing-mapping-of-image-formats" class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 36. <em>Direct3D 11 formats and corresponding OpenCL image formats</em></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>DXGI format</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>CL image format</strong> |
| |
| <strong>(channel order, channel data type)</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R32G32B32A32_FLOAT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R32G32B32A32_UINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_UNSIGNED_INT32</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R32G32B32A32_SINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_SIGNED_INT32</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16G16B16A16_FLOAT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_HALF_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16G16B16A16_UNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_UNORM_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16G16B16A16_UINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_UNSIGNED_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16G16B16A16_SNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_SNORM_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16G16B16A16_SINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_SIGNED_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_B8G8R8A8_UNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_BGRA, CL_UNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R8G8B8A8_UNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_UNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R8G8B8A8_UINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_UNSIGNED_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R8G8B8A8_SNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_SNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R8G8B8A8_SINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_SIGNED_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R32G32_FLOAT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R32G32_UINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_UNSIGNED_INT32</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R32G32_SINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_SIGNED_INT32</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16G16_FLOAT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_HALF_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16G16_UNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_UNORM_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16G16_UINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_UNSIGNED_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16G16_SNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_SNORM_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16G16_SINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_SIGNED_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R8G8_UNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_UNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R8G8_UINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_UNSIGNED_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R8G8_SNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_SNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R8G8_SINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_SIGNED_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R32_FLOAT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R32_UINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_UNSIGNED_INT32</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R32_SINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_SIGNED_INT32</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16_FLOAT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_HALF_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16_UNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_UNORM_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16_UINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_UNSIGNED_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16_SNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_SNORM_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R16_SINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_SIGNED_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R8_UNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_UNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R8_UINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_UNSIGNED_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R8_SNORM</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_SNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DXGI_FORMAT_R8_SINT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_SIGNED_INT8</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_d3d11_sharing-querying-direct3d-properties-of-memory-objects-created-from-direct3d-11-resources"><a class="anchor" href="#cl_khr_d3d11_sharing-querying-direct3d-properties-of-memory-objects-created-from-direct3d-11-resources"></a>14.7.5. Querying Direct3D properties of memory objects created from Direct3D 11 resources</h4> |
| <div class="paragraph"> |
| <p>Properties of Direct3D 11 objects may be queried using <strong>clGetMemObjectInfo</strong> |
| and <strong>clGetImageInfo</strong> with <em>param_name</em> CL_MEM_D3D11_RESOURCE_KHR and</p> |
| </div> |
| <div class="paragraph"> |
| <p>CL_IMAGE_D3D11_SUBRESOURCE_KHR respectively as described in <em>sections 5.4.3</em> |
| and <em>5.3.6</em>.</p> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_d3d11_sharing-sharing-memory-objects-created-from-direct3d-11-resources-between-direct3d-11-and-opencl-contexts"><a class="anchor" href="#cl_khr_d3d11_sharing-sharing-memory-objects-created-from-direct3d-11-resources-between-direct3d-11-and-opencl-contexts"></a>14.7.6. Sharing memory objects created from Direct3D 11 resources between Direct3D 11 and OpenCL contexts</h4> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_int clEnqueueAcquireD3D11ObjectsKHR(cl_command_queue command_queue, |
| cl_uint num_objects, |
| <span class="directive">const</span> cl_mem *mem_objects, |
| cl_uint num_events_in_wait_list, |
| <span class="directive">const</span> cl_event *event_wait_list, |
| cl_event *event)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>is used to acquire OpenCL memory objects that have been created from |
| Direct3D 11 resources. |
| The Direct3D 11 objects are acquired by the OpenCL context associated with |
| <em>command_queue</em> and can therefore be used by all command-queues associated |
| with the OpenCL context.</p> |
| </div> |
| <div class="paragraph"> |
| <p>OpenCL memory objects created from Direct3D 11 resources must be acquired |
| before they can be used by any OpenCL commands queued to a command-queue. |
| If an OpenCL memory object created from a Direct3D 11 resource is used while |
| it is not currently acquired by OpenCL, the call attempting to use that |
| OpenCL memory object will return CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If CL_CONTEXT_INTEROP_USER_SYNC is not specified as CL_TRUE during context |
| creation, <strong>clEnqueueAcquireD3D11ObjectsKHR</strong> provides the synchronization |
| guarantee that any Direct3D 11 calls involving the interop device(s) used in |
| the OpenCL context made before <strong>clEnqueueAcquireD3D11ObjectsKHR</strong> is called |
| will complete executing before <em>event</em> reports completion and before the |
| execution of any subsequent OpenCL work issued in <em>command_queue</em> begins. |
| If the context was created with properties specifying |
| CL_CONTEXT_INTEROP_USER_SYNC as CL_TRUE, the user is responsible for |
| guaranteeing that any Direct3D 11 calls involving the interop device(s) used |
| in the OpenCL context made before <strong>clEnqueueAcquireD3D11ObjectsKHR</strong> is |
| called have completed before calling <strong>clEnqueueAcquireD3D11ObjectsKHR.</strong></p> |
| </div> |
| <div class="paragraph"> |
| <p><em>command_queue</em> is a valid command-queue.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>num_objects</em> is the number of memory objects to be acquired in |
| <em>mem_objects</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>mem_objects</em> is a pointer to a list of OpenCL memory objects that were |
| created from Direct3D 11 resources.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>event_wait_list</em> and <em>num_events_in_wait_list</em> specify events that need to |
| complete before this particular command can be executed. |
| If <em>event_wait_list</em> is <code>NULL</code>, then this particular command does not wait |
| on any event to complete. |
| If <em>event_wait_list</em> is <code>NULL</code>, <em>num_events_in_wait_list</em> must be 0. |
| If <em>event_wait_list</em> is not <code>NULL</code>, the list of events pointed to by |
| <em>event_wait_list</em> must be valid and <em>num_events_in_wait_list</em> must be |
| greater than 0. |
| The events specified in <em>event_wait_list</em> act as synchronization points.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>event</em> returns an event object that identifies this command and |
| can be used to query or wait for this command to complete. |
| If <em>event</em> is <code>NULL</code> or the enqueue is unsuccessful, no event will be |
| created and therefore it will not be possible to query the status of this |
| command or to wait for this command to complete. |
| If <em>event_wait_list</em> and <em>event</em> are not <code>NULL</code>, <em>event</em> must not refer |
| to an element of the <em>event_wait_list</em> array.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clEnqueueAcquireD3D11ObjectsKHR</strong> returns CL_SUCCESS if the function is |
| executed successfully. |
| If <em>num_objects</em> is 0 and <em>mem_objects</em> is <code>NULL</code> then the function does |
| nothing and returns CL_SUCCESS. |
| Otherwise it returns one of the following errors:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_VALUE if <em>num_objects</em> is zero and <em>mem_objects</em> is not a |
| <code>NULL</code> value or if <em>num_objects</em> > 0 and <em>mem_objects</em> is <code>NULL</code>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_MEM_OBJECT if memory objects in <em>mem_objects</em> are not valid |
| OpenCL memory objects or if memory objects in <em>mem_objects</em> have not |
| been created from Direct3D 11 resources.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_COMMAND_QUEUE if <em>command_queue</em> is not a valid |
| command-queue.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_CONTEXT if context associated with <em>command_queue</em> was not |
| created from an Direct3D 11 context.</p> |
| </li> |
| <li> |
| <p>CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR if memory objects in |
| <em>mem_objects</em> have previously been acquired using |
| <strong>clEnqueueAcquireD3D11ObjectsKHR</strong> but have not been released using |
| <strong>clEnqueueReleaseD3D11ObjectsKHR</strong>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_EVENT_WAIT_LIST if <em>event_wait_list</em> is <code>NULL</code> and |
| <em>num_events_in_wait_list</em> > 0, or <em>event_wait_list</em> is not <code>NULL</code> and |
| <em>num_events_in_wait_list</em> is 0, or if event objects in <em>event_wait_list</em> |
| are not valid events.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_int clEnqueueReleaseD3D11ObjectsKHR(cl_command_queue command_queue, |
| cl_uint num_objects, |
| <span class="directive">const</span> cl_mem *mem_objects, |
| cl_uint num_events_in_wait_list, |
| <span class="directive">const</span> cl_event *event_wait_list, |
| cl_event *event)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>is used to release OpenCL memory objects that have been created from |
| Direct3D 11 resources. |
| The Direct3D 11 objects are released by the OpenCL context associated with |
| <em>command_queue</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>OpenCL memory objects created from Direct3D 11 resources which have been |
| acquired by OpenCL must be released by OpenCL before they may be accessed by |
| Direct3D 11. |
| Accessing a Direct3D 11 resource while its corresponding OpenCL memory |
| object is acquired is in error and will result in undefined behavior, |
| including but not limited to possible OpenCL errors, data corruption, and |
| program termination.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If CL_CONTEXT_INTEROP_USER_SYNC is not specified as CL_TRUE during context |
| creation, <strong>clEnqueueReleaseD3D11ObjectsKHR</strong> provides the synchronization |
| guarantee that any calls to Direct3D 11 calls involving the interop |
| device(s) used in the OpenCL context made after the call to |
| <strong>clEnqueueReleaseD3D11ObjectsKHR</strong> will not start executing until after all |
| events in <em>event_wait_list</em> are complete and all work already submitted to |
| <em>command_queue</em> completes execution. |
| If the context was created with properties specifying |
| CL_CONTEXT_INTEROP_USER_SYNC as CL_TRUE, the user is responsible for |
| guaranteeing that any Direct3D 11 calls involving the interop device(s) used |
| in the OpenCL context made after <strong>clEnqueueReleaseD3D11ObjectsKHR</strong> will not |
| start executing until after event returned by |
| <strong>clEnqueueReleaseD3D11ObjectsKHR</strong> reports completion.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>num_objects</em> is the number of memory objects to be released in |
| <em>mem_objects</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>mem_objects</em> is a pointer to a list of OpenCL memory objects that were |
| created from Direct3D 11 resources.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>event_wait_list</em> and <em>num_events_in_wait_list</em> specify events that need to |
| complete before this particular command can be executed. |
| If <em>event_wait_list</em> is <code>NULL</code>, then this particular command does not wait |
| on any event to complete. |
| If <em>event_wait_list</em> is <code>NULL</code>, <em>num_events_in_wait_list</em> must be 0. |
| If <em>event_wait_list</em> is not <code>NULL</code>, the list of events pointed to by |
| <em>event_wait_list</em> must be valid and <em>num_events_in_wait_list</em> must be |
| greater than 0.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>event</em> returns an event object that identifies this command and |
| can be used to query or wait for this command to complete. |
| If <em>event</em> is <code>NULL</code> or the enqueue is unsuccessful, no event will be |
| created and therefore it will not be possible to query the status of this |
| command or to wait for this command to complete. |
| If <em>event_wait_list</em> and <em>event</em> are not <code>NULL</code>, <em>event</em> must not refer |
| to an element of the <em>event_wait_list</em> array.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clEnqueueReleaseD3D11ObjectsKHR</strong> returns CL_SUCCESS if the function is |
| executed successfully. |
| If <em>num_objects</em> is 0 and <em>mem_objects</em> is <code>NULL</code> the function does nothing |
| and returns CL_SUCCESS. |
| Otherwise it returns one of the following errors:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_VALUE if <em>num_objects</em> is zero and <em>mem_objects</em> is not a |
| <code>NULL</code> value or if <em>num_objects</em> > 0 and <em>mem_objects</em> is <code>NULL</code>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_MEM_OBJECT if memory objects in <em>mem_objects</em> are not valid |
| OpenCL memory objects or if memory objects in <em>mem_objects</em> have not |
| been created from Direct3D 11 resources.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_COMMAND_QUEUE if <em>command_queue</em> is not a valid |
| command-queue.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_CONTEXT if context associated with <em>command_queue</em> was not |
| created from a Direct3D 11 device.</p> |
| </li> |
| <li> |
| <p>CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR if memory objects in <em>mem_objects</em> |
| have not previously been acquired using |
| <strong>clEnqueueAcquireD3D11ObjectsKHR</strong>, or have been released using |
| <strong>clEnqueueReleaseD3D11ObjectsKHR</strong> since the last time that they were |
| acquired.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_EVENT_WAIT_LIST if <em>event_wait_list</em> is <code>NULL</code> and |
| <em>num_events_in_wait_list</em> > 0, or <em>event_wait_list</em> is not <code>NULL</code> and |
| <em>num_events_in_wait_list</em>> is 0, or if event objects in |
| <em>event_wait_list</em> are not valid events.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_d3d11_sharing-event-command-types"><a class="anchor" href="#cl_khr_d3d11_sharing-event-command-types"></a>14.7.7. Event Command Types for Sharing memory objects that map to Direct3D 11 objects</h4> |
| <div class="paragraph"> |
| <p>The following table describes the event command types for the OpenCL commands |
| to acquire and release OpenCL memory objects that have been created from |
| Direct3D 11 objects:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 37. List of supported event command types</caption> |
| <colgroup> |
| <col style="width: 40%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Events Created By</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Event Command Type</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>clEnqueueAcquireD3D11ObjectsKHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_COMMAND_<wbr>ACQUIRE_<wbr>D3D11_<wbr>OBJECTS_<wbr>KHR</code></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>clEnqueueReleaseD3D11ObjectsKHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_COMMAND_<wbr>RELEASE_<wbr>D3D11_<wbr>OBJECTS_<wbr>KHR</code></p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_dx9_media_sharing"><a class="anchor" href="#cl_khr_dx9_media_sharing"></a>15. Creating OpenCL Memory Objects from DirectX 9 Media Surfaces</h2> |
| <div class="sectionbody"> |
| <div class="sect2"> |
| <h3 id="cl_khr_dx9_media_sharing-overview"><a class="anchor" href="#cl_khr_dx9_media_sharing-overview"></a>15.1. Overview</h3> |
| <div class="paragraph"> |
| <p>This section describes the <strong>cl_khr_dx9_media_sharing</strong> extension. |
| The goal of this extension is to allow applications to use media surfaces as |
| OpenCL memory objects. |
| This allows efficient sharing of data between OpenCL and selected adapter |
| APIs (only DX9 for now). |
| If this extension is supported, an OpenCL image object can be created from a |
| media surface and the OpenCL API can be used to execute kernels that read |
| and/or write memory objects that are media surfaces. |
| Note that OpenCL memory objects may be created from the adapter media |
| surface if and only if the OpenCL context has been created from that |
| adapter.</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_14"><a class="anchor" href="#_general_information_14"></a>15.2. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_14"><a class="anchor" href="#_version_history_14"></a>15.2.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_dx9_media_sharing-new-procedures-and-functions"><a class="anchor" href="#cl_khr_dx9_media_sharing-new-procedures-and-functions"></a>15.3. New Procedures and Functions</h3> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_int clGetDeviceIDsFromDX9MediaAdapterKHR( |
| cl_platform_id platform, |
| cl_uint num_media_adapters, |
| cl_dx9_media_adapter_type_khr *media_adapters_type, |
| <span class="directive">void</span> *media_adapters, |
| cl_dx9_media_adapter_set_khr media_adapter_set, |
| cl_uint num_entries, |
| cl_device_id *devices, |
| cl_int *num_devices) |
| |
| cl_mem clCreateFromDX9MediaSurfaceKHR(cl_context context, |
| cl_mem_flags flags, |
| cl_dx9_media_adapter_type_khr adapter_type, |
| <span class="directive">void</span> *surface_info, |
| cl_uint plane, |
| cl_int *errcode_ret) |
| |
| cl_int clEnqueueAcquireDX9MediaSurfacesKHR(cl_command_queue command_queue, |
| cl_uint num_objects, |
| <span class="directive">const</span> cl_mem *mem_objects, |
| cl_uint num_events_in_wait_list, |
| <span class="directive">const</span> cl_event *event_wait_list, |
| cl_event *event) |
| |
| cl_int clEnqueueReleaseDX9MediaSurfacesKHR(cl_command_queue command_queue, |
| cl_uint num_objects, |
| <span class="directive">const</span> cl_mem *mem_objects, |
| cl_uint num_events_in_wait_list, |
| <span class="directive">const</span> cl_event *event_wait_list, |
| cl_event *event)</code></pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_dx9_media_sharing-new-tokens"><a class="anchor" href="#cl_khr_dx9_media_sharing-new-tokens"></a>15.4. New Tokens</h3> |
| <div class="paragraph"> |
| <p>Accepted by the <em>media_adapter_type</em> parameter of |
| <strong>clGetDeviceIDsFromDX9MediaAdapterKHR</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_ADAPTER_D3D9_KHR |
| CL_ADAPTER_D3D9EX_KHR |
| CL_ADAPTER_DXVA_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Accepted by the <em>media_adapter_set</em> parameter of |
| <strong>clGetDeviceIDsFromDX9MediaAdapterKHR</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR |
| CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Accepted as a property name in the <em>properties</em> parameter of |
| <strong>clCreateContext</strong> and <strong>clCreateContextFromType</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_CONTEXT_ADAPTER_D3D9_KHR |
| CL_CONTEXT_ADAPTER_D3D9EX_KHR |
| CL_CONTEXT_ADAPTER_DXVA_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Accepted as the property being queried in the <em>param_name</em> parameter of |
| <strong>clGetMemObjectInfo</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR |
| CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Accepted as the property being queried in the <em>param_name</em> parameter of |
| <strong>clGetImageInfo</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_IMAGE_DX9_MEDIA_PLANE_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Returned in the <em>param_value</em> parameter of <strong>clGetEventInfo</strong> when |
| <em>param_name</em> is CL_EVENT_COMMAND_TYPE:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR |
| CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Returned by <strong>clCreateContext</strong> and <strong>clCreateContextFromType</strong> if the media |
| adapter specified for interoperability is not compatible with the devices |
| against which the context is to be created:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_INVALID_DX9_MEDIA_ADAPTER_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Returned by <strong>clCreateFromDX9MediaSurfaceKHR</strong> when <em>adapter_type</em> is set to a |
| media adapter and the <em>surface_info</em> does not reference a media surface of |
| the required type, or if <em>adapter_type</em> is set to a media adapter type and |
| <em>surface_info</em> does not contain a valid reference to a media surface on that |
| adapter, by <strong>clGetMemObjectInfo</strong> when <em>param_name</em> is a surface or handle |
| when the image was not created from an appropriate media surface, and from |
| <strong>clGetImageInfo</strong> when <em>param_name</em> is CL IMAGE_DX9_MEDIA_PLANE KHR and image |
| was not created from an appropriate media surface.</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_INVALID_DX9_MEDIA_SURFACE_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Returned by <strong>clEnqueueAcquireDX9MediaSurfacesKHR</strong> when any of <em>mem_objects</em> |
| are currently acquired by OpenCL:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Returned by <strong>clEnqueueReleaseDX9MediaSurfacesKHR</strong> when any of <em>mem_objects</em> |
| are not currently acquired by OpenCL:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR</pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_dx9_media_sharing-additions-to-chapter-4"><a class="anchor" href="#cl_khr_dx9_media_sharing-additions-to-chapter-4"></a>15.5. Additions to Chapter 4 of the OpenCL 2.2 Specification</h3> |
| <div class="paragraph"> |
| <p>In <em>section 4.4</em>, replace the description of <em>properties</em> under |
| <strong>clCreateContext</strong> with:</p> |
| </div> |
| <div class="paragraph"> |
| <p>“_properties_ specifies a list of context property names and their |
| corresponding values. |
| Each property is followed immediately by the corresponding desired value. |
| The list is terminated with zero. |
| If a property is not specified in <em>properties</em>, then its default value |
| (listed in <em>table 4.5</em>) is used (it is said to be specified implicitly). |
| If <em>properties</em> is <code>NULL</code> or empty (points to a list whose first value is |
| zero), all attributes take on their default values.”</p> |
| </div> |
| <div class="paragraph"> |
| <p>Add the following to <em>table 4.5</em>:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 33.3333%;"> |
| <col style="width: 33.3333%;"> |
| <col style="width: 33.3334%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_context_properties enum</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Property value</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_CONTEXT_ADAPTER_D3D9_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">IDirect3DDevice9 *</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Specifies an IDirect3DDevice9 to use for D3D9 interop.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_CONTEXT_ADAPTER_D3D9EX_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">IDirect3DDeviceEx*</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Specifies an IDirect3DDevice9Ex to use for D3D9 interop.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_CONTEXT_ADAPTER_DXVA_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">IDXVAHD_Device *</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Specifies an IDXVAHD_Device to use for DXVA interop.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>Add to the list of errors for <strong>clCreateContext</strong>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_ADAPTER_KHR if any of the values of the properties |
| CL_CONTEXT_ADAPTER_D3D9_KHR, CL_CONTEXT_ADAPTER_D3D9EX_KHR or |
| CL_CONTEXT_ADAPTER_DXVA_KHR is non-<code>NULL</code> and does not specify a valid |
| media adapter with which the <em>cl_device_ids</em> against which this context |
| is to be created may interoperate.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>Add to the list of errors for <strong>clCreateContextFromType</strong> the same new errors |
| described above for <strong>clCreateContext</strong>.</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_dx9_media_sharing-additions-to-chapter-5"><a class="anchor" href="#cl_khr_dx9_media_sharing-additions-to-chapter-5"></a>15.6. Additions to Chapter 5 of the OpenCL 2.2 Specification</h3> |
| <div class="paragraph"> |
| <p>Add to the list of errors for <strong>clGetMemObjectInfo</strong>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_DX9_MEDIA_SURFACE_KHR if <em>param_name</em> is |
| CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR and <em>memobj</em> was not created by the |
| function <strong>clCreateFromDX9MediaSurfaceKHR</strong> from a Direct3D9 surface.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>Extend <em>table 5.12</em> to include the following entry:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 33.3333%;"> |
| <col style="width: 33.3333%;"> |
| <col style="width: 33.3334%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_mem_info</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Return type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Info. returned in <em>param_value</em></strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cl_dx9_media_adapter_type_khr</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the <em>cl_dx9_media_adapter_type_khr</em> argument value specified when |
| <em>memobj</em> is created using <strong>clCreateFromDX9MediaSurfaceKHR</strong>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cl_dx9_surface_info_khr</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the <em>cl_dx9_surface_info_khr</em> argument value specified when |
| <em>memobj</em> is created using <strong>clCreateFromDX9MediaSurfaceKHR</strong>.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>Add to the list of errors for <strong>clGetImageInfo</strong>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_DX9_MEDIA_SURFACE_KHR if <em>param_name</em> is |
| CL_IMAGE_DX9_MEDIA_PLANE_KHR and <em>image</em> was not created by the function |
| <strong>clCreateFromDX9MediaSurfaceKHR</strong>.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>Extend <em>table 5.9</em> to include the following entry.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 33.3333%;"> |
| <col style="width: 33.3333%;"> |
| <col style="width: 33.3334%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_image_info</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Return type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Info. returned in <em>param_value</em></strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_IMAGE_DX9_MEDIA_PLANE_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cl_uint</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the <em>plane</em> argument value specified when <em>memobj</em> is created |
| using <strong>clCreateFromDX9MediaSurfaceKHR</strong>.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>Add to <em>table 5.22</em> in the <strong>Info returned in param_value</strong> column for |
| <em>cl_event_info</em> = CL_EVENT_COMMAND_TYPE:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR |
| CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR</pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_dx9_media_sharing-sharing-media-surfaces-with-opencl"><a class="anchor" href="#cl_khr_dx9_media_sharing-sharing-media-surfaces-with-opencl"></a>15.7. Sharing Media Surfaces with OpenCL</h3> |
| <div class="paragraph"> |
| <p>This section discusses OpenCL functions that allow applications to use media |
| surfaces as OpenCL memory objects. |
| This allows efficient sharing of data between OpenCL and media surface APIs. |
| The OpenCL API may be used to execute kernels that read and/or write memory |
| objects that are also media surfaces. |
| An OpenCL image object may be created from a media surface. |
| OpenCL memory objects may be created from media surfaces if and only if the |
| OpenCL context has been created from a media adapter.</p> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_dx9_media_sharing-querying-opencl-devices-corresponding-to-media-adapters"><a class="anchor" href="#cl_khr_dx9_media_sharing-querying-opencl-devices-corresponding-to-media-adapters"></a>15.7.1. Querying OpenCL Devices corresponding to Media Adapters</h4> |
| <div class="paragraph"> |
| <p>Media adapters are an abstraction associated with devices that provide media |
| capabilities.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_int clGetDeviceIDsFromDX9MediaAdapterKHR( |
| cl_platform_id platform, |
| cl_uint num_media_adapters, |
| cl_dx9_media_adapter_type_khr *media_adapters_type, |
| <span class="directive">void</span> *media_adapters, |
| cl_dx9_media_adapter_set_khr media_adapter_set, |
| cl_uint num_entries, |
| cl_device_id *devices, |
| cl_int *num_devices)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>queries a media adapter for any associated OpenCL devices. |
| Adapters with associated OpenCL devices can enable media surface sharing |
| between the two.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>platform</em> refers to the platform ID returned by <strong>clGetPlatformIDs</strong>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>num_media_adapters</em> specifies the number of media adapters.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>media_adapters_type</em> is an array of <em>num_media_adapters</em> entries. |
| Each entry specifies the type of media adapter and must be one of the values |
| described in the table below.</p> |
| </div> |
| <table id="cl_khr_dx9_media_sharing-media-adapter-types" class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 38. <em>cl_dx9_media_adapter_type_khr values</em></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_dx9_media_adapter_type_khr</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Type of media adapters</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_ADAPTER_D3D9_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">IDirect3DDevice9 *</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_ADAPTER_D3D9EX_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">IDirect3DDevice9Ex *</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_ADAPTER_DXVA_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">IDXVAHD_Device *</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <table id="cl_khr_dx9_media_sharing-media-adapter-sets" class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 39. <em>cl_dx9_media_adapter_set_khr values</em></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_dx9_media_adapter_set_khr</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">The preferred OpenCL devices associated with the media adapter.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_ALL_DEVICES_FOR_MEDIA_DX9_ADAPTER_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">All OpenCL devices that may interoperate with the media adapter</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p><em>media_adapters</em> is an array of <em>num_media_adapters</em> entries. |
| Each entry specifies the actual adapter whose type is specified by |
| <em>media_adapter_type</em>. |
| The <em>media_adapters</em> must be one of the types described in the table |
| <a href="#cl_khr_dx9_media_sharing-media-adapter-types"><em>cl_dx9_media_adapter_type_khr |
| values</em></a>. |
| <em>media_adapter_set</em> specifies the set of adapters to return and must be one |
| of the values described in the table |
| <<[[cl_khr_dx9_media_sharing-media-adapter-sets,<em>cl_dx9_media_adapter_set_khr |
| values</em>>>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>num_entries</em> is the number of cl_device_id entries that can be added to |
| <em>devices</em>. |
| If <em>devices</em> is not <code>NULL</code>, the <em>num_entries</em> must be greater than zero.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>devices</em> returns a list of OpenCL devices found that support the list of |
| media adapters specified. |
| The cl_device_id values returned in <em>devices</em> can be used to identify a |
| specific OpenCL device. |
| If <em>devices</em> argument is <code>NULL</code>, this argument is ignored. |
| The number of OpenCL devices returned is the minimum of the value specified |
| by <em>num_entries</em> or the number of OpenCL devices whose type matches |
| <em>device_type</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>num_devices</em> returns the number of OpenCL devices. |
| If <em>num_devices</em> is <code>NULL</code>, this argument is ignored.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clGetDeviceIDsFromDX9MediaAdapterKHR</strong> returns CL_SUCCESS if the function is |
| executed successfully. |
| Otherwise, it returns one of the following errors:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_PLATFORM if <em>platform</em> is not a valid platform.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_VALUE if <em>num_media_adapters</em> is zero or if |
| <em>media_adapters_type</em> is <code>NULL</code> or if <em>media_adapters</em> is <code>NULL</code>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_VALUE if any of the entries in <em>media_adapters_type</em> or |
| <em>media_adapters</em> is not a valid value.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_VALUE if <em>media_adapter_set</em> is not a valid value.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_VALUE if <em>num_entries</em> is equal to zero and <em>devices</em> is not |
| <code>NULL</code> or if both <em>num_devices</em> and <em>devices</em> are <code>NULL</code>.</p> |
| </li> |
| <li> |
| <p>CL_DEVICE_NOT_FOUND if no OpenCL devices that correspond to adapters |
| specified in <em>media_adapters</em> and <em>media_adapters_type</em> were found.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_RESOURCES if there is a failure to allocate resources required |
| by the OpenCL implementation on the device.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_dx9_media_sharing-creating-media-resources-as-opencl-image-objects"><a class="anchor" href="#cl_khr_dx9_media_sharing-creating-media-resources-as-opencl-image-objects"></a>15.7.2. Creating Media Resources as OpenCL Image Objects</h4> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_mem clCreateFromDX9MediaSurfaceKHR(cl_context context, |
| cl_mem_flags flags, |
| cl_dx9_media_adapter_type_khr adapter_type, |
| <span class="directive">void</span> *surface_info, |
| cl_uint plane, |
| cl_int *errcode_ret)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>creates an OpenCL image object from a media surface.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>context</em> is a valid OpenCL context created from a media adapter.</p> |
| </div> |
| <div class="paragraph"> |
| <p>flags is a bit-field that is used to specify usage information. |
| Refer to <em>table 5.3</em> for a description of flags. |
| Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values |
| specified in <em>table 5.3</em> can be used.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>adapter_type</em> is a value from enumeration of supported adapters described |
| in the table |
| <a href="#cl_khr_dx9_media_sharing-media-adapter-types"><em>cl_dx9_media_adapter_type_khr |
| values</em></a>. |
| The type of <em>surface_info</em> is determined by the adapter type. |
| The implementation does not need to support all adapter types. |
| This approach provides flexibility to support additional adapter types in |
| the future. |
| Supported adapter types are CL_ADAPTER_D3D9_KHR, CL_ADAPTER_D3D9EX_KHR and |
| CL_ADAPTER_DXVA_KHR.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If <em>adapter_type</em> is CL_ADAPTER_D3D9_KHR, CL_ADAPTER_D3D9EX_KHR and |
| CL_ADAPTER_DXVA_KHR, the <em>surface_info</em> points to the following structure:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="keyword">typedef</span> <span class="keyword">struct</span> _cl_dx9_surface_info_khr |
| { |
| IDirect3DSurface9 *resource; |
| HANDLE shared_handle; |
| } cl_dx9_surface_info_khr;</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>For DX9 surfaces, we need both the handle to the resource and the resource |
| itself to have a sufficient amount of information to eliminate a copy of the |
| surface for sharing in cases where this is possible. |
| Elimination of the copy is driver dependent. |
| <em>shared_handle</em> may be <code>NULL</code> and this may result in sub-optimal |
| performance.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>surface_info</em> is a pointer to one of the structures defined in the |
| <em>adapter_type</em> description above passed in as a void *.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>plane</em> is the plane of resource to share for planar surface formats. |
| For planar formats, we use the plane parameter to obtain a handle to thie |
| specific plane (Y, U or V for example). |
| For non-planar formats used by media, <em>plane</em> must be 0.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>errcode_ret</em> will return an appropriate error code. |
| If <em>errcode_ret</em> is <code>NULL</code>, no error code is returned.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clCreateFromDX9MediaSurfaceKHR</strong> returns a valid non-zero 2D image object |
| and <em>errcode_ret</em> is set to CL_SUCCESS if the 2D image object is created |
| successfully. |
| Otherwise it returns a <code>NULL</code> value with one of the following error values |
| returned in <em>errcode_ret</em>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_CONTEXT if <em>context</em> is not a valid context.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_VALUE if values specified in <em>flags</em> are not valid or if |
| <em>plane</em> is not a valid plane of <em>resource</em> specified in <em>surface_info</em>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_DX9_MEDIA_SURFACE_KHR if <em>resource</em> specified in |
| <em>surface_info</em> is not a valid resource or is not associated with |
| <em>adapter_type</em> (e.g., <em>adapter_type</em> is set to CL_ADAPTER_D3D9_KHR and |
| <em>resource</em> is not a Direct3D 9 surface created in D3DPOOL_DEFAULT).</p> |
| </li> |
| <li> |
| <p>CL_INVALID_DX9_MEDIA_SURFACE_KHR if <em>shared_handle</em> specified in |
| <em>surface_info</em> is not <code>NULL</code> or a valid handle value.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the texture format of <em>resource</em> |
| is not listed in <a href="#cl_khr_dx9_media_sharing-fourcc-image-formats"><em>YUV |
| FourCC codes and corresponding OpenCL image format</em></a> or |
| <a href="#cl_khr_dx9_media_sharing-d3d-image-formats"><em>Direct3D formats and |
| corresponding OpenCL image formats</em></a>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_OPERATION if there are no devices in <em>context</em> that support |
| <em>adapter_type</em>.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_RESOURCES if there is a failure to allocate resources required |
| by the OpenCL implementation on the device.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>The width and height of the returned OpenCL 2D image object are determined |
| by the width and height of the plane of resource. |
| The channel type and order of the returned image object is determined by the |
| format and plane of resource and are described in the table |
| <a href="#cl_khr_dx9_media_sharing-fourcc-image-formats"><em>YUV FourCC codes and |
| corresponding OpenCL image format</em></a> or |
| <a href="#cl_khr_dx9_media_sharing-d3d-image-formats"><em>Direct3D formats and |
| corresponding OpenCL image formats</em></a>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This call will increment the internal media surface count on <em>resource</em>. |
| The internal media surface reference count on <em>resource</em> will be decremented |
| when the OpenCL reference count on the returned OpenCL memory object drops |
| to zero.</p> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_dx9_media_sharing-querying-media-surface-properties-of-memory-objects-created-from-media-surfaces"><a class="anchor" href="#cl_khr_dx9_media_sharing-querying-media-surface-properties-of-memory-objects-created-from-media-surfaces"></a>15.7.3. Querying Media Surface Properties of Memory Objects created from Media Surfaces</h4> |
| <div class="paragraph"> |
| <p>Properties of media surface objects may be queried using |
| <strong>clGetMemObjectInfo</strong> and <strong>clGetImageInfo</strong> with <em>param_name</em> |
| CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR, CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR and |
| CL_IMAGE_DX9_MEDIA_PLANE_KHR as described in <em>sections 5.4.3</em> and <em>5.3.6</em>.</p> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_dx9_media_sharing-sharing-memory-objects-created-from-media-surfaces-between-a-media-adapter-and-opencl"><a class="anchor" href="#cl_khr_dx9_media_sharing-sharing-memory-objects-created-from-media-surfaces-between-a-media-adapter-and-opencl"></a>15.7.4. Sharing Memory Objects created from Media Surfaces between a Media Adapter and OpenCL</h4> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_int clEnqueueAcquireDX9MediaSurfacesKHR(cl_command_queue command_queue, |
| cl_uint num_objects, |
| <span class="directive">const</span> cl_mem *mem_objects, |
| cl_uint num_events_in_wait_list, |
| <span class="directive">const</span> cl_event *event_wait_list, |
| cl_event *event)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>is used to acquire OpenCL memory objects that have been created from a media |
| surface. |
| The media surfaces are acquired by the OpenCL context associated with |
| <em>command_queue</em> and can therefore be used by all command-queues associated |
| with the OpenCL context.</p> |
| </div> |
| <div class="paragraph"> |
| <p>OpenCL memory objects created from media surfaces must be acquired before |
| they can be used by any OpenCL commands queued to a command-queue. |
| If an OpenCL memory object created from a media surface is used while it is |
| not currently acquired by OpenCL, the call attempting to use that OpenCL |
| memory object will return CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If CL_CONTEXT_INTEROP_USER_SYNC is not specified as CL_TRUE during context |
| creation, <strong>clEnqueueAcquireDX9MediaSurfacesKHR</strong> provides the synchronization |
| guarantee that any media adapter API calls involving the interop device(s) |
| used in the OpenCL context made before <strong>clEnqueueAcquireDX9MediaSurfacesKHR</strong> |
| is called will complete executing before <em>event</em> reports completion and |
| before the execution of any subsequent OpenCL work issued in <em>command_queue</em> |
| begins. |
| If the context was created with properties specifying |
| CL_CONTEXT_INTEROP_USER_SYNC as CL_TRUE, the user is responsible for |
| guaranteeing that any media adapter API calls involving the interop |
| device(s) used in the OpenCL context made before |
| <strong>clEnqueueAcquireDX9MediaSurfacesKHR</strong> is called have completed before |
| calling <strong>clEnqueueAcquireDX9MediaSurfacesKHR</strong> <strong>.</strong></p> |
| </div> |
| <div class="paragraph"> |
| <p><em>command_queue</em> is a valid command-queue.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>num_objects</em> is the number of memory objects to be acquired in |
| <em>mem_objects</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>mem_objects</em> is a pointer to a list of OpenCL memory objects that were |
| created from media surfaces.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>event_wait_list</em> and <em>num_events_in_wait_list</em> specify events that need to |
| complete before this particular command can be executed. |
| If <em>event_wait_list</em> is <code>NULL</code>, then this particular command does not wait |
| on any event to complete. |
| If <em>event_wait_list</em> is <code>NULL</code>, <em>num_events_in_wait_list</em> must be 0. |
| If <em>event_wait_list</em> is not <code>NULL</code>, the list of events pointed to by |
| <em>event_wait_list</em> must be valid and <em>num_events_in_wait_list</em> must be |
| greater than 0. |
| The events specified in <em>event_wait_list</em> act as synchronization points.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>event</em> returns an event object that identifies this command and |
| can be used to query or wait for this command to complete. |
| If <em>event</em> is <code>NULL</code> or the enqueue is unsuccessful, no event will be |
| created and therefore it will not be possible to query the status of this |
| command or to wait for this command to complete. |
| If <em>event_wait_list</em> and <em>event</em> are not <code>NULL</code>, <em>event</em> must not refer |
| to an element of the <em>event_wait_list</em> array.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clEnqueueAcquireDX9MediaSurfacesKHR</strong> returns CL_SUCCESS if the function is |
| executed successfully. |
| If <em>num_objects</em> is 0 and <em>mem_objects</em> is <code>NULL</code> then the function does |
| nothing and returns CL_SUCCESS. |
| Otherwise it returns one of the following errors:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_VALUE if <em>num_objects</em> is zero and <em>mem_objects</em> is not a |
| <code>NULL</code> value or if <em>num_objects</em> > 0 and <em>mem_objects</em> is <code>NULL</code>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_MEM_OBJECT if memory objects in <em>mem_objects</em> are not valid |
| OpenCL memory objects or if memory objects in <em>mem_objects</em> have not |
| been created from media surfaces.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_COMMAND_QUEUE if <em>command_queue</em> is not a valid |
| command-queue.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_CONTEXT if context associated with <em>command_queue</em> was not |
| created from a device that can share the media surface referenced by |
| <em>mem_objects</em>.</p> |
| </li> |
| <li> |
| <p>CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR if memory objects in |
| <em>mem_objects</em> have previously been acquired using |
| <strong>clEnqueueAcquireDX9MediaSurfacesKHR</strong> but have not been released using |
| <strong>clEnqueueReleaseDX9MediaSurfacesKHR</strong>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_EVENT_WAIT_LIST if <em>event_wait_list</em> is <code>NULL</code> and |
| <em>num_events_in_wait_list</em> > 0, or <em>event_wait_list</em> is not <code>NULL</code> and |
| <em>num_events_in_wait_list</em> is 0, or if event objects in <em>event_wait_list</em> |
| are not valid events.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_int clEnqueueReleaseDX9MediaSurfacesKHR(cl_command_queue command_queue, |
| cl_uint num_objects, |
| <span class="directive">const</span> cl_mem *mem_objects, |
| cl_uint num_events_in_wait_list, |
| <span class="directive">const</span> cl_event *event_wait_list, |
| cl_event *event)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>is used to release OpenCL memory objects that have been created from media |
| surfaces. |
| The media surfaces are released by the OpenCL context associated with |
| <em>command_queue</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>OpenCL memory objects created from media surfaces which have been acquired |
| by OpenCL must be released by OpenCL before they may be accessed by the |
| media adapter API. |
| Accessing a media surface while its corresponding OpenCL memory object is |
| acquired is in error and will result in undefined behavior, including but |
| not limited to possible OpenCL errors, data corruption, and program |
| termination.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If CL_CONTEXT_INTEROP_USER_SYNC is not specified as CL_TRUE during context |
| creation, <strong>clEnqueueReleaseDX9MediaSurfacesKHR</strong> provides the synchronization |
| guarantee that any calls to media adapter APIs involving the interop |
| device(s) used in the OpenCL context made after the call to |
| <strong>clEnqueueReleaseDX9MediaSurfacesKHR</strong> will not start executing until after |
| all events in <em>event_wait_list</em> are complete and all work already submitted |
| to <em>command_queue</em> completes execution. |
| If the context was created with properties specifying |
| CL_CONTEXT_INTEROP_USER_SYNC as CL_TRUE, the user is responsible for |
| guaranteeing that any media adapter API calls involving the interop |
| device(s) used in the OpenCL context made after |
| <strong>clEnqueueReleaseDX9MediaSurfacesKHR</strong> will not start executing until after |
| event returned by <strong>clEnqueueReleaseDX9MediaSurfacesKHR</strong> reports completion.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>num_objects</em> is the number of memory objects to be released in |
| <em>mem_objects</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>mem_objects</em> is a pointer to a list of OpenCL memory objects that were |
| created from media surfaces.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>event_wait_list</em> and <em>num_events_in_wait_list</em> specify events that need to |
| complete before this particular command can be executed. |
| If <em>event_wait_list</em> is <code>NULL</code>, then this particular command does not wait |
| on any event to complete. |
| If <em>event_wait_list</em> is <code>NULL</code>, <em>num_events_in_wait_list</em> must be 0. |
| If <em>event_wait_list</em> is not <code>NULL</code>, the list of events pointed to by |
| <em>event_wait_list</em> must be valid and <em>num_events_in_wait_list</em> must be |
| greater than 0.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>event</em> returns an event object that identifies this command and |
| can be used to query or wait for this command to complete. |
| If <em>event</em> is <code>NULL</code> or the enqueue is unsuccessful, no event will be |
| created and therefore it will not be possible to query the status of this |
| command or to wait for this command to complete. |
| If <em>event_wait_list</em> and <em>event</em> are not <code>NULL</code>, <em>event</em> must not refer |
| to an element of the <em>event_wait_list</em> array.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clEnqueueReleaseDX9MediaSurfaceKHR</strong> returns CL_SUCCESS if the function is |
| executed successfully. |
| If <em>num_objects</em> is 0 and <<em>mem_objects</em>> is <code>NULL</code> the function does |
| nothing and returns CL_SUCCESS. |
| Otherwise it returns one of the following errors:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_VALUE if <em>num_objects</em> is zero and <em>mem_objects</em> is not a |
| <code>NULL</code> value or if <em>num_objects</em> > 0 and <em>mem_objects</em> is <code>NULL</code>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_MEM_OBJECT if memory objects in <em>mem_objects</em> are not valid |
| OpenCL memory objects or if memory objects in <em>mem_objects</em> have not |
| been created from valid media surfaces.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_COMMAND_QUEUE if <em>command_queue</em> is not a valid |
| command-queue.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_CONTEXT if context associated with <em>command_queue</em> was not |
| created from a media object.</p> |
| </li> |
| <li> |
| <p>CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR if memory objects in <em>mem_objects</em> |
| have not previously been acquired using |
| <strong>clEnqueueAcquireDX9MediaSurfacesKHR</strong>, or have been released using |
| <strong>clEnqueueReleaseDX9MediaSurfacesKHR</strong> since the last time that they were |
| acquired.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_EVENT_WAIT_LIST if <em>event_wait_list</em> is <code>NULL</code> and |
| <em>num_events_in_wait_list</em> > 0, or <em>event_wait_list</em> is not <code>NULL</code> and |
| <em>num_events_in_wait_list</em>> is 0, or if event objects in |
| <em>event_wait_list</em> are not valid events.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_dx9_media_sharing-event-command-types"><a class="anchor" href="#cl_khr_dx9_media_sharing-event-command-types"></a>15.7.5. Event Command Types for Sharing Memory Objects created from Media Surfaces</h4> |
| <div class="paragraph"> |
| <p>The following table describes the event command types for the OpenCL commands |
| to acquire and release OpenCL memory objects that have been created from |
| media surfaces:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 40. List of supported event command types</caption> |
| <colgroup> |
| <col style="width: 40%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Events Created By</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Event Command Type</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>clEnqueueAcquireDX9MediaSurfacesKHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_COMMAND_<wbr>ACQUIRE_<wbr>DX9_<wbr>MEDIA_<wbr>SURFACES_<wbr>KHR</code></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>clEnqueueReleaseDX9MediaSurfacesKHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_COMMAND_<wbr>RELEASE_<wbr>DX9_<wbr>MEDIA_<wbr>SURFACES_<wbr>KHR</code></p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_dx9_media_sharing-surface-formats-for-media-surface-sharing"><a class="anchor" href="#cl_khr_dx9_media_sharing-surface-formats-for-media-surface-sharing"></a>15.7.6. Surface formats for Media Surface Sharing</h4> |
| <div class="paragraph"> |
| <p>This section includes the D3D surface formats that are supported when the |
| adapter type is one of the Direct 3D lineage . |
| Using a D3D surface format not listed here is an error. |
| To extend the use of this extension to support media adapters beyond |
| DirectX9 tables similar to the ones in this section will need to be defined |
| for the surface formats supported by the new media adapter. |
| All implementations that support this extension are required to support the |
| NV12 surface format, the other surface formats supported are the same |
| surface formats that the adapter you are sharing with supports as long as |
| they are listed in the table |
| <a href="#cl_khr_dx9_media_sharing-fourcc-image-formats"><em>YUV FourCC codes and |
| corresponding OpenCL image format</em></a> or in the table |
| <a href="#cl_khr_dx9_media_sharing-d3d-image-formats"><em>Direct3D formats and |
| corresponding OpenCL image formats</em></a>.</p> |
| </div> |
| <table id="cl_khr_dx9_media_sharing-fourcc-image-formats" class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 41. <em>YUV FourCC codes and corresponding OpenCL image format</em></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>FOUR CC code</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>CL image format</strong> |
| |
| <strong>(channel order, channel data type)</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">FOURCC('N','V','1','2'), Plane 0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_UNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">FOURCC('N','V','1','2'), Plane 1</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_UNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">FOURCC('Y','V','1','2'), Plane 0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_UNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">FOURCC('Y','V','1','2'), Plane 1</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_UNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">FOURCC('Y','V','1','2'), Plane 2</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_UNORM_INT8</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>In the table <a href="#cl_khr_dx9_media_sharing-fourcc-image-formats"><em>YUV FourCC |
| codes and corresponding OpenCL image format</em></a> above, NV12 Plane 0 |
| corresponds to the luminance (Y) channel and Plane 1 corresponds to the UV |
| channels. |
| The YV12 Plane 0 corresponds to the Y channel, Plane 1 corresponds to the V |
| channel and Plane 2 corresponds to the U channel. |
| Note that the YUV formats map to CL_R and CL_RG but do not perform any YUV |
| to RGB conversion and vice-versa.</p> |
| </div> |
| <table id="cl_khr_dx9_media_sharing-d3d-image-formats" class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 42. <em>Direct3D formats and corresponding OpenCL image formats</em></caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>D3D format</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>CL image format</strong><br> |
| <strong>(channel order, channel data type)</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">D3DFMT_R32F</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">D3DFMT_R16F</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_HALF_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">D3DFMT_L16</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_UNORM_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">D3DFMT_A8</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_A, CL_UNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">D3DFMT_L8</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_R, CL_UNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">D3DFMT_G32R32F</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">D3DFMT_G16R16F</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_HALF_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">D3DFMT_G16R16</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_UNORM_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">D3DFMT_A8L8</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RG, CL_UNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">D3DFMT_A32B32G32R32F</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">D3DFMT_A16B16G16R16F</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_HALF_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">D3DFMT_A16B16G16R16</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_UNORM_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">D3DFMT_A8B8G8R8</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_UNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">D3DFMT_X8B8G8R8</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_RGBA, CL_UNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">D3DFMT_A8R8G8B8</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_BGRA, CL_UNORM_INT8</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">D3DFMT_X8R8G8B8</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_BGRA, CL_UNORM_INT8</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>Note: The D3D9 format names in the table above seem to imply that the |
| order of the color channels are switched relative to OpenCL but this is |
| not the case. |
| For example, the layout of channels for each pixel for D3DFMT_A32FB32FG32FR32F |
| is the same as CL_RGBA, CL_FLOAT.</p> |
| </div> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_depth_images"><a class="anchor" href="#cl_khr_depth_images"></a>16. Depth Images</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This section describes the <strong>cl_khr_depth_images</strong> extension.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This extension adds support for depth images.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This extension became a core feature in OpenCL 2.0.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_15"><a class="anchor" href="#_general_information_15"></a>16.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_15"><a class="anchor" href="#_version_history_15"></a>16.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_depth_images-additions-to-chapter-5"><a class="anchor" href="#cl_khr_depth_images-additions-to-chapter-5"></a>16.2. Additions to Chapter 5 of the OpenCL 1.2 Specification</h3> |
| <div class="paragraph"> |
| <p>This extension adds the following new image formats for depth images to <em>tables 5.6 and 5.7</em> of the OpenCL 1.2 specification.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 100%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Enum values that can be specified in channel_order</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_DEPTH.</strong> This format can only be used if channel data type = CL_UNORM_INT16 or CL_FLOAT.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 40%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Image Channel Data Type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_UNORM_INT16</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Each channel component is a normalized unsigned 16-bit integer value</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_FLOAT</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Each channel component is a single precision floating-point value</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>This extension adds the following new image format to the minimum list of supported image formats described in <em>table 5.8</em>:</p> |
| </div> |
| <table id="cl_khr_depth_images-required-image-formats" class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 43. <em>Required Image Formats for</em> <strong>cl_khr_depth_images</strong></caption> |
| <colgroup> |
| <col style="width: 33.3333%;"> |
| <col style="width: 33.3333%;"> |
| <col style="width: 33.3334%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>num_channels</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>channel_order</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>channel_data_type</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_DEPTH</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_UNORM_INT<br> |
| CL_FLOAT</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>NOTE:</p> |
| </div> |
| <div class="paragraph"> |
| <p>Depth image objects can be initialized, read and written using the appropriate CL APIs i.e. clEnqueueReadImage, clEnqueueWriteImage, clEnqueueCopyImage, clEnqueueCopyImageToBuffer, clEnqueueCopyBufferToImage, clEnqueueMapImage and clEnqueueFillImage.</p> |
| </div> |
| <div class="paragraph"> |
| <p>For clEnqueueFillImage, the fill color is a 4-component value where the R component refers to the depth value if the image format is CL_DEPTH. The fill color will be converted to the appropriate image channel format and order associated with image.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Update text that describes arg value argument to clSetKernelArg with the following:</p> |
| </div> |
| <div class="paragraph"> |
| <p>If the kernel argument is declared to be of type image2d_depth_t or image2d_array_depth t, the arg_value entry will be a pointer to a depth image or depth image array object.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Add the following error condition for clSetKernelArg:</p> |
| </div> |
| <div class="paragraph"> |
| <p>CL_INVALID_MEM_OBJECT for an argument declared to be a depth image or a depth image |
| array and the argument value specified in arg_value does not follow the rules described above |
| for a depth memory object or memory array object argument.</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_depth_images-additions-to-chapter-6"><a class="anchor" href="#cl_khr_depth_images-additions-to-chapter-6"></a>16.3. Additions to Chapter 6 of the OpenCL 1.2 Specification</h3> |
| <div class="paragraph"> |
| <p>Add the following new data types to <em>table 6.3</em> in <em>section 6.1.3</em> of the OpenCL 1.2 specification:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 40%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>image2d_depth_t</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">A 2D depth image. Refer to <em>section 6.12.14</em> for a detailed |
| description of the built-in functions that use this type.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>image2d_array_depth_t</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">A 2D depth image array. Refer to <em>section 6.12.14</em> for a |
| detailed description of the built-in functions that use this |
| type.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>Add the following to the bulleted list in section 6.12.14.1.1 - Determining the border color:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>If the image channel order is <code>CL_DEPTH</code>, the border value is <code>0.0f</code>.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>Add the following built-in functions to section 6.12.14.2 - Built-in Image Read Functions:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 40%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">float <strong>read_imagef</strong>(read_only image2d_depth_t <em>image</em>, |
| sampler_t <em>sampler</em>, int2 <em>coord</em>)<br> |
| float <strong>read_imagef</strong>(read_only image2d_depth_t <em>image</em>, |
| sampler_t <em>sampler</em>, float2 <em>coord</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Use the coordinate (<em>coord.x</em>, <em>coord.y</em>) to do an element lookup in |
| the 2D depth image object specified by <em>image</em>.</p> |
| <p class="tableblock"> <strong>read_imagef</strong> returns a floating-point value in the range [0.0, 1.0] |
| for depth image objects created with <em>image_channel_data_type</em> set to |
| <code>CL_UNORM_INT16</code> or <code>CL_UNORM_INT24</code>.</p> |
| <p class="tableblock"> <strong>read_imagef</strong> returns a floating-point value for depth image objects |
| created with <em>image_channel_data_type</em> set to <code>CL_FLOAT</code>.</p> |
| <p class="tableblock"> The <strong>read_imagef</strong> calls that take integer coordinates must use a |
| sampler with filter mode set to <code>CLK_FILTER_NEAREST</code>, normalized |
| coordinates set to <code>CLK_NORMALIZED_COORDS_FALSE</code> and addressing mode |
| set to <code>CLK_ADDRESS_CLAMP_TO_EDGE</code>, <code>CLK_ADDRESS_CLAMP</code> or |
| <code>CLK_ADDRESS_NONE</code>; otherwise the values returned are undefined.</p> |
| <p class="tableblock"> Values returned by <strong>read_imagef</strong> for depth image objects with |
| <em>image_channel_data_type</em> values not specified in the description |
| above are undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">float <strong>read_imagef</strong>(read_only image2d_array_depth_t <em>image</em>, |
| sampler_t <em>sampler</em>, int4 <em>coord</em>)<br> |
| float <strong>read_imagef</strong>(read_only image2d_array_depth_t <em>image</em>, |
| sampler_t <em>sampler</em>, float4 <em>coord</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Use <em>coord.xy</em> to do an element lookup in the 2D image identified by |
| <em>coord.z</em> in the 2D depth image array specified by <em>image</em>.</p> |
| <p class="tableblock"> <strong>read_imagef</strong> returns a floating-point value in the range [0.0, 1.0] |
| for depth image objects created with <em>image_channel_data_type</em> set to |
| <code>CL_UNORM_INT16</code> or <code>CL_UNORM_INT24</code>.</p> |
| <p class="tableblock"> <strong>read_imagef</strong> returns a floating-point value for depth image objects |
| created with <em>image_channel_data_type</em> set to <code>CL_FLOAT</code>.</p> |
| <p class="tableblock"> The <strong>read_imagef</strong> calls that take integer coordinates must use a |
| sampler with filter mode set to <code>CLK_FILTER_NEAREST</code>, normalized |
| coordinates set to <code>CLK_NORMALIZED_COORDS_FALSE</code> and addressing mode |
| set to <code>CLK_ADDRESS_CLAMP_TO_EDGE</code>, <code>CLK_ADDRESS_CLAMP</code> or |
| <code>CLK_ADDRESS_NONE</code>; otherwise the values returned are undefined.</p> |
| <p class="tableblock"> Values returned by <strong>read_imagef</strong> for image objects with |
| <em>image_channel_data_type</em> values not specified in the description |
| above are undefined.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>Add the following built-in functions to section 6.12.14.3 - Built-in Image Sampler-less Read Functions:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 40%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">float <strong>read_imagef</strong>(image2d_depth_t <em>image</em>, int2 <em>coord</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Use the coordinate (<em>coord.x</em>, <em>coord.y</em>) to do an element lookup in |
| the 2D depth image object specified by <em>image</em>.</p> |
| <p class="tableblock"> <strong>read_imagef</strong> returns a floating-point value in the range [0.0, 1.0] |
| for depth image objects created with <em>image_channel_data_type</em> set to |
| <code>CL_UNORM_INT16</code> or <code>CL_UNORM_INT24</code>.</p> |
| <p class="tableblock"> <strong>read_imagef</strong> returns a floating-point value for depth image objects |
| created with <em>image_channel_data_type</em> set to <code>CL_FLOAT</code>.</p> |
| <p class="tableblock"> Values returned by <strong>read_imagef</strong> for image objects with |
| <em>image_channel_data_type</em> values not specified in the description |
| above are undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">float <strong>read_imagef</strong>(image2d_array_depth_t <em>image</em>, int4 <em>coord</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Use <em>coord.xy</em> to do an element lookup in the 2D image identified by |
| <em>coord.z</em> in the 2D depth image array specified by <em>image</em>.</p> |
| <p class="tableblock"> <strong>read_imagef</strong> returns a floating-point value in the range [0.0, 1.0] |
| for depth image objects created with <em>image_channel_data_type</em> set to |
| <code>CL_UNORM_INT16</code> or <code>CL_UNORM_INT24</code>.</p> |
| <p class="tableblock"> <strong>read_imagef</strong> returns a floating-point value for depth image objects |
| created with <em>image_channel_data_type</em> set to <code>CL_FLOAT</code>.</p> |
| <p class="tableblock"> Values returned by <strong>read_imagef</strong> for image objects with |
| <em>image_channel_data_type</em> values not specified in the description |
| above are undefined.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>Add the following built-in functions to section 6.12.14.4 – Built-in Image Write Functions:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 40%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">void <strong>write_imagef</strong>(image2d_depth_t <em>image</em>, int2 <em>coord</em>, |
| float <em>depth</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Write <em>depth</em> value to location specified by <em>coord.xy</em> in the 2D |
| depth image object specified by <em>image</em>. |
| Appropriate data format conversion to the specified image format is |
| done before writing the depth value. |
| <em>coord.x</em> and <em>coord.y</em> are considered to be unnormalized coordinates, |
| and must be in the range [0, image width-1], and [0, image height-1], |
| respectively.</p> |
| <p class="tableblock"> <strong>write_imagef</strong> can only be used with image objects created with |
| <em>image_channel_data_type</em> set to <code>CL_UNORM_INT16</code>, <code>CL_UNORM_INT24</code> or |
| <code>CL_FLOAT</code>. |
| Appropriate data format conversion will be done to convert depth value |
| from a floating-point value to actual data format associated with the |
| image.</p> |
| <p class="tableblock"> The behavior of <strong>write_imagef</strong>, <strong>write_imagei</strong> and <strong>write_imageui</strong> for |
| image objects created with <em>image_channel_data_type</em> values not |
| specified in the description above or with (<em>x</em>, <em>y</em>) coordinate |
| values that are not in the range [0, image width-1] and [0, image |
| height-1], respectively, is undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">void <strong>write_imagef</strong>(image2d_array_depth_t <em>image</em>, int4 <em>coord</em>, |
| float <em>depth</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Write <em>depth</em> value to location specified by <em>coord.xy</em> in the 2D |
| image identified by <em>coord.z</em> in the 2D depth image array specified by |
| <em>image</em>. |
| Appropriate data format conversion to the specified image format is |
| done before writing the depth value. |
| <em>coord.x</em>, <em>coord.y</em> and <em>coord.z</em> are considered to be unnormalized |
| coordinates, and must be in the range [0, image width-1], [0, image |
| height-1], and [0, image number of layers-1], respectively.</p> |
| <p class="tableblock"> <strong>write_imagef</strong> can only be used with image objects created with |
| <em>image_channel_data_type</em> set to <code>CL_UNORM_INT16</code>, <code>CL_UNORM_INT24</code> or |
| <code>CL_FLOAT</code>. |
| Appropriate data format conversion will be done to convert depth valye |
| from a floating-point value to actual data format associated with the |
| image.</p> |
| <p class="tableblock"> The behavior of <strong>write_imagef</strong>, <strong>write_imagei</strong> and <strong>write_imageui</strong> for |
| image objects created with <em>image_channel_data_type</em> values not |
| specified in the description above or with (<em>x</em>, <em>y</em>, <em>z</em>) coordinate |
| values that are not in the range [0, image width-1], [0, image |
| height-1], [0, image number of layers-1], respectively, is undefined.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>Add the following built-in functions to section 6.12.14.5 – Built-in Image Query Functions:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 40%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Function</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Description</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>get_image_width</strong>(image2d_depth_t <em>image</em>)<br> |
| int <strong>get_image_width</strong>(image2d_array_depth_t <em>image</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Return the image width in pixels.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>get_image_height</strong>(image2d_depth_t <em>image</em>)<br> |
| int <strong>get_image_height</strong>(image2d_array_depth_t <em>image</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Return the image height in pixels.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>get_image_channel_data_type</strong>(image2d_depth_t <em>image</em>)<br> |
| int <strong>get_image_channel_data_type</strong>(image2d_array_depth_t <em>image</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Return the channel data type. Valid values are:</p> |
| <p class="tableblock"> <code>CLK_UNORM_INT16</code><br> |
| <code>CLK_FLOAT</code></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>get_image_channel_order</strong>(image2d_depth_t <em>image</em>)<br> |
| int <strong>get_image_channel_order</strong>(image2d_array_depth_t <em>image</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Return the image channel order. Valid values are:</p> |
| <p class="tableblock"> <code>CLK_DEPTH</code></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int2 <strong>get_image_dim</strong>(image2d_depth_t <em>image</em>)<br> |
| int2 <strong>get_image_dim</strong>(image2d_array_depth_t <em>image</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Return the 2D image width and height as an int2 type. |
| The width is returned in the <em>x</em> component, and the height in the <em>y</em> |
| component.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"></td> |
| <td class="tableblock halign-left valign-top"></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">size_t <strong>get_image_array_size</strong>(image2d_array_depth_t <em>image</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Return the number of images in the 2D image array.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>Add the following text below the table in section 6.12.14.6 - Mapping image channels to color values returned by read_image |
| and color values passed to write_image to image channels:</p> |
| </div> |
| <div class="paragraph"> |
| <p>For <code>CL_DEPTH</code> images, a scalar value is returned by <strong>read_imagef</strong> or |
| supplied to <strong>write_imagef</strong>.</p> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_gl_depth_images"><a class="anchor" href="#cl_khr_gl_depth_images"></a>17. Sharing OpenGL and OpenGL ES Depth and Depth-Stencil Images</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This section describes the <strong>cl_khr_gl_depth_images</strong> extension. |
| The <strong>cl_khr_gl_depth_images</strong> extends OpenCL / OpenGL sharing (the |
| cl_khr_gl_sharing_extension) defined in |
| <a href="#cl_khr_gl_sharing__memobjs">Creating OpenCL Memory Objects from OpenGL |
| Objects</a> to allow an OpenCL image to be created from an OpenGL depth or |
| depth-stencil texture.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_16"><a class="anchor" href="#_general_information_16"></a>17.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_16"><a class="anchor" href="#_version_history_16"></a>17.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_gl_depth_images-additions-to-chapter-5"><a class="anchor" href="#cl_khr_gl_depth_images-additions-to-chapter-5"></a>17.2. Additions to Chapter 5 of the OpenCL 2.2 Specification</h3> |
| <div class="paragraph"> |
| <p>The <strong>cl_khr_gl_depth_images</strong> extension extends OpenCL / OpenGL sharing by |
| allowing an OpenCL depth image to be created from an OpenGL depth or |
| depth-stencil texture. |
| Depth images with an image channel order of CL_DEPTH_STENCIL can only be |
| created using the <strong>clCreateFromGLTexture</strong> API.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This extension adds the following new image format for depth-stencil images |
| to <em>table 5.6 and 5.7</em> of the OpenCL 2.2 specification.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 100%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Enum values that can be specified in channel_order</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_DEPTH_STENCIL</strong>. |
| This format can only be used if channel data type = CL_UNORM_INT24 or |
| CL_FLOAT.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Image Channel Data Type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_UNORM_INT24</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Each channel component is a normalized unsigned 24-bit integer value</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_FLOAT</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Each channel component is a single precision floating-point value</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>This extension adds the following new image format to the minimum list of |
| supported image formats described in <em>tables 5.8.a</em> and <em>5.8.b</em>.</p> |
| </div> |
| <table id="cl_khr_gl_depth_images-required-image-formats" class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 44. <em>Required Image Formats for</em> <strong>cl_khr_gl_depth_images</strong></caption> |
| <colgroup> |
| <col style="width: 25%;"> |
| <col style="width: 25%;"> |
| <col style="width: 25%;"> |
| <col style="width: 25%;"> |
| </colgroup> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>num_channels</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>channel_order</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>channel_data_type</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>read / write</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_DEPTH_STENCIL</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_UNORM_INT24<br> |
| CL_FLOAT</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">read only</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>For the image format given by channel order of CL_DEPTH_STENCIL and channel |
| data type of CL_UNORM_INT24, the depth is stored as an unsigned normalized |
| 24-bit value.</p> |
| </div> |
| <div class="paragraph"> |
| <p>For the image format given by channel order of CL DEPTH_STENCIL and channel |
| data type of CL_FLOAT, each pixel is two 32-bit values. |
| The depth is stored as a single precision floating-point value followed by |
| the stencil which is stored as a 8-bit integer value.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The stencil value cannot be read or written using the <strong>read_imagef</strong> and |
| <strong>write_imagef</strong> built-in functions in an OpenCL kernel.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Depth image objects with an image channel order equal to CL_DEPTH_STENCIL |
| cannot be used as arguments to clEnqueueReadImage, clEnqueueWriteImage, |
| clEnqueueCopyImage, clEnqueueCopyImageToBuffer, clEnqueueCopyBufferToImage, |
| clEnqueueMapImage and clEnqueueFillImage and will return a |
| CL_INVALID_OPERATION error.</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_gl_depth_images-additions-to-extension-specification"><a class="anchor" href="#cl_khr_gl_depth_images-additions-to-extension-specification"></a>17.3. Additions to the OpenCL Extension Specification</h3> |
| <div class="paragraph"> |
| <p>The following new image formats are added to the table of |
| <a href="#cl_khr_gl_sharing__memobjs-mapping-of-image-formats">OpenGL internal |
| formats and corresponding OpenCL internal formats</a> in the OpenCL extension |
| specification. |
| If an OpenGL texture object with an internal format in this table is |
| successfully created by OpenGL, then there is guaranteed to be a mapping to |
| one of the corresponding OpenCL image format(s) in that table.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>GL internal format</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>CL image format</strong> |
| |
| <strong>(channel order, channel data type)</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_DEPTH_COMPONENT32F</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_DEPTH, CL_FLOAT</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_DEPTH_COMPONENT16</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_DEPTH, CL_UNORM_INT16</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_DEPTH24_STENCIL8</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_DEPTH_STENCIL, CL_UNORM_INT24</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GL_DEPTH32F_STENCIL8</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_DEPTH_STENCIL, CL_FLOAT</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_gl_msaa_sharing"><a class="anchor" href="#cl_khr_gl_msaa_sharing"></a>18. Creating OpenCL Memory Objects from OpenGL MSAA Textures</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This extension extends the OpenCL / OpenGL sharing (the |
| cl_khr_gl_sharing_extension) defined in |
| <a href="#cl_khr_gl_sharing__memobjs">Creating OpenCL Memory Objects from OpenGL |
| Objects</a> to allow an OpenCL image to be created from an OpenGL |
| multi-sampled (a.k.a. |
| MSAA) texture (color or depth).</p> |
| </div> |
| <div class="paragraph"> |
| <p>This extension name is <strong>cl_khr_gl_msaa_sharing</strong>. |
| This extension requires <strong>cl_khr_gl_depth_images</strong>.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_17"><a class="anchor" href="#_general_information_17"></a>18.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_17"><a class="anchor" href="#_version_history_17"></a>18.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_gl_msaa_sharing-additions-to-extension-specification"><a class="anchor" href="#cl_khr_gl_msaa_sharing-additions-to-extension-specification"></a>18.2. Additions to the OpenCL Extension Specification</h3> |
| <div class="paragraph"> |
| <p>Allow <em>texture_target</em> argument to <strong>clCreateFromGLTexture</strong> to be |
| GL_TEXTURE_2D_MULTISAMPLE or GL_TEXTURE_2D_MULTISAMPLE_ARRAY.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If <em>texture_target</em> is GL_TEXTURE_2D_MULTISAMPLE, <strong>clCreateFromGLTexture</strong> |
| creates an OpenCL 2D multi-sample image object from an OpenGL 2D |
| multi-sample texture.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If <em>texture_target</em> is GL_TEXTURE_2D_MULTISAMPLE_ARRAY, |
| <strong>clCreateFromGLTexture</strong> creates an OpenCL 2D multi-sample array image object |
| from an OpenGL 2D multi-sample texture.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Multi-sample OpenCL image objects can only be read from a kernel. |
| Multi-sample OpenCL image objects cannot be used as arguments to |
| clEnqueueReadImage , clEnqueueWriteImage, clEnqueueCopyImage, |
| clEnqueueCopyImageToBuffer, clEnqueueCopyBufferToImage, clEnqueueMapImage |
| and clEnqueueFillImage and will return a CL_INVALID_OPERATION error.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>Add the following entry to the table describing |
| <a href="#cl_khr_gl_sharing__memobjs-clGetGLTextureInfo-queries">OpenGL texture info |
| that may be queried with clGetGLTextureInfo</a>:</strong></p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 33.3333%;"> |
| <col style="width: 33.3333%;"> |
| <col style="width: 33.3334%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_gl_texture_info</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Return Type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Info. returned in <em>param_value</em></strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_GL_NUM_SAMPLES</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">GLsizei</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">The <em>samples</em> argument passed to <strong>glTexImage2DMultisample</strong> or |
| <strong>glTexImage3DMultisample</strong>.</p> |
| <p class="tableblock"> If <em>image</em> is not a MSAA texture, 1 is returned.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_gl_msaa_sharing-additions-to-chapter-5"><a class="anchor" href="#cl_khr_gl_msaa_sharing-additions-to-chapter-5"></a>18.3. Additions to Chapter 5 of the OpenCL 2.2 Specification</h3> |
| <div class="paragraph"> |
| <p>The formats described in tables 5.8.a and 5.8.b of the OpenCL 2.2 |
| specification and the additional formats described in |
| <a href="#cl_khr_gl_depth_images-required-image-formats">required image formats for |
| cl_khr_gl_depth_images</a> also support OpenCL images created from a OpenGL |
| multi-sampled color or depth texture.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>Update text that describes arg value argument to clSetKernelArg with the |
| following:</strong></p> |
| </div> |
| <div class="paragraph"> |
| <p>“If the argument is a multi-sample 2D image, the <em>arg_value</em> entry must be |
| a pointer to a multi-sample image object. |
| If the argument is a multi-sample 2D depth image, the <em>arg_value</em> entry must |
| be a pointer to a multisample depth image object. |
| If the argument is a multi-sample 2D image array, the <em>arg_value</em> entry must |
| be a pointer to a multi-sample image array object. |
| If the argument is a multi-sample 2D depth image array, the <em>arg_value</em> |
| entry must be a pointer to a multi-sample depth image array object.”</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>Updated error code text for clSetKernelArg is:</strong></p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>Add the following text:</strong></p> |
| </div> |
| <div class="paragraph"> |
| <p>“CL_INVALID_MEM_OBJECT for an argument declared to be a multi-sample image, |
| multi-sample image array, multi-sample depth image or a multi-sample depth |
| image array and the argument value specified in <em>arg_value</em> does not follow |
| the rules described above for a depth memory object or memory array object |
| argument.”</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_gl_msaa_sharing-additions-to-chapter-6"><a class="anchor" href="#cl_khr_gl_msaa_sharing-additions-to-chapter-6"></a>18.4. Additions to Chapter 6 of the OpenCL 2.2 Specification</h3> |
| <div class="paragraph"> |
| <p><strong>Add the following new data types to <em>table 6.3</em> in <em>section 6.1.3</em> of the |
| OpenCL 2.2 specification:</strong></p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>image2d_msaa_t</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">A 2D multi-sample color image. |
| Refer to <em>section 6.13.14</em> for a detailed description of the built-in |
| functions that use this type.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>image2d_array_msaa_t</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">A 2D multi-sample color image array. |
| Refer to <em>section 6.13.14</em> for a detailed description of the built-in |
| functions that use this type.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>image2d_msaa_depth_t</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">A 2D multi-sample depth image. |
| Refer to <em>section 6.13.14</em> for a detailed description of the built-in |
| functions that use this type.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>image2d_array_msaa_depth_t</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">A 2D multi-sample depth image array. |
| Refer to <em>section 6.13.14</em> for a detailed description of the built-in |
| functions that use this type.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p><strong>Add the following built-in functions to section 6.13.14.3 — Built-in Image |
| Sampler-less Read Functions:</strong></p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">float4 read_imagef( |
| image2d_msaa_t image, |
| int2 coord, |
| <span class="predefined-type">int</span> sample)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Use the coordinate <em>(coord.x, coord.y)</em> and <em>sample</em> to do an element lookup |
| in the 2D image object specified by <em>image</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>read_imagef</strong> returns floating-point values in the range [0.0 …​ 1.0] for |
| image objects created with <em>image_channel_data_type</em> set to one of the |
| pre-defined packed formats or CL_UNORM_INT8, or CL_UNORM_INT16.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>read_imagef</strong> returns floating-point values in the range [-1.0 …​ 1.0] for |
| image objects created with <em>image_channel_data_type</em> set to CL_SNORM_INT8, |
| or CL_SNORM_INT16.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>read_imagef</strong> returns floating-point values for image objects created with |
| <em>image_channel_data_type</em> set to CL_HALF_FLOAT or CL_FLOAT.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Values returned by <strong>read_imagef</strong> for image objects with |
| <em>image_channel_data_type</em> values not specified in the description above are |
| undefined.</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">int4 read_imagei(image2d_msaa_t image, |
| int2 coord, |
| <span class="predefined-type">int</span> sample) |
| |
| uint4 read_imageui(image2d_msaa_t image, |
| int2 coord, |
| <span class="predefined-type">int</span> sample)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Use the coordinate <em>(coord.x, coord.y)</em> and <em>sample</em> to do an element lookup |
| in the 2D image object specified by <em>image</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>read_imagei</strong> and <strong>read_imageui</strong> return unnormalized signed integer and |
| unsigned integer values respectively. |
| Each channel will be stored in a 32-bit integer.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>read_imagei</strong> can only be used with image objects created with |
| <em>image_channel_data_type</em> set to one of the following values:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_SIGNED_INT8,</p> |
| </li> |
| <li> |
| <p>CL_SIGNED_INT16, and</p> |
| </li> |
| <li> |
| <p>CL_SIGNED_INT32.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>If the <em>image_channel_data_type</em> is not one of the above values, the values |
| returned by <strong>read_imagei</strong> are undefined.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>read_imageui</strong> can only be used with image objects created with |
| <em>image_channel_data_type</em> set to one of the following values:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_UNSIGNED_INT8,</p> |
| </li> |
| <li> |
| <p>CL_UNSIGNED_INT16, and</p> |
| </li> |
| <li> |
| <p>CL_UNSIGNED_INT32.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>If the <em>image_channel_data_type</em> is not one of the above values, the values |
| returned by <strong>read_imageui</strong> are undefined.</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">float4 read_imagef(image2d_array_msaa_t image, |
| int4 coord, |
| <span class="predefined-type">int</span> sample)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Use <em>coord.xy</em> and <em>sample</em> to do an element lookup in the 2D image |
| identified by <em>coord.z</em> in the 2D image array specified by <em>image</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>read_imagef</strong> returns floating-point values in the range [0.0 …​ 1.0] for |
| image objects created with <em>image_channel_data_type</em> set to one of the |
| pre-defined packed formats or CL_UNORM_INT8, or CL_UNORM_INT16.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>read_imagef</strong> returns floating-point values in the range [-1.0 …​ 1.0] for |
| image objects created with <em>image_channel_data_type</em> set to CL_SNORM_INT8, |
| or CL_SNORM_INT16.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>read_imagef</strong> returns floating-point values for image objects created with |
| <em>image_channel_data_type</em> set to CL_HALF_FLOAT or CL_FLOAT.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Values returned by <strong>read_imagef</strong> for image objects with |
| <em>image_channel_data_type</em> values not specified in the description above are |
| undefined.</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">int4 read_imagei(image2d_array_msaa_t image, |
| int4 coord, |
| <span class="predefined-type">int</span> sample) |
| |
| uint4 read_imageui(image2d_array_msaa_t image, |
| int4 coord, |
| <span class="predefined-type">int</span> sample)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Use <em>coord.xy</em> and <em>sample</em> to do an element lookup in the 2D image |
| identified by <em>coord.z</em> in the 2D image array specified by <em>image</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>read_imagei</strong> and <strong>read_imageui</strong> return unnormalized signed integer and |
| unsigned integer values respectively. |
| Each channel will be stored in a 32-bit integer.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>read_imagei</strong> can only be used with image objects created with |
| <em>image_channel_data_type</em> set to one of the following values:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_SIGNED_INT8,</p> |
| </li> |
| <li> |
| <p>CL_SIGNED_INT16, and</p> |
| </li> |
| <li> |
| <p>CL_SIGNED_INT32.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>If the <em>image_channel_data_type</em> is not one of the above values, the values |
| returned by <strong>read_imagei</strong> are undefined.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>read_imageui</strong> can only be used with image objects created with |
| <em>image_channel_data_type</em> set to one of the following values:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_UNSIGNED_INT8,</p> |
| </li> |
| <li> |
| <p>CL_UNSIGNED_INT16, and</p> |
| </li> |
| <li> |
| <p>CL_UNSIGNED_INT32.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>If the <em>image_channel_data_type</em> is not one of the above values, the values |
| returned by <strong>read_imageui</strong> are undefined.</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="predefined-type">float</span> read_imagef(image2d_msaa_depth_t image, |
| int2 coord, |
| <span class="predefined-type">int</span> sample)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Use the coordinate <em>(coord.x, coord.y)</em> and <em>sample</em> to do an element lookup |
| in the 2D depth image object specified by <em>image</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>read_imagef</strong> returns a floating-point value in the range [0.0 …​ 1.0] for |
| depth image objects created with <em>image_channel_data_type</em> set to |
| CL_UNORM_INT16 or CL_UNORM_INT24.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>read_imagef</strong> returns a floating-point value for depth image objects created |
| with <em>image_channel_data_type</em> set to CL_FLOAT.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Values returned by <strong>read_imagef</strong> for image objects with |
| <em>image_channel_data_type</em> values not specified in the description above are |
| undefined.</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="predefined-type">float</span> read_imagef(image2d_array_msaaa_depth_t image, |
| int4 coord, |
| <span class="predefined-type">int</span> sample)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Use <em>coord.xy</em> and <em>sample</em> to do an element lookup in the 2D image |
| identified by <em>coord.z</em> in the 2D depth image array specified by <em>image</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>read_imagef</strong> returns a floating-point value in the range [0.0 …​ 1.0] for |
| depth image objects created with <em>image_channel_data_type</em> set to |
| CL_UNORM_INT16 or CL_UNORM_INT24.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>read_imagef</strong> returns a floating-point value for depth image objects created |
| with <em>image_channel_data_type</em> set to CL_FLOAT.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Values returned by <strong>read_imagef</strong> for image objects with |
| <em>image_channel_data_type</em> values not specified in the description above are |
| undefined.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Note: When a multisample image is accessed in a kernel, the access takes one |
| vector of integers describing which pixel to fetch and an integer |
| corresponding to the sample numbers describing which sample within the pixel |
| to fetch. |
| sample identifies the sample position in the multi-sample image.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>For best performance, we recommend that <em>sample</em> be a literal value so it |
| is known at compile time and the OpenCL compiler can perform appropriate |
| optimizations for multi-sample reads on the device</strong>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>No standard sampling instructions are allowed on the multisample image. |
| Accessing a coordinate outside the image and/or a sample that is outside the |
| number of samples associated with each pixel in the image is undefined</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>Add the following built-in functions to section 6.13.14.5 — Built-in Image |
| Query Functions:</strong></p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="predefined-type">int</span> get_image_width(image2d_msaa_t image) |
| |
| <span class="predefined-type">int</span> get_image_width(image2d_array_msaa_t image) |
| |
| <span class="predefined-type">int</span> get_image_width(image2d_msaa_depth_t image) |
| |
| <span class="predefined-type">int</span> get_image_width(image2d_array_msaa_depth_t image)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Return the image width in pixels.</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="predefined-type">int</span> get_image_height(image2d_msaa_t image) |
| |
| <span class="predefined-type">int</span> get_image_height(image2d_array_msaa_t image) |
| |
| <span class="predefined-type">int</span> get_image_height(image2d_msaa_depth_t image) |
| |
| <span class="predefined-type">int</span> get_image_height(image2d_array_msaa_depth_t image)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Return the image height in pixels.</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="predefined-type">int</span> get_image_channel_data_type(image2d_msaa_t image) |
| |
| <span class="predefined-type">int</span> get_image_channel_data_type(image2d_array_msaa_t image) |
| |
| <span class="predefined-type">int</span> get_image_channel_data_type(image2d_msaa_depth_t image) |
| |
| <span class="predefined-type">int</span> get_image_channel_data_type(image2d_array_msaa_depth_t image)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Return the channel data type.</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="predefined-type">int</span> get_image_channel_order(image2d_msaa_t image) |
| |
| <span class="predefined-type">int</span> get_image_channel_order(image2d_array_msaa_t image) |
| |
| <span class="predefined-type">int</span> get_image_channel_order(image2d_msaa_depth_t image) |
| |
| <span class="predefined-type">int</span> get_image_channel_order(image2d_array_msaa_depth_t image)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Return the image channel order.</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">int2 get_image_dim(image2d_msaa_t image) |
| |
| int2 get_image_dim(image2d_array_msaa_t image) |
| |
| int2 get_image_dim(image2d_msaa_depth_t image) |
| |
| int2 get_image_dim(image2d_array_msaa_depth_t image)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Return the 2D image width and height as an int2 type. |
| The width is returned in the <em>x</em> component, and the height in the <em>y</em> |
| component.</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">size_t get_image_array_size(image2d_array_msaa_depth_t image)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Return the number of images in the 2D image array.</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="predefined-type">int</span> get_image_num_samples(image2d_msaa_t image) |
| |
| <span class="predefined-type">int</span> get_image_num_samples(image2d_array_msaa_t image) |
| |
| <span class="predefined-type">int</span> get_image_num_samples(image2d_msaa_depth_t image) |
| |
| <span class="predefined-type">int</span> get_image_num_samples(image2d_array_msaa_depth_t image)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Return the number of samples in the 2D MSAA image</p> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_egl_event"><a class="anchor" href="#cl_khr_egl_event"></a>19. Creating OpenCL Event Objects from EGL Sync Objects</h2> |
| <div class="sectionbody"> |
| <div class="sect2"> |
| <h3 id="cl_khr_egl_event-overview"><a class="anchor" href="#cl_khr_egl_event-overview"></a>19.1. Overview</h3> |
| <div class="paragraph"> |
| <p>This section describes the <strong>cl_khr_egl_event</strong> extension. |
| This extension allows creating OpenCL event objects linked to EGL fence sync |
| objects, potentially improving efficiency of sharing images and buffers |
| between the two APIs. |
| The companion <strong>EGL_KHR_cl_event</strong> extension provides the complementary |
| functionality of creating an EGL sync object from an OpenCL event object.</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_18"><a class="anchor" href="#_general_information_18"></a>19.2. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_18"><a class="anchor" href="#_version_history_18"></a>19.2.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_egl_event-new-procedures-and-functions"><a class="anchor" href="#cl_khr_egl_event-new-procedures-and-functions"></a>19.3. New Procedures and Functions</h3> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_event clCreateEventFromEGLSyncKHR(cl_context context, |
| CLeglSyncKHR sync, |
| CLeglDisplayKHR display, |
| cl_int *errcode_ret);</code></pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_egl_event-new-tokens"><a class="anchor" href="#cl_khr_egl_event-new-tokens"></a>19.4. New Tokens</h3> |
| <div class="paragraph"> |
| <p>Returned by clCreateEventFromEGLSyncKHR if <em>sync</em> is not a valid EGLSyncKHR |
| handle created with respect to EGLDisplay <em>display</em>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_INVALID_EGL_OBJECT_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Returned by <strong>clGetEventInfo</strong> when <em>param_name</em> is CL_EVENT_COMMAND_TYPE:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR</pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_egl_event-additions-to-chapter-5"><a class="anchor" href="#cl_khr_egl_event-additions-to-chapter-5"></a>19.5. Additions to Chapter 5 of the OpenCL 2.2 Specification</h3> |
| <div class="paragraph"> |
| <p>Add following to the fourth paragraph of <em>section 5.11</em> (prior to the |
| description of <strong>clWaitForEvents</strong>):</p> |
| </div> |
| <div class="paragraph"> |
| <p>“Event objects can also be used to reflect the status of an EGL fence sync |
| object. |
| The sync object in turn refers to a fence command executing in an EGL client |
| API command stream. |
| This provides another method of coordinating sharing of EGL / EGL client API |
| objects with OpenCL. |
| Completion of EGL / EGL client API commands may be determined by placing an |
| EGL fence command after commands using eglCreateSyncKHR, creating an event |
| from the resulting EGL sync object using clCreateEventFromEGLSyncKHR and |
| then specifying it in the <em>event_wait_list</em> of a clEnqueueAcquire*** |
| command. |
| This method may be considerably more efficient than calling operations like |
| glFinish, and is referred to as <em>explicit synchronization</em>. |
| The application is responsible for ensuring the command stream associated |
| with the EGL fence is flushed to ensure the CL queue is submitted to the |
| device. |
| Explicit synchronization is most useful when an EGL client API context bound |
| to another thread is accessing the memory objects.”</p> |
| </div> |
| <div class="paragraph"> |
| <p>Add CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR to the valid <em>param_value</em> values |
| returned by <strong>clGetEventInfo</strong> for <em>param_name</em> CL_EVENT_COMMAND_TYPE (in the |
| third row and third column of <em>table 5.22</em>).</p> |
| </div> |
| <div class="paragraph"> |
| <p>Add new <em>subsection 5.11.2</em>:</p> |
| </div> |
| <div class="paragraph"> |
| <p>"`<strong>5.11.2 Linking Event Objects to EGL Synchronization Objects</strong></p> |
| </div> |
| <div class="paragraph"> |
| <p>An event object may be created by linking to an EGL <strong>sync object</strong>. |
| Completion of such an event object is equivalent to waiting for completion |
| of the fence command associated with the linked EGL sync object.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_event clCreateEventFromEGLSyncKHR(cl_context context, |
| CLeglSyncKHR sync, |
| CLeglDisplayKHR display, |
| cl_int *errcode_ret)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>creates a linked event object.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>context</em> is a valid OpenCL context created from an OpenGL context or share |
| group, using the <strong>cl_khr_gl_sharing</strong> extension.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>sync</em> is the name of a sync object of type EGL_SYNC_FENCE_KHR created with |
| respect to EGLDisplay <em>display</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clCreateEventFromEGLSyncKHR</strong> returns a valid OpenCL event object and |
| <em>errcode_ret</em> is set to CL_SUCCESS if the event object is created |
| successfully. |
| Otherwise, it returns a <code>NULL</code> value with one of the following error values |
| returned in <em>errcode_ret</em>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_CONTEXT if <em>context</em> is not a valid context, or was not |
| created from a GL context.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_EGL_OBJECT_KHR if <em>sync</em> is not a valid EGLSyncKHR object of |
| type EGL_SYNC_FENCE_KHR created with respect to EGLDisplay <em>display</em>.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>The parameters of an event object linked to an EGL sync object will return |
| the following values when queried with <strong>clGetEventInfo</strong>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>The CL_EVENT_COMMAND_QUEUE of a linked event is <code>NULL</code>, because the |
| event is not associated with any OpenCL command queue.</p> |
| </li> |
| <li> |
| <p>The CL_EVENT_COMMAND_TYPE of a linked event is |
| CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR, indicating that the event is |
| associated with a EGL sync object, rather than an OpenCL command.</p> |
| </li> |
| <li> |
| <p>The CL_EVENT_COMMAND_EXECUTION_STATUS of a linked event is either |
| CL_SUBMITTED, indicating that the fence command associated with the sync |
| object has not yet completed, or CL_COMPLETE, indicating that the fence |
| command has completed.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clCreateEventFromEGLSyncKHR</strong> performs an implicit <strong>clRetainEvent</strong> on the |
| returned event object. |
| Creating a linked event object also places a reference on the linked EGL |
| sync object. |
| When the event object is deleted, the reference will be removed from the EGL |
| sync object.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Events returned from <strong>clCreateEventFromEGLSyncKHR</strong> may only be consumed by |
| <strong>clEnqueueAcquire</strong>*** commands. |
| Passing such events to any other CL API that enqueues commands will generate |
| a CL_INVALID_EVENT error.`"</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_egl_event-additions-to-extension-specification"><a class="anchor" href="#cl_khr_egl_event-additions-to-extension-specification"></a>19.6. Additions to the OpenCL Extension Specification</h3> |
| <div class="paragraph"> |
| <p>Replace the second paragraph of |
| <a href="#cl_khr_gl_sharing__memobjs-synchronizing-opencl-and-opengl-access-to-shared-objects">Synchronizing OpenCL and OpenGL Access to Shared Objects</a> with:</p> |
| </div> |
| <div class="paragraph"> |
| <p>"`Prior to calling <strong>clEnqueueAcquireGLObjects</strong>, the application must ensure |
| that any pending EGL or EGL client API operations which access the objects |
| specified in <em>mem_objects</em> have completed.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If the <strong>cl_khr_egl_event</strong> extension is supported and the EGL context in |
| question supports fence sync objects, <em>explicit synchronization</em> can be |
| achieved as set out in <em>section 5.7.1</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If the <strong>cl_khr_egl_event</strong> extension is not supported, completion of EGL |
| client API commands may be determined by issuing and waiting for completion |
| of commands such as glFinish or vgFinish on all client API contexts with |
| pending references to these objects. |
| Some implementations may offer other efficient synchronization methods. |
| If such methods exist they will be described in platform-specific |
| documentation.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Note that no synchronization methods other than glFinish and vgFinish are |
| portable between all EGL client API implementations and all OpenCL |
| implementations. |
| While this is the only way to ensure completion that is portable to all |
| platforms, these are expensive operation and their use should be avoided if |
| the cl_khr_egl_event extension is supported on a platform.`"</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_egl_event-issues"><a class="anchor" href="#cl_khr_egl_event-issues"></a>19.7. Issues</h3> |
| <div class="paragraph"> |
| <p>Most issues are shared with <strong>cl_khr_gl_event</strong> and are resolved as described |
| in that extension.</p> |
| </div> |
| <div class="olist arabic"> |
| <ol class="arabic"> |
| <li> |
| <p>Should we support implicit synchronization?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>RESOLVED: No, as this may be very difficult since the synchronization would |
| not be with EGL, it would be with currently bound EGL client APIs. |
| It would be necessary to know which client APIs might be bound, to validate |
| that they’re associated with the EGLDisplay associated with the OpenCL |
| context, and to reach into each such context.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>Do we need to have typedefs to use EGL handles in OpenCL?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>RESOLVED Using typedefs for EGL handles.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>Should we restrict which CL APIs can be used with this cl_event?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>RESOLVED Use is limited to clEnqueueAcquire*** calls only.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>What is the desired behaviour for this extension when EGLSyncKHR is of a |
| type other than EGL_SYNC_FENCE_KHR?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>RESOLVED This extension only requires support for EGL_SYNC_FENCE_KHR. |
| Support of other types is an implementation choice, and will result in |
| CL_INVALID_EGL_OBJECT_KHR if unsupported.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| </ol> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_egl_image"><a class="anchor" href="#cl_khr_egl_image"></a>20. Creating OpenCL Memory Objects from EGL Images</h2> |
| <div class="sectionbody"> |
| <div class="sect2"> |
| <h3 id="cl_khr_egl_image-overview"><a class="anchor" href="#cl_khr_egl_image-overview"></a>20.1. Overview</h3> |
| <div class="paragraph"> |
| <p>This section describes the <strong>cl_khr_egl_image</strong> extension. |
| This extension provides a mechanism to creating OpenCL memory objects from |
| from EGLImages.</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_19"><a class="anchor" href="#_general_information_19"></a>20.2. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_19"><a class="anchor" href="#_version_history_19"></a>20.2.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_egl_image-new-procedures-and-functions"><a class="anchor" href="#cl_khr_egl_image-new-procedures-and-functions"></a>20.3. New Procedures and Functions</h3> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_mem clCreateFromEGLImageKHR(cl_context context, |
| CLeglDisplayKHR display, |
| CLeglImageKHR image, |
| cl_mem_flags flags, |
| <span class="directive">const</span> cl_egl_image_properties_khr *properties, |
| cl_int *errcode_ret); |
| |
| cl_int clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue, |
| cl_uint num_objects, |
| <span class="directive">const</span> cl_mem *mem_objects, |
| cl_uint num_events_in_wait_list, |
| <span class="directive">const</span> cl_event *event_wait_list, |
| cl_event *event) |
| |
| cl_int clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue, |
| cl_uint num_objects, |
| <span class="directive">const</span> cl_mem *mem_objects, |
| cl_uint num_events_in_wait_list, |
| <span class="directive">const</span> cl_event *event_wait_list, |
| cl_event *event)</code></pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_egl_image-new-tokens"><a class="anchor" href="#cl_khr_egl_image-new-tokens"></a>20.4. New Tokens</h3> |
| <div class="paragraph"> |
| <p>New error codes:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_EGL_RESOURCE_NOT_ACQUIRED_KHR |
| CL_INVALID_EGL_OBJECT_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>New command types:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR |
| CL_COMMAND_RELEASE_EGL_OBJECTS_KHR</pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_egl_image-additions-to-chapter-5"><a class="anchor" href="#cl_khr_egl_image-additions-to-chapter-5"></a>20.5. Additions to Chapter 5 of the OpenCL 2.2 Specification</h3> |
| <div class="paragraph"> |
| <p>In section 5.2.4, add the following text after the paragraph defining |
| clCreateImage:</p> |
| </div> |
| <div class="paragraph"> |
| <p>"`The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_mem clCreateFromEGLImageKHR(cl_context context, |
| CLeglDisplayKHR display, |
| CLeglImageKHR image, |
| cl_mem_flags flags, |
| <span class="directive">const</span> cl_egl_image_properties_khr *properties, |
| cl_int *errcode_ret);</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>creates an EGLImage target of type cl_mem from the EGLImage source provided |
| as <em>image</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>display</em> should be of type EGLDisplay, cast into the type CLeglDisplayKHR.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>image</em> should be of type EGLImageKHR, cast into the type CLeglImageKHR. |
| Assuming no errors are generated in this function, the resulting image |
| object will be an EGLImage target of the specified EGLImage <em>image</em>. |
| The resulting cl_mem is an image object which may be used normally by all |
| OpenCL operations. |
| This maps to an image2d_t type in OpenCL kernel code.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>flags</em> is a bit-field that is used to specify usage information about the |
| memory object being created.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The possible values for <em>flags</em> are: CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and |
| CL_MEM _READ_WRITE.</p> |
| </div> |
| <div class="paragraph"> |
| <p>For OpenCL 1.2 <em>flags</em> also accepts: CL_MEM_HOST_WRITE_ONLY, |
| CL_MEM_HOST_READ_ONLY or CL_MEM_HOST_NO_ACCESS.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This extension only requires support for CL_MEM _READ_ONLY, and for OpenCL |
| 1.2 CL_MEM_HOST_NO_ACCESS. |
| For OpenCL 1.1, a CL_INVALID_OPERATION will be returned for images which do |
| not support host mapping.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If the value passed in <em>flags</em> is not supported by the OpenCL implementation |
| it will return CL_INVALID_VALUE. |
| The accepted <em>flags</em> may be dependent upon the texture format used.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>properties</em> specifies a list of property names and their corresponding |
| values. |
| Each property name is immediately followed by the corresponding desired |
| value. |
| The list is terminated with 0. |
| No properties are currently supported with this version of the extension. |
| <em>properties</em> can be <code>NULL</code>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clCreateFromEGLImageKHR</strong> returns a valid non-zero OpenCL image object and |
| <em>errcode_ret</em> is set to CL_SUCCESS if the image object is created |
| successfully. |
| Otherwise, it returns a <code>NULL</code> value with one of the following error values |
| returned in <em>errcode_ret</em>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_CONTEXT if <em>context</em> is not a valid OpenCL context.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_VALUE if <em>properties</em> contains invalid values, if <em>display</em> |
| is not a valid display object or if <em>flags</em> are not in the set defined |
| above.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_EGL_OBJECT_KHR if <em>image</em> is not a valid EGLImage object.</p> |
| </li> |
| <li> |
| <p>CL_IMAGE_FORMAT_NOT_SUPPORTED if the OpenCL implementation is not able |
| to create a cl_mem compatible with the provided CLeglImageKHR for an |
| implementation-dependent reason (this could be caused by, but not |
| limited to, reasons such as unsupported texture formats, etc).</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_RESOURCES if there is a failure to allocate resources required |
| by the OpenCL implementation on the device.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_OPERATION if there are no devices in <em>context</em> that support |
| images (i.e. CL_DEVICE_IMAGE_SUPPORT specified in table 4.3 is CL_FALSE) |
| or if the flags passed are not supported for that image type.`"</p> |
| </li> |
| </ul> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_egl_image-lifetime-of-shared-objects"><a class="anchor" href="#cl_khr_egl_image-lifetime-of-shared-objects"></a>20.5.1. Lifetime of Shared Objects</h4> |
| <div class="paragraph"> |
| <p>An OpenCL memory object created from an EGL image remains valid according to |
| the lifetime behavior as described in EGL_KHR_image_base.</p> |
| </div> |
| <div class="paragraph"> |
| <p>“Any EGLImage siblings exist in any client API context”</p> |
| </div> |
| <div class="paragraph"> |
| <p>For OpenCL this means that while the application retains a reference on the |
| cl_mem (the EGL sibling), the image remains valid.</p> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_egl_image-synchronizing-opengl-and-egl-access-to-shared-objects"><a class="anchor" href="#cl_khr_egl_image-synchronizing-opengl-and-egl-access-to-shared-objects"></a>20.5.2. Synchronizing OpenCL and EGL Access to Shared Objects</h4> |
| <div class="paragraph"> |
| <p>In order to ensure data integrity, the application is responsible for |
| synchronizing access to shared CL/EGL objects by their respective APIs. |
| Failure to provide such synchronization may result in race conditions and |
| other undefined behavior including non-portability between implementations.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Prior to calling clEnqueueAcquireEGLObjectsKHR, the application must ensure |
| that any pending operations which access the objects specified in |
| mem_objects have completed. |
| This may be accomplished in a portable way by ceasing all client operations |
| on the resource, and issuing and waiting for completion of a glFinish |
| command on all GL contexts with pending references to these objects. |
| Implementations may offer more efficient synchronization methods, such as |
| synchronization primitives or fence operations.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Similarly, after calling clEnqueueReleaseEGLImageObjects, the application is |
| responsible for ensuring that any pending OpenCL operations which access the |
| objects specified in mem_objects have completed prior to executing |
| subsequent commands in other APIs which reference these objects. |
| This may be accomplished in a portable way by calling clWaitForEvents with |
| the event object returned by clEnqueueReleaseGLObjects, or by calling |
| clFinish. |
| As above, some implementations may offer more efficient methods.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Attempting to access the data store of an EGLImage object after it has been |
| acquired by OpenCL and before it has been released will result in undefined |
| behavior. |
| Similarly, attempting to access a shared EGLImage object from OpenCL before |
| it has been acquired by the OpenCL command queue or after it has been |
| released, will result in undefined behavior.</p> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_egl_image-sharing-memory-objects-created-from-egl-resources-between-egldisplays-and-opencl-contexts"><a class="anchor" href="#cl_khr_egl_image-sharing-memory-objects-created-from-egl-resources-between-egldisplays-and-opencl-contexts"></a>20.5.3. Sharing memory objects created from EGL resources between EGLDisplays and OpenCL contexts</h4> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_int clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue, |
| cl_uint num_objects, |
| <span class="directive">const</span> cl_mem *mem_objects, |
| cl_uint num_events_in_wait_list, |
| <span class="directive">const</span> cl_event *event_wait_list, |
| cl_event *event)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>is used to acquire OpenCL memory objects that have been created from EGL |
| resources. |
| The EGL objects are acquired by the OpenCL context associated with |
| <em>command_queue</em> and can therefore be used by all command-queues associated |
| with the OpenCL context.</p> |
| </div> |
| <div class="paragraph"> |
| <p>OpenCL memory objects created from EGL resources must be acquired before |
| they can be used by any OpenCL commands queued to a command-queue. |
| If an OpenCL memory object created from a EGL resource is used while it is |
| not currently acquired by OpenCL, the call attempting to use that OpenCL |
| memory object will return CL_EGL_RESOURCE_NOT_ACQUIRED_KHR.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>command_queue</em> is a valid command-queue.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>num_objects</em> is the number of memory objects to be acquired in |
| <em>mem_objects</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>mem_objects</em> is a pointer to a list of OpenCL memory objects that were |
| created from EGL resources, within the context associate with command_queue.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>event_wait_list</em> and <em>num_events_in_wait_list</em> specify events that need to |
| complete before this particular command can be executed. |
| If <em>event_wait_list</em> is <code>NULL</code>, then this particular command does not wait |
| on any event to complete. |
| If <em>event_wait_list</em> is <code>NULL</code>, <em>num_events_in_wait_list</em> must be 0. |
| If <em>event_wait_list</em> is not <code>NULL</code>, the list of events pointed to by |
| <em>event_wait_list</em> must be valid and <em>num_events_in_wait_list</em> must be |
| greater than 0. |
| The events specified in <em>event_wait_list</em> act as synchronization points.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>event</em> returns an event object that identifies this command and |
| can be used to query or wait for this command to complete. |
| If <em>event</em> is <code>NULL</code> or the enqueue is unsuccessful, no event will be |
| created and therefore it will not be possible to query the status of this |
| command or to wait for this command to complete. |
| If <em>event_wait_list</em> and <em>event</em> are not <code>NULL</code>, <em>event</em> must not refer |
| to an element of the <em>event_wait_list</em> array.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clEnqueueAcquireEGLObjectsKHR</strong> returns CL_SUCCESS if the function is |
| executed successfully. |
| If <em>num_objects</em> is 0 and <em>mem_objects</em> is <code>NULL</code> then the function does |
| nothing and returns CL_SUCCESS. |
| Otherwise it returns one of the following errors:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_VALUE if <em>num_objects</em> is zero and <em>mem_objects</em> is not a |
| <code>NULL</code> value or if num_objects > 0 and mem_objects is <code>NULL</code>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_MEM_OBJECT if memory objects in <em>mem_objects</em> are not valid |
| OpenCL memory objects in the context associated with <em>command_queue</em>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_EGL_OBJECT_KHR if memory objects in <em>mem_objects</em> have not |
| been created from EGL resources.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_COMMAND_QUEUE if <em>command_queue</em> is not a valid |
| command-queue.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_EVENT_WAIT_LIST if <em>event_wait_list</em> is <code>NULL</code> and |
| <em>num_events_in_wait_list</em> > 0, or <em>event_wait_list</em> is not <code>NULL</code> and |
| <em>num_events_in_wait_list</em> is 0, or if event objects in <em>event_wait_list</em> |
| are not valid events.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_RESOURCES if there is a failure to allocate resources required |
| by the OpenCL implementation on the device.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>The function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_int clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue, |
| cl_uint num_objects, |
| <span class="directive">const</span> cl_mem *mem_objects, |
| cl_uint num_events_in_wait_list, |
| <span class="directive">const</span> cl_event *event_wait_list, |
| cl_event *event)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>is used to release OpenCL memory objects that have been created from EGL |
| resources. |
| The EGL objects are released by the OpenCL context associated with |
| <command_queue>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>OpenCL memory objects created from EGL resources which have been acquired by |
| OpenCL must be released by OpenCL before they may be accessed by EGL or by |
| EGL client APIs. |
| Accessing a EGL resource while its corresponding OpenCL memory object is |
| acquired is in error and will result in undefined behavior, including but |
| not limited to possible OpenCL errors, data corruption, and program |
| termination.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>command_queue</em> is a valid command-queue.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>num_objects</em> is the number of memory objects to be acquired in |
| <em>mem_objects</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>mem_objects</em> is a pointer to a list of OpenCL memory objects that were |
| created from EGL resources, within the context associate with command_queue.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>event_wait_list</em> and <em>num_events_in_wait_list</em> specify events that need to |
| complete before this particular command can be executed. |
| If <em>event_wait_list</em> is <code>NULL</code>, then this particular command does not wait |
| on any event to complete. |
| If <em>event_wait_list</em> is <code>NULL</code>, <em>num_events_in_wait_list</em> must be 0. |
| If <em>event_wait_list</em> is not <code>NULL</code>, the list of events pointed to by |
| <em>event_wait_list</em> must be valid and <em>num_events_in_wait_list</em> must be |
| greater than 0. |
| The events specified in <em>event_wait_list</em> act as synchronization points.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>event</em> returns an event object that identifies this command and |
| can be used to query or wait for this command to complete. |
| If <em>event</em> is <code>NULL</code> or the enqueue is unsuccessful, no event will be |
| created and therefore it will not be possible to query the status of this |
| command or to wait for this command to complete. |
| If <em>event_wait_list</em> and <em>event</em> are not <code>NULL</code>, <em>event</em> must not refer |
| to an element of the <em>event_wait_list</em> array.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clEnqueueReleaseEGLObjectsKHR</strong> returns CL_SUCCESS if the function is |
| executed successfully. |
| If <em>num_objects</em> is 0 and <em>mem_objects</em> is <code>NULL</code> then the function does |
| nothing and returns CL_SUCCESS. |
| Otherwise it returns one of the following errors:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_VALUE if <em>num_objects</em> is zero and <em>mem_objects</em> is not a |
| <code>NULL</code> value or if num_objects > 0 and mem_objects is <code>NULL</code>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_MEM_OBJECT if memory objects in <em>mem_objects</em> are not valid |
| OpenCL memory objects in the context associated with <em>command_queue</em>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_EGL_OBJECT_KHR if memory objects in <em>mem_objects</em> have not |
| been created from EGL resources.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_COMMAND_QUEUE if <em>command_queue</em> is not a valid |
| command-queue.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_EVENT_WAIT_LIST if <em>event_wait_list</em> is <code>NULL</code> and |
| <em>num_events_in_wait_list</em> > 0, or <em>event_wait_list</em> is not <code>NULL</code> and |
| <em>num_events_in_wait_list</em> is 0, or if event objects in <em>event_wait_list</em> |
| are not valid events.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_RESOURCES if there is a failure to allocate resources required |
| by the OpenCL implementation on the device.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_egl_image-event-command-types"><a class="anchor" href="#cl_khr_egl_image-event-command-types"></a>20.5.4. Event Command Types for Sharing memory objects created from EGL resources</h4> |
| <div class="paragraph"> |
| <p>The following table describes the event command types for the OpenCL commands |
| to acquire and release OpenCL memory objects that have been created from |
| EGL resources:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 45. List of supported event command types</caption> |
| <colgroup> |
| <col style="width: 40%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Events Created By</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Event Command Type</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>clEnqueueAcquireEGLObjectsKHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_COMMAND_<wbr>ACQUIRE_<wbr>EGL_<wbr>OBJECTS_<wbr>KHR</code></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>clEnqueueReleaseEGLObjectsKHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_COMMAND_<wbr>RELEASE_<wbr>EGL_<wbr>OBJECTS_<wbr>KHR</code></p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_egl_image-issues"><a class="anchor" href="#cl_khr_egl_image-issues"></a>20.6. Issues</h3> |
| <div class="olist arabic"> |
| <ol class="arabic"> |
| <li> |
| <p>This extension does not support reference counting of the images, so the |
| onus is on the application to behave sensibly and not release the |
| underlying cl_mem object while the EGLImage is still being used.</p> |
| </li> |
| <li> |
| <p>In order to ensure data integrity, the application is responsible for |
| synchronizing access to shared CL/EGL image objects by their respective |
| APIs. |
| Failure to provide such synchronization may result in race conditions |
| and other undefined behavior. |
| This may be accomplished by calling clWaitForEvents with the event |
| objects returned by any OpenCL commands which use the shared image |
| object or by calling clFinish.</p> |
| </li> |
| <li> |
| <p>Currently CL_MEM_READ_ONLY is the only supported flag for <em>flags</em>.</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>RESOLVED: Implementation will now return an error if writing to a shared |
| object that is not supported rather than disallowing it entirely.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>Currently restricted to 2D image objects.</p> |
| </li> |
| <li> |
| <p>What should happen for YUV color-space conversion, multi plane images, |
| and chroma-siting, and channel mapping?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>RESOLVED: YUV is no longer explicitly described in this extension. |
| Before this removal the behavior was dependent on the platform. |
| This extension explicitly leaves the YUV layout to the platform and EGLImage |
| source extension (i.e. is implementation specific). |
| Colorspace conversion must be applied by the application using a color |
| conversion matrix.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The expected extension path if YUV color-space conversion is to be supported |
| is to introduce a YUV image type and provide overloaded versions of the |
| read_image built-in functions.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Getting image information for a YUV image should return the original image |
| size (non quantized size) when all of Y U and V are present in the image. |
| If the planes have been separated then the actual dimensionality of the |
| separated plane should be reported. |
| For example with YUV 4:2:0 (NV12) with a YUV image of 256x256, the Y only |
| image would return 256x256 whereas the UV only image would return 128x128.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>Should an attribute list be used instead?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>RESOLVED: function has been changed to use an attribute list.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>What should happen for EGLImage extensions which introduce formats |
| without a mapping to an OpenCL image channel data type or channel order?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>RESOLVED: This extension does not define those formats. |
| It is expected that as additional EGL extensions are added to create EGL |
| images from other sources, an extension to CL will be introduced where |
| needed to represent those image types.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>What are the guarantees to synchronization behavior provided by the |
| implementation?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>The basic portable form of synchronization is to use a clFinish, as is the |
| case for GL interop. |
| In addition implementations which support the synchronization extensions |
| cl_khr_egl_event and EGL_KHR_cl_event can interoperate more efficiently as |
| described in those extensions.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| </ol> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_image2d_from_buffer"><a class="anchor" href="#cl_khr_image2d_from_buffer"></a>21. Creating a 2D Image From A Buffer</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This section describes the <strong>cl_khr_image2d_from_buffer</strong> extension.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This extension allows a 2D image to be created from an existing OpenCL buffer memory object.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This extension became a core feature in OpenCL 2.0.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_20"><a class="anchor" href="#_general_information_20"></a>21.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_20"><a class="anchor" href="#_version_history_20"></a>21.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_additions_to_chapter_4_of_the_opencl_1_2_specification"><a class="anchor" href="#_additions_to_chapter_4_of_the_opencl_1_2_specification"></a>21.2. Additions to Chapter 4 of the OpenCL 1.2 Specification</h3> |
| <div class="paragraph"> |
| <p>The following table entry describes the additions to <em>table 4.3,</em> which allows applications to query the configuration information using <strong>clGetDeviceInfo</strong> for an OpenCL device that supports creating a 2D image from a buffer.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 40%;"> |
| <col style="width: 20%;"> |
| <col style="width: 40%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_device_info</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Return Type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_DEVICE_IMAGE_<br> |
| PITCH_ALIGNMENT_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">cl_uint</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">The row pitch alignment size in pixels for images created from a buffer. The value returned must be a power of 2.<br> |
| <br> |
| If the device does not support images, this value should be 0.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_DEVICE_IMAGE_BASE_<br> |
| ADDRESS_ALIGNMENT_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">cl_uint</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">This query should be used when an image is created from a buffer which was created using <code>CL_MEM_USE_HOST_PTR</code>. The value returned must be a power of 2.<br> |
| <br> |
| This query specifies the minimum alignment in pixels of the <em>host_ptr</em> specified to <strong>clCreateBuffer</strong>.<br> |
| <br> |
| If the device does not support images, this value should be 0.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect2"> |
| <h3 id="_additions_to_chapter_5_of_the_opencl_1_2_specification"><a class="anchor" href="#_additions_to_chapter_5_of_the_opencl_1_2_specification"></a>21.3. Additions to Chapter 5 of the OpenCL 1.2 Specification</h3> |
| <div class="paragraph"> |
| <p>Add to Section 5.3.1: Creating Image Objects:</p> |
| </div> |
| <div class="paragraph"> |
| <p>A 2D image can be created from a buffer by specifying a <em>buffer</em> object in the <em>image_desc</em> passed to <strong>clCreateImage</strong> for an <em>image_type</em> equal to <code>CL_MEM_OBJECT_IMAGE2D</code>. When the 2D image from buffer is created, the client must specify the width, height and image format (i.e. channel order and channel data type). If these are not specified, <strong>clCreateImage</strong> returns a NULL value with <em>errcode_ret</em> set to <code>CL_INVALID_IMAGE_FORMAT_DESCRIPTOR</code>. The pitch can be optionally specified. If the pitch is not specified, the pitch is computed as width × bytes per pixel based on the image format.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The pitch specified (or computed if pitch specified is 0) must be a multiple of the maximum of the <code>CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR</code> value for all devices in the context associated with the <em>buffer</em> that support images. Otherwise, <strong>clCreateImage</strong> returns a NULL value with <em>errcode_ret</em> set to <code>CL_INVALID_IMAGE_FORMAT_DESCRIPTOR</code>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If the <em>buffer</em> was created with <code>CL_MEM_USE_HOST_PTR</code>, the <em>host_ptr</em> specified to <strong>clCreateBuffer</strong> must be aligned to the maximum of the <code>CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR</code> value for all devices in the context associated with the <em>buffer</em> that support images. Otherwise, <strong>clCreateImage</strong> returns a NULL value with <em>errcode_ret</em> set to <code>CL_INVALID_IMAGE_FORMAT_DESCRIPTOR</code>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The minimum list of supported image formats described in <em>table 5.8</em> of the OpenCL 1.2 specification must be supported for 2D images created from a buffer.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The OpenCL runtime APIs that operate on images (i.e. <strong>clEnqueueReadImage</strong>, <strong>clEnqueueWriteImage</strong>, <strong>clEnqueueFillImage</strong>, <strong>clEnqueueCopyImage</strong>, <strong>clEnqueueCopyImageToBuffer</strong>, <strong>clEnqueueCopyBufferToImage</strong> and <strong>clEnqueueMapImage</strong>) are supported for a 2D image created from a buffer.</p> |
| </div> |
| <div class="paragraph"> |
| <p>When the contents of a buffer object data store are modified, those changes are reflected in the contents of the 2D image object and vice-versa at corresponding synchronization points. The <em>image_height</em> × <em>image_row_pitch</em> specified in <em>image_desc</em> must be less than or equal to the size of the buffer object data store.</p> |
| </div> |
| <div class="admonitionblock note"> |
| <table> |
| <tr> |
| <td class="icon"> |
| <i class="fa icon-note" title="Note"></i> |
| </td> |
| <td class="content"> |
| Concurrent reading from, writing to, and copying between both a buffer object and the 2D image object associated with the buffer object is undefined. Only reading from both a buffer object and 2D image object associated with the buffer object is defined. A 2D image and a 2D image created from a buffer use the same image type in OpenCL C (<code>image2d_t</code>). The image built-ins functions described in <em>section 6.12.14.2</em>, <em>6.12.14.3</em>, <em>6.12.14.4</em> and <em>6.12.14.5</em> for <code>image2d_t</code> behave the same way for a 2D image and a 2D image from a buffer. |
| </td> |
| </tr> |
| </table> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_initialize_memory"><a class="anchor" href="#cl_khr_initialize_memory"></a>22. Local and Private Memory Initialization</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>Memory is allocated in various forms in OpenCL both explicitly (global |
| memory) or implicitly (local, private memory). |
| This allocation so far does not provide a straightforward mechanism to |
| initialize the memory on allocation. |
| In other words what is lacking is the equivalent of calloc for the currently |
| supported malloc like capability. |
| This functionality is useful for a variety of reasons including ease of |
| debugging, application controlled limiting of visibility to previous |
| contents of memory and in some cases, optimization.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This extension adds support for initializing local and private memory before |
| a kernel begins execution. |
| This extension name is <strong>cl_khr_initialize_memory</strong>.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_21"><a class="anchor" href="#_general_information_21"></a>22.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_21"><a class="anchor" href="#_version_history_21"></a>22.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_initialize_memory-additions-to-chapter-4"><a class="anchor" href="#cl_khr_initialize_memory-additions-to-chapter-4"></a>22.2. Additions to Chapter 4 of the OpenCL 2.2 Specification</h3> |
| <div class="paragraph"> |
| <p>Add a new context property to <em>table 4.5</em> in <em>section 4.4</em>.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 33.3333%;"> |
| <col style="width: 22.2222%;"> |
| <col style="width: 44.4445%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_context_properties enum</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Property value</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_CONTEXT_MEMORY_INITIALIZE_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">cl_context_memory_initialize_khr</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Describes which memory types for the context must be initialized. |
| This is a bit-field, where the following values are currently supported:</p> |
| <p class="tableblock"> CL_CONTEXT_MEMORY_INITIALIZE_LOCAL_KHR — Initialize local memory to |
| zeros.</p> |
| <p class="tableblock"> CL_CONTEXT_MEMORY_INITIALIZE_PRIVATE_KHR — Initialize private memory to |
| zeros.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_initialize_memory-additions-to-chapter-6"><a class="anchor" href="#cl_khr_initialize_memory-additions-to-chapter-6"></a>22.3. Additions to Chapter 6 of the OpenCL 2.2 Specification</h3> |
| <div class="paragraph"> |
| <p>Updates to <em>section 6.9</em> — Restrictions</p> |
| </div> |
| <div class="paragraph"> |
| <p>If the context is created with CL CONTEXT MEMORY INITIALIZE KHR, appropriate |
| memory locations as specified by the bit-field is initialized with zeroes, |
| prior to the start of execution of any kernel. |
| The driver chooses when, prior to kernel execution, the initialization of |
| local and/or private memory is performed. |
| The only requirement is there should be no values set from outside the |
| context, which can be read during a kernel execution.</p> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_terminate_context"><a class="anchor" href="#cl_khr_terminate_context"></a>23. Terminating OpenCL contexts</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>Today, OpenCL provides an API to release a context. |
| This operation is done only after all queues, memory object, programs and |
| kernels are released, which in turn might wait for all ongoing operations to |
| complete. |
| However, there are cases in which a fast release is required, or release |
| operation cannot be done, as commands are stuck in mid execution. |
| An example of the first case can be program termination due to exception, or |
| quick shutdown due to low power. |
| Examples of the second case are when a kernel is running too long, or gets |
| stuck, or it may result from user action which makes the results of the |
| computation unnecessary.</p> |
| </div> |
| <div class="paragraph"> |
| <p>In many cases, the driver or the device is capable of speeding up the |
| closure of ongoing operations when the results are no longer required in a |
| much more expedient manner than waiting for all previously enqueued |
| operations to finish.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This extension implements a new query to check whether a device can |
| terminate an OpenCL context and adds an API to terminate a context.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The extension name is <strong>cl_khr_terminate_context</strong>.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_22"><a class="anchor" href="#_general_information_22"></a>23.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_22"><a class="anchor" href="#_version_history_22"></a>23.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_terminate_context-additions-to-chapter-4"><a class="anchor" href="#cl_khr_terminate_context-additions-to-chapter-4"></a>23.2. Additions to Chapter 4 of the OpenCL 2.2 Specification</h3> |
| <div class="paragraph"> |
| <p>Add a new device property to <em>table 4.3</em> in <em>section 4.2</em>.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 33.3333%;"> |
| <col style="width: 22.2222%;"> |
| <col style="width: 44.4445%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_device_info</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Return Type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_DEVICE_TERMINATE_CAPABILITY_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cl_device_terminate_capability_khr</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Describes the termination capability of the OpenCL device. |
| This is a bit-field, where the following values are currently supported:</p> |
| <p class="tableblock"> CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR - Indicates that context |
| termination is supported.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>Add a new context property to <em>table 4.5</em> in <em>section 4.4</em>.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 33.3333%;"> |
| <col style="width: 22.2222%;"> |
| <col style="width: 44.4445%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_context_properties enum</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Property value</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_CONTEXT_TERMINATE_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cl_bool</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Specifies whether the context can be terminated. |
| The default value is CL_FALSE.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>CL_CONTEXT_TERMINATE_KHR can be specified in the context properties only if |
| all devices associated with the context support the ability to support |
| context termination (i.e. CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR is set |
| for CL_DEVICE_TERMINATE_CAPABILITY_KHR). |
| Otherwise, context creation fails with error code of CL_INVALID_PROPERTY.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The new function |
| </p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_int clTerminateContextKHR(cl context context)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>terminates all pending work associated with the context and renders all data |
| owned by the context invalid. |
| It is the responsibility of the application to release all objects |
| associated with the context being terminated.</p> |
| </div> |
| <div class="paragraph"> |
| <p>When a context is terminated:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>The execution status of enqueued commands will be CL_TERMINATED_KHR. |
| Event objects can be queried using <strong>clGetEventInfo</strong>. |
| Event callbacks can be registered and registered event callbacks will be |
| called with <em>event_command_status</em> set to CL_TERMINATED_KHR. |
| <strong>clWaitForEvents</strong> will return as immediately for commands associated |
| with event objects specified in event_list. |
| The status of user events can be set. |
| Event objects can be retained and released. |
| <strong>clGetEventProfilingInfo</strong> returns CL_PROFILING_INFO_NOT_AVAILABLE.</p> |
| </li> |
| <li> |
| <p>The context is considered to be terminated. |
| A callback function registered when the context was created will be |
| called. |
| Only queries, retain and release operations can be performed on the |
| context. |
| All other APIs that use a context as an argument will return |
| CL_CONTEXT_TERMINATED_KHR.</p> |
| </li> |
| <li> |
| <p>The contents of the memory regions of the memory objects is undefined. |
| Queries, registering a destructor callback, retain and release |
| operations can be performed on the memory objects.</p> |
| </li> |
| <li> |
| <p>Once a context has been terminated, all OpenCL API calls that create |
| objects or enqueue commands will return CL_CONTEXT_TERMINATED_KHR. |
| APIs that release OpenCL objects will continue to operate as though |
| <strong>clTerminateContextKHR</strong> was not called.</p> |
| </li> |
| <li> |
| <p>The behavior of callbacks will remain unchanged, and will report |
| appropriate error, if executing after termination of context. |
| This behavior is similar to enqueued commands, after the command queue |
| has become invalid.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clTerminateContextKHR</strong> returns CL_SUCCESS if the function is executed |
| successfully. |
| Otherwise, it returns one of the following errors:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_CONTEXT if <em>context</em> is not a valid OpenCL context.</p> |
| </li> |
| <li> |
| <p>CL_CONTEXT_TERMINATED_KHR if <em>context</em> has already been terminated.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_OPERATION if <em>context</em> was not created with |
| CL_CONTEXT_TERMNATE_KHR set to CL_TRUE.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_RESOURCES if there is a failure to allocate resources required |
| by the OpenCL implementation on the device.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>An implementation that supports this extension must be able to terminate |
| commands currently executing on devices or queued across all command-queues |
| associated with the context that is being terminated. |
| The implementation cannot implement this extension by waiting for currently |
| executing (or queued) commands to finish execution on devices associated |
| with this context (i.e. doing a <strong>clFinish</strong>).</p> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_spir"><a class="anchor" href="#cl_khr_spir"></a>24. Standard Portable Intermediate Representation Binaries</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This extension adds the ability to create an OpenCL program object from a |
| Standard Portable Intermediate Representation (SPIR) instance. |
| A SPIR instance is a vendor-neutral non-source representation for OpenCL C |
| programs.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The extension name is <strong>cl_khr_spir</strong>. |
| This extension has been superseded by the SPIR-V intermediate |
| representation, which is supported by the <strong>cl_khr_il_program</strong> extension, |
| and is a core feature in OpenCL 2.1.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_23"><a class="anchor" href="#_general_information_23"></a>24.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_23"><a class="anchor" href="#_version_history_23"></a>24.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_spir-additions-to-chapter-4"><a class="anchor" href="#cl_khr_spir-additions-to-chapter-4"></a>24.2. Additions to Chapter 4 of the OpenCL 2.2 Specification</h3> |
| <div class="paragraph"> |
| <p><strong>Add a new device property to <em>table 4.3</em> in <em>section 4.2</em>:</strong></p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 28.5714%;"> |
| <col style="width: 14.2857%;"> |
| <col style="width: 57.1429%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_device_info</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Return Type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_DEVICE_SPIR_VERSIONS</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">char[]</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">A space separated list of SPIR versions supported by the device.</p> |
| <p class="tableblock"> For example, returning <code>"1.2"</code> in this query implies that SPIR version 1.2 |
| is supported by the implementation.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_spir-additions-to-chapter-5"><a class="anchor" href="#cl_khr_spir-additions-to-chapter-5"></a>24.3. Additions to Chapter 5 of the OpenCL 2.2 Specification</h3> |
| <div class="paragraph"> |
| <p><strong>Additions to <em>section 5.8.1</em> — Creating Program Objects:</strong></p> |
| </div> |
| <div class="paragraph"> |
| <p>“<strong>clCreateProgramWithBinary</strong> can be used to load a SPIR binary. |
| Once a program object has been created from a SPIR binary, <strong>clBuildProgram</strong> |
| can be called to build a program executable or <strong>clCompileProgram</strong> can be |
| called to compile the SPIR binary.”</p> |
| </div> |
| <div class="paragraph"> |
| <p>Modify the CL_PROGRAM_BINARY_TYPE entry in <em>table 5.14</em> |
| (<strong>clGetProgramBuildInfo</strong>) to add a potential value |
| CL_PROGRAM_BINARY_TYPE_INTERMEDIATE:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 28.5714%;"> |
| <col style="width: 14.2857%;"> |
| <col style="width: 57.1429%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_program_build_info</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Return Type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Info. returned in <em>param_value</em></strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_PROGRAM_BINARY_TYPE</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cl_program_binary_type</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_PROGRAM_BINARY_TYPE_INTERMEDIATE — An intermediate (non-source) |
| representation for the program is loaded as a binary. |
| The program must be further processed with <strong>clCompileProgram</strong> or |
| <strong>clBuildProgram</strong>.</p> |
| <p class="tableblock"> If processed with <strong>clCompileProgram</strong>, the result will be a binary of type |
| CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or CL_PROGRAM_BINARY_TYPE_LIBRARY. |
| If processed with <strong>clBuildProgram</strong>, the result will be a binary of type |
| CL_PROGRAM_BINARY_TYPE_EXECUTABLE.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p><strong>Additions to <em>section 5.8.4</em> — Compiler Options:</strong></p> |
| </div> |
| <div class="paragraph"> |
| <p>“The compile option <strong>-x spir</strong> must be specified to indicate that the binary |
| is in SPIR format, and the compile option <strong>-spir-std</strong> must be used to |
| specify the version of the SPIR specification that describes the format and |
| meaning of the binary. |
| For example, if the binary is as described in SPIR version 1.2, then |
| <strong>-spir-std=1.2</strong> must be specified. |
| Failing to specify these compile options may result in implementation |
| defined behavior.”</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>Additions to <em>section 5.9.3</em> — Kernel Object Queries:</strong></p> |
| </div> |
| <div class="paragraph"> |
| <p>Modify following text in clGetKernelArgInfo from:</p> |
| </div> |
| <div class="paragraph"> |
| <p>“Kernel argument information is only available if the program object |
| associated with <em>kernel</em> is created with <strong>clCreateProgramWithSource</strong> and the |
| program executable is built with the -cl-kernel-arg-info option specified in |
| <em>options</em> argument to <strong>clBuildProgram</strong> or <strong>clCompileProgram</strong>.”</p> |
| </div> |
| <div class="paragraph"> |
| <p>to:</p> |
| </div> |
| <div class="paragraph"> |
| <p>“Kernel argument information is only available if the program object |
| associated with <em>kernel</em> is created with <strong>clCreateProgramWithSource</strong> and the |
| program executable is built with the -cl-kernel-arg-info option specified in |
| <em>options</em> argument to <strong>clBuildProgram</strong> or <strong>clCompileProgram</strong>, or if the |
| program object associated with <em>kernel</em> is created with |
| <strong>clCreateProgramWithBinary</strong> and the program executable is built with the |
| -cl-kernel-arg-info and --x spir options specified in <em>options</em> argument to |
| <strong>clBuildProgram</strong> or <strong>clCompileProgram</strong>.”</p> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_il_program"><a class="anchor" href="#cl_khr_il_program"></a>25. Intermediate Language Programs</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This section describes the <strong>cl_khr_il_program</strong> extension.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This extension adds the ability to create programs with intermediate language (IL), |
| usually SPIR-V. Further information about the format and contents of SPIR-V may be |
| found in the SPIR-V Specification. Information about how SPIR-V modules behave in |
| the OpenCL environment may be found in the OpenCL SPIR-V Environment Specification.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This functionality described by this extension is a core feature in OpenCL 2.1.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_24"><a class="anchor" href="#_general_information_24"></a>25.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_24"><a class="anchor" href="#_version_history_24"></a>25.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_il_program-new-procedures-and-functions"><a class="anchor" href="#cl_khr_il_program-new-procedures-and-functions"></a>25.2. New Procedures and Functions</h3> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_program clCreateProgramWithILKHR(cl_context context, |
| <span class="directive">const</span> <span class="directive">void</span> *il, |
| size_t length, |
| cl_int *errcode_ret);</code></pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_il_program-new-tokens"><a class="anchor" href="#cl_khr_il_program-new-tokens"></a>25.3. New Tokens</h3> |
| <div class="paragraph"> |
| <p>Accepted as a new <em>param_name</em> argument to <strong>clGetDeviceInfo</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_DEVICE_IL_VERSION_KHR</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Accepted as a new <em>param_name</em> argument to <strong>clGetProgramInfo</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>CL_PROGRAM_IL_KHR</pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_il_program-additions-to-chapter-3"><a class="anchor" href="#cl_khr_il_program-additions-to-chapter-3"></a>25.4. Additions to Chapter 3 of the OpenCL 2.0 Specification</h3> |
| <div class="paragraph"> |
| <p>In section 3.1, replace the fourth paragraph with:</p> |
| </div> |
| <div class="paragraph"> |
| <p>"Programmers provide programs in the form of intermediate language binaries (usually SPIR-V), OpenCL C source strings, or implementation-defined binary objects. The OpenCL platform provides a compiler to translate programs represented as intermediate language binaries or OpenCL C source strings into device program executables. The compiler may be <em>online</em> or <em>offline</em>. An <em>online compiler</em> is available during host program execution using standard APIs. An <em>offline compiler</em> is invoked outside of host program control, using platform-specific methods. The OpenCL runtime allows developers to get a previously compiled device program executable and to load and execute a previously compiled device program executable."</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_il_program-additions-to-chapter-4"><a class="anchor" href="#cl_khr_il_program-additions-to-chapter-4"></a>25.5. Additions to Chapter 4 of the OpenCL 2.0 Specification</h3> |
| <div class="paragraph"> |
| <p>Add a new device property to <strong>Table 4.3</strong> <em>OpenCL Device Queries</em>:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 33.3333%;"> |
| <col style="width: 16.6666%;"> |
| <col style="width: 50.0001%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_device_info</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Return Type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_DEVICE_IL_VERSION_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">char[]</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">The intermediate languages that are be supported by <strong>clCreateProgramWithILKHR</strong> for this device.<br> |
| <br> |
| Returns a space separated list of IL version strings of the form:<br> |
| <br> |
| <IL_Prefix>_<Major_version>.<Minor_version><br> |
| <br> |
| A device that supports the <strong>cl_khr_il_program</strong> extension must support the “SPIR-V” IL prefix.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_il_program-additions-to-chapter-5"><a class="anchor" href="#cl_khr_il_program-additions-to-chapter-5"></a>25.6. Additions to Chapter 5 of the OpenCL 2.0 Specification</h3> |
| <div class="paragraph"> |
| <p>Add to Section 5.8.1: Creating Program Objects:</p> |
| </div> |
| <div class="paragraph"> |
| <p>"The function</p> |
| </div> |
| <div class="paragraph"> |
| <p></p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_program clCreateProgramWithILKHR(cl_context context, |
| <span class="directive">const</span> <span class="directive">void</span> *il, |
| size_t length, |
| cl_int *errcode_ret);</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>creates a new program object for <em>context</em> using the <em>length</em> bytes of intermediate language pointed to by <em>il</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>context</em> must be a valid OpenCL context.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>il</em> is a pointer to a <em>length</em>-byte block of memory containing intermediate langage.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>length</em> is the length of the block of memory pointed to by <em>il</em>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>errcode_ret</em> will return an appropriate error code. If <em>errcode_ret</em> is NULL, no error code is returned.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clCreateProgramWithILKHR</strong> returns a valid non-zero program object and <em>errcode_ret</em> is set to CL_SUCCESS if the program object is created successfully. Otherwise, it returns a NULL value with one of the following error values returned in <em>errcode_ret</em>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_CONTEXT if <em>context</em> is not a valid context</p> |
| </li> |
| <li> |
| <p>CL_INVALID_VALUE if <em>il</em> is NULL or if <em>length</em> is zero.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_VALUE if the <em>length</em>-byte block of memory pointed to by <em>il</em> does not contain well-formed intermediate language.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL implementation on the device.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL implementation on the host."</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>Add to Section 5.8.2: Building Program Executables:</p> |
| </div> |
| <div class="paragraph"> |
| <p>Add the following to the description of the <em>options</em> parameter to <strong>clBuildProgram</strong>:</p> |
| </div> |
| <div class="paragraph"> |
| <p>"Certain options are ignored when <em>program</em> is created with IL."</p> |
| </div> |
| <div class="paragraph"> |
| <p>Additionally, replace the error:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_OPERATION if <em>program</em> was not created with <strong>clCreateProgramWithSource</strong> or <strong>clCreateProgramWithBinary</strong>.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>with:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_OPERATION if <em>program</em> was not created with <strong>clCreateProgramWithSource</strong>, <strong>clCreateProgramWithILKHR</strong> or <strong>clCreateProgramWithBinary.</strong></p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>Add to Section 5.8.3: Separate Compilation and Linking of Programs:</p> |
| </div> |
| <div class="paragraph"> |
| <p>Add the following to the description of the <em>options</em> parameter to <strong>clCompileProgram</strong>:</p> |
| </div> |
| <div class="paragraph"> |
| <p>"Certain options are ignored when <em>program</em> is created with IL."</p> |
| </div> |
| <div class="paragraph"> |
| <p>Additionally, replace the error:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_OPERATION if <em>program</em> has no source i.e. it has not been created with <strong>clCreateProgramWithSource</strong>.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>with:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_OPERATION if <em>program</em> was not created with <strong>clCreateProgramWithSource</strong> or <strong>clCreateProgramWithILKHR</strong>.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>Add to Section 5.8.4.1: Preprocessor Options,<br> |
| Add to Section 5.8.4.2: Math Intrinsic Options (for -cl-single-precision-constant-only),<br> |
| Add to Section 5.8.4.3: Optimization Options,<br> |
| Add to Section 5.8.4.4: Options to Request or Suppress Warnings, and<br> |
| Add to Section 5.8.4.5: Options Controlling the OpenCL C Version:</p> |
| </div> |
| <div class="paragraph"> |
| <p>"These options are ignored for programs created with IL."</p> |
| </div> |
| <div class="paragraph"> |
| <p>Change one entry and add one new entry to <strong>Table 5.17</strong> <em>clGetProgramInfo parameter queries</em>:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 33.3333%;"> |
| <col style="width: 33.3333%;"> |
| <col style="width: 33.3334%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_program_info</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Return Type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Info returned in <em>param_value</em></strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_PROGRAM_SOURCE</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">char[]</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Return the program source code specified by <strong>clCreateProgramWithSource</strong>. The source string returned is a concatenation of all source strings |
| specified to <strong>clCreateProgramWithSource</strong> with a null terminator. The concatenation strips any nulls in the original source strings.<br> |
| <br> |
| If program is created using <strong>clCreateProgramWithBinary,</strong> <strong>clCreateProgramWithBuiltInKernels,</strong>, or <strong>clCreateProgramWithILKHR</strong> a null string or the appropriate program source code is returned depending on whether or not the program source code is stored in the binary.<br> |
| <br> |
| The actual number of characters that represents the program source code including the null terminator is returned in <em>param_value_size_ret</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_PROGRAM_IL_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">unsigned char[]</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the program IL for programs created with <strong>clCreateProgramWithILKHR</strong>.<br> |
| <br> |
| If program is created with <strong>clCreateProgramWithSource</strong>, <strong>clCreateProgramWithBinary</strong>, or <strong>clCreateProgramWithBuiltInKernels</strong>, the memory pointed to by <em>param_value</em> will be unchanged and <em>param_value_size_ret</em> will be set to zero.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_create_command_queue"><a class="anchor" href="#cl_khr_create_command_queue"></a>26. Creating Command Queues with Properties</h2> |
| <div class="sectionbody"> |
| <div class="sect2"> |
| <h3 id="_overview"><a class="anchor" href="#_overview"></a>26.1. Overview</h3> |
| <div class="paragraph"> |
| <p>The section describes the <strong>cl_khr_create_command_queue</strong> extension.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This extension allows OpenCL 1.x devices to support an equivalent of the |
| <strong>clCreateCommandQueueWithProperties</strong> API that was added in OpenCL 2.0. |
| This allows OpenCL 1.x devices to support other optional extensions or |
| features that use the <strong>clCreateCommandQueueWithProperties</strong> API to specify |
| additional command queue properties that cannot be specified using the |
| OpenCL 1.x <strong>clCreateCommandQueue</strong> API.</p> |
| </div> |
| <div class="paragraph"> |
| <p>No new command queue properties are required by this extension. |
| Applications may use the existing CL_DEVICE_QUEUE_PROPERTIES query to |
| determine command queue properties that are supported by the device.</p> |
| </div> |
| <div class="paragraph"> |
| <p>OpenCL 2.x devices may support this extension for compatibility. In |
| this scenario, the function added by this extension will have the same |
| capabilities as the core <strong>clCreateCommandQueueWithProperties</strong> API. |
| Applications that only target OpenCL 2.x devices should use the core |
| OpenCL 2.x <strong>clCreateCommandQueueWithProperties</strong> API instead of this |
| extension API.</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_25"><a class="anchor" href="#_general_information_25"></a>26.2. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_25"><a class="anchor" href="#_version_history_25"></a>26.2.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_new_api_functions"><a class="anchor" href="#_new_api_functions"></a>26.3. New API Functions</h3> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_command_queue clCreateCommandQueueWithPropertiesKHR( |
| cl_context context, |
| cl_device_id device, |
| <span class="directive">const</span> cl_queue_properties_khr *properties, |
| cl_int *errcode_ret)</code></pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_new_api_types"><a class="anchor" href="#_new_api_types"></a>26.4. New API Types</h3> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="keyword">typedef</span> cl_properties cl_queue_properties_khr;</code></pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_modifications_to_the_opencl_1_2_specification"><a class="anchor" href="#_modifications_to_the_opencl_1_2_specification"></a>26.5. Modifications to the OpenCL 1.2 Specification</h3> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">(Add to Table 5.2 for CL_QUEUE_PROPERTIES in Section 5.1) </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p><strong>Table 5.2</strong> <em>List of supported param_names by clGetCommandQueueInfo</em></p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 33.3333%;"> |
| <col style="width: 16.6666%;"> |
| <col style="width: 50.0001%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_command_queue_info</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Return Type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Information returned in param_value</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_QUEUE_PROPERTIES</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">cl_command_queue_properties</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the currently specified properties for the command-queue. |
| These properties are specified by the <em>properties</em> argument in |
| <strong>clCreateCommandQueue</strong>, or by the CL_QUEUE_PROPERTIES property value in |
| <strong>clCreateCommandQueueWithPropertiesKHR</strong>.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| </dd> |
| <dt class="hdlist1">(Add a new Section 5.1.1, <strong>Creating Command Queues With Properties</strong>) </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>The function</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_command_queue clCreateCommandQueueWithPropertiesKHR( |
| cl_context context, |
| cl_device_id device, |
| <span class="directive">const</span> cl_queue_properties_khr *properties, |
| cl_int *errcode_ret)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>allows creation of a command-queue from an array of properties |
| for the specified device.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>context</em> must be a valid OpenCL context.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>device</em> must be a device or sub-device associated with <em>context</em>. It |
| can either be in the list of devices and sub-devices specified when |
| <em>context</em> is created using <strong>clCreateContext</strong> or |
| be a root device with the same device type as specified when <em>context</em> |
| is created using <strong>clCreateContextFromType</strong>.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>properties</em> specifies a list of properties for the command-queue and |
| their corresponding values. Each property name is immediately followed |
| by the corresponding desired value. The list is terminated with 0. The |
| list of supported properties is described in the table below. If a |
| supported property and its value is not specified in <em>properties</em>, its |
| default value will be used. <em>properties</em> can be NULL in which case the |
| default values for supported command-queue properties will be used.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>Table X.Y</strong> <em>List of supported cl_queue_properties_khr values and description</em></p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 33.3333%;"> |
| <col style="width: 16.6666%;"> |
| <col style="width: 50.0001%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Queue Properties</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Property Value</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_QUEUE_PROPERTIES</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">cl_bitfield</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">This is a bitfield and can be set to a combination of the following |
| values:<br> |
| <br> |
| CL_QUEUE_OUT_OF_ORDER_ EXEC_MODE_ENABLE - Determines whether the |
| commands queued in the command-queue are executed in-order or out-of-order. If |
| set, the commands in the command-queue are executed out-of-order. Otherwise, |
| commands are executed in-order.<br> |
| <br> |
| CL_QUEUE_PROFILING_ENABLE - Enable or disable profiling of commands in |
| the command-queue. If set, the profiling of commands is enabled. Otherwise, |
| profiling of commands is disabled.<br> |
| <br> |
| If CL_QUEUE_PROPERTIES is not specified an in-order command queue that |
| does not support profiling of commands is created for the specified device.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p><em>errcode_ret</em> will return an appropriate error code. If <em>errcode_ret</em> |
| is NULL, no error code is returned.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clCreateCommandQueueWithPropertiesKHR</strong> returns a valid non-zero |
| command-queue and <em>errcode_ret</em> is set to CL_SUCCESS if the |
| command-queue is created successfully. Otherwise, it returns a NULL |
| value with one of the following error values returned in <em>errcode_ret</em>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_INVALID_CONTEXT if <em>context</em> is not a valid context.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_DEVICE if <em>device</em> is not a valid device or is not associated |
| with <em>context</em>.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_VALUE if values specified in <em>properties</em> are not valid.</p> |
| </li> |
| <li> |
| <p>CL_INVALID_QUEUE_PROPERTIES if values specified in <em>properties</em> are |
| valid but are not supported by the device.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_RESOURCES if there is a failure to allocate resources required |
| by the OpenCL implementation on the device.</p> |
| </li> |
| <li> |
| <p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required |
| by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| </div> |
| </dd> |
| </dl> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_device_enqueue_local_arg_types"><a class="anchor" href="#cl_khr_device_enqueue_local_arg_types"></a>27. Device Enqueue Local Argument Types</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This extension allows arguments to blocks that are passed to the <strong>enqueue_kernel</strong> built-in |
| function to be pointers to any type (built-in or user-defined) in local memory, instead of |
| requiring arguments to blocks to be pointers to void in local memory.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The name of this extension is <strong>cl_khr_device_enqueue_local_arg_types</strong>.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_26"><a class="anchor" href="#_general_information_26"></a>27.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_26"><a class="anchor" href="#_version_history_26"></a>27.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_device_enqueue_local_arg_types-additions-to-chapter-6-of-the-opencl-2.0-specification"><a class="anchor" href="#cl_khr_device_enqueue_local_arg_types-additions-to-chapter-6-of-the-opencl-2.0-specification"></a>27.2. Additions to Chapter 6 of the OpenCL 2.0 C Specification</h3> |
| <div class="paragraph"> |
| <p>Modify the second paragraph of Section 6.13.17: Enqueuing Kernels:</p> |
| </div> |
| <div class="paragraph"> |
| <p>"The following table describes the list of built-in functions that can be used to enqueue a |
| kernel. We use the generic type name gentype to indicate the built-in OpenCL C scalar or |
| vector integer or floating-point data types, or any user defined type built from these scalar and |
| vector data types, which can be used as the type of the pointee of the arguments of the kernel |
| enqueue functions listed in table 6.31."</p> |
| </div> |
| <div class="paragraph"> |
| <p>Then, replace all occurrences of local void * in table 6.31 with local gentype *. For example:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="predefined-type">int</span> enqueue_kernel(queue_t queue, |
| kernel_enqueue_flags_t flags, |
| <span class="directive">const</span> ndrange_t ndrange, |
| <span class="directive">void</span> (^block)(local gentype *, ...), |
| uint size0, ... )</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Additionally, replace all occurrences of local void* in table 6.33 with local gentype *. For example:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">uint get_kernel_work_group_size( |
| <span class="directive">void</span> (^block)(local gentype *, ...))</code></pre> |
| </div> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_subgroups"><a class="anchor" href="#cl_khr_subgroups"></a>28. Subgroups</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This section describes the <strong>cl_khr_subgroups</strong> extension.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This extension adds support for implementation-controlled groups of work items, known as subgroups. |
| Subgroups behave similarly to work groups and have their own sets of built-ins and synchronization primitives. |
| Subgroups within a work group are independent, may make forward progress with respect to each other, and may map to optimized hardware structures where that makes sense.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Subgroups were promoted to a core feature in OpenCL 2.1, however note that:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>The subgroup OpenCL C built-in functions described by this extension must still be accessed as an OpenCL C extension in OpenCL 2.1.</p> |
| </li> |
| <li> |
| <p>Subgroup independent forward progress is an optional device property in OpenCL 2.1, see <code>CL_DEVICE_<wbr>SUB_<wbr>GROUP_<wbr>INDEPENDENT_<wbr>FORWARD_<wbr>PROGRESS</code>.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_27"><a class="anchor" href="#_general_information_27"></a>28.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_27"><a class="anchor" href="#_version_history_27"></a>28.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_subgroups-additions-to-chapter-3-of-the-opencl-2.0-specification"><a class="anchor" href="#cl_khr_subgroups-additions-to-chapter-3-of-the-opencl-2.0-specification"></a>28.2. Additions to Chapter 3 of the OpenCL 2.0 Specification</h3> |
| |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_subgroups-additions-to-section-3.2-execution-model"><a class="anchor" href="#cl_khr_subgroups-additions-to-section-3.2-execution-model"></a>28.3. Additions to section 3.2 — Execution Model</h3> |
| <div class="paragraph"> |
| <p>Within a work-group work-items may be divided into sub-groups. |
| The mapping of work-items to sub-groups is implementation-defined and may be queried at runtime. |
| While sub-groups may be used in multi-dimensional work-groups, each subgroup is 1-dimensional and any given work-item may query which sub-group it is a member of.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Work items are mapped into subgroups through a combination of compile-time decisions and the parameters of the dispatch. |
| The mapping to subgroups is invariant for the duration of a kernel’s execution, across dispatches of a given kernel with the same launch parameters, and from one work-group to another within the dispatch (excluding the trailing edge work-groups in the presence of non-uniform work-group sizes). |
| In addition, all sub-groups within a work-group will be the same size, apart from the sub-group with the maximum index which may be smaller if the size of the work-group is not evenly divisible by the size of the sub-group.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Sub-groups execute concurrently within a given work-group and make independent forward progress with respect to each other even in the absence of work-group barrier operations. |
| Subgroups are able to internally synchronize using barrier operations without synchronizing with each other.</p> |
| </div> |
| <div class="paragraph"> |
| <p>In the degenerate case, with the extension enabled, a single sub-group must be supported for each work-group. |
| In this situation all sub-group scope functions alias their work-group level equivalents.</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_subgroups-additions-to-chapter-5-of-the-opencl-2.0-specification"><a class="anchor" href="#cl_khr_subgroups-additions-to-chapter-5-of-the-opencl-2.0-specification"></a>28.4. Additions to Chapter 5 of the OpenCL 2.0 Specification</h3> |
| <div class="paragraph"> |
| <p>The function</p> |
| </div> |
| <div class="paragraph"> |
| <p></p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">cl_int clGetKernelSubGroupInfoKHR(cl_kernel kernel, |
| cl_device_id device, |
| cl_kernel_sub_group_info param_name, |
| size_t input_value_size, |
| <span class="directive">const</span> <span class="directive">void</span> *input_value, |
| size_t param_value_size, |
| <span class="directive">void</span> *param_value, |
| size_t *param_value_size_ret)</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>returns information about the kernel object.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>kernel</em> specifies the kernel object being queried.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>device</em> identifies a specific device in the list of devices associated with |
| <em>kernel</em>. |
| The list of devices is the list of devices in the OpenCL context that is |
| associated with <em>kernel</em>. |
| If the list of devices associated with <em>kernel</em> is a single device, <em>device</em> |
| can be a <code>NULL</code> value.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>param_name</em> specifies the information to query. |
| The list of supported <em>param_name</em> types and the information returned in |
| <em>param_value</em> by <strong>clGetKernelSubGroupInfoKHR</strong> is described in the |
| <a href="#cl_khr_subgroups-kernel-subgroup-info-table">Kernel Object Subgroup Queries</a> table.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>input_value_size</em> is used to specify the size in bytes of memory pointed to |
| by <em>input_value</em>. |
| This size must be == size of input type as described in the table below.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>input_value</em> is a pointer to memory where the appropriate parameterization |
| of the query is passed from. |
| If <em>input_value</em> is <code>NULL</code>, it is ignored.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>param_value</em> is a pointer to memory where the appropriate result being |
| queried is returned. |
| If <em>param_value</em> is <code>NULL</code>, it is ignored.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>param_value_size</em> is used to specify the size in bytes of memory pointed to |
| by <em>param_value</em>. |
| This size must be ≥ size of return type as described in the |
| <a href="#kernel-subgroup-info-table">Kernel Object Subgroup Queries</a> table.</p> |
| </div> |
| <div class="paragraph"> |
| <p><em>param_value_size_ret</em> returns the actual size in bytes of data being |
| queried by <em>param_name</em>. |
| If <em>param_value_size_ret</em> is <code>NULL</code>, it is ignored.</p> |
| </div> |
| <table id="cl_khr_subgroups-kernel-subgroup-info-table" class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 46. <strong>clGetKernelSubGroupInfoKHR</strong> parameter queries</caption> |
| <colgroup> |
| <col style="width: 25%;"> |
| <col style="width: 25%;"> |
| <col style="width: 25%;"> |
| <col style="width: 25%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_kernel_sub_group_info</strong></th> |
| <th class="tableblock halign-left valign-top">Input Type</th> |
| <th class="tableblock halign-left valign-top">Return Type</th> |
| <th class="tableblock halign-left valign-top">Info. returned in <em>param_value</em></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_KERNEL_<wbr>MAX_<wbr>SUB_<wbr>GROUP_<wbr>SIZE_<wbr>FOR_<wbr>NDRANGE_<wbr>KHR</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">size_t *</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">size_t</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the maximum sub-group size for this kernel. |
| All sub-groups must be the same size, while the last subgroup in |
| any work-group (i.e. the subgroup with the maximum index) could |
| be the same or smaller size.</p> |
| <p class="tableblock"> The <em>input_value</em> must be an array of size_t values |
| corresponding to the local work size parameter of the intended |
| dispatch. |
| The number of dimensions in the ND-range will be inferred from |
| the value specified for <em>input_value_size</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_KERNEL_<wbr>SUB_<wbr>GROUP_<wbr>COUNT_<wbr>FOR_<wbr>NDRANGE_<wbr>KHR</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">size_t *</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">size_t</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the number of sub-groups that will be present in each |
| work-group for a given local work size. |
| All workgroups, apart from the last work-group in each dimension |
| in the presence of non-uniform work-group sizes, will have the |
| same number of sub-groups.</p> |
| <p class="tableblock"> The <em>input_value</em> must be an array of size_t values |
| corresponding to the local work size parameter of the intended |
| dispatch. |
| The number of dimensions in the ND-range will be inferred from |
| the value specified for <em>input_value_size</em>.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p><strong>clGetKernelSubGroupInfoKHR</strong> returns CL_SUCCESS if the function is executed |
| successfully. |
| Otherwise, it returns one of the following errors:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p><code>CL_INVALID_<wbr>DEVICE</code> if <em>device</em> is not in the list of devices associated |
| with <em>kernel</em> or if <em>device</em> is <code>NULL</code> but there is more than one device |
| associated with <em>kernel</em>.</p> |
| </li> |
| <li> |
| <p><code>CL_INVALID_<wbr>VALUE</code> if <em>param_name</em> is not valid, or if size in bytes |
| specified by <em>param_value_size</em> is < size of return type as described in |
| the <a href="#kernel-subgroup-info-table">Kernel Object Subgroup Queries</a> table |
| and <em>param_value</em> is not <code>NULL</code>.</p> |
| </li> |
| <li> |
| <p><code>CL_INVALID_<wbr>VALUE</code> if <em>param_name</em> is |
| <code>CL_KERNEL_<wbr>MAX_<wbr>SUB_<wbr>GROUP_<wbr>SIZE_<wbr>FOR_<wbr>NDRANGE_<wbr>KHR</code> and the size in bytes specified by |
| <em>input_value_size</em> is not valid or if <em>input_value</em> is <code>NULL</code>.</p> |
| </li> |
| <li> |
| <p><code>CL_INVALID_<wbr>KERNEL</code> if <em>kernel</em> is a not a valid kernel object.</p> |
| </li> |
| <li> |
| <p><code>CL_OUT_<wbr>OF_<wbr>RESOURCES</code> if there is a failure to allocate resources required |
| by the OpenCL implementation on the device.</p> |
| </li> |
| <li> |
| <p><code>CL_OUT_<wbr>OF_<wbr>HOST_<wbr>MEMORY</code> if there is a failure to allocate resources |
| required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_subgroups-additions-to-chapter-6-of-the-opencl-2.0-specification"><a class="anchor" href="#cl_khr_subgroups-additions-to-chapter-6-of-the-opencl-2.0-specification"></a>28.5. Additions to Chapter 6 of the OpenCL 2.0 C Specification</h3> |
| <div class="sect3"> |
| <h4 id="cl_khr_subgroups-additions-to-section-6.13.1-work-item-functions"><a class="anchor" href="#cl_khr_subgroups-additions-to-section-6.13.1-work-item-functions"></a>28.5.1. Additions to section 6.13.1 — Work Item Functions</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="paragraph"> |
| <p>uint <strong>get_sub_group_size</strong> ()</p> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the number of work items in the subgroup. |
| This value is no more than the maximum subgroup size and is |
| implementation-defined based on a combination of the compiled kernel and |
| the dispatch dimensions. |
| This will be a constant value for the lifetime of the subgroup.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="paragraph"> |
| <p>uint <strong>get_max_sub_group_size</strong> ()</p> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the maximum size of a subgroup within the dispatch. |
| This value will be invariant for a given set of dispatch dimensions and a |
| kernel object compiled for a given device.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="paragraph"> |
| <p>uint <strong>get_num_sub_groups</strong> ()</p> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the number of subgroups that the current work group is divided |
| into.</p> |
| <p class="tableblock"> This number will be constant for the duration of a work group’s execution. |
| If the kernel is executed with a non-uniform work group size |
| (i.e. the global_work_size values specified to <strong>clEnqueueNDRangeKernel</strong> |
| are not evenly divisible by the local_work_size values for any dimension, |
| calls to this built-in from some work groups may return different values |
| than calls to this built-in from other work groups.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="paragraph"> |
| <p>uint <strong>get_enqueued_num_sub_groups</strong> ()</p> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the same value as that returned by <strong>get_num_sub_groups</strong> if the |
| kernel is executed with a uniform work group size.</p> |
| <p class="tableblock"> If the kernel is executed with a non-uniform work group size, returns the |
| number of subgroups in each of the work groups that make up the uniform |
| region of the global range.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="paragraph"> |
| <p>uint <strong>get_sub_group_id</strong> ()</p> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>get_sub_group_id</strong> returns the subgroup ID which is a number from 0 .. |
| <strong>get_num_sub_groups</strong>() - 1.</p> |
| <p class="tableblock"> For <strong>clEnqueueTask</strong>, this returns 0.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="paragraph"> |
| <p>uint <strong>get_sub_group_local_id</strong> ()</p> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the unique work item ID within the current subgroup. |
| The mapping from <strong>get_local_id</strong>(<em>dimindx</em>) to <strong>get_sub_group_local_id</strong> |
| will be invariant for the lifetime of the work group.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_subgroups-additions-to-section-6.13.8-synchronization-functions"><a class="anchor" href="#cl_khr_subgroups-additions-to-section-6.13.8-synchronization-functions"></a>28.5.2. Additions to section 6.13.8 — Synchronization Functions</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 30%;"> |
| <col style="width: 70%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">void <strong>sub_group_barrier</strong> (<br> |
| cl_mem_fence_flags <em>flags</em>)</p> |
| <p class="tableblock"> void <strong>sub_group_barrier</strong> (<br> |
| cl_mem_fence_flags <em>flags</em>, memory_scope <em>scope</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">All work items in a subgroup executing the kernel on a processor must |
| execute this function before any are allowed to continue execution beyond |
| the subgroup barrier. |
| This function must be encountered by all work items in a subgroup |
| executing the kernel. |
| These rules apply to ND-ranges implemented with uniform and non-uniform |
| work groups.</p> |
| <p class="tableblock"> If <strong>sub_group_barrier</strong> is inside a conditional statement, then all work |
| items within the subgroup must enter the conditional if any work item in |
| the subgroup enters the conditional statement and executes the |
| sub_group_barrier.</p> |
| <p class="tableblock"> If <strong>sub_group_barrier</strong> is inside a loop, all work items within the subgroup |
| must execute the sub_group_barrier for each iteration of the loop before |
| any are allowed to continue execution beyond the sub_group_barrier.</p> |
| <p class="tableblock"> The <strong>sub_group_barrier</strong> function also queues a memory fence (reads and |
| writes) to ensure correct ordering of memory operations to local or global |
| memory.</p> |
| <p class="tableblock"> The flags argument specifies the memory address space and can be set to a |
| combination of the following values:</p> |
| <p class="tableblock"> CLK_LOCAL_MEM_FENCE - The <strong>sub_group_barrier</strong> function will either flush |
| any variables stored in local memory or queue a memory fence to ensure |
| correct ordering of memory operations to local memory.</p> |
| <p class="tableblock"> CLK_GLOBAL_MEM_FENCE — The <strong>sub_group_barrier</strong> function will queue a |
| memory fence to ensure correct ordering of memory operations to global |
| memory. |
| This can be useful when work items, for example, write to buffer objects |
| and then want to read the updated data from these buffer objects.</p> |
| <p class="tableblock"> CLK_IMAGE_MEM_FENCE — The <strong>sub_group_barrier</strong> function will queue a memory |
| fence to ensure correct ordering of memory operations to image objects. |
| This can be useful when work items, for example, write to image objects |
| and then want to read the updated data from these image objects.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_subgroups-additions-to-section-6.13.11-atomic-functions"><a class="anchor" href="#cl_khr_subgroups-additions-to-section-6.13.11-atomic-functions"></a>28.5.3. Additions to section 6.13.11 — Atomic Functions</h4> |
| <div class="paragraph"> |
| <p>Add the following new value to the enumerated type <code>memory_scope</code> defined in |
| <em>section 6.13.11.4</em>.</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre>memory_scope_sub_group</pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>The <code>memory_scope_sub_group</code> specifies that the memory ordering constraints |
| given by <code>memory_order</code> apply to work items in a subgroup. |
| This memory scope can be used when performing atomic operations to global or |
| local memory.</p> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_subgroups-add-a-new-section-6.13.X-sub-group-functions"><a class="anchor" href="#cl_khr_subgroups-add-a-new-section-6.13.X-sub-group-functions"></a>28.5.4. Add a new section 6.13.X — Sub-Group Functions</h4> |
| <div class="paragraph"> |
| <p>The table below describes OpenCL C programming language built-in functions that operate on a subgroup level. |
| These built-in functions must be encountered by all work items in the subgroup executing the kernel. |
| For the functions below, the generic type name <code>gentype</code> may be the one of the supported built-in scalar data types <code>int</code>, <code>uint</code>, <code>long</code>, <code>ulong</code>, <code>float</code>, <code>double</code> (if double precision is supported), or <code>half</code> (if half precision is supported).</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>sub_group_all</strong> (int <em>predicate</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Evaluates <em>predicate</em> for all work items in the subgroup and returns a |
| non-zero value if <em>predicate</em> evaluates to non-zero for all work items in |
| the subgroup.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">int <strong>sub_group_any</strong> (int <em>predicate</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Evaluates <em>predicate</em> for all work items in the subgroup and returns a |
| non-zero value if <em>predicate</em> evaluates to non-zero for any work items in |
| the subgroup.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>sub_group_broadcast</strong> (<br> |
| gentype <em>x</em>, uint <em>sub_group_local_id</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Broadcast the value of <em>x</em> for work item identified by |
| <em>sub_group_local_id</em> (value returned by <strong>get_sub_group_local_id</strong>) to all |
| work items in the subgroup.</p> |
| <p class="tableblock"> <em>sub_group_local_id</em> must be the same value for all work items in the |
| subgroup.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>sub_group_reduce_<op></strong> (<br> |
| gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Return result of reduction operation specified by <strong><op></strong> for all values of |
| <em>x</em> specified by work items in a subgroup.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>sub_group_scan_exclusive_<op></strong> (<br> |
| gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Do an exclusive scan operation specified by <strong><op></strong> of all values specified |
| by work items in a subgroup. |
| The scan results are returned for each work item.</p> |
| <p class="tableblock"> The scan order is defined by increasing subgroup local ID within the |
| subgroup.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">gentype <strong>sub_group_scan_inclusive_<op></strong> (<br> |
| gentype <em>x</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Do an inclusive scan operation specified by <strong><op></strong> of all values specified |
| by work items in a subgroup. |
| The scan results are returned for each work item.</p> |
| <p class="tableblock"> The scan order is defined by increasing subgroup local ID within the |
| subgroup.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>The <strong><op></strong> in <strong>sub_group_reduce_<op></strong>, <strong>sub_group_scan_inclusive_<op></strong> and <strong>sub_group_scan_exclusive_<op></strong> defines the operator and can be <strong>add</strong>, <strong>min</strong> or <strong>max</strong>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The exclusive scan operation takes a binary operator <strong>op</strong> with an identity I and <em>n</em> (where <em>n</em> is the size of the sub-group) elements [a<sub>0</sub>, a<sub>1</sub>, …​ a<sub>n-1</sub>] and returns [I, a<sub>0</sub>, (a<sub>0</sub> <strong>op</strong> a<sub>1</sub>), …​ (a<sub>0</sub> <strong>op</strong> a<sub>1</sub> <strong>op</strong> …​ <strong>op</strong> a<sub>n-2</sub>)].</p> |
| </div> |
| <div class="paragraph"> |
| <p>The inclusive scan operation takes a binary operator <strong>op</strong> with <em>n</em> (where <em>n</em> is the size of the sub-group) elements [a<sub>0</sub>, a<sub>1</sub>, …​ a<sub>n-1</sub>] and returns [a<sub>0</sub>, (a<sub>0</sub> <strong>op</strong> a<sub>1</sub>), …​ (a<sub>0</sub> <strong>op</strong> a<sub>1</sub> <strong>op</strong> …​ <strong>op</strong> a<sub>n-1</sub>)].</p> |
| </div> |
| <div class="paragraph"> |
| <p>If <strong>op</strong> = <strong>add</strong>, the identity I is 0. |
| If <strong>op</strong> = <strong>min</strong>, the identity I is <code>INT_MAX</code>, <code>UINT_MAX</code>, <code>LONG_MAX</code>, <code>ULONG_MAX</code>, for <code>int</code>, <code>uint</code>, <code>long</code>, <code>ulong</code> types and is <code>+INF</code> for |
| floating-point types. |
| Similarly if <strong>op</strong> = max, the identity I is <code>INT_MIN</code>, 0, <code>LONG_MIN</code>, 0 and <code>-INF</code>.</p> |
| </div> |
| <div class="admonitionblock note"> |
| <table> |
| <tr> |
| <td class="icon"> |
| <i class="fa icon-note" title="Note"></i> |
| </td> |
| <td class="content"> |
| <div class="paragraph"> |
| <p>The order of floating-point operations is not guaranteed for the <strong>sub_group_reduce_<op></strong>, <strong>sub_group_scan_inclusive_<op></strong> and <strong>sub_group_scan_exclusive_<op></strong> built-in functions that operate on <code>half</code>, <code>float</code> and <code>double</code> data types. |
| The order of these floating-point operations is also non-deterministic for a given sub-group.</p> |
| </div> |
| </td> |
| </tr> |
| </table> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_subgroups-additions-to-section-6.13.16-pipe-functions"><a class="anchor" href="#cl_khr_subgroups-additions-to-section-6.13.16-pipe-functions"></a>28.5.5. Additions to section 6.13.16 — Pipe Functions</h4> |
| <div class="paragraph"> |
| <p>The OpenCL C programming language implements the following built-in pipe |
| functions that operate at a subgroup level. |
| These built-in functions must be encountered by all work items in a subgroup |
| executing the kernel with the same argument values; otherwise the behavior |
| is undefined. |
| We use the generic type name <code>gentype</code> to indicate the built-in OpenCL C |
| scalar or vector integer or floating-point data types or any user defined |
| type built from these scalar and vector data types can be used as the type |
| for the arguments to the pipe functions listed in <em>table 6.29</em>.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">reserve_id_t <strong>sub_group_reserve_read_pipe</strong> (<br> |
| read_only pipe gentype <em>pipe</em>,<br> |
| uint <em>num_packets</em>)</p> |
| <p class="tableblock"> reserve_id_t <strong>sub_group_reserve_write_pipe</strong> (<br> |
| write_only pipe gentype <em>pipe</em>,<br> |
| uint <em>num_packets</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Reserve <em>num_packets</em> entries for reading from or writing to <em>pipe</em>. |
| Returns a valid non-zero reservation ID if the reservation is successful |
| and 0 otherwise.</p> |
| <p class="tableblock"> The reserved pipe entries are referred to by indices that go from 0 …​ |
| <em>num_packets</em> - 1.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">void <strong>sub_group_commit_read_pipe</strong> (<br> |
| read_only pipe gentype <em>pipe</em>,<br> |
| reserve_id_t <em>reserve_id</em>)</p> |
| <p class="tableblock"> void <strong>sub_group_commit_write_pipe</strong> (<br> |
| write_only pipe gentype <em>pipe</em>,<br> |
| reserve_id_t <em>reserve_id</em>)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Indicates that all reads and writes to <em>num_packets</em> associated with |
| reservation <em>reserve_id</em> are completed.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>Note: Reservations made by a subgroup are ordered in the pipe as they are |
| ordered in the program. |
| Reservations made by different subgroups that belong to the same work group |
| can be ordered using subgroup synchronization. |
| The order of subgroup based reservations that belong to different work |
| groups is implementation defined.</p> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_subgroups-additions-to-section-6.13.17.6-enqueuing-kernels-kernel-query-functions"><a class="anchor" href="#cl_khr_subgroups-additions-to-section-6.13.17.6-enqueuing-kernels-kernel-query-functions"></a>28.5.6. Additions to section 6.13.17.6 — Enqueuing Kernels (Kernel Query Functions)</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 55.5555%;"> |
| <col style="width: 44.4445%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Built-in Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">uint <strong>get_kernel_sub_group_count_for_ndrange</strong> (<br> |
| const ndrange_t <em>ndrange</em>,<br> |
| void (^block)(void));</p> |
| <p class="tableblock"> uint <strong>get_kernel_sub_group_count_for_ndrange</strong> (<br> |
| const ndrange_t <em>ndrange</em>,<br> |
| void (^block)(local void *, …​));</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the number of subgroups in each work group of the dispatch (except |
| for the last in cases where the global size does not divide cleanly into |
| work groups) given the combination of the passed ndrange and block.</p> |
| <p class="tableblock"> <em>block</em> specifies the block to be enqueued.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">uint <strong>get_kernel_max_sub_group_size_for_ndrange</strong> (<br> |
| const ndrange_t <em>ndrange</em>,<br> |
| void (^block)(void));<br></p> |
| <p class="tableblock"> uint <strong>get_kernel_max_sub_group_size_for_ndrange</strong> (<br> |
| const ndrange_t <em>ndrange</em>,<br> |
| void (^block)(local void *, …​));</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the maximum subgroup size for a block.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_mipmap_image"><a class="anchor" href="#cl_khr_mipmap_image"></a>29. Mipmaps</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This section describes OpenCL support for mipmaps.</p> |
| </div> |
| <div class="paragraph"> |
| <p>There are two optional mipmap extensions. |
| The <strong>cl_khr_mipmap_image</strong> extension adds the ability to create a mip-mapped |
| image, enqueue commands to read/write/copy/map/unmap a region of a mipmapped |
| image, and built-in functions that can be used to read a mip-mapped image in |
| an OpenCL C program. |
| The <strong>cl_khr_mipmap_image_writes</strong> extension adds built-in functions that can |
| be used to write a mip-mapped image in an OpenCL C program. |
| If the <strong>cl_khr_mipmap_image_writes</strong> extension is supported by the OpenCL |
| device, the <strong>cl_khr_mipmap_image</strong> extension must also be supported.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_28"><a class="anchor" href="#_general_information_28"></a>29.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_28"><a class="anchor" href="#_version_history_28"></a>29.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_mipmap_image-additions-to-chapter-5"><a class="anchor" href="#cl_khr_mipmap_image-additions-to-chapter-5"></a>29.2. Additions to Chapter 5 of the OpenCL 2.2 Specification</h3> |
| <div class="sect3"> |
| <h4 id="cl_khr_mipmap_image-additions-to-section-5.3"><a class="anchor" href="#cl_khr_mipmap_image-additions-to-section-5.3"></a>29.2.1. Additions to section 5.3 — Image Objects</h4> |
| <div class="paragraph"> |
| <p>A mip-mapped 1D image, 1D image array, 2D image, 2D image array or 3D image |
| is created by specifying <em>num_mip_levels</em> to be a value greater than one in |
| the <em>image_desc</em> passed to <strong>clCreateImage</strong>. |
| The dimensions of a mip-mapped image can be a power of two or a non-power of |
| two. |
| Each successively smaller mipmap level is half the size of the previous |
| level. |
| If this half value is a fractional value, it is rounded down to the nearest |
| integer.</p> |
| </div> |
| <div class="paragraph"> |
| <p><strong>Restrictions</strong></p> |
| </div> |
| <div class="paragraph"> |
| <p>The following restrictions apply when mip-mapped images are created with |
| <strong>clCreateImage</strong>:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>CL_MEM_USE_HOST_PTR or CL_MEM_COPY_HOST_PTR cannot be specified if a |
| mip-mapped image is created.</p> |
| </li> |
| <li> |
| <p>The <em>host_ptr</em> argument to <strong>clCreateImage</strong> must be a <code>NULL</code> value.</p> |
| </li> |
| <li> |
| <p>Mip-mapped images cannot be created for CL_MEM_OBJECT_IMAGE1D_BUFFER |
| images, depth images or multi-sampled (i.e. msaa) images.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>Calls to <strong>clEnqueueReadImage</strong>, <strong>clEnqueueWriteImage</strong> and <strong>clEnqueueMapImage</strong> |
| can be used to read from or write to a specific mip-level of a mip-mapped |
| image. |
| If image argument is a 1D image, <em>origin</em>[1] specifies the mip-level to use. |
| If image argument is a 1D image array, <em>origin</em>[2] specifies the mip-level |
| to use. |
| If image argument is a 2D image, <em>origin</em>[2] specifies the mip-level to use. |
| If image argument is a 2D image array or a 3D image, <em>origin</em>[3] specifies |
| the mip-level to use.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Calls to <strong>clEnqueueCopyImage</strong>, <strong>clEnqueueCopyImageToBuffer</strong> and |
| <strong>clEnqueueCopyBufferToImage</strong> can also be used to copy from and to a specific |
| mip-level of a mip-mapped image. |
| If <em>src_image</em> argument is a 1D image, <em>src_origin</em>[1] specifies the |
| mip-level to use. |
| If <em>src_image</em> argument is a 1D image array, <em>src_origin</em>[2] specifies the |
| mip-level to use. |
| If <em>src_image</em> argument is a 2D image, <em>src_origin</em>[2] specifies the |
| mip-level to use. |
| If <em>src_image</em> argument is a 2D image array or a 3D image, <em>src_origin</em>[3] |
| specifies the mip-level to use. |
| If <em>dst_image</em> argument is a 1D image, <em>dst_origin</em>[1] specifies the |
| mip-level to use. |
| If <em>dst_image</em> argument is a 1D image array, <em>dst_origin</em>[2] specifies the |
| mip-level to use. |
| If <em>dst_image</em> argument is a 2D image, <em>dst_origin</em>[2] specifies the |
| mip-level to use. |
| If <em>dst_image</em> argument is a 2D image array or a 3D image, <em>dst_origin</em>[3] |
| specifies the mip-level to use.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If the mip level specified is not a valid value, these functions return the |
| error CL_INVALID_MIP_LEVEL.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Calls to clEnqueueFillImage can be used to write to a specific mip-level of |
| a mip-mapped image. |
| If image argument is a 1D image, origin[1] specifies the mip-level to use. |
| If image argument is a 1D image array, origin[2] specifies the mip-level to |
| use. |
| If image argument is a 2D image, origin[2] specifies the mip-level to use. |
| If image argument is a 2D image array or a 3D image, origin[3] specifies the |
| mip-level to use.</p> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="cl_khr_mipmap_image-additions-to-section-5.7"><a class="anchor" href="#cl_khr_mipmap_image-additions-to-section-5.7"></a>29.2.2. Additions to section 5.7 — Sampler Objects</h4> |
| <div class="paragraph"> |
| <p>Add the following sampler properties <em>to table 5.14</em> that can be specified |
| when a sampler object is created using <strong>clCreateSamplerWithProperties</strong>.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 16.6666%;"> |
| <col style="width: 33.3334%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_sampler_properties enum</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Property Value</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Default Value</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_SAMPLER_MIP_FILTER_MODE_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">cl_filter_mode</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">CL_FILTER_NEAREST</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_SAMPLER_LOD_MIN_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">cl_float</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0.0f</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_SAMPLER_LOD_MAX_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">cl_float</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">MAXFLOAT</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>Note: The sampler properties CL_SAMPLER_MIP_FILTER_MODE_KHR, |
| CL_SAMPLER_LOD_MIN_KHR and CL_SAMPLER_LOD_MAX_KHR cannot be specified with |
| any samplers initialized in the OpenCL program source. |
| Only the default values for these properties will be used. |
| To create a sampler with specific values for these properties, a sampler |
| object must be created with <strong>clCreateSamplerWithProperties</strong> and passed as an |
| argument to a kernel.</p> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_mipmap_image-additions-to-chapter-6-of-the-opencl-2.0-specification"><a class="anchor" href="#cl_khr_mipmap_image-additions-to-chapter-6-of-the-opencl-2.0-specification"></a>29.3. Additions to Chapter 6 of the OpenCL 2.0 Specification</h3> |
| <div class="sect3"> |
| <h4 id="cl_khr_mipmap_image-additions-to-section-6.13.14-image-read-write-and-query-functions"><a class="anchor" href="#cl_khr_mipmap_image-additions-to-section-6.13.14-image-read-write-and-query-functions"></a>29.3.1. Additions to section 6.13.14 – Image Read, Write and Query Functions</h4> |
| <div class="paragraph"> |
| <p>The image read and write functions described in <em>sections 6.13.14.2</em>, |
| <em>6.13.14.3</em> and <em>6.13.14.4</em> read from and write to mip-level 0 if the |
| image argument is a mip-mapped image.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The following new built-in functions are added to <em>section 6.13.14.2</em>.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 55.5555%;"> |
| <col style="width: 44.4445%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">float4 read_imagef( |
| read_only image2d_t image, |
| sampler_t sampler, |
| float2 coord, |
| <span class="predefined-type">float</span> lod) |
| |
| int4 read_imagei( |
| read_only image2d_t image, |
| sampler_t sampler, |
| float2 coord, |
| <span class="predefined-type">float</span> lod) |
| |
| uint4 read_imageui( |
| read_only image2d_t image, |
| sampler_t sampler, |
| float2 coord, |
| <span class="predefined-type">float</span> lod) |
| |
| <span class="predefined-type">float</span> read_imagef( |
| read_only image2d_depth_t image, |
| sampler_t sampler, |
| float2 coord, |
| <span class="predefined-type">float</span> lod)</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Use the coordinate <em>coord.xy</em> to do an element lookup in the mip-level specified by <em>lod</em> in the 2D image object specified by <em>image</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">float4 read_imagef( |
| read_only image2d_t image, |
| sampler_t sampler, |
| float2 coord, |
| float2 gradient_x, |
| float2 gradient_y) |
| |
| int4 read_imagei( |
| read_only image2d_t image, |
| sampler_t sampler, |
| float2 coord, |
| float2 gradient_x, |
| float2 gradient_y) |
| |
| uint4 read_imageui( |
| read_only image2d_t image, |
| sampler_t sampler, |
| float2 coord, |
| float2 gradient_x, |
| float2 gradient_y) |
| |
| <span class="predefined-type">float</span> read_imagef( |
| read_only image2d_depth_t image, |
| sampler_t sampler, |
| float2 coord, |
| float2 gradient_x, |
| float2 gradient_y)</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Use the gradients to compute the lod and coordinate <em>coord.xy</em> to do an element lookup in the mip-level specified by the computed lod in the 2D image object specified by <em>image</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">float4 read_imagef( |
| read_only image1d_t image, |
| sampler_t sampler, |
| <span class="predefined-type">float</span> coord, |
| <span class="predefined-type">float</span> lod) |
| |
| int4 read_imagei( |
| read_only image1d_t image, |
| sampler_t sampler, |
| <span class="predefined-type">float</span> coord, |
| <span class="predefined-type">float</span> lod) |
| |
| uint4 read_imageui( |
| read_only image1d_t image, |
| sampler_t sampler, |
| <span class="predefined-type">float</span> coord, |
| <span class="predefined-type">float</span> lod)</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Use the coordinate <em>coord</em> to do an element lookup in the mip-level specified by <em>lod</em> in the 1D image object specified by <em>image</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">float4 read_imagef( |
| read_only image1d_t image, |
| sampler_t sampler, |
| <span class="predefined-type">float</span> coord, |
| <span class="predefined-type">float</span> gradient_x, |
| <span class="predefined-type">float</span> gradient_y) |
| |
| int4 read_imagei( |
| read_only image1d_t image, |
| sampler_t sampler, |
| <span class="predefined-type">float</span> coord, |
| <span class="predefined-type">float</span> gradient_x, |
| <span class="predefined-type">float</span> gradient_y) |
| |
| uint4 read_imageui( |
| read_only image1d_t image, |
| sampler_t sampler, |
| <span class="predefined-type">float</span> coord, |
| <span class="predefined-type">float</span> gradient_x, |
| <span class="predefined-type">float</span> gradient_y)</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Use the gradients to compute the lod and coordinate <em>coord</em> to do an element lookup in the mip-level specified by the computed lod in the 1D image object specified by <em>image</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">float4 read_imagef( |
| read_only image3d_t image, |
| sampler_t sampler, |
| float4 coord, |
| <span class="predefined-type">float</span> lod) |
| |
| int4 read_imagei( |
| read_only image3d_t image, |
| sampler_t sampler, |
| float4 coord, |
| <span class="predefined-type">float</span> lod) |
| |
| uint4 read_imageui( |
| read_only image3d_t image, |
| sampler_t sampler, |
| float4 coord, |
| <span class="predefined-type">float</span> lod)</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Use the coordinate <em>coord.xyz</em> to do an element lookup in the mip-level specified by <em>lod</em> in the 3D image object specified by <em>image</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">float4 read_imagef( |
| read_only image3d_t image, |
| sampler_t sampler, |
| float4 coord, |
| float4 gradient_x, |
| float4 gradient_y) |
| |
| int4 read_imagei( |
| read_only image3d_t image, |
| sampler_t sampler, |
| float4 coord, |
| float4 gradient_x, |
| float4 gradient_y) |
| |
| uint4 read_imageui( |
| read_only image3d_t image, |
| sampler_t sampler, |
| float4 coord, |
| float4 gradient_x, |
| float4 gradient_y)</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Use the gradients to compute the lod and coordinate <em>coord.xyz</em> to do an element lookup in the mip-level specified by the computed lod in the 3D image object specified by <em>image</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">float4 read_imagef( |
| read_only image1d_array_t image, |
| sampler_t sampler, |
| float2 coord, |
| <span class="predefined-type">float</span> lod) |
| |
| int4 read_imagei( |
| read_only image1d_array_t image, |
| sampler_t sampler, |
| float2 coord, |
| <span class="predefined-type">float</span> lod) |
| |
| uint4 read_imageui( |
| read_only image1d_array_t image, |
| sampler_t sampler, |
| float2 coord, |
| <span class="predefined-type">float</span> lod)</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Use the coordinate <em>coord.x</em> to do an element lookup in the 1D image identified by <em>coord.x</em> and mip-level specified by <em>lod</em> in the 1D image array specified by <em>image</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">float4 read_imagef( |
| read_only image1d_array_t image, |
| sampler_t sampler, |
| float2 coord, |
| <span class="predefined-type">float</span> gradient_x, |
| <span class="predefined-type">float</span> gradient_y) |
| |
| int4 read_imagei( |
| read_only image1d_array_t image, |
| sampler_t sampler, |
| float2 coord, |
| <span class="predefined-type">float</span> gradient_x, |
| <span class="predefined-type">float</span> gradient_y) |
| |
| uint4 read_imageui( |
| read_only image1d_array_t image, |
| sampler_t sampler, |
| float2 coord, |
| <span class="predefined-type">float</span> gradient_x, |
| <span class="predefined-type">float</span> gradient_y)</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Use the gradients to compute the lod and coordinate <em>coord.x</em> to do an element lookup in the mip-level specified by the computed lod in the 1D image array specified by <em>image</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">float4 read_imagef( |
| read_only image2d_array_t image, |
| sampler_t sampler, |
| float4 coord, |
| <span class="predefined-type">float</span> lod) |
| |
| int4 read_imagei( |
| read_only image2d_array_t image, |
| sampler_t sampler, |
| float4 coord, |
| <span class="predefined-type">float</span> lod) |
| |
| uint4 read_imageui( |
| read_only image2d_array_t image, |
| sampler_t sampler, |
| float4 coord, |
| <span class="predefined-type">float</span> lod) |
| |
| <span class="predefined-type">float</span> read_imagef( |
| read_only image2d_array_depth_t image, |
| sampler_t sampler, |
| float4 coord, |
| <span class="predefined-type">float</span> lod)</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Use the coordinate <em>coord.xy</em> to do an element lookup in the 2D image identified by <em>coord.z</em> and mip-level specified by <em>lod</em> in the 2D image array specified by <em>image</em>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">float4 read_imagef( |
| read_only image2d_array_t image, |
| sampler_t sampler, |
| float4 coord, |
| float2 gradient_x, |
| float2 gradient_y) |
| |
| int4 read_imagei( |
| read_only image2d_array_t image, |
| sampler_t sampler, |
| float4 coord, |
| float2 gradient_x, |
| float2 gradient_y) |
| |
| uint4 read_imageui( |
| read_only image2d_array_t image, |
| sampler_t sampler, |
| float4 coord, |
| float2 gradient_x, |
| float2 gradient_y) |
| |
| <span class="predefined-type">float</span> read_imagef( |
| read_only image2d_array_depth_t image, |
| sampler_t sampler, |
| float4 coord, |
| float2 gradient_x, |
| float2 gradient_y)</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Use the gradients to compute the lod coordinate and <em>coord.xy</em> to do an element lookup in the 2D image identified by <em>coord.z</em> and mip-level specified by the computed lod in the 2D image array specified by <em>image</em>.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="admonitionblock note"> |
| <table> |
| <tr> |
| <td class="icon"> |
| <i class="fa icon-note" title="Note"></i> |
| </td> |
| <td class="content"> |
| CL_SAMPLER_NORMALIZED_COORDS must be CL_TRUE for built-in functions described in the table above that read from a mip-mapped image; otherwise the behavior is undefined. |
| The value specified in the <em>lod</em> argument is clamped to the minimum of (actual number of mip-levels – 1) in the image or value specified for CL_SAMPLER_LOD_MAX. |
| </td> |
| </tr> |
| </table> |
| </div> |
| <div class="paragraph"> |
| <p>The following new built-in functions are added to <em>section 6.13.14.4</em>.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="directive">void</span> write_imagef( |
| write_only image2d_t image, |
| int2 coord, |
| <span class="predefined-type">int</span> lod, |
| float4 color) |
| |
| <span class="directive">void</span> write_imagei( |
| write_only image2d_t image, |
| int2 coord, |
| <span class="predefined-type">int</span> lod, |
| int4 color) |
| |
| <span class="directive">void</span> write_imageui( |
| write_only image2d_t image, |
| int2 coord, |
| <span class="predefined-type">int</span> lod, |
| uint4 color) |
| |
| <span class="directive">void</span> write_imagef( |
| write_only image2d_depth_t image, |
| int2 coord, |
| <span class="predefined-type">int</span> lod, |
| <span class="predefined-type">float</span> depth)</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Write <em>color</em> value to location specified by <em>coord.xy</em> in the mip-level specified by <em>lod</em> in the 2D image object specified by <em>image</em>. |
| Appropriate data format conversion to the specified image format is done before writing the color value. |
| <em>coord.x</em> and <em>coord.y</em> are considered to be unnormalized coordinates and must be in the range 0 .. image width of mip-level specified by <em>lod</em> – 1, and 0 .. image height of mip-level specified by <em>lod</em> – 1.</p> |
| <p class="tableblock">The behavior of <strong>write_imagef</strong>, <strong>write_imagei</strong> and <strong>write_imageui</strong> if (<em>x</em>, <em>y</em>) coordinate values are not in the range (0 .. image width of the mip-level specified by <em>lod</em> – 1, 0 .. image height of the mip-level specified by <em>lod</em> – 1) or <em>lod</em> value exceeds the (number of mip-levels in the image – 1) is undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="directive">void</span> write_imagef( |
| write_only image1d_t image, |
| <span class="predefined-type">int</span> coord, |
| <span class="predefined-type">int</span> lod, |
| float4 color) |
| |
| <span class="directive">void</span> write_imagei( |
| write_only image1d_t image, |
| <span class="predefined-type">int</span> coord, |
| <span class="predefined-type">int</span> lod, |
| int4 color) |
| |
| <span class="directive">void</span> write_imageui( |
| write_only image1d_t image, |
| <span class="predefined-type">int</span> coord, |
| <span class="predefined-type">int</span> lod, |
| uint4 color)</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Write <em>color</em> value to location specified by <em>coord</em> in the mip-level specified by <em>lod</em> in the 1D image object specified by <em>image</em>. |
| Appropriate data format conversion to the specified image format is done before writing the color value. |
| <em>coord</em> is considered to be unnormalized coordinates and must be in the range 0 .. image width of the mip-level |
| specified by <em>lod</em> – 1.</p> |
| <p class="tableblock">The behavior of <strong>write_imagef</strong>, <strong>write_imagei</strong> and <strong>write_imageui</strong> if coordinate value is not in the range (0 .. image width of the mip-level specified by <em>lod</em> – 1) or <em>lod</em> value exceeds the (number of mip-levels in the image – 1), is undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="directive">void</span> write_imagef( |
| write_only image1d_array_t image, |
| int2 coord, |
| <span class="predefined-type">int</span> lod, |
| float4 color) |
| |
| <span class="directive">void</span> write_imagei( |
| write_only image1d_array_t image, |
| int2 coord, |
| <span class="predefined-type">int</span> lod, |
| int4 color) |
| |
| <span class="directive">void</span> write_imageui( |
| write_only image1d_array_t image, |
| int2 coord, |
| <span class="predefined-type">int</span> lod, |
| uint4 color)</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Write <em>color</em> value to location specified by <em>coord.x</em> in the 1D image identified by <em>coord.y</em> and mip-level <em>lod</em> in the 1D image array specified by <em>image</em>. |
| Appropriate data format conversion to the specified image format is done before writing the color value. |
| <em>coord.x</em> and <em>coord.y</em> are considered to be unnormalized coordinates and must be in the range 0 .. image width of the mip-level specified by <em>lod</em> – 1 and 0 .. image number of layers – 1.</p> |
| <p class="tableblock">The behavior of <strong>write_imagef</strong>, <strong>write_imagei</strong> and <strong>write_imageui</strong> if (<em>x</em>, <em>y</em>) coordinate values are not in the range (0 .. image width of the mip-level specified by <em>lod</em> – 1, 0 .. image number of layers – 1), respectively or <em>lod</em> value exceeds the (number of mip-levels in the image – 1), is undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="directive">void</span> write_imagef( |
| write_only image2d_array_t image, |
| int4 coord, |
| <span class="predefined-type">int</span> lod, |
| float4 color) |
| |
| <span class="directive">void</span> write_imagei( |
| write_only image2d_array_t image, |
| int4 coord, |
| <span class="predefined-type">int</span> lod, |
| int4 color) |
| |
| <span class="directive">void</span> write_imageui( |
| write_only image2d_array_t image, |
| int4 coord, |
| <span class="predefined-type">int</span> lod, |
| uint4 color) |
| |
| <span class="directive">void</span> write_imagef( |
| write_only image2d_array_depth_t image, |
| int4 coord, |
| <span class="predefined-type">int</span> lod, |
| <span class="predefined-type">float</span> depth)</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Write <em>color</em> value to location specified by <em>coord.xy</em> in the 2D image identified by <em>coord.z</em> and mip-level <em>lod</em> in the 2D image array specified by <em>image</em>. |
| Appropriate data format conversion to the specified image format is done before writing the color value. |
| <em>coord.x</em>, <em>coord.y</em> and <em>coord.z</em> are considered to be unnormalized coordinates and must be in the range 0 .. image width of the mip-level specified by <em>lod</em> – 1, 0 .. image height – 1 specified by <em>lod</em> – 1 and 0 .. image number of layers – 1.</p> |
| <p class="tableblock">The behavior of <strong>write_imagef</strong>, <strong>write_imagei</strong> and <strong>write_imageui</strong> if (<em>x</em>, <em>y, z</em>) coordinate values are not in the range (0 .. image width of the mip-level specified by <em>lod</em> – 1, 0 .. image height of the mip-level specified by <em>lod</em> – 1, 0 .. image number of layers – 1), respectively or <em>lod</em> value exceeds the (number of mip-levels in the image – 1), is undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="directive">void</span> write_imagef( |
| write_only image3d_t image, |
| int4 coord, |
| <span class="predefined-type">int</span> lod, |
| float4 color) |
| |
| <span class="directive">void</span> write_imagei( |
| write_only image3d_t image, |
| int4 coord, |
| <span class="predefined-type">int</span> lod, |
| int4 color) |
| |
| <span class="directive">void</span> write_imageui( |
| write_only image3d_t image, |
| int4 coord, |
| <span class="predefined-type">int</span> lod, |
| uint4 color)</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Write color value to location specified by <em>coord.xyz</em> and mip-level <em>lod</em> in the 3D image object specified by <em>image</em>. |
| Appropriate data format conversion to the specified image format is done before writing the color value. |
| <em>coord.x</em>, <em>coord.y</em> and <em>coord.z</em> are considered to be unnormalized coordinates and must be in the range 0 .. image width – 1 |
| specified by <em>lod</em> – 1, 0 .. image height – 1 specified by <em>lod</em> – 1 and 0 .. image depth – 1 specified by <em>lod</em> – 1.</p> |
| <p class="tableblock">The behavior of <strong>write_imagef</strong>, <strong>write_imagei</strong> and <strong>write_imageui</strong> if (<em>x</em>, <em>y, z</em>) coordinate values are not in the range (0 .. image width of the mip-level specified by <em>lod</em> – 1, 0 .. image height of the mip-level specified by <em>lod</em> – 1, 0 .. image depth – 1), respectively or <em>lod</em> value exceeds the (number of mip-levels in the image – 1), is undefined.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>The following new built-in functions are added to <em>section 6.13.14.5</em>.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="predefined-type">int</span> get_image_num_mip_levels( |
| image1d_t image) |
| |
| <span class="predefined-type">int</span> get_image_num_mip_levels( |
| image2d_t image) |
| |
| <span class="predefined-type">int</span> get_image_num_mip_levels( |
| image3d_t image) |
| |
| <span class="predefined-type">int</span> get_image_num_mip_levels( |
| image1d_array_t image) |
| |
| <span class="predefined-type">int</span> get_image_num_mip_levels( |
| image2d_array_t image) |
| |
| <span class="predefined-type">int</span> get_image_num_mip_levels( |
| image2d_depth_t image) |
| |
| <span class="predefined-type">int</span> get_image_num_mip_levels( |
| image2d_array_depth_t image)</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Return the number of mip-levels.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_mipmap_image-additions-to-creating-opencl-memory-objects-from-opengl-objects"><a class="anchor" href="#cl_khr_mipmap_image-additions-to-creating-opencl-memory-objects-from-opengl-objects"></a>29.4. Additions to <a href="#cl_khr_gl_sharing__memobjs">Creating OpenCL Memory Objects from OpenGL Objects</a></h3> |
| <div class="paragraph"> |
| <p>If both the <strong><code>cl_khr_mipmap_image</code></strong> and <strong><code>cl_khr_gl_sharing</code></strong> extensions are |
| supported by the OpenCL device, the <strong><code>cl_khr_gl_sharing</code></strong> extension may also |
| be used to create a mipmapped OpenCL image from a mipmapped OpenGL texture.</p> |
| </div> |
| <div class="paragraph"> |
| <p>To create a mipmapped OpenCL image from a mipmapped OpenGL texture, pass a |
| negative value as the <em>miplevel</em> argument to <strong>clCreateFromGLTexture</strong>. |
| If <em>miplevel</em> is a negative value then an OpenCL mipmapped image object is |
| created from a mipmapped OpenGL texture object, instead of an OpenCL image |
| object for a specific miplevel of the OpenGL texture.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Note: For a detailed description of how the level of detail is computed, |
| please refer to <em>section 3.9.7</em> of the OpenGL 3.0 specification.</p> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_srgb_image_writes"><a class="anchor" href="#cl_khr_srgb_image_writes"></a>30. sRGB Image Writes</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This section describes the <strong>cl_khr_srgb_image_writes</strong> extension.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This extension enables kernels to write to sRGB images using the <strong>write_imagef</strong> built-in function. |
| The sRGB image formats that may be written to will be returned by <strong>clGetSupportedImageFormats</strong>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>When the image is an sRGB image, the <strong>write_imagef</strong> built-in function will perform the linear to sRGB conversion. |
| Only the R, G, and B components are converted from linear to sRGB; the A component is written as-is.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_29"><a class="anchor" href="#_general_information_29"></a>30.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_29"><a class="anchor" href="#_version_history_29"></a>30.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_priority_hints"><a class="anchor" href="#cl_khr_priority_hints"></a>31. Priority Hints</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This section describes the <strong>cl_khr_priority_hints</strong> extension. |
| This extension adds priority hints for OpenCL, but does not specify the |
| scheduling behavior or minimum guarantees. |
| It is expected that the the user guides associated with each implementation |
| which supports this extension will describe the scheduling behavior |
| guarantees.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_30"><a class="anchor" href="#_general_information_30"></a>31.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_30"><a class="anchor" href="#_version_history_30"></a>31.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_priority_hints-host-side-api-modifications"><a class="anchor" href="#cl_khr_priority_hints-host-side-api-modifications"></a>31.2. Host-side API modifications</h3> |
| <div class="paragraph"> |
| <p>The function <strong><code>clCreateCommandQueueWithProperties</code></strong> (Section 5.1) is |
| extended to support a priority value as part of the <em>properties</em> argument.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The priority property applies to OpenCL command queues that belong to the |
| same OpenCL context.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The properties field accepts the <code>CL_QUEUE_PRIORITY_KHR</code> property, with a |
| value of type cl_queue_priority_khr, which can be one of:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p><code>CL_QUEUE_PRIORITY_HIGH_KHR</code></p> |
| </li> |
| <li> |
| <p><code>CL_QUEUE_PRIORITY_MED_KHR</code></p> |
| </li> |
| <li> |
| <p><code>CL_QUEUE_PRIORITY_LOW_KHR</code></p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>If <code>CL_QUEUE_PRIORITY_KHR</code> is not specified then the default priority is |
| <code>CL_QUEUE_PRIORITY_MED_KHR</code>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>To the error section for <strong><code>clCreateCommandQueueWithProperties</code></strong>, the |
| following is added:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p><code>CL_INVALID_QUEUE_PROPERTIES</code> if the <code>CL_QUEUE_PRIORITY_KHR</code> property is |
| specified and the queue is a <code>CL_QUEUE_ON_DEVICE</code>.</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_throttle_hints"><a class="anchor" href="#cl_khr_throttle_hints"></a>32. Throttle Hints</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This section describes the <strong>cl_khr_throttle_hints</strong> extension. |
| This extension adds throttle hints for OpenCL, but does not specify the |
| throttling behavior or minimum guarantees. |
| It is expected that the user guide associated with each implementation which |
| supports this extension will describe the throttling behavior guarantees.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Note that the throttle hint is orthogonal to functionality defined in |
| <strong>cl_khr_priority_hints</strong> extension. |
| For example, a task may have high priority (<code>CL_QUEUE_PRIORITY_HIGH_KHR</code>) |
| but should at the same time be executed at an optimized throttle setting |
| (<code>CL_QUEUE_THROTTLE_LOW</code>).</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_31"><a class="anchor" href="#_general_information_31"></a>32.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_31"><a class="anchor" href="#_version_history_31"></a>32.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_throttle_hints-host-side-api-modifications"><a class="anchor" href="#cl_khr_throttle_hints-host-side-api-modifications"></a>32.2. Host-side API modifications</h3> |
| <div class="paragraph"> |
| <p>The function <strong><code>clCreateCommandQueueWithProperties</code></strong> (Section 5.1) is |
| extended to support a new <code>CL_QUEUE_THROTTLE_KHR</code> value as part of the |
| <em>properties</em> argument.</p> |
| </div> |
| <div class="paragraph"> |
| <p>The properties field accepts the following values:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p><code>CL_QUEUE_THROTTLE_HIGH_KHR</code> (full throttle, i.e., OK to consume more |
| energy)</p> |
| </li> |
| <li> |
| <p><code>CL_QUEUE_THROTTLE_MED_KHR</code> (normal throttle)</p> |
| </li> |
| <li> |
| <p><code>CL_QUEUE_THROTTLE_LOW_KHR</code> (optimized/lowest energy consumption)</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>If <code>CL_QUEUE_THROTTLE_KHR</code> is not specified then the default priority is |
| <code>CL_QUEUE_THROTTLE_MED_KHR</code>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>To the error section for <strong><code>clCreateCommandQueueWithProperties</code></strong>, the |
| following is added:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p><code>CL_INVALID_QUEUE_PROPERTIES</code> if the <code>CL_QUEUE_THROTTLE_KHR</code> property is |
| specified and the queue is a <code>CL_QUEUE_ON_DEVICE</code>.</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_subgroup_named_barrier"><a class="anchor" href="#cl_khr_subgroup_named_barrier"></a>33. Named Barriers for Subgroups</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This section describes the <strong>cl_khr_subgroup_named_barrier</strong> extension. |
| This extension adds barrier operations that cover subsets of an OpenCL |
| work-group. |
| Only the OpenCL API changes are described in this section. |
| Please refer to the SPIR-V specification for information about using |
| subgroups named barriers in the SPIR-V intermediate representation, and to |
| the OpenCL C++ specification for descriptions of the subgroup named |
| barrier built-in functions in the OpenCL C++ kernel language.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_32"><a class="anchor" href="#_general_information_32"></a>33.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_32"><a class="anchor" href="#_version_history_32"></a>33.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_changes_to_opencl_specification"><a class="anchor" href="#_changes_to_opencl_specification"></a>33.2. Changes to OpenCL specification</h3> |
| <div class="paragraph"> |
| <p>Add to <em>table 4.3</em>:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 33.3333%;"> |
| <col style="width: 16.6666%;"> |
| <col style="width: 50.0001%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_device_info</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Return Type</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>cl_uint</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Maximum number of named barriers in a work-group for any given |
| kernel-instance running on the device. |
| The minimum value is 8.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_extended_async_copies"><a class="anchor" href="#cl_khr_extended_async_copies"></a>34. Extended Async Copies (Provisional)</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This section describes the <strong>cl_khr_extended_async_copies</strong> provisional extension. |
| This extension augments built-in asynchronous copy functions to OpenCL C |
| to support more patterns:</p> |
| </div> |
| <div class="olist arabic"> |
| <ol class="arabic"> |
| <li> |
| <p>for async copy between 2D source and 2D destination.</p> |
| </li> |
| <li> |
| <p>for async copy between 3D source and 3D destination.</p> |
| </li> |
| </ol> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_33"><a class="anchor" href="#_general_information_33"></a>34.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_33"><a class="anchor" href="#_version_history_33"></a>34.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0.9.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version (provisional).</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_extended_async_copies-additions-to-chapter-6-of-the-opencl-specification"><a class="anchor" href="#cl_khr_extended_async_copies-additions-to-chapter-6-of-the-opencl-specification"></a>34.2. Additions to Chapter 6 of the OpenCL C Specification</h3> |
| <div class="paragraph"> |
| <p>The following new built-in functions are added to the <em>Async Copies from Global to |
| Local Memory, Local to Global Memory, and Prefetch</em> functions described in <em>section 6.12.10</em> |
| and <em>section 6.13.10</em> of the OpenCL 1.2 and OpenCL 2.0 C specifications.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Note that <strong>async_work_group_strided_copy</strong> is a special case of |
| <strong>async_work_group_copy_2D2D</strong>, namely one which copies a single column to a |
| single line or vice versa. |
| For example:<br> |
| <code>async_work_group_strided_copy(dst, src, num_gentypes, src_stride)</code> is equal to<br> |
| <code>async_work_group_copy_2D2D(dst, src, 1, num_gentypes, src_stride-1, 1)</code></p> |
| </div> |
| <div class="paragraph"> |
| <p>These new built-in functions support the same <code>gentype</code> generic type names as |
| the standard asynchronous copy functions unless otherwise stated.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">event_t async_work_group_copy_2D2D( |
| __local gentype *dst, |
| <span class="directive">const</span> __global gentype *src, |
| size_t num_elements_per_line, |
| size_t num_lines, |
| size_t src_stride, |
| size_t dst_stride, |
| event_t event) |
| |
| event_t async_work_group_copy_2D2D( |
| __global gentype *dst, |
| <span class="directive">const</span> __local gentype *src, |
| size_t num_elements_per_line, |
| size_t num_lines, |
| size_t src_stride, |
| size_t dst_stride, |
| event_t event)</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Perform an asynchronous copy of <em>num_lines</em> lines from <em>src</em> to <em>dst</em>. Each line |
| contains <em>num_elements_per_line</em> <code>gentype</code> elements. After each line of |
| transfer, <em>src</em> address is incremented by |
| (<em>src_stride</em> + <em>num_elements_per_line</em>) <code>gentype</code> elements, |
| <em>dst</em> address is incremented by |
| (<em>dst_stride</em> + <em>num_elements_per_line</em>) <code>gentype</code> elements |
| for the next line of transfer.</p> |
| <p class="tableblock">For these functions, the stride describes the number of elements between |
| the <strong>end</strong> of the current line and the <strong>beginning</strong> of the next line, i.e., |
| without overlap.</p> |
| <p class="tableblock">Returns an event object that can be used by <strong>wait_group_events</strong> to wait |
| for the async copy to finish. The <em>event</em> argument can also be used to |
| associate the <strong>async_work_group_copy_2D2D</strong> with a previous async copy |
| allowing an event to be shared by multiple async copies; |
| otherwise <em>event</em> should be zero.</p> |
| <p class="tableblock">If <em>event</em> argument is non-zero, the event object supplied in <em>event</em> |
| argument will be returned.</p> |
| <p class="tableblock">This function does not perform any implicit synchronization of source |
| data such as using a <strong>barrier</strong> before performing the copy.</p> |
| <p class="tableblock">The behavior of <strong>async_work_group_copy_2D2D</strong> is undefined if the |
| <em>num_elements_per_line</em> or <em>src_stride</em> or <em>dst_stride</em> values cause |
| the <em>src</em> or <em>dst</em> addresses to exceed the upper bounds of the address |
| space during the copy.</p> |
| <p class="tableblock">The async copy is performed by all work-items in a work-group and this |
| built-in function must therefore be encountered by all work-items in a |
| work-group executing the kernel with the same argument values; |
| otherwise the results are undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">event_t async_work_group_copy_3D3D( |
| __local gentype *dst, |
| <span class="directive">const</span> __global gentype *src, |
| size_t num_elements_per_line, |
| size_t num_lines, |
| size_t src_line_stride, |
| size_t dst_line_stride, |
| size_t num_planes, |
| size_t src_plane_stride, |
| size_t dst_plane_stride, |
| event_t event) |
| |
| event_t async_work_group_copy_3D3D( |
| __global gentype *dst, |
| <span class="directive">const</span> __local gentype *src, |
| size_t num_elements_per_line, |
| size_t num_lines, |
| size_t src_line_stride, |
| size_t dst_line_stride, |
| size_t num_planes, |
| size_t src_plane_stride, |
| size_t dst_plane_stride, |
| event_t event)</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Perform an async copy of <em>num_planes</em> times <em>num_lines</em> lines from <em>src</em> to |
| <em>dst</em> arranged in <em>num_planes</em> planes. Each plane contains <em>num_lines</em> |
| lines. Each line contains <em>num_elements_per_line</em> <code>gentype</code> elements. |
| After each line of transfer, <em>src</em> address is incremented by |
| (<em>src_line_stride</em> + <em>num_elements_per_line</em>) <code>gentype</code> elements, <em>dst</em> |
| address is incremented by (<em>dst_line_stride</em> + <em>num_elements_per_line</em>) |
| <code>gentype</code> elements for the next line of transfer. For the last line of a |
| plane, an additional <em>src_plane_stride</em> <code>gentype</code> elements is added to |
| <em>src</em> address, and an additional <em>dst_plane_stride</em> <code>gentype</code> elements is |
| added to <em>dst</em> address.</p> |
| <p class="tableblock">Returns an event object that can be used by <strong>wait_group_events</strong> to wait |
| for the async copy to finish. The <em>event</em> argument can also be used to |
| associate the <strong>async_work_group_copy_3D3D</strong> with a previous async copy |
| allowing an event to be shared by multiple async copies; |
| otherwise <em>event</em> should be zero.</p> |
| <p class="tableblock">If <em>event</em> argument is non-zero, the event object supplied in <em>event</em> |
| argument will be returned.</p> |
| <p class="tableblock">This function does not perform any implicit synchronization of source |
| data such as using a <strong>barrier</strong> before performing the copy.</p> |
| <p class="tableblock">The behavior of <strong>async_work_group_copy_3D3D</strong> is undefined if any of |
| <em>num_elements_per_line</em>, <em>src_line_stride</em>, <em>dst_line_stride</em>, |
| <em>src_plane_stride</em> or <em>dst_plane_stride</em> values cause the <em>src</em> or <em>dst</em> |
| addresses to exceed the upper bounds of the address space during the copy.</p> |
| <p class="tableblock">The async copy is performed by all work-items in a work-group and this |
| built-in function must therefore be encountered by all work-items in a |
| work-group executing the kernel with the same argument values; |
| otherwise the results are undefined.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="admonitionblock note"> |
| <table> |
| <tr> |
| <td class="icon"> |
| <i class="fa icon-note" title="Note"></i> |
| </td> |
| <td class="content"> |
| This is a preview of an OpenCL provisional extension specification that has been Ratified under the Khronos Intellectual Property Framework. It is being made publicly available prior to being uploaded to the Khronos registry to enable review and feedback from the community. If you have feedback please create an issue on <a href="https://github.com/KhronosGroup/OpenCL-Docs/" class="bare">https://github.com/KhronosGroup/OpenCL-Docs/</a> |
| </td> |
| </tr> |
| </table> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_async_work_group_copy_fence"><a class="anchor" href="#cl_khr_async_work_group_copy_fence"></a>35. Async Work Group Copy Fence (Provisional)</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This section describes the <strong>cl_khr_async_work_group_copy_fence</strong> provisional extension. |
| The extension adds a new built-in function to OpenCL C to establish a memory synchronization ordering of asynchronous copies.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_34"><a class="anchor" href="#_general_information_34"></a>35.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_34"><a class="anchor" href="#_version_history_34"></a>35.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-04-21</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">0.9.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version (provisional).</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_async_work_group_copy_fence-additions-to-chapter-6-of-the-opencl-specification"><a class="anchor" href="#cl_khr_async_work_group_copy_fence-additions-to-chapter-6-of-the-opencl-specification"></a>35.2. Additions to Chapter 6 of the OpenCL C Specification</h3> |
| <div class="paragraph"> |
| <p>The following new built-in function is added to the <em>Async Copies from Global to |
| Local Memory, Local to Global Memory, and Prefetch</em> functions described in <em>section 6.12.10</em> |
| and <em>section 6.13.10</em> of the OpenCL 1.2 and OpenCL 2.0 C specifications:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="directive">void</span> async_work_group_copy_fence( |
| cl_mem_fence_flags flags)</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Orders async copies produced by the work-items of a work-group executing |
| a kernel. Async copies preceding the <strong>async_work_group_copy_fence</strong> must |
| complete their access to the designated memory or memories, |
| including both reads-from and writes-to it, before async copies |
| following the fence are allowed to start accessing these memories. |
| In other words, every async copy preceding the <strong>async_work_group_copy_fence</strong> |
| must happen-before every async copy following the fence, with respect to |
| the designated memory or memories.</p> |
| <p class="tableblock">The <em>flags</em> argument specifies the memory address space and can be set to a |
| combination of the following literal values:</p> |
| <p class="tableblock"><code>CLK_LOCAL_MEM_FENCE</code><br> |
| <code>CLK_GLOBAL_MEM_FENCE</code></p> |
| <p class="tableblock">The async fence is performed by all work-items in a work-group and this |
| built-in function must therefore be encountered by all work-items in a |
| work-group executing the kernel with the same argument values; |
| otherwise the results are undefined. This rule applies to ND-ranges |
| implemented with uniform and non-uniform work-groups.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="admonitionblock note"> |
| <table> |
| <tr> |
| <td class="icon"> |
| <i class="fa icon-note" title="Note"></i> |
| </td> |
| <td class="content"> |
| This is a preview of an OpenCL provisional extension specification that has been Ratified under the Khronos Intellectual Property Framework. It is being made publicly available prior to being uploaded to the Khronos registry to enable review and feedback from the community. If you have feedback please create an issue on <a href="https://github.com/KhronosGroup/OpenCL-Docs/" class="bare">https://github.com/KhronosGroup/OpenCL-Docs/</a> |
| </td> |
| </tr> |
| </table> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_device_uuid"><a class="anchor" href="#cl_khr_device_uuid"></a>36. Unique Device Identifiers</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This section describes the <strong>cl_khr_device_uuid</strong> extension.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This extension adds the ability to query a universally unique identifier |
| (UUID) for an OpenCL driver and OpenCL device. |
| The UUIDs returned by the query may be used to identify drivers and devices |
| across processes or APIs.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_35"><a class="anchor" href="#_general_information_35"></a>36.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_35"><a class="anchor" href="#_version_history_35"></a>36.1.1. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-08-27</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_additions_to_chapter_4_of_the_opencl_3_0_api_specification"><a class="anchor" href="#_additions_to_chapter_4_of_the_opencl_3_0_api_specification"></a>36.2. Additions to Chapter 4 of the OpenCL 3.0 API Specification</h3> |
| <div class="paragraph"> |
| <p>Add to Table 5 - OpenCL Device Queries:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 5. OpenCL Device Queries</caption> |
| <colgroup> |
| <col style="width: 30%;"> |
| <col style="width: 20%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_device_info</strong></th> |
| <th class="tableblock halign-left valign-top">Return Type</th> |
| <th class="tableblock halign-left valign-top">Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_DEVICE_UUID_KHR</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>cl_uchar[CL_UUID_SIZE_KHR]</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns a universally unique identifier (UUID) for the device.</p> |
| <p class="tableblock"> Device UUIDs must be immutable for a given device across processes, driver APIs, driver versions, and system reboots.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_DRIVER_UUID_KHR</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>cl_uchar[CL_UUID_SIZE_KHR]</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns a universally unique identifier (UUID) for the software driver for the device.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_DEVICE_LUID_VALID_KHR</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>cl_bool</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <code>CL_TRUE</code> if the device has a valid LUID and <code>CL_FALSE</code> otherwise.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_DEVICE_LUID_KHR</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>cl_uchar[CL_LUID_SIZE_KHR]</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns a locally unique identifier (LUID) for the device.</p> |
| <p class="tableblock"> It is not an error to query <code>CL_DEVICE_LUID_KHR</code> when <code>CL_DEVICE_LUID_VALID_KHR</code> returns <code>CL_FALSE</code>, but in this case the returned LUID value is undefined.</p> |
| <p class="tableblock"> When <code>CL_DEVICE_LUID_VALID_KHR</code> returns <code>CL_TRUE</code>, and the OpenCL device is running on the Windows operating system, the returned LUID value can be cast to an <code>LUID</code> object and must be equal to the locally unique identifier of an <code>IDXGIAdapter1</code> object that corresponds to the OpenCL device.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_DEVICE_NODE_MASK_KHR</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>cl_uint</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns a node mask for the device.</p> |
| <p class="tableblock"> It is not an error to query <code>CL_DEVICE_NODE_MASK_KHR</code> when <code>CL_DEVICE_LUID_VALID_KHR</code> returns <code>CL_FALSE</code>, but in this case the returned node mask is undefined.</p> |
| <p class="tableblock"> When <code>CL_DEVICE_LUID_VALID_KHR</code> returns <code>CL_TRUE</code>, the returned node mask must contain exactly one bit. |
| If the OpenCL device is running on an operating system that supports the Direct3D 12 API and the OpenCL device corresponds to an individual device in a linked device adapter, the returned node mask identifies the Direct3D 12 node corresponding to the OpenCL device. |
| Otherwise, the returned node mask must be <code>1</code>.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="admonitionblock note"> |
| <table> |
| <tr> |
| <td class="icon"> |
| <i class="fa icon-note" title="Note"></i> |
| </td> |
| <td class="content"> |
| While <code>CL_DEVICE_UUID_KHR</code> is specified to remain consistent across driver versions and system reboots, it is not intended to be usable as a serializable persistent identifier for a device. |
| It may change when a device is physically added to, removed from, or moved to a different connector in a system while that system is powered down. |
| Further, there is no reasonable way to verify with conformance testing that a given device retains the same UUID in a given system across all driver versions supported in that system. |
| While implementations should make every effort to report consistent device UUIDs across driver versions, applications should avoid relying on the persistence of this value for uses other than identifying compatible devices for external object sharing purposes. |
| </td> |
| </tr> |
| </table> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_extended_versioning"><a class="anchor" href="#cl_khr_extended_versioning"></a>37. Extended versioning</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This extension introduces new platform and device queries that return detailed |
| version information to applications. It makes it possible to return the exact |
| revision of the specification or intermediate languages supported by an |
| implementation. It also enables implementations to communicate a version |
| number for each of the extensions they support and remove the requirement |
| for applications to process strings to test for the presence of an extension or |
| intermediate language or built-in kernel.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_36"><a class="anchor" href="#_general_information_36"></a>37.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_name_strings"><a class="anchor" href="#_name_strings"></a>37.1.1. Name Strings</h4> |
| <div class="paragraph"> |
| <p><code>cl_khr_extended_versioning</code></p> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="_contributors"><a class="anchor" href="#_contributors"></a>37.1.2. Contributors</h4> |
| <div class="paragraph"> |
| <p>Kévin Petit, Arm Ltd.<br> |
| Ben Ashbaugh, Intel<br> |
| Alastair Murray, Codeplay Software Ltd.<br> |
| Einar Hov, Arm Ltd.</p> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="_version_history_36"><a class="anchor" href="#_version_history_36"></a>37.1.3. Version history</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-02-12</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Initial version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="_dependencies"><a class="anchor" href="#_dependencies"></a>37.1.4. Dependencies</h4> |
| <div class="paragraph"> |
| <p>This extension is written against the OpenCL Specification |
| Version 2.2, Revision 11.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This extension requires OpenCL 1.0.</p> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_new_api_types_2"><a class="anchor" href="#_new_api_types_2"></a>37.2. New API Types</h3> |
| <div class="sect3"> |
| <h4 id="_version"><a class="anchor" href="#_version"></a>37.2.1. Version</h4> |
| <div class="paragraph"> |
| <p>This extension introduces a new scheme to encode detailed |
| (major, minor, patch/revision) version information into a single 32-bit unsigned |
| integer:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>The major version is using bits 31-22</p> |
| </li> |
| <li> |
| <p>The minor version is using bits 21-12</p> |
| </li> |
| <li> |
| <p>The patch version is using bits 11-0</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>This scheme enables two versions to be ordered using the standard C/C++ operators. |
| Macros are provided to extract individual fields or compose a full version |
| from the individual fields.</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="keyword">typedef</span> cl_uint cl_version_khr; |
| |
| <span class="preprocessor">#define</span> CL_VERSION_MAJOR_BITS_KHR (<span class="integer">10</span>) |
| <span class="preprocessor">#define</span> CL_VERSION_MINOR_BITS_KHR (<span class="integer">10</span>) |
| <span class="preprocessor">#define</span> CL_VERSION_PATCH_BITS_KHR (<span class="integer">12</span>) |
| |
| <span class="preprocessor">#define</span> CL_VERSION_MAJOR_MASK_KHR ((<span class="integer">1</span> << CL_VERSION_MAJOR_BITS_KHR) - <span class="integer">1</span>) |
| <span class="preprocessor">#define</span> CL_VERSION_MINOR_MASK_KHR ((<span class="integer">1</span> << CL_VERSION_MINOR_BITS_KHR) - <span class="integer">1</span>) |
| <span class="preprocessor">#define</span> CL_VERSION_PATCH_MASK_KHR ((<span class="integer">1</span> << CL_VERSION_PATCH_BITS_KHR) - <span class="integer">1</span>) |
| |
| <span class="preprocessor">#define</span> CL_VERSION_MAJOR_KHR(version) \ |
| ((version) >> (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR)) |
| <span class="preprocessor">#define</span> CL_VERSION_MINOR_KHR(version) \ |
| (((version) >> CL_VERSION_PATCH_BITS_KHR) & CL_VERSION_MINOR_MASK_KHR) |
| <span class="preprocessor">#define</span> CL_VERSION_PATCH_KHR(version) ((version) & CL_VERSION_PATCH_MASK_KHR) |
| |
| <span class="preprocessor">#define</span> CL_MAKE_VERSION_KHR(major, minor, patch) \ |
| ((((major) & CL_VERSION_MAJOR_MASK_KHR) << (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR)) | \ |
| (((minor) & CL_VERSION_MINOR_MASK_KHR) << CL_VERSION_PATCH_BITS_KHR) | \ |
| ((patch) & CL_VERSION_PATCH_MASK_KHR))</code></pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="_name_and_version"><a class="anchor" href="#_name_and_version"></a>37.2.2. Name and version</h4> |
| <div class="paragraph"> |
| <p>This extension adds a structure that can be used to describe a combination of a |
| name alongside a version number:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="preprocessor">#define</span> CL_NAME_VERSION_MAX_NAME_SIZE_KHR <span class="integer">64</span> |
| |
| <span class="keyword">typedef</span> <span class="keyword">struct</span> _cl_name_version_khr { |
| cl_version_khr version; |
| <span class="predefined-type">char</span> name[CL_NAME_VERSION_MAX_NAME_SIZE_KHR]; |
| } cl_name_version_khr;</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>The <code>name</code> field is an array of <code>CL_NAME_VERSION_MAX_NAME_SIZE_KHR</code> bytes used as |
| storage for a NUL-terminated string whose maximum length is therefore |
| <code>CL_NAME_VERSION_MAX_NAME_SIZE_KHR - 1</code>.</p> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_new_api_enums"><a class="anchor" href="#_new_api_enums"></a>37.3. New API Enums</h3> |
| <div class="paragraph"> |
| <p>Accepted value for the <em>param_name</em> parameter to <strong>clGetPlatformInfo</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">CL_PLATFORM_NUMERIC_VERSION_KHR |
| CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>Accepted value for the <em>param_name</em> parameter to <strong>clGetDeviceInfo</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">CL_DEVICE_NUMERIC_VERSION_KHR |
| CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR |
| CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR |
| CL_DEVICE_ILS_WITH_VERSION_KHR |
| CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR</code></pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_modifications_to_the_opencl_api_specification"><a class="anchor" href="#_modifications_to_the_opencl_api_specification"></a>37.4. Modifications to the OpenCL API Specification</h3> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">(Modify Section 4.1, <strong>Querying Platform Info</strong>) </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">(Add the following to Table 3, <em>List of supported param_names by clGetPlatformInfo</em>) </dt> |
| </dl> |
| </div> |
| </div> |
| </div> |
| </dd> |
| </dl> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 37.5%;"> |
| <col style="width: 25%;"> |
| <col style="width: 37.5%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top">cl_platform_info</th> |
| <th class="tableblock halign-left valign-top">Return Type</th> |
| <th class="tableblock halign-left valign-top">Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_PLATFORM_NUMERIC_VERSION_KHR</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>cl_version_khr</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns detailed (major, minor, patch) numeric version information. The major |
| and minor version numbers returned must match those returned via |
| <code>CL_PLATFORM_VERSION</code>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>cl_name_version_khr[]</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns an array of description (name and version) structures. The same |
| extension name must not be reported more than once. The list of extensions |
| reported must match the list reported via <code>CL_PLATFORM_EXTENSIONS</code>.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">(Modify Section 4.2, <strong>Querying Devices</strong>) </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">(Add the following to Table 5, <em>List of supported param_names by clGetDeviceInfo</em>) </dt> |
| </dl> |
| </div> |
| </div> |
| </div> |
| </dd> |
| </dl> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 37.5%;"> |
| <col style="width: 25%;"> |
| <col style="width: 37.5%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top">cl_device_info</th> |
| <th class="tableblock halign-left valign-top">Return Type</th> |
| <th class="tableblock halign-left valign-top">Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_DEVICE_NUMERIC_VERSION_KHR</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>cl_version_khr</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns detailed (major, minor, patch) numeric version information. The major |
| and minor version numbers returned must match those returned via |
| <code>CL_DEVICE_VERSION</code>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>cl_version_khr</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns detailed (major, minor, patch) numeric version information. The major |
| and minor version numbers returned must match those returned via |
| <code>CL_DEVICE_OPENCL_C_VERSION</code>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>cl_name_version_khr[]</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns an array of description (name and version) structures. The same |
| extension name must not be reported more than once. The list of extensions |
| reported must match the list reported via <code>CL_DEVICE_EXTENSIONS</code>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_DEVICE_ILS_WITH_VERSION_KHR</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>cl_name_version_khr[]</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns an array of descriptions (name and version) for all supported |
| Intermediate Languages. Intermediate Languages with the same name may be |
| reported more than once but each name and major/minor version combination |
| may only be reported once. The list of intermediate languages reported must |
| match the list reported via <code>CL_DEVICE_IL_VERSION</code>.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>cl_name_version_khr[]</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns an array of descriptions for the built-in kernels supported by the device. |
| Each built-in kernel may only be reported once. The list of reported kernels must |
| match the list returned via <code>CL_DEVICE_BUILT_IN_KERNELS</code>.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="openblock"> |
| <div class="content"> |
| |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_conformance_tests"><a class="anchor" href="#_conformance_tests"></a>37.5. Conformance tests</h3> |
| <div class="olist arabic"> |
| <ol class="arabic"> |
| <li> |
| <p>Each of the new queries described in this extension must be attempted and |
| succeed.</p> |
| </li> |
| <li> |
| <p>It must be verified that the information returned by all queries that |
| extend existing queries is consistent with the information returned |
| by existing queries.</p> |
| </li> |
| <li> |
| <p>Some of the queries introduced by this extension impose uniqueness constraints |
| on the list of returned values. It must be verified that these constraints are |
| satisfied.</p> |
| </li> |
| </ol> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_issues"><a class="anchor" href="#_issues"></a>37.6. Issues</h3> |
| <div class="olist arabic"> |
| <ol class="arabic"> |
| <li> |
| <p>What compatibility policy should we define? e.g. a <em>revision</em> has to be |
| backwards-compatible with previous ones</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p><strong>RESOLVED</strong>: No general rules as that wouldn’t be testable. Here’s a recommended policy:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>Patch version bump: only clarifications and small/obvious bugfixes.</p> |
| </li> |
| <li> |
| <p>Minor version bump: backwards-compatible changes only.</p> |
| </li> |
| <li> |
| <p>Major version bump: backwards compatibility may break.</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>Do we want versioning for built-in kernels as returned by <code>CL_DEVICE_BUILT_IN_KERNELS</code>?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p><strong>RESOLVED</strong>: No immediate use-case for versioning but being able to get a list of |
| individual kernels without parsing a string is desirable. Adding |
| <code>CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR</code>.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>What is the behaviour of the queries that return an array of structures when |
| there are no elements to return?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p><strong>RESOLVED</strong>: The query succeeds and the size returned is zero.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>What value should be returned when version information is not available?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p><strong>RESOLVED</strong>: If a patch version is not available, it should be reported as 0. |
| If no version information is available, 0.0.0 should be reported. |
| These values have been chosen as they are guaranteed to be lower |
| than or equal to any other version.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>Should we add a query to report SPIR-V extended instruction sets?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p><strong>RESOLVED</strong>: It is unlikely that we will introduce many SPIR-V extended |
| instruction sets without an accompanying API extension. Decided |
| not to do this.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>Should the queries for which the old-style query doesn’t exist in a given |
| OpenCL version be present (e.g. <code>CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR</code> |
| prior to OpenCL 2.1 or without support for <code>cl_khr_il_program</code> or |
| <code>CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR</code> on OpenCL 1.0)?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p><strong>RESOLVED</strong>: All the queries are always present. |
| <code>CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR</code> returns an empty set |
| when Intermediate Languages are not supported. |
| <code>CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR</code> always returns 1.0 on an |
| OpenCL 1.0 platform.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| <li> |
| <p>Is reporting multiple Intermediate Languages with the same name and major/minor |
| versions but differing patch versions allowed?</p> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p><strong>RESOLVED</strong>: No. This isn’t aligned with the intended use for patch versions and |
| makes it harder for implementations to guarantee consistency with |
| the existing IL queries.</p> |
| </div> |
| </div> |
| </div> |
| </li> |
| </ol> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="_extended_subgroup_functions"><a class="anchor" href="#_extended_subgroup_functions"></a>38. Extended Subgroup Functions</h2> |
| <div class="sectionbody"> |
| <div class="sect2"> |
| <h3 id="extended-subgroups"><a class="anchor" href="#extended-subgroups"></a>38.1. Overview</h3> |
| <div class="paragraph"> |
| <p>This section describes a family of extensions that provide extended subgroup functionality. |
| The extensions in this family are:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p><code>cl_khr_subgroup_extended_types</code></p> |
| </li> |
| <li> |
| <p><code>cl_khr_subgroup_non_uniform_vote</code></p> |
| </li> |
| <li> |
| <p><code>cl_khr_subgroup_ballot</code></p> |
| </li> |
| <li> |
| <p><code>cl_khr_subgroup_non_uniform_arithmetic</code></p> |
| </li> |
| <li> |
| <p><code>cl_khr_subgroup_shuffle</code></p> |
| </li> |
| <li> |
| <p><code>cl_khr_subgroup_shuffle_relative</code></p> |
| </li> |
| <li> |
| <p><code>cl_khr_subgroup_clustered_reduce</code></p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>The functionality added by these extensions includes:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>Additional data type support for subgroup broadcast, scan, and reduction functions;</p> |
| </li> |
| <li> |
| <p>The ability to elect a single work item from a subgroup to perform a task;</p> |
| </li> |
| <li> |
| <p>The ability to hold votes among work items in a subgroup;</p> |
| </li> |
| <li> |
| <p>The ability to collect and operate on ballots from work items in the subgroup;</p> |
| </li> |
| <li> |
| <p>The ability to use some subgroup functions, such as any, all, broadcasts, scans, and reductions within non-uniform flow control;</p> |
| </li> |
| <li> |
| <p>Additional scan and reduction operators;</p> |
| </li> |
| <li> |
| <p>Additional ways to exchange data among work items in a subgroup;</p> |
| </li> |
| <li> |
| <p>Clustered reductions, that operate on a subset of work items in the subgroup.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p>This section describes changes to the OpenCL C Language for these extensions. |
| There are no new API functions or enums added by these extensions.</p> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_37"><a class="anchor" href="#_general_information_37"></a>38.2. General information</h3> |
| <div class="sect3"> |
| <h4 id="_version_history_37"><a class="anchor" href="#_version_history_37"></a>38.2.1. Version history</h4> |
| <div class="paragraph"> |
| <p>For all of the extensions described in this section:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2020-12-15</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">First assigned version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="extended-subgroups-summary"><a class="anchor" href="#extended-subgroups-summary"></a>38.3. Summary of New OpenCL C Functions</h3> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="comment">// These functions are available to devices supporting</span> |
| <span class="comment">// cl_khr_subgroup_extended_types:</span> |
| |
| <span class="comment">// Note: Existing functions supporting additional data types.</span> |
| |
| gentype sub_group_broadcast( gentype value, uint index ) |
| |
| gentype sub_group_reduce_add( gentype value ) |
| gentype sub_group_reduce_min( gentype value ) |
| gentype sub_group_reduce_max( gentype value ) |
| |
| gentype sub_group_scan_inclusive_add( gentype value ) |
| gentype sub_group_scan_inclusive_min( gentype value ) |
| gentype sub_group_scan_inclusive_max( gentype value ) |
| |
| gentype sub_group_scan_exclusive_add( gentype value ) |
| gentype sub_group_scan_exclusive_min( gentype value ) |
| gentype sub_group_scan_exclusive_max( gentype value ) |
| |
| <span class="comment">// These functions are available to devices supporting</span> |
| <span class="comment">// cl_khr_subgroup_non_uniform_vote:</span> |
| |
| <span class="predefined-type">int</span> sub_group_elect() |
| |
| <span class="predefined-type">int</span> sub_group_non_uniform_all( <span class="predefined-type">int</span> predicate ) |
| <span class="predefined-type">int</span> sub_group_non_uniform_any( <span class="predefined-type">int</span> predicate ) |
| <span class="predefined-type">int</span> sub_group_non_uniform_all_equal( gentype value ) |
| |
| <span class="comment">// These functions are available to devices supporting</span> |
| <span class="comment">// cl_khr_subgroup_ballot:</span> |
| |
| gentype sub_group_non_uniform_broadcast( gentype value, uint index ) |
| gentype sub_group_broadcast_first( gentype value ) |
| |
| uint4 sub_group_ballot( <span class="predefined-type">int</span> predicate ) |
| <span class="predefined-type">int</span> sub_group_inverse_ballot( uint4 value ) |
| <span class="predefined-type">int</span> sub_group_ballot_bit_extract( uint4 value, uint index ) |
| uint sub_group_ballot_bit_count( uint4 value ) |
| uint sub_group_ballot_inclusive_scan( uint4 value ) |
| uint sub_group_ballot_exclusive_scan( uint4 value ) |
| uint sub_group_ballot_find_lsb( uint4 value ) |
| uint sub_group_ballot_find_msb( uint4 value ) |
| |
| uint4 get_sub_group_eq_mask() |
| uint4 get_sub_group_ge_mask() |
| uint4 get_sub_group_gt_mask() |
| uint4 get_sub_group_le_mask() |
| uint4 get_sub_group_lt_mask() |
| |
| <span class="comment">// These functions are available to devices supporting</span> |
| <span class="comment">// cl_khr_subgroup_non_uniform_arithmetic:</span> |
| |
| gentype sub_group_non_uniform_reduce_add( gentype value ) |
| gentype sub_group_non_uniform_reduce_mul( gentype value ) |
| gentype sub_group_non_uniform_reduce_min( gentype value ) |
| gentype sub_group_non_uniform_reduce_max( gentype value ) |
| gentype sub_group_non_uniform_reduce_and( gentype value ) |
| gentype sub_group_non_uniform_reduce_or( gentype value ) |
| gentype sub_group_non_uniform_reduce_xor( gentype value ) |
| <span class="predefined-type">int</span> sub_group_non_uniform_reduce_logical_and( <span class="predefined-type">int</span> predicate ) |
| <span class="predefined-type">int</span> sub_group_non_uniform_reduce_logical_or( <span class="predefined-type">int</span> predicate ) |
| <span class="predefined-type">int</span> sub_group_non_uniform_reduce_logical_xor( <span class="predefined-type">int</span> predicate ) |
| |
| gentype sub_group_non_uniform_scan_inclusive_add( gentype value ) |
| gentype sub_group_non_uniform_scan_inclusive_mul( gentype value ) |
| gentype sub_group_non_uniform_scan_inclusive_min( gentype value ) |
| gentype sub_group_non_uniform_scan_inclusive_max( gentype value ) |
| gentype sub_group_non_uniform_scan_inclusive_and( gentype value ) |
| gentype sub_group_non_uniform_scan_inclusive_or( gentype value ) |
| gentype sub_group_non_uniform_scan_inclusive_xor( gentype value ) |
| <span class="predefined-type">int</span> sub_group_non_uniform_scan_inclusive_logical_and( <span class="predefined-type">int</span> predicate ) |
| <span class="predefined-type">int</span> sub_group_non_uniform_scan_inclusive_logical_or( <span class="predefined-type">int</span> predicate ) |
| <span class="predefined-type">int</span> sub_group_non_uniform_scan_inclusive_logical_xor( <span class="predefined-type">int</span> predicate ) |
| |
| gentype sub_group_non_uniform_scan_exclusive_add( gentype value ) |
| gentype sub_group_non_uniform_scan_exclusive_mul( gentype value ) |
| gentype sub_group_non_uniform_scan_exclusive_min( gentype value ) |
| gentype sub_group_non_uniform_scan_exclusive_max( gentype value ) |
| gentype sub_group_non_uniform_scan_exclusive_and( gentype value ) |
| gentype sub_group_non_uniform_scan_exclusive_or( gentype value ) |
| gentype sub_group_non_uniform_scan_exclusive_xor( gentype value ) |
| <span class="predefined-type">int</span> sub_group_non_uniform_scan_exclusive_logical_and( <span class="predefined-type">int</span> predicate ) |
| <span class="predefined-type">int</span> sub_group_non_uniform_scan_exclusive_logical_or( <span class="predefined-type">int</span> predicate ) |
| <span class="predefined-type">int</span> sub_group_non_uniform_scan_exclusive_logical_xor( <span class="predefined-type">int</span> predicate ) |
| |
| <span class="comment">// These functions are available to devices supporting</span> |
| <span class="comment">// cl_khr_subgroup_shuffle:</span> |
| |
| gentype sub_group_shuffle( gentype value, uint index ) |
| gentype sub_group_shuffle_xor( gentype value, uint mask ) |
| |
| <span class="comment">// These functions are available to devices supporting</span> |
| <span class="comment">// cl_khr_subgroup_shuffle_relative:</span> |
| |
| gentype sub_group_shuffle_up( gentype value, uint delta ) |
| gentype sub_group_shuffle_down( gentype value, uint delta ) |
| |
| <span class="comment">// These functions are available to devices supporting</span> |
| <span class="comment">// cl_khr_subgroup_clustered_reduce:</span> |
| |
| gentype sub_group_clustered_reduce_add( gentype value, uint clustersize ) |
| gentype sub_group_clustered_reduce_mul( gentype value, uint clustersize ) |
| gentype sub_group_clustered_reduce_min( gentype value, uint clustersize ) |
| gentype sub_group_clustered_reduce_max( gentype value, uint clustersize ) |
| gentype sub_group_clustered_reduce_and( gentype value, uint clustersize ) |
| gentype sub_group_clustered_reduce_or( gentype value, uint clustersize ) |
| gentype sub_group_clustered_reduce_xor( gentype value, uint clustersize ) |
| <span class="predefined-type">int</span> sub_group_clustered_reduce_logical_and( <span class="predefined-type">int</span> predicate, uint clustersize ) |
| <span class="predefined-type">int</span> sub_group_clustered_reduce_logical_or( <span class="predefined-type">int</span> predicate, uint clustersize ) |
| <span class="predefined-type">int</span> sub_group_clustered_reduce_logical_xor( <span class="predefined-type">int</span> predicate, uint clustersize )</code></pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_subgroup_extended_types"><a class="anchor" href="#cl_khr_subgroup_extended_types"></a>38.4. Extended Types</h3> |
| <div class="paragraph"> |
| <p>This section describes functionality added by <code>cl_khr_subgroup_extended_types</code>. |
| This extension adds additional supported data types to the existing subgroup broadcast, scan, and reduction functions.</p> |
| </div> |
| <div class="sect3"> |
| <h4 id="_modify_the_existing_section_describing_subgroup_functions"><a class="anchor" href="#_modify_the_existing_section_describing_subgroup_functions"></a>38.4.1. Modify the Existing Section Describing Subgroup Functions</h4> |
| <div class="paragraph"> |
| <p>Modify the first paragraph in this section that describes <code>gentype</code> type support for the subgroup <code>broadcast</code>, <code>scan</code>, and <code>reduction</code> functions to add scalar <code>char</code>, <code>uchar</code>, <code>short</code>, and <code>ushort</code> support, and to additionally add built-in vector type support for <code>broadcast</code> specifically. |
| The functions in the table and their descriptions remain unchanged by this extension:</p> |
| </div> |
| <div class="paragraph"> |
| <p>The table below describes OpenCL C programming language built-in functions that operate on a subgroup level. |
| These built-in functions must be encountered by all work items in the subgroup executing the kernel. |
| We use the generic type name <code>gentype</code> to indicate the built-in scalar data types <code>char</code>, <code>uchar</code>, <code>short</code>, <code>ushort</code>, <code>int</code>, <code>uint</code>, <code>long</code>, <code>ulong</code>, <code>float</code>, <code>double</code> (if double precision is supported), or <code>half</code> (if half precision is supported).</p> |
| </div> |
| <div class="paragraph"> |
| <p>For the <code>sub_group_broadcast</code> function, the generic type name <code>gentype</code> may additionally be one of the supported built-in vector data types <code>char<em>n</em></code>, <code>uchar<em>n</em></code>, <code>short<em>n</em></code>, <code>ushort<em>n</em></code>, <code>int<em>n</em></code>, <code>uint<em>n</em></code>, <code>long<em>n</em></code>, <code>ulong<em>n</em></code>, <code>float<em>n</em></code>, <code>double<em>n</em></code> (if double precision is supported), or <code>half<em>n</em></code> (if half precision is supported).</p> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_subgroup_non_uniform_vote"><a class="anchor" href="#cl_khr_subgroup_non_uniform_vote"></a>38.5. Votes and Elections</h3> |
| <div class="paragraph"> |
| <p>This section describes functionality added by <code>cl_khr_subgroup_non_uniform_vote</code>. |
| This extension adds the ability to elect a single work item from a subgroup to perform a task and to hold votes among work items in a subgroup.</p> |
| </div> |
| <div class="sect3"> |
| <h4 id="_add_a_new_section_6_15_x_subgroup_vote_and_elect_built_in_functions"><a class="anchor" href="#_add_a_new_section_6_15_x_subgroup_vote_and_elect_built_in_functions"></a>38.5.1. Add a new Section 6.15.X - Subgroup Vote and Elect Built-in Functions</h4> |
| <div class="paragraph"> |
| <p>The table below describes the OpenCL C programming language built-in functions to elect a single work item in a subgroup to perform a task and to collectively vote to determine a boolean condition for the subgroup. |
| These functions need not be encountered by all work items in a subgroup executing the kernel. |
| For the functions below, the generic type name <code>gentype</code> may be the one of the supported built-in scalar data types <code>char</code>, <code>uchar</code>, <code>short</code>, <code>ushort</code>, <code>int</code>, <code>uint</code>, <code>long</code>, <code>ulong</code>, <code>float</code>, <code>double</code> (if double precision is supported), or <code>half</code> (if half precision is supported).</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="predefined-type">int</span> sub_group_elect()</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Elects a single work item in the subgroup to perform a task. |
| This function will return true (nonzero) for the active work item in the subgroup with the smallest subgroup local ID, and false (zero) for all other active work items in the subgroup.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="predefined-type">int</span> sub_group_non_uniform_all( |
| <span class="predefined-type">int</span> predicate )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Examines <em>predicate</em> for all active work items in the subgroup and returns a non-zero value if <em>predicate</em> is non-zero for all active work items in the subgroup and zero otherwise.</p> |
| <p class="tableblock">Note: This behavior is the same as <code>sub_group_all</code> from <code>cl_khr_subgroups</code> and OpenCL 2.1, except this function need not be encountered by all work items in the subgroup executing the kernel.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="predefined-type">int</span> sub_group_non_uniform_any( |
| <span class="predefined-type">int</span> predicate )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Examines <em>predicate</em> for all active work items in the subgroup and returns a non-zero value if <em>predicate</em> is non-zero for any active work item in the subgroup and zero otherwise.</p> |
| <p class="tableblock">Note: This behavior is the same as <code>sub_group_any</code> from <code>cl_khr_subgroups</code> and OpenCL 2.1, except this function need not be encountered by all work items in the subgroup executing the kernel.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="predefined-type">int</span> sub_group_non_uniform_all_equal( |
| gentype value )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Examines <em>value</em> for all active work items in the subgroup and returns a non-zero value if <em>value</em> is equivalent for all active invocations in the subgroup and zero otherwise.</p> |
| <p class="tableblock">Integer types use a bitwise test for equality. Floating-point types use an ordered floating-point test for equality.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_subgroup_ballot"><a class="anchor" href="#cl_khr_subgroup_ballot"></a>38.6. Ballots</h3> |
| <div class="paragraph"> |
| <p>This section describes functionality added by <code>cl_khr_subgroup_ballot</code>. |
| This extension adds the ability to collect and operate on ballots from work items in the subgroup.</p> |
| </div> |
| <div class="sect3"> |
| <h4 id="_add_a_new_section_6_15_x_subgroup_ballot_built_in_functions"><a class="anchor" href="#_add_a_new_section_6_15_x_subgroup_ballot_built_in_functions"></a>38.6.1. Add a new Section 6.15.X - Subgroup Ballot Built-in Functions</h4> |
| <div class="paragraph"> |
| <p>The table below describes the OpenCL C programming language built-in functions to allow work items in a subgroup to collect and operate on ballots from work items in the subgroup. |
| These functions need not be encountered by all work items in a subgroup executing the kernel.</p> |
| </div> |
| <div class="paragraph"> |
| <p>For the <code>sub_group_non_uniform_broadcast</code> and <code>sub_group_broadcast_first</code> functions, the generic type name <code>gentype</code> may be one of the supported built-in scalar data types <code>char</code>, <code>uchar</code>, <code>short</code>, <code>ushort</code>, <code>int</code>, <code>uint</code>, <code>long</code>, <code>ulong</code>, <code>float</code>, <code>double</code> (if double precision is supported), or <code>half</code> (if half precision is supported).</p> |
| </div> |
| <div class="paragraph"> |
| <p>For the <code>sub_group_non_uniform_broadcast</code> function, the generic type name <code>gentype</code> may additionally be one of the supported built-in vector data types <code>char<em>n</em></code>, <code>uchar<em>n</em></code>, <code>short<em>n</em></code>, <code>ushort<em>n</em></code>, <code>int<em>n</em></code>, <code>uint<em>n</em></code>, <code>long<em>n</em></code>, <code>ulong<em>n</em></code>, <code>float<em>n</em></code>, <code>double<em>n</em></code> (if double precision is supported), or <code>half<em>n</em></code> (if half precision is supported).</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">gentype sub_group_non_uniform_broadcast( |
| gentype value, |
| uint index )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <em>value</em> for the work item with subgroup local ID equal to <em>index</em>.</p> |
| <p class="tableblock">Behavior is undefined when the value of <em>index</em> is not equivalent for all active work items in the subgroup.</p> |
| <p class="tableblock">The return value is undefined if the work item with subgroup local ID equal to <em>index</em> is inactive or if <em>index</em> is greater than or equal to the size of the subgroup.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">gentype sub_group_broadcast_first( |
| gentype value )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <em>value</em> for the work item with the smallest subgroup local ID among active work items in the subgroup.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">uint4 sub_group_ballot( |
| <span class="predefined-type">int</span> predicate )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns a bitfield combining the <em>predicate</em> values from all work items in the subgroup. |
| Bit zero of the first vector component represents the subgroup local ID zero, with higher-order bits and subsequent vector components representing, in order, increasing subgroup local IDs. |
| The representative bit in the bitfield is set if the work item is active and the <em>predicate</em> is non-zero, and is unset otherwise.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="predefined-type">int</span> sub_group_inverse_ballot( |
| uint4 value )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the predicate value for this work item in the subgroup from the bitfield <em>value</em> representing predicate values from all work items in the subgroup. |
| The predicate return value will be non-zero if the bit in the bitfield <em>value</em> for this work item is set, and zero otherwise.</p> |
| <p class="tableblock">Behavior is undefined when <em>value</em> is not equivalent for all active work items in the subgroup.</p> |
| <p class="tableblock">This is a specialized function that may perform better than the equivalent <code>sub_group_ballot_bit_extract</code> on some implementations.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="predefined-type">int</span> sub_group_ballot_bit_extract( |
| uint4 value, |
| uint index )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the predicate value for the work item with subgroup local ID equal to <em>index</em> from the bitfield <em>value</em> representing predicate values from all work items in the subgroup. |
| The predicate return value will be non-zero if the bit in the bitfield <em>value</em> for the work item with subgroup local ID equal to <em>index</em> is set, and zero otherwise.</p> |
| <p class="tableblock">The predicate return value is undefined if the work item with subgroup local ID equal to <em>index</em> is greater than or equal to the size of the subgroup.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">uint sub_group_ballot_bit_count( |
| uint4 value )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the number of bits that are set in the bitfield <em>value</em>, only considering the bits in <em>value</em> that represent predicate values from all work items in the subgroup.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">uint sub_group_ballot_inclusive_scan( |
| uint4 value )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the number of bits that are set in the bitfield <em>value</em>, only considering the bits in <em>value</em> representing work items with a subgroup local ID less than or equal to this work item’s subgroup local ID.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">uint sub_group_ballot_exclusive_scan( |
| uint4 value )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the number of bits that are set in the bitfield <em>value</em>, only considering the bits in <em>value</em> representing work items with a subgroup local ID less than this work item’s subgroup local ID.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">uint sub_group_ballot_find_lsb( |
| uint4 value )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the smallest subgroup local ID with a bit set in the bitfield <em>value</em>, only considering the bits in <em>value</em> that represent predicate values from all work items in the subgroup. |
| If no bits representing predicate values from all work items in the subgroup are set in the bitfield <em>value</em> then the return value is undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">uint sub_group_ballot_find_msb( |
| uint4 value )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the largest subgroup local ID with a bit set in the bitfield <em>value</em>, only considering the bits in <em>value</em> that represent predicate values from all work items in the subgroup. |
| If no bits representing predicate values from all work items in the subgroup are set in the bitfield <em>value</em> then the return value is undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">uint4 get_sub_group_eq_mask()</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Generates a bitmask of all work items in the subgroup, where the bit is set in the bitmask if the bit index equals the subgroup local ID and unset otherwise. |
| Bit zero of the first vector component represents the subgroup local ID zero, with higher-order bits and subsequent vector components representing, in order, increasing subgroup local IDs.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">uint4 get_sub_group_ge_mask()</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Generates a bitmask of all work items in the subgroup, where the bit is set in the bitmask if the bit index is greater than or equal to the subgroup local ID and less than the maximum subgroup size, and unset otherwise. |
| Bit zero of the first vector component represents the subgroup local ID zero, with higher-order bits and subsequent vector components representing, in order, increasing subgroup local IDs.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">uint4 get_sub_group_gt_mask()</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Generates a bitmask of all work items in the subgroup, where the bit is set in the bitmask if the bit index is greater than the subgroup local ID and less than the maximum subgroup size, and unset otherwise. |
| Bit zero of the first vector component represents the subgroup local ID zero, with higher-order bits and subsequent vector components representing, in order, increasing subgroup local IDs.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">uint4 get_sub_group_le_mask()</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Generates a bitmask of all work items in the subgroup, where the bit is set in the bitmask if the bit index is less than or equal to the subgroup local ID and unset otherwise. |
| Bit zero of the first vector component represents the subgroup local ID zero, with higher-order bits and subsequent vector components representing, in order, increasing subgroup local IDs.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">uint4 get_sub_group_lt_mask()</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Generates a bitmask of all work items in the subgroup, where the bit is set in the bitmask if the bit index is less than the subgroup local ID and unset otherwise. |
| Bit zero of the first vector component represents the subgroup local ID zero, with higher-order bits and subsequent vector components representing, in order, increasing subgroup local IDs.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_subgroup_non_uniform_arithmetic"><a class="anchor" href="#cl_khr_subgroup_non_uniform_arithmetic"></a>38.7. Non-Uniform Arithmetic</h3> |
| <div class="paragraph"> |
| <p>This section describes functionality added by <code>cl_khr_subgroup_non_uniform_arithmetic</code>. |
| This extension adds the ability to use some subgroup functions within non-uniform flow control, including additional scan and reduction operators.</p> |
| </div> |
| <div class="sect3"> |
| <h4 id="_add_a_new_section_6_15_x_non_uniform_subgroup_scan_and_reduction_built_in_functions"><a class="anchor" href="#_add_a_new_section_6_15_x_non_uniform_subgroup_scan_and_reduction_built_in_functions"></a>38.7.1. Add a new Section 6.15.X - Non Uniform Subgroup Scan and Reduction Built-in Functions</h4> |
| <div class="sect4"> |
| <h5 id="_arithmetic_operations"><a class="anchor" href="#_arithmetic_operations"></a>38.7.1.1. Arithmetic Operations</h5> |
| <div class="paragraph"> |
| <p>The table below describes the OpenCL C programming language built-in functions that perform simple arithmetic operations across work items in a subgroup. |
| These functions need not be encountered by all work items in a subgroup executing the kernel. |
| For the functions below, the generic type name <code>gentype</code> may be one of the supported built-in scalar data types <code>char</code>, <code>uchar</code>, <code>short</code>, <code>ushort</code>, <code>int</code>, <code>uint</code>, <code>long</code>, <code>ulong</code>, <code>float</code>, <code>double</code> (if double precision is supported), or <code>half</code> (if half precision is supported).</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 60%;"> |
| <col style="width: 40%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">gentype sub_group_non_uniform_reduce_add( |
| gentype value ) |
| gentype sub_group_non_uniform_reduce_min( |
| gentype value ) |
| gentype sub_group_non_uniform_reduce_max( |
| gentype value ) |
| gentype sub_group_non_uniform_reduce_mul( |
| gentype value )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the summation, multiplication, minimum, or maximum of <em>value</em> for all active work items in the subgroup.</p> |
| <p class="tableblock">Note: This behavior is the same as the <strong>add</strong>, <strong>min</strong>, and <strong>max</strong> reduction built-in functions from <code>cl_khr_subgroups</code> and OpenCL 2.1, except these functions support additional types and need not be encountered by all work items in the subgroup executing the kernel.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">gentype sub_group_non_uniform_scan_inclusive_add( |
| gentype value ) |
| gentype sub_group_non_uniform_scan_inclusive_min( |
| gentype value ) |
| gentype sub_group_non_uniform_scan_inclusive_max( |
| gentype value ) |
| gentype sub_group_non_uniform_scan_inclusive_mul( |
| gentype value )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the result of an inclusive scan operation, which is the summation, multiplication, minimum, or maximum of <em>value</em> for all active work items in the subgroup with a subgroup local ID less than or equal to this work item’s subgroup local ID.</p> |
| <p class="tableblock">Note: This behavior is the same as the <strong>add</strong>, <strong>min</strong>, and <strong>max</strong> inclusive scan built-in functions from <code>cl_khr_subgroups</code> and OpenCL 2.1, except these functions support additional types and need not be encountered by all work items in the subgroup executing the kernel.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">gentype sub_group_non_uniform_scan_exclusive_add( |
| gentype value ) |
| gentype sub_group_non_uniform_scan_exclusive_min( |
| gentype value ) |
| gentype sub_group_non_uniform_scan_exclusive_max( |
| gentype value ) |
| gentype sub_group_non_uniform_scan_exclusive_mul( |
| gentype value )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the result of an exclusive scan operation, which is the summation, multiplication, minimum, or maximum of <em>value</em> for all active work items in the subgroup with a subgroup local ID less than this work item’s subgroup local ID.</p> |
| <p class="tableblock">If there is no active work item in the subgroup with a subgroup local ID less than this work item’s subgroup local ID then an identity value <code>I</code> is returned. |
| For <strong>add</strong>, the identity value is <code>0</code>. |
| For <strong>min</strong>, the identity value is the largest representable value for integer types, or <code>+INF</code> for floating point types. |
| For <strong>max</strong>, the identity value is the minimum representable value for integer types, or <code>-INF</code> for floating point types. |
| For <strong>mul</strong>, the identity value is <code>1</code>.</p> |
| <p class="tableblock">Note: This behavior is the same as the <strong>add</strong>, <strong>min</strong>, and <strong>max</strong> exclusive scan built-in functions from <code>cl_khr_subgroups</code> and OpenCL 2.1, except these functions support additional types and need not be encountered by all work items in the subgroup executing the kernel.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>Note: The order of floating-point operations is not guaranteed for the subgroup scan and reduction built-in functions that operate on floating point types, and the order of operations may additionally be non-deterministic for a given subgroup.</p> |
| </div> |
| </div> |
| <div class="sect4"> |
| <h5 id="_bitwise_operations"><a class="anchor" href="#_bitwise_operations"></a>38.7.1.2. Bitwise Operations</h5> |
| <div class="paragraph"> |
| <p>The table below describes the OpenCL C programming language built-in functions that perform simple bitwise integer operations across work items in a subgroup. |
| These functions need not be encountered by all work items in a subgroup executing the kernel. |
| For the functions below, the generic type name <code>gentype</code> may be one of the supported built-in scalar data types <code>char</code>, <code>uchar</code>, <code>short</code>, <code>ushort</code>, <code>int</code>, <code>uint</code>, <code>long</code>, and <code>ulong</code>.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 60%;"> |
| <col style="width: 40%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">gentype sub_group_non_uniform_reduce_and( |
| gentype value ) |
| gentype sub_group_non_uniform_reduce_or( |
| gentype value ) |
| gentype sub_group_non_uniform_reduce_xor( |
| gentype value )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the bitwise <strong>and</strong>, <strong>or</strong>, or <strong>xor</strong> of <em>value</em> for all active work items in the subgroup.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">gentype sub_group_non_uniform_scan_inclusive_and( |
| gentype value ) |
| gentype sub_group_non_uniform_scan_inclusive_or( |
| gentype value ) |
| gentype sub_group_non_uniform_scan_inclusive_xor( |
| gentype value )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the result of an inclusive scan operation, which is the bitwise <strong>and</strong>, <strong>or</strong>, or <strong>xor</strong> of <em>value</em> for all active work items in the subgroup with a subgroup local ID less than or equal to this work item’s subgroup local ID.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">gentype sub_group_non_uniform_scan_exclusive_and( |
| gentype value ) |
| gentype sub_group_non_uniform_scan_exclusive_or( |
| gentype value ) |
| gentype sub_group_non_uniform_scan_exclusive_xor( |
| gentype value )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the result of an exclusive scan operation, which is the bitwise <strong>and</strong>, <strong>or</strong>, or <strong>xor</strong> of <em>value</em> for all active work items in the subgroup with a subgroup local ID less than this work item’s subgroup local ID.</p> |
| <p class="tableblock">If there is no active work item in the subgroup with a subgroup local ID less than this work item’s subgroup local ID then an identity value <code>I</code> is returned. |
| For <strong>and</strong>, the identity value is <code>~0</code> (all bits set). |
| For <strong>or</strong> and <strong>xor</strong>, the identity value is <code>0</code>.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect4"> |
| <h5 id="_logical_operations"><a class="anchor" href="#_logical_operations"></a>38.7.1.3. Logical Operations</h5> |
| <div class="paragraph"> |
| <p>The table below describes the OpenCL C programming language built-in functions that perform simple logical operations across work items in a subgroup. |
| These functions need not be encountered by all work items in a subgroup executing the kernel. |
| For these functions, a non-zero <em>predicate</em> argument or return value is logically <code>true</code> and a zero <em>predicate</em> argument or return value is logically <code>false</code>.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 66.6666%;"> |
| <col style="width: 33.3334%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="predefined-type">int</span> sub_group_non_uniform_reduce_logical_and( |
| <span class="predefined-type">int</span> predicate ) |
| <span class="predefined-type">int</span> sub_group_non_uniform_reduce_logical_or( |
| <span class="predefined-type">int</span> predicate ) |
| <span class="predefined-type">int</span> sub_group_non_uniform_reduce_logical_xor( |
| <span class="predefined-type">int</span> predicate )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the logical <strong>and</strong>, <strong>or</strong>, or <strong>xor</strong> of <em>predicate</em> for all active work items in the subgroup.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="predefined-type">int</span> sub_group_non_uniform_scan_inclusive_logical_and( |
| <span class="predefined-type">int</span> predicate ) |
| <span class="predefined-type">int</span> sub_group_non_uniform_scan_inclusive_logical_or( |
| <span class="predefined-type">int</span> predicate ) |
| <span class="predefined-type">int</span> sub_group_non_uniform_scan_inclusive_logical_xor( |
| <span class="predefined-type">int</span> predicate )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the result of an inclusive scan operation, which is the logical <strong>and</strong>, <strong>or</strong>, or <strong>xor</strong> of <em>predicate</em> for all active work items in the subgroup with a subgroup local ID less than or equal to this work item’s subgroup local ID.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="predefined-type">int</span> sub_group_non_uniform_scan_exclusive_logical_and( |
| <span class="predefined-type">int</span> predicate ) |
| <span class="predefined-type">int</span> sub_group_non_uniform_scan_exclusive_logical_or( |
| <span class="predefined-type">int</span> predicate ) |
| <span class="predefined-type">int</span> sub_group_non_uniform_scan_exclusive_logical_xor( |
| <span class="predefined-type">int</span> predicate )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the result of an exclusive scan operation, which is the logical <strong>and</strong>, <strong>or</strong>, or <strong>xor</strong> of <em>predicate</em> for all active work items in the subgroup with a subgroup local ID less than this work item’s subgroup local ID.</p> |
| <p class="tableblock">If there is no active work item in the subgroup with a subgroup local ID less than this work item’s subgroup local ID then an identity value <code>I</code> is returned. |
| For <strong>and</strong>, the identity value is <code>true</code> (non-zero). |
| For <strong>or</strong> and <strong>xor</strong>, the identity value is <code>false</code> (zero).</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_subgroup_shuffle"><a class="anchor" href="#cl_khr_subgroup_shuffle"></a>38.8. General Purpose Shuffles</h3> |
| <div class="paragraph"> |
| <p>This section describes functionality added by <code>cl_khr_subgroup_shuffle</code>. |
| This extension adds additional ways to exchange data among work items in a subgroup.</p> |
| </div> |
| <div class="sect3"> |
| <h4 id="_add_a_new_section_6_15_x_subgroup_shuffle_built_in_functions"><a class="anchor" href="#_add_a_new_section_6_15_x_subgroup_shuffle_built_in_functions"></a>38.8.1. Add a new Section 6.15.X - Subgroup Shuffle Built-in Functions</h4> |
| <div class="paragraph"> |
| <p>The table below describes the OpenCL C programming language built-in functions that allow work items in a subgroup to exchange data. |
| These functions need not be encountered by all work items in a subgroup executing the kernel. |
| For the functions below, the generic type name <code>gentype</code> may be one of the supported built-in scalar data types <code>char</code>, <code>uchar</code>, <code>short</code>, <code>ushort</code>, <code>int</code>, <code>uint</code>, <code>long</code>, <code>ulong</code>, <code>float</code>, <code>double</code> (if double precision is supported), or <code>half</code> (if half precision is supported).</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">gentype sub_group_shuffle( |
| gentype value, uint index )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <em>value</em> for the work item with subgroup local ID equal to <em>index</em>. |
| The shuffle <em>index</em> need not be the same for all work items in the subgroup.</p> |
| <p class="tableblock">The return value is undefined if the work item with subgroup local ID equal to <em>index</em> is inactive or if <em>index</em> is greater than or equal to the size of the subgroup.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">gentype sub_group_shuffle_xor( |
| gentype value, uint mask )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <em>value</em> for the work item with subgroup local ID equal to this work item’s subgroup local ID xor’d with <em>mask</em>. |
| The shuffle <em>mask</em> need not be the same for all work items in the subgroup.</p> |
| <p class="tableblock">The return value is undefined if the work item with subgroup local ID equal to the calculated index is inactive or if the calculated index is greater than or equal to the size of the subgroup.</p> |
| <p class="tableblock">This is a specialized function that may perform better than the equivalent <code>sub_group_shuffle</code> on some implementations.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_subgroup_shuffle_relative"><a class="anchor" href="#cl_khr_subgroup_shuffle_relative"></a>38.9. Relative Shuffles</h3> |
| <div class="paragraph"> |
| <p>This section describes functionality added by <code>cl_khr_subgroup_shuffle_relative</code>. |
| This extension adds specialized ways to exchange data among work items in a subgroup that may perform better on some implementations.</p> |
| </div> |
| <div class="sect3"> |
| <h4 id="_add_a_new_section_6_15_x_subgroup_relative_shuffle_built_in_functions"><a class="anchor" href="#_add_a_new_section_6_15_x_subgroup_relative_shuffle_built_in_functions"></a>38.9.1. Add a new Section 6.15.X - Subgroup Relative Shuffle Built-in Functions</h4> |
| <div class="paragraph"> |
| <p>The table below describes specialized OpenCL C programming language built-in functions that allow work items in a subgroup to exchange data. |
| These functions need not be encountered by all work items in a subgroup executing the kernel. |
| For the functions below, the generic type name <code>gentype</code> may be one of the supported built-in scalar data types <code>char</code>, <code>uchar</code>, <code>short</code>, <code>ushort</code>, <code>int</code>, <code>uint</code>, <code>long</code>, <code>ulong</code>, <code>float</code>, <code>double</code> (if double precision is supported), or <code>half</code> (if half precision is supported).</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">gentype sub_group_shuffle_up( |
| gentype value, uint delta )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <em>value</em> for the work item with subgroup local ID equal to this work item’s subgroup local ID minus <em>delta</em>. |
| The shuffle <em>delta</em> need not be the same for all work items in the subgroup.</p> |
| <p class="tableblock">The return value is undefined if the work item with subgroup local ID equal to the calculated index is inactive, or <em>delta</em> is greater than this work item’s subgroup local ID.</p> |
| <p class="tableblock">This is a specialized function that may perform better than the equivalent <code>sub_group_shuffle</code> on some implementations.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">gentype sub_group_shuffle_down( |
| gentype value, uint delta )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns <em>value</em> for the work item with subgroup local ID equal to this work item’s subgroup local ID plus <em>delta</em>. |
| The shuffle <em>delta</em> need not be the same for all work items in the subgroup.</p> |
| <p class="tableblock">The return value is undefined if the work item with subgroup local ID equal to the calculated index is inactive, or this work item’s subgroup local ID plus <em>delta</em> is greater than or equal to the size of the subgroup.</p> |
| <p class="tableblock">This is a specialized function that may perform better than the equivalent <code>sub_group_shuffle</code> on some implementations.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="cl_khr_subgroup_clustered_reduce"><a class="anchor" href="#cl_khr_subgroup_clustered_reduce"></a>38.10. Clustered Reductions</h3> |
| <div class="paragraph"> |
| <p>This section describes functionality added by <code>cl_khr_subgroup_clustered_reduce</code>. |
| This extension adds support for clustered reductions that operate on a subset of work items in the subgroup.</p> |
| </div> |
| <div class="sect3"> |
| <h4 id="_add_a_new_section_6_15_x_subgroup_clustered_reduction_built_in_functions"><a class="anchor" href="#_add_a_new_section_6_15_x_subgroup_clustered_reduction_built_in_functions"></a>38.10.1. Add a new Section 6.15.X - Subgroup Clustered Reduction Built-in Functions</h4> |
| <div class="paragraph"> |
| <p>This section describes arithmetic operations that are performed subset of work items in a subgroup, referred to as a cluster. |
| A cluster is described by a specified cluster size. |
| Work items in a subgroup are assigned to clusters such that for cluster size <em>n</em>, the <em>n</em> work items in the subgroup with the smallest subgroup local IDs are assigned to the first cluster, then the <em>n</em> remaining work items with the smallest subgroup local IDs are assigned to the next cluster, and so on. |
| The specified cluster size must be an integer constant expression that is a power-of-two. |
| Behavior is undefined if the specified cluster size is greater than the maximum size of a subgroup within the dispatch.</p> |
| </div> |
| <div class="sect4"> |
| <h5 id="_arithmetic_operations_2"><a class="anchor" href="#_arithmetic_operations_2"></a>38.10.1.1. Arithmetic Operations</h5> |
| <div class="paragraph"> |
| <p>The table below describes the OpenCL C programming language built-in functions that perform simple arithmetic operations on a cluster of work items in a subgroup. |
| These functions need not be encountered by all work items in a subgroup executing the kernel. |
| For the functions below, the generic type name <code>gentype</code> may be one of the supported built-in scalar data types <code>char</code>, <code>uchar</code>, <code>short</code>, <code>ushort</code>, <code>int</code>, <code>uint</code>, <code>long</code>, <code>ulong</code>, <code>float</code>, <code>double</code> (if double precision is supported), or <code>half</code> (if half precision is supported).</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">gentype sub_group_clustered_reduce_add( |
| gentype value, uint clustersize ) |
| gentype sub_group_clustered_reduce_mul( |
| gentype value, uint clustersize ) |
| gentype sub_group_clustered_reduce_min( |
| gentype value, uint clustersize ) |
| gentype sub_group_clustered_reduce_max( |
| gentype value, uint clustersize )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the summation, multiplication, minimum, or maximum of <em>value</em> for all active work items in the subgroup within a cluster of the specified <em>clustersize</em>.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="paragraph"> |
| <p>Note: The order of floating-point operations is not guaranteed for the subgroup clustered reduction built-in functions that operate on floating point types, and the order of operations may additionally be non-deterministic for a given subgroup.</p> |
| </div> |
| </div> |
| <div class="sect4"> |
| <h5 id="_bitwise_operations_2"><a class="anchor" href="#_bitwise_operations_2"></a>38.10.1.2. Bitwise Operations</h5> |
| <div class="paragraph"> |
| <p>The table below describes the OpenCL C programming language built-in functions to perform simple bitwise integer operations across a cluster of work items in a subgroup. |
| These functions need not be encountered by all work items in a subgroup executing the kernel. |
| For the functions below, the generic type name <code>gentype</code> may be the one of the supported built-in scalar data types <code>char</code>, <code>uchar</code>, <code>short</code>, <code>ushort</code>, <code>int</code>, <code>uint</code>, <code>long</code>, or <code>ulong</code>.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">gentype sub_group_clustered_reduce_and( |
| gentype value, uint clustersize ) |
| gentype sub_group_clustered_reduce_or( |
| gentype value, uint clustersize ) |
| gentype sub_group_clustered_reduce_xor( |
| gentype value, uint clustersize )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the bitwise <strong>and</strong>, <strong>or</strong>, or <strong>xor</strong> of <em>value</em> for all active work items in the subgroup within a cluster of the specified <em>clustersize</em>.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect4"> |
| <h5 id="_logical_operations_2"><a class="anchor" href="#_logical_operations_2"></a>38.10.1.3. Logical Operations</h5> |
| <div class="paragraph"> |
| <p>The table below describes the OpenCL C programming language built-in functions to perform simple logical operations across a cluster of work items in a subgroup. |
| These functions need not be encountered by all work items in a subgroup executing the kernel. |
| For these functions, a non-zero <em>predicate</em> argument or return value is logically <code>true</code> and a zero <em>predicate</em> argument or return value is logically <code>false</code>.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 60%;"> |
| <col style="width: 40%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="predefined-type">int</span> sub_group_clustered_reduce_logical_and( |
| <span class="predefined-type">int</span> predicate, uint clustersize ) |
| <span class="predefined-type">int</span> sub_group_clustered_reduce_logical_or( |
| <span class="predefined-type">int</span> predicate, uint clustersize ) |
| <span class="predefined-type">int</span> sub_group_clustered_reduce_logical_xor( |
| <span class="predefined-type">int</span> predicate, uint clustersize )</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the logical <strong>and</strong>, <strong>or</strong>, or <strong>xor</strong> of <em>predicate</em> for all active work items in the subgroup within a cluster of the specified <em>clustersize</em>.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="extended-subgroups-mapping"><a class="anchor" href="#extended-subgroups-mapping"></a>38.11. Function Mapping and Capabilities</h3> |
| <div class="paragraph"> |
| <p>This section describes a possible mapping between OpenCL built-in functions and SPIR-V instructions and required SPIR-V capabilities.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This section is informational and non-normative.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 33.3333%;"> |
| <col style="width: 33.3333%;"> |
| <col style="width: 33.3334%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>OpenCL C Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>SPIR-V BuiltIn or Instruction</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Enabling SPIR-V Capability</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top" colspan="3"><p class="tableblock">For OpenCL 2.1 or <code>cl_khr_subgroups</code>:</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>get_​sub_​group_​size</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>SubgroupSize</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Kernel</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>get_​max_​sub_​group_​size</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>SubgroupMaxSize</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Kernel</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>get_​num_​sub_​groups</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>NumSubgroups</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Kernel</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>get_​enqueued_​num_​sub_​groups</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>NumEnqueuedSubgroups</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Kernel</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>get_​sub_​group_​id</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>SubgroupId</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Kernel</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>get_​sub_​group_​local_​id</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>SubgroupLocalInvocationId</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Kernel</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​barrier</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpControlBarrier</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">None Needed</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​all</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupAll</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Groups</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​any</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupAny</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Groups</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​broadcast</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupBroadcast</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Groups</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​reduce_​add</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupIAdd</strong>, <strong>OpGroupFAdd</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Groups</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​reduce_​min</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupSMin</strong>, <strong>OpGroupUMin</strong>, <strong>OpGroupFMin</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Groups</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​reduce_​max</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupSMax</strong>, <strong>OpGroupUMax</strong>, <strong>OpGroupFMax</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Groups</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​scan_​exclusive_​add</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupIAdd</strong>, <strong>OpGroupFAdd</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Groups</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​scan_​exclusive_​min</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupSMin</strong>, <strong>OpGroupUMin</strong>, <strong>OpGroupFMin</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Groups</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​scan_​exclusive_​max</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupSMax</strong>, <strong>OpGroupUMax</strong>, <strong>OpGroupFMax</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Groups</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​scan_​inclusive_​add</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupIAdd</strong>, <strong>OpGroupFAdd</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Groups</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​scan_​inclusive_​min</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupSMin</strong>, <strong>OpGroupUMin</strong>, <strong>OpGroupFMin</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Groups</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​scan_​inclusive_​max</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupSMax</strong>, <strong>OpGroupUMax</strong>, <strong>OpGroupFMax</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Groups</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​reserve_​read_​pipe</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupReserveReadPipePackets</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Pipes</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​reserve_​write_​pipe</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupReserveReadWritePackets</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Pipes</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​commit_​read_​pipe</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupCommitReadPipe</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Pipes</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​commit_​write_​pipe</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupCommitWritePipe</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Pipes</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>get_​kernel_​sub_​group_​count_​for_​ndrange</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGetKernelNDrangeSubGroupCount</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>DeviceEnqueue</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>get_​kernel_​max_​sub_​group_​size_​for_​ndrange</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGetKernelNDrangeMaxSubGroupSize</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>DeviceEnqueue</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top" colspan="3"><p class="tableblock">For <code>cl_khr_subgroup_extended_types</code>:<br> |
| Note: This extension adds new types to uniform subgroup operations.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​broadcast</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupBroadcast</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Groups</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​reduce_​add</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupIAdd</strong>, <strong>OpGroupFAdd</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Groups</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​reduce_​min</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupSMin</strong>, <strong>OpGroupUMin</strong>, <strong>OpGroupFMin</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Groups</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​reduce_​max</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupSMax</strong>, <strong>OpGroupUMax</strong>, <strong>OpGroupFMax</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Groups</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​scan_​exclusive_​add</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupIAdd</strong>, <strong>OpGroupFAdd</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Groups</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​scan_​exclusive_​min</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupSMin</strong>, <strong>OpGroupUMin</strong>, <strong>OpGroupFMin</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Groups</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​scan_​exclusive_​max</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupSMax</strong>, <strong>OpGroupUMax</strong>, <strong>OpGroupFMax</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Groups</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​scan_​inclusive_​add</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupIAdd</strong>, <strong>OpGroupFAdd</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Groups</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​scan_​inclusive_​min</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupSMin</strong>, <strong>OpGroupUMin</strong>, <strong>OpGroupFMin</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Groups</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​scan_​inclusive_​max</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupSMax</strong>, <strong>OpGroupUMax</strong>, <strong>OpGroupFMax</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Groups</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top" colspan="3"><p class="tableblock">For <code>cl_khr_subgroup_non_uniform_vote</code>:</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​elect</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformElect</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniform</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​all</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformAll</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformVote</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​any</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformAny</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformVote</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​all_​equal</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformAllEqual</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformVote</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top" colspan="3"><p class="tableblock">For <code>cl_khr_subgroup_ballot</code>:</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​broadcast</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformBroadcast</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformBallot</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​broadcast_​first</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformBroadcastFirst</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformBallot</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​ballot</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformBallot</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformBallot</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​inverse_​ballot</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformInverseBallot</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformBallot</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​ballot_​bit_​extract</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformBallotBitExtract</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformBallot</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​ballot_​bit_​count</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformBallotBitCount</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformBallot</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​ballot_​inclusive_​scan</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformBallotBitCount</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformBallot</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​ballot_​exclusive_​scan</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformBallotBitCount</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformBallot</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​ballot_​find_​lsb</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformBallotFindLSB</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformBallot</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​ballot_​find_​msb</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformBallotFindMSB</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformBallot</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>get_​sub_​group_​eq_​mask</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>SubgroupEqMask</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformBallot</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>get_​sub_​group_​ge_​mask</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>SubgroupGeMask</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformBallot</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>get_​sub_​group_​gt_​mask</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>SubgroupGtMask</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformBallot</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>get_​sub_​group_​le_​mask</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>SubgroupLeMask</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformBallot</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>get_​sub_​group_​lt_​mask</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>SubgroupLtMask</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformBallot</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top" colspan="3"><p class="tableblock">For <code>cl_khr_subgroup_non_uniform_arithmetic</code>:</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​reduce_​add</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformIAdd</strong>, <strong>OpGroupNonUniformFAdd</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​reduce_​mul</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformIMul</strong>, <strong>OpGroupNonUniformFMul</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​reduce_​min</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformSMin</strong>, <strong>OpGroupNonUniformUMin</strong>, <strong>OpGroupNonUniformFMin</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​reduce_​max</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformSMax</strong>, <strong>OpGroupNonUniformUMax</strong>, <strong>OpGroupNonUniformFMax</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​reduce_​and</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformBitwiseAnd</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​reduce_​or</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformBitwiseOr</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​reduce_​xor</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformBitwiseXor</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​reduce_​logical_​and</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformLogicalAnd</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​reduce_​logical_​or</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformLogicalOr</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​reduce_​logical_​xor</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformLogicalXor</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​scan_​inclusive_​add</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformIAdd</strong>, <strong>OpGroupNonUniformFAdd</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​scan_​inclusive_​mul</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformIMul</strong>, <strong>OpGroupNonUniformFMul</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​scan_​inclusive_​min</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformSMin</strong>, <strong>OpGroupNonUniformUMin</strong>, <strong>OpGroupNonUniformFMin</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​scan_​inclusive_​max</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformSMax</strong>, <strong>OpGroupNonUniformUMax</strong>, <strong>OpGroupNonUniformFMax</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​scan_​inclusive_​and</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformBitwiseAnd</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​scan_​inclusive_​or</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformBitwiseOr</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​scan_​inclusive_​xor</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformBitwiseXor</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​scan_​inclusive_​logical_​and</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformLogicalAnd</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​scan_​inclusive_​logical_​or</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformLogicalOr</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​scan_​inclusive_​logical_​xor</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformLogicalXor</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​scan_​exclusive_​add</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformIAdd</strong>, <strong>OpGroupNonUniformFAdd</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​scan_​exclusive_​mul</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformIMul</strong>, <strong>OpGroupNonUniformFMul</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​scan_​exclusive_​min</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformSMin</strong>, <strong>OpGroupNonUniformUMin</strong>, <strong>OpGroupNonUniformFMin</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​​scan_​exclusive_​max</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformSMax</strong>, <strong>OpGroupNonUniformUMax</strong>, <strong>OpGroupNonUniformFMax</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​​scan_​exclusive_​and</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformBitwiseAnd</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​​scan_​exclusive_​or</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformBitwiseOr</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​​scan_​exclusive_​xor</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformBitwiseXor</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​​scan_​exclusive_​logical_​and</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformLogicalAnd</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​​scan_​exclusive_​logical_​or</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformLogicalOr</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​non_​uniform_​​scan_​exclusive_​logical_​xor</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformLogicalXor</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformArithmetic</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top" colspan="3"><p class="tableblock">For <code>cl_khr_subgroup_shuffle</code>:</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​shuffle</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformShuffle</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformShuffle</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​shuffle_​xor</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformShuffleXor</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformShuffle</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top" colspan="3"><p class="tableblock">For <code>cl_khr_subgroup_shuffle_relative</code>:</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​shuffle_​up</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformShuffleUp</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformShuffleRelative</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​shuffle_​down</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformShuffleDown</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformShuffleRelative</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top" colspan="3"><p class="tableblock">For <code>cl_khr_subgroup_clustered_reduce</code>:</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​reduce_​clustered_​add</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformIAdd</strong>, <strong>OpGroupNonUniformFAdd</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformClustered</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​reduce_​clustered_​mul</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformIMul</strong>, <strong>OpGroupNonUniformFMul</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformClustered</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​reduce_​clustered_​min</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformSMin</strong>, <strong>OpGroupNonUniformUMin</strong>, <strong>OpGroupNonUniformFMin</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformClustered</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​reduce_​clustered_​max</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformSMax</strong>, <strong>OpGroupNonUniformUMax</strong>, <strong>OpGroupNonUniformFMax</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformClustered</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​reduce_​clustered_​and</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformBitwiseAnd</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformClustered</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​reduce_​clustered_​or</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformBitwiseOr</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformClustered</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​reduce_​clustered_​xor</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformBitwiseXor</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformClustered</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​reduce_​clustered_​logical_​and</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformLogicalAnd</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformClustered</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​reduce_​clustered_​logical_​or</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformLogicalOr</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformClustered</strong></p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>sub_​group_​reduce_​clustered_​logical_​xor</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>OpGroupNonUniformLogicalXor</strong></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><strong>GroupNonUniformClustered</strong></p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_pci_bus_info"><a class="anchor" href="#cl_khr_pci_bus_info"></a>39. PCI Bus Information Query</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This extension adds a new query to obtain PCI bus information about an OpenCL |
| device.</p> |
| </div> |
| <div class="paragraph"> |
| <p>Not all OpenCL devices have PCI bus information, either due to the device not |
| being connected to the system through a PCI interface or due to platform |
| specific restrictions and policies. Thus this extension is only expected to be |
| supported by OpenCL devices which can provide the information.</p> |
| </div> |
| <div class="paragraph"> |
| <p>As a consequence, applications should always check for the presence of the |
| extension string for each individual OpenCL device for which they intend to |
| issue the new query for and should not have any assumptions about the |
| availability of the extension on any given platform.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_38"><a class="anchor" href="#_general_information_38"></a>39.1. General information</h3> |
| <div class="sect3"> |
| <h4 id="_name_strings_2"><a class="anchor" href="#_name_strings_2"></a>39.1.1. Name Strings</h4> |
| <div class="paragraph"> |
| <p><code>cl_khr_pci_bus_info</code></p> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="_version_history_38"><a class="anchor" href="#_version_history_38"></a>39.1.2. Version History</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2021-04-19</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Initial version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="_dependencies_2"><a class="anchor" href="#_dependencies_2"></a>39.1.3. Dependencies</h4> |
| <div class="paragraph"> |
| <p>This extension is written against the OpenCL API Specification Version V3.0.6.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This extension requires OpenCL 1.0.</p> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_new_api_types_3"><a class="anchor" href="#_new_api_types_3"></a>39.2. New API Types</h3> |
| <div class="paragraph"> |
| <p>Structure returned by the device info query for <code>CL_DEVICE_PCI_BUS_INFO_KHR</code>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="keyword">typedef</span> <span class="keyword">struct</span> _cl_device_pci_bus_info_khr { |
| cl_uint pci_domain; |
| cl_uint pci_bus; |
| cl_uint pci_device; |
| cl_uint pci_function; |
| } cl_device_pci_bus_info_khr;</code></pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_new_api_enums_2"><a class="anchor" href="#_new_api_enums_2"></a>39.3. New API Enums</h3> |
| <div class="paragraph"> |
| <p>Accepted value for the <em>param_name</em> parameter to <strong>clGetDeviceInfo</strong>:</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c"><span class="preprocessor">#define</span> CL_DEVICE_PCI_BUS_INFO_KHR <span class="hex">0x410F</span></code></pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_modifications_to_the_opencl_api_specification_2"><a class="anchor" href="#_modifications_to_the_opencl_api_specification_2"></a>39.4. Modifications to the OpenCL API Specification</h3> |
| <div class="sect3"> |
| <h4 id="_section_4_2_querying_devices"><a class="anchor" href="#_section_4_2_querying_devices"></a>39.4.1. Section 4.2 - Querying Devices:</h4> |
| <div class="paragraph"> |
| <p>Add to Table 5 - OpenCL Device Queries:</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 5. OpenCL Device Queries</caption> |
| <colgroup> |
| <col style="width: 30%;"> |
| <col style="width: 20%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>cl_device_info</strong></th> |
| <th class="tableblock halign-left valign-top">Return Type</th> |
| <th class="tableblock halign-left valign-top">Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>CL_DEVICE_PCI_BUS_INFO_KHR</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><code>cl_device_pci_bus_info_khr</code></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns PCI bus information for the device.</p> |
| <p class="tableblock"> The PCI bus information is returned as a single structure that includes |
| the PCI bus domain, the PCI bus identifier, the PCI device identifier, and |
| the PCI device function identifier.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_extended_bit_ops"><a class="anchor" href="#cl_khr_extended_bit_ops"></a>40. Extended Bit Operations</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This extension adds OpenCL C functions for performing extended bit operations. |
| Specifically, the following functions are added:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>bitfield insert: insert bits from one source operand into another source operand.</p> |
| </li> |
| <li> |
| <p>bitfield extract: extract bits from a source operand, with sign- or zero-extension.</p> |
| </li> |
| <li> |
| <p>bit reverse: reverse the bits of a source operand.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_39"><a class="anchor" href="#_general_information_39"></a>40.1. General Information</h3> |
| <div class="sect3"> |
| <h4 id="_name_strings_3"><a class="anchor" href="#_name_strings_3"></a>40.1.1. Name Strings</h4> |
| <div class="paragraph"> |
| <p><code>cl_khr_extended_bit_ops</code></p> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="_version_history_39"><a class="anchor" href="#_version_history_39"></a>40.1.2. Version History</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2021-04-22</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Initial version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="_dependencies_3"><a class="anchor" href="#_dependencies_3"></a>40.1.3. Dependencies</h4> |
| <div class="paragraph"> |
| <p>This extension is written against the OpenCL 3.0 C Language Specification and the OpenCL SPIR-V Environment Specification Version V3.0.6.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This extension requires OpenCL 1.0.</p> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_new_opencl_c_functions"><a class="anchor" href="#_new_opencl_c_functions"></a>40.2. New OpenCL C Functions</h3> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code>gentype bitfield_insert( gentype base, gentype insert, uint offset, uint count ) |
| igentype bitfield_extract_signed( gentype base, uint offset, uint count ) |
| ugentype bitfield_extract_unsigned( gentype base, uint offset, uint count ) |
| gentype bit_reverse( gentype base )</code></pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_modifications_to_the_opencl_c_specification"><a class="anchor" href="#_modifications_to_the_opencl_c_specification"></a>40.3. Modifications to the OpenCL C Specification</h3> |
| <div class="sect3"> |
| <h4 id="_modify_section_6_15_3_integer_functions"><a class="anchor" href="#_modify_section_6_15_3_integer_functions"></a>40.3.1. Modify Section 6.15.3. Integer Functions:</h4> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">Add a new Section 6.15.3.X. Extended Bit Operations: </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>The functions described in the following table can be used with built-in scalar or vector integer types to perform extended bit operations. |
| The functions that operate on vector types operate component-wise. |
| The description is per-component.</p> |
| </div> |
| <div class="paragraph"> |
| <p>In the table below, the generic type name <code>gentype</code> refers to the built-in integer types <code>char</code>, <code>char<em>n</em></code>, <code>uchar</code>, <code>uchar<em>n</em></code>, <code>short</code>, <code>short<em>n</em></code>, <code>ushort</code>, <code>ushort<em>n</em></code>, <code>int</code>, <code>int<em>n</em></code>, <code>uint</code>, <code>uint<em>n</em></code>, <code>long</code>, <code>long<em>n</em></code>, <code>ulong</code>, and <code>ulong<em>n</em></code>. |
| The generic type name <code>igentype</code> refers to the built-in signed integer types <code>char</code>, <code>char<em>n</em></code>, <code>short</code>, <code>short<em>n</em></code>, <code>int</code>, <code>int<em>n</em></code>, <code>long</code>, and <code>long<em>n</em></code>. |
| The generic type name <code>ugentype</code> refers to the built-in unsigned integer types <code>uchar</code>, <code>uchar<em>n</em></code>, <code>ushort</code>, <code>ushort<em>n</em></code>, <code>uint</code>, <code>uint<em>n</em></code>, <code>ulong</code>, and <code>ulong<em>n</em></code>. |
| <em>n</em> is 2, 3, 4, 8, or 16.</p> |
| </div> |
| <table class="tableblock frame-all grid-all stretch"> |
| <caption class="title">Table 47. Built-in Scalar and Vector Extended Bit Operations</caption> |
| <colgroup> |
| <col style="width: 50%;"> |
| <col style="width: 50%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Function</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">gentype bitfield_insert( |
| gentype base, gentype insert, |
| uint offset, uint count)</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns a copy of <em>base</em>, with a modified bitfield that comes from <em>insert</em>.</p> |
| <p class="tableblock">Any bits of the result value numbered outside [<em>offset</em>, <em>offset</em> + <em>count</em> - 1] (inclusive) will come from the corresponding bits in <em>base</em>.</p> |
| <p class="tableblock">Any bits of the result value numbered inside [<em>offset</em>, <em>offset</em> + <em>count</em> - 1] (inclusive) will come from the bits numbered [0, <em>count</em> - 1] (inclusive) of <em>insert</em>.</p> |
| <p class="tableblock"><em>count</em> is the number of bits to be modified. |
| If <em>count</em> equals 0, the return value will be equal to <em>base</em>.</p> |
| <p class="tableblock">If <em>count</em> or <em>offset</em> or <em>offset</em> + <em>count</em> is greater than number of bits in <code>gentype</code> (for scalar types) or components of <code>gentype</code> (for vector types), the result is undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">igentype bitfield_extract_signed( |
| gentype base, |
| uint offset, uint count)</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns an extracted bitfield from <em>base</em> with sign extension. |
| The type of the return value is always a signed type.</p> |
| <p class="tableblock">The bits of <em>base</em> numbered in [<em>offset</em>, <em>offset</em> + <em>count</em> - 1] (inclusive) are returned as the bits numbered in [0, <em>count</em> - 1] (inclusive) of the result. |
| The remaining bits in the result will be sign extended by replicating the bit numbered <em>offset</em> + <em>count</em> - 1 of <em>base</em>.</p> |
| <p class="tableblock"><em>count</em> is the number of bits to be extracted. |
| If <em>count</em> equals 0, the result is 0.</p> |
| <p class="tableblock">If the <em>count</em> or <em>offset</em> or <em>offset</em> + <em>count</em> is greater than number of bits in <code>gentype</code> (for scalar types) or components of <code>gentype</code> (for vector types), the result is undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">ugentype bitfield_extract_unsigned( |
| gentype base, |
| uint offset, uint count)</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns an extracted bitfield from <em>base</em> with zero extension. |
| The type of the return value is always an unsigned type.</p> |
| <p class="tableblock">The bits of <em>base</em> numbered in [<em>offset</em>, <em>offset</em> + <em>count</em> - 1] (inclusive) are returned as the bits numbered in [0, <em>count</em> - 1] (inclusive) of the result. |
| The remaining bits in the result will be zero.</p> |
| <p class="tableblock"><em>count</em> is the number of bits to be extracted. |
| If <em>count</em> equals 0, the result is 0.</p> |
| <p class="tableblock">If the <em>count</em> or <em>offset</em> or <em>offset</em> + <em>count</em> is greater than number of bits in <code>gentype</code> (for scalar types) or components of <code>gentype</code> (for vector types), the result is undefined.</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><div class="content"><div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code data-lang="c">gentype bit_reverse( |
| gentype base)</code></pre> |
| </div> |
| </div></div></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Returns the value of <em>base</em> with reversed bits. |
| That is, the bit numbered <em>n</em> of the result value will be taken from the bit numbered <em>width</em> - <em>n</em> - 1 of <em>base</em> (for scalar types) or a component of <em>base</em> (for vector types), where <em>width</em> is number of bits of <code>gentype</code> (for scalar types) or components of <code>gentype</code> (for vector types).</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| </dd> |
| </dl> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_modifications_to_the_opencl_spir_v_environment_specification"><a class="anchor" href="#_modifications_to_the_opencl_spir_v_environment_specification"></a>40.4. Modifications to the OpenCL SPIR-V Environment Specification</h3> |
| <div class="sect3"> |
| <h4 id="_add_to_section_5_opencl_extensions"><a class="anchor" href="#_add_to_section_5_opencl_extensions"></a>40.4.1. Add to Section 5 - OpenCL Extensions</h4> |
| <div class="dlist"> |
| <dl> |
| <dt class="hdlist1">Add a new Section 5.2.X - <code>cl_khr_extended_bit_ops</code>: </dt> |
| <dd> |
| <div class="openblock"> |
| <div class="content"> |
| <div class="paragraph"> |
| <p>If the OpenCL environment supports the extension <code>cl_khr_extended_bit_ops</code>, then the environment must accept modules that declare use of the extension <code>SPV_KHR_bit_instructions</code> via <strong>OpExtension</strong>.</p> |
| </div> |
| <div class="paragraph"> |
| <p>If the OpenCL environment supports the extension <code>cl_khr_extended_bit_ops</code> and use of the SPIR-V extension <code>SPV_KHR_bit_instructions</code> is declared in the module via <strong>OpExtension</strong>, then the environment must accept modules that declare the <strong>BitInstructions</strong> capability.</p> |
| </div> |
| </div> |
| </div> |
| </dd> |
| </dl> |
| </div> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="cl_khr_suggested_local_work_size"><a class="anchor" href="#cl_khr_suggested_local_work_size"></a>41. Suggested Local Work Size Query</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>This extension adds the ability to query a suggested local work group size for a kernel running on a device for a specified global work size and global work offset. |
| The suggested local work group size will match the work group size that would be chosen if the kernel were enqueued with the specified global work size and global work offset and a <code>NULL</code> local work size.</p> |
| </div> |
| <div class="paragraph"> |
| <p>By using the suggested local work group size query an application has greater insight into the local work group size chosen by the OpenCL implementation, and the OpenCL implementation need not re-compute the local work group size if the same kernel is enqueued multiple times with the same parameters.</p> |
| </div> |
| <div class="sect2"> |
| <h3 id="_general_information_40"><a class="anchor" href="#_general_information_40"></a>41.1. General Information</h3> |
| <div class="sect3"> |
| <h4 id="_name_strings_4"><a class="anchor" href="#_name_strings_4"></a>41.1.1. Name Strings</h4> |
| <div class="paragraph"> |
| <p><code>cl_khr_suggested_local_work_size</code></p> |
| </div> |
| </div> |
| <div class="sect3"> |
| <h4 id="_version_history_40"><a class="anchor" href="#_version_history_40"></a>41.1.2. Version History</h4> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 20%;"> |
| <col style="width: 20%;"> |
| <col style="width: 60%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Date</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Version</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Description</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2021-04-22</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">1.0.0</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Initial version.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="sect3"> |
| <h4 id="_dependencies_4"><a class="anchor" href="#_dependencies_4"></a>41.1.3. Dependencies</h4> |
| <div class="paragraph"> |
| <p>This extension is written against the OpenCL API Specification Version V3.0.6.</p> |
| </div> |
| <div class="paragraph"> |
| <p>This extension requires OpenCL 1.0.</p> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_new_api_functions_2"><a class="anchor" href="#_new_api_functions_2"></a>41.2. New API Functions</h3> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code>cl_int clGetKernelSuggestedLocalWorkSizeKHR( |
| cl_command_queue command_queue, |
| cl_kernel kernel, |
| cl_uint work_dim, |
| const size_t *global_work_offset, |
| const size_t *global_work_size, |
| size_t *suggested_local_work_size);</code></pre> |
| </div> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_modifications_to_the_opencl_api_specification_3"><a class="anchor" href="#_modifications_to_the_opencl_api_specification_3"></a>41.3. Modifications to the OpenCL API Specification</h3> |
| <div class="sect3"> |
| <h4 id="_section_5_9_kernel_objects"><a class="anchor" href="#_section_5_9_kernel_objects"></a>41.3.1. Section 5.9 - Kernel Objects:</h4> |
| <div class="sect4"> |
| <h5 id="_new_section_5_9_4_x_suggested_local_work_size_query"><a class="anchor" href="#_new_section_5_9_4_x_suggested_local_work_size_query"></a>41.3.1.1. New Section 5.9.4.X - Suggested Local Work Size Query</h5> |
| <div class="paragraph"> |
| <p>To query a suggested local work size for a kernel object, call the function</p> |
| </div> |
| <div class="listingblock"> |
| <div class="content"> |
| <pre class="CodeRay highlight"><code>cl_int clGetKernelSuggestedLocalWorkSizeKHR( |
| cl_command_queue command_queue, |
| cl_kernel kernel, |
| cl_uint work_dim, |
| const size_t *global_work_offset, |
| const size_t *global_work_size, |
| size_t *suggested_local_work_size);</code></pre> |
| </div> |
| </div> |
| <div class="paragraph"> |
| <p>The returned suggested local work size is expected to match the local work size that would be chosen if the specified kernel object, with the same kernel arguments, were enqueued into the specified command queue with the specified global work size, specified global work offset, and with a <code>NULL</code> local work size.</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p><em>command_queue</em> specifies the command queue and device for the query.</p> |
| </li> |
| <li> |
| <p><em>kernel</em> specifies the kernel object and kernel arguments for the query. |
| The OpenCL context associated with <em>kernel</em> and <em>command_queue</em> must the same.</p> |
| </li> |
| <li> |
| <p><em>work_dim</em> specifies the number of work dimensions in the input global work offset and global work size, and the output suggested local work size.</p> |
| </li> |
| <li> |
| <p><em>global_work_offset</em> can be used to specify an array of at least <em>work_dim</em> global ID offset values for the query. |
| This is optional and may be <code>NULL</code> to indicate there is no global ID offset.</p> |
| </li> |
| <li> |
| <p><em>global_work_size</em> is an array of at least <em>work_dim</em> values describing the global work size for the query.</p> |
| </li> |
| <li> |
| <p><em>suggested_local_work_size</em> is an output array of at least <em>work_dim</em> values that will contain the result of the query.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="paragraph"> |
| <p><strong>clGetKernelSuggestedLocalWorkSizeKHR</strong> returns <code>CL_SUCCESS</code> if the query executed successfully. |
| Otherwise, it returns one of the following errors:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p><code>CL_INVALID_COMMAND_QUEUE</code> if <em>command_queue</em> is not a valid host command queue.</p> |
| </li> |
| <li> |
| <p><code>CL_INVALID_KERNEL</code> if <em>kernel</em> is not a valid kernel object.</p> |
| </li> |
| <li> |
| <p><code>CL_INVALID_CONTEXT</code> if the context associated with <em>kernel</em> is not the same as the context associated with <em>command_queue</em>.</p> |
| </li> |
| <li> |
| <p><code>CL_INVALID_PROGRAM_EXECUTABLE</code> if there is no successfully built program executable available for <em>kernel</em> for the device associated with <em>command_queue</em>.</p> |
| </li> |
| <li> |
| <p><code>CL_INVALID_KERNEL_ARGS</code> if all argument values for <em>kernel</em> have not been set.</p> |
| </li> |
| <li> |
| <p><code>CL_MISALIGNED_SUB_BUFFER_OFFSET</code> if a sub-buffer object is set as an argument to <em>kernel</em> and the offset specified when the sub-buffer object was created is not aligned to <code>CL_DEVICE_MEM_BASE_ADDR_ALIGN</code> for the device associated with <em>command_queue</em>.</p> |
| </li> |
| <li> |
| <p><code>CL_INVALID_IMAGE_SIZE</code> if an image object is set as an argument to <em>kernel</em> and the image dimensions are not supported by device associated with <em>command_queue</em>.</p> |
| </li> |
| <li> |
| <p><code>CL_IMAGE_FORMAT_NOT_SUPPORTED</code> if an image object is set as an argument to <em>kernel</em> and the image format is not supported by the device associated with <em>command_queue</em>.</p> |
| </li> |
| <li> |
| <p><code>CL_INVALID_OPERATION</code> if an SVM pointer is set as an argument to <em>kernel</em> and the device associated with <em>command_queue</em> does not support SVM or the required SVM capabilities for the SVM pointer.</p> |
| </li> |
| <li> |
| <p><code>CL_INVALID_WORK_DIMENSION</code> if <em>work_dim</em> is not a valid value (i.e. a value between 1 and <code>CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS</code>).</p> |
| </li> |
| <li> |
| <p><code>CL_INVALID_GLOBAL_WORK_SIZE</code> if <em>global_work_size</em> is NULL or if any of the values specified in <em>global_work_size</em> are 0.</p> |
| </li> |
| <li> |
| <p><code>CL_INVALID_GLOBAL_WORK_SIZE</code> if any of the values specified in <em>global_work_size</em> exceed the maximum value representable by <code>size_t</code> on the device associated with <em>command_queue</em>.</p> |
| </li> |
| <li> |
| <p><code>CL_INVALID_GLOBAL_OFFSET</code> if the value specified in <em>global_work_size</em> plus the corresponding value in <em>global_work_offset</em> for dimension exceeds the maximum value representable by <code>size_t</code> on the device associated with <em>command_queue</em>.</p> |
| </li> |
| <li> |
| <p><code>CL_OUT_OF_RESOURCES</code> if there is a failure to allocate resources required by the OpenCL implementation on the device.</p> |
| </li> |
| <li> |
| <p><code>CL_OUT_OF_HOST_RESOURCES</code> if there is a failure to allocate resources required by the OpenCL implementation on the host.</p> |
| </li> |
| </ul> |
| </div> |
| <div class="admonitionblock note"> |
| <table> |
| <tr> |
| <td class="icon"> |
| <i class="fa icon-note" title="Note"></i> |
| </td> |
| <td class="content"> |
| These error conditions are consistent with error conditions for <strong>clEnqueueNDRangeKernel</strong>. |
| </td> |
| </tr> |
| </table> |
| </div> |
| </div> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="spirv_extensions"><a class="anchor" href="#spirv_extensions"></a>42. Extensions to the OpenCL SPIR-V Environment</h2> |
| <div class="sectionbody"> |
| <div class="paragraph"> |
| <p>An OpenCL SPIR-V environment may be modified by OpenCL extensions. |
| Please refer to the OpenCL SPIR-V Environment Specification for descriptions how OpenCL extensions modify an OpenCL SPIR-V environment. |
| In addition to the extensions described in this document, the OpenCL SPIR-V Environment Specification also describes how the following OpenCL extensions modify an OpenCL SPIR-V environment:</p> |
| </div> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p><code>cl_khr_spirv_no_integer_wrap_decoration</code></p> |
| </li> |
| <li> |
| <p><code>cl_khr_spirv_extended_debug_info</code></p> |
| </li> |
| <li> |
| <p><code>cl_khr_spirv_linkonce_odr</code></p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="_extensions_promoted_to_core_features"><a class="anchor" href="#_extensions_promoted_to_core_features"></a>Appendix A: Extensions Promoted to Core Features</h2> |
| <div class="sectionbody"> |
| <div class="sect2"> |
| <h3 id="_for_opencl_1_1"><a class="anchor" href="#_for_opencl_1_1"></a>A.1. For OpenCL 1.1:</h3> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>The functionality previously described by <strong>cl_khr_byte_addressable_store</strong> is now part of the core feature set.</p> |
| </li> |
| <li> |
| <p>The functionality previously described by <strong>cl_khr_global_int32_base_atomics</strong>, <strong>cl_khr_global_int32_extended_atomics</strong>, <strong>cl_khr_local_int32_base_atomics</strong>, and <strong>cl_khr_local_int32_extended_atomics</strong> is now part of the core feature set.</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_for_opencl_1_2"><a class="anchor" href="#_for_opencl_1_2"></a>A.2. For OpenCL 1.2:</h3> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>The functionality previously described by <strong>cl_khr_fp64</strong> is now an optional core feature.</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_for_opencl_2_0"><a class="anchor" href="#_for_opencl_2_0"></a>A.3. For OpenCL 2.0:</h3> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>The functionality described by <strong>cl_khr_3d_image_writes</strong> is part of the core feature set.</p> |
| </li> |
| <li> |
| <p>The functionality described by <strong>cl_khr_create_command_queue</strong> is part of the core feature set.</p> |
| </li> |
| <li> |
| <p>The functionality described by <strong>cl_khr_depth_images</strong> is now part of the core feature set.</p> |
| </li> |
| <li> |
| <p>The functionality described by <strong>cl_khr_image2d_from_buffer</strong> is now part of the core feature set.</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_for_opencl_2_1"><a class="anchor" href="#_for_opencl_2_1"></a>A.4. For OpenCL 2.1:</h3> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>The functionality described by <strong>cl_khr_il_program</strong> is now part of the core feature set.</p> |
| </li> |
| <li> |
| <p>The API functionality described by <strong>cl_khr_subgroups</strong> is now part of the core API feature set, but the built-in functions described by <strong>cl_khr_subgroups</strong> must still be accessed as an extension to the OpenCL 2.0 C Language specification.</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <div class="sect2"> |
| <h3 id="_for_opencl_3_0"><a class="anchor" href="#_for_opencl_3_0"></a>A.5. For OpenCL 3.0:</h3> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>The built-in functions described by <strong>cl_khr_subgroups</strong> are now supported in OpenCL C 3.0 when the <code>__opencl_c_subgroups</code> feature is supported.</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="_deprecated_extensions"><a class="anchor" href="#_deprecated_extensions"></a>Appendix B: Deprecated Extensions</h2> |
| <div class="sectionbody"> |
| <div class="sect2"> |
| <h3 id="_for_opencl_1_1_2"><a class="anchor" href="#_for_opencl_1_1_2"></a>B.1. For OpenCL 1.1:</h3> |
| <div class="ulist"> |
| <ul> |
| <li> |
| <p>The <strong>cl_khr_select_fprounding_mode</strong> extension has been deprecated. |
| Its use is no longer recommended.</p> |
| </li> |
| </ul> |
| </div> |
| </div> |
| </div> |
| </div> |
| <div class="sect1"> |
| <h2 id="_quick_reference"><a class="anchor" href="#_quick_reference"></a>Appendix C: Quick Reference</h2> |
| <div class="sectionbody"> |
| <table class="tableblock frame-all grid-all stretch"> |
| <colgroup> |
| <col style="width: 45.4545%;"> |
| <col style="width: 36.3636%;"> |
| <col style="width: 18.1819%;"> |
| </colgroup> |
| <thead> |
| <tr> |
| <th class="tableblock halign-left valign-top"><strong>Extension Name</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Brief Description</strong></th> |
| <th class="tableblock halign-left valign-top"><strong>Status</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_3d_image_writes">cl_khr_3d_image_writes</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Write to 3D images</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Core Feature in OpenCL 2.0</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_async_work_group_copy_fence">cl_khr_async_work_group_copy_fence</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Asynchronous Copy Fences</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Provisional Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_byte_addressable_store">cl_khr_byte_addressable_store</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Read and write from 8-bit and 16-bit pointers</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Core Feature in OpenCL 1.1</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_create_command_queue">cl_khr_create_command_queue</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">API to Create Command Queues with Properties</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Core Feature in OpenCL 2.0</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_d3d10_sharing">cl_khr_d3d10_sharing</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Share Direct3D 10 Buffers and Textures with OpenCL</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_d3d11_sharing">cl_khr_d3d11_sharing</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Share Direct3D 11 Buffers and Textures with OpenCL</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_depth_images">cl_khr_depth_images</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Single Channel Depth Images</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Core Feature in OpenCL 2.0</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_device_enqueue_local_arg_types">cl_khr_device_enqueue_local_arg_types</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Pass Non-Void Local Pointers to Child Kernels</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_device_uuid">cl_khr_device_uuid</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Unique Device and Driver Identifier Queries</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_dx9_media_sharing">cl_khr_dx9_media_sharing</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Share DirectX 9 Media Surfaces with OpenCL</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_egl_event">cl_khr_egl_event</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Share EGL Sync Objects with OpenCL</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_egl_image">cl_khr_egl_image</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Share EGL Images with OpenCL</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_extended_async_copies">cl_khr_extended_async_copies</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">2D and 3D Async Copies</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Provisional Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_extended_bit_ops">cl_khr_extended_bit_ops</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Bit Insert, Extract, and Reverse Operations</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_extended_versioning">cl_khr_extended_versioning</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extend versioning of platform, devices, extensions, etc.</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_fp16">cl_khr_fp16</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Operations on 16-bit Floating-Point Values</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_fp64">cl_khr_fp64</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Operations on 64-bit Floating-Point Values</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Optional Core Feature in OpenCL 1.2</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_gl_depth_images">cl_khr_gl_depth_images</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Share OpenGL Depth Images with OpenCL</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_gl_event">cl_khr_gl_event</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Share OpenGL Fence Sync Objects with OpenCL</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_gl_msaa_sharing">cl_khr_gl_msaa_sharing</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Share OpenGL MSAA Textures with OpenCL</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_gl_sharing">cl_khr_gl_sharing</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Sharing OpenGL Buffers and Textures with OpenCL</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_int32_atomics">cl_khr_global_int32_base_atomics</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Basic Atomic Operations on 32-bit Integers in Global Memory</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Core Feature in OpenCL 1.1</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_int32_atomics">cl_khr_global_int32_extended_atomics</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extended Atomic Operations on 32-bit Integers in Global Memory</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Core Feature in OpenCL 1.1</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_icd-opencl">cl_khr_icd</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Installable Client Drivers</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_il_program">cl_khr_il_program</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Support for Intermediate Language (IL) Programs (SPIR-V)</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Core Feature in OpenCL 2.1</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_image2d_from_buffer">cl_khr_image2d_from_buffer</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Create 2D Images from Buffers</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Core Feature in OpenCL 2.0</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_initialize_memory">cl_khr_initialize_memory</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Initialize Local and Private Memory on Allocation</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_int64_atomics">cl_khr_int64_base_atomics</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Basic Atomic Operations on 64-bit Integers in Global and Local Memory</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_int64_atomics">cl_khr_int64_extended_atomics</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extended Atomic Operations on 64-bit Integers in Global and Local Memory</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_int32_atomics">cl_khr_local_int32_base_atomics</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Basic Atomic Operations on 32-bit Integers in Local Memory</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Core Feature in OpenCL 1.1</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_int32_atomics">cl_khr_local_int32_extended_atomics</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extended Atomic Operations on 32-bit Integers in Local Memory</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Core Feature in OpenCL 1.1</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_mipmap_image">cl_khr_mipmap_image</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Create and Use Images with Mipmaps</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_mipmap_image">cl_khr_mipmap_image_writes</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Write to Images with Mipmaps</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_pci_bus_info">cl_khr_pci_bus_info</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Query PCI Bus Information for an OpenCL Device</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_priority_hints">cl_khr_priority_hints</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Create Command Queues with Different Priorities</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_select_fprounding_mode">cl_khr_select_fprounding_mode</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Set the Current Kernel Rounding Mode</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">DEPRECATED</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_spir">cl_khr_spir</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Standard Portable Intermediate Representation Programs</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension, Superseded by IL Programs / SPIR-V</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_srgb_image_writes">cl_khr_srgb_image_writes</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Write to sRGB Images</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_subgroups">cl_khr_subgroups</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Sub-Groupings of Work Items</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Core Feature in OpenCL 2.1 (with minor changes)</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_subgroup_ballot">cl_khr_subgroup_ballot</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Exchange Ballots Among Sub-Groupings of Work Items</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_subgroup_clustered_reduce">cl_khr_subgroup_clustered_reduce</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Clustered Reductions for Sub-Groupings of Work Items</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_subgroup_extended_types">cl_khr_subgroup_extended_types</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Additional Type Support for Sub-Group Functions</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_subgroup_named_barrier">cl_khr_subgroup_named_barrier</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Barriers for Subsets of a Work Group</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_subgroup_non_uniform_arithmetic">cl_khr_subgroup_non_uniform_arithmetic</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Sub-Group Arithmetic Functions in Non-Uniform Control Flow</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_subgroup_non_uniform_vote">cl_khr_subgroup_non_uniform_vote</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Hold Votes Among Sub-Groupings of Work Items</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_subgroup_shuffle">cl_khr_subgroup_shuffle</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">General-Purpose Shuffles Among Sub-Groupings of Work Items</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_subgroup_shuffle_relative">cl_khr_subgroup_shuffle_relative</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Relative Shuffles Among Sub-Groupings of Work Items</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_suggested_local_work_size">cl_khr_suggested_local_work_size</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Query a Suggested Local Work Size</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_terminate_context">cl_khr_terminate_context</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Terminate an OpenCL Context</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| <tr> |
| <td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl_khr_throttle_hints">cl_khr_throttle_hints</a></p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Create Command Queues with Different Throttle Policies</p></td> |
| <td class="tableblock halign-left valign-top"><p class="tableblock">Extension</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| </div> |
| <div id="footer"> |
| <div id="footer-text"> |
| Version v3.0.7<br> |
| Last updated 2021-04-23 12:57:31 -0700 |
| </div> |
| </div> |
| |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.4/MathJax.js?config=TeX-MML-AM_HTMLorMML"></script> |
| </body> |
| </html> |