Merge pull request #50 from kpet/master

Update cl_arm_get_core_id specification and enums
diff --git a/extensions/arm/cl_arm_get_core_id.txt b/extensions/arm/cl_arm_get_core_id.txt
index 4962a85..5e05b56 100644
--- a/extensions/arm/cl_arm_get_core_id.txt
+++ b/extensions/arm/cl_arm_get_core_id.txt
@@ -8,12 +8,13 @@
-    Robert Elliott
-    Hui Chen
+    Robert Elliott, ARM Ltd. (robert.elliott 'at'
+    Hui Chen, ARM Ltd.       (hui.chen 'at'
+    Kevin Petit, ARM Ltd.    (kevin.petit 'at'
-    Robert Elliott, ARM (robert.elliott 'at'
+    Kevin Petit, ARM Ltd. (kevin.petit 'at'
@@ -21,7 +22,7 @@
-    Revision: #1, April 2nd, 2013
+    Revision: #2, Feb 26th, 2018
@@ -29,20 +30,25 @@
-    Requires OpenCL version 1.0 or later.
+    Requires OpenCL version 1.2 or later.
-    This extension provides a built-in function which returns the physical core
-    id (OpenCL Compute Unit) that a work-group is running on. This value is
+    This extension provides a built-in function which returns a unique ID
+    for the compute unit that a work-group is running on.  This value is
     uniform for a work-group.
     This value can be used for a core-specific cache or atomic pool where the
     storage is required to be in global memory and persistent (but not ordered)
-    between work-groups. This does not provide any additional ordering on top
+    between work-groups.  This does not provide any additional ordering on top
     of the existing guarantees between workgroups, nor does it provide any
     guarantee of concurrent execution.
+    The IDs for the compute units may not be consecutive and applications must
+    make sure they allocate enough memory to accommodate all the compute units
+    present on the device.  A device info query allows the application to
+    know the IDs associated with the compute units on a given device.
     The extension string cl_arm_core_id is returned for devices and platforms
     which support this extension.
@@ -56,20 +62,32 @@
 New Procedures and Functions
+    Device Info query
+      CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM (return type cl_ulong) returns a
+      bitfield where each bit set represents the presence of compute unit whose
+      ID is the bit position.  The highest ID for any compute unit on the device
+      is the position of the most significant bit set.  The total number of
+      elements an application should allocate in an array indexed by core IDs is
+      thus given by:
+        ALLOC = sizeof(cl_ulong) * 8 - CLZ(CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM)
     Built-in Function
       uint arm_get_core_id( void )
-      Returns a uint that represents the physical core id, in the range 0 to
+      Returns the compute unit id as a uint, in the range [0;ALLOC-1]
-      // Host code, size pool based on required_instance_size * number_of_compute_units
+      // Host code, size pool based on required_instance_size * ALLOC
       size_t required_instance_size = 1024; 
-      cl_mem core_pool = clCreateBuffer( ctx, CL_MEM_READ_WRITE, required_instance_size * CL_DEVICE_MAX_COMPUTE_UNITS, NULL, NULL );
+      cl_mem core_pool = clCreateBuffer(context,
+                                        CL_MEM_READ_WRITE,
+                                        required_instance_size * ALLOC, NULL, NULL);
       // Device/Kernel code, select memory instance
       kernel void test( global char *per_core_pool, global char *input, uint required_instance_size  )
@@ -83,4 +101,10 @@
     OpenCL kernel code Now has access to:
       #pragma OPENCL EXTENSION cl_arm_core_id : enable
-      The define cl_arm_core_id is also present
+      The preprocessor macro cl_arm_core_id is also present
+Revision History
+    Revision: #1, Apr  2nd, 2013 - Initial revision
+    Revision: #2, Feb 26th, 2018 - Added support for sparsely allocated compute
+                                   unit IDs.
diff --git a/xml/cl.xml b/xml/cl.xml
index 75394cb..d06a58b 100644
--- a/xml/cl.xml
+++ b/xml/cl.xml
@@ -1035,7 +1035,7 @@
         <enum value="0x40BC"      name="CL_COMMAND_SVM_MEMFILL_ARM"/>
         <enum value="0x40BD"      name="CL_COMMAND_SVM_MAP_ARM"/>
         <enum value="0x40BE"      name="CL_COMMAND_SVM_UNMAP_ARM"/>
-            <unused start="0x40BF"/>
+        <enum value="0x40BF"      name="CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM"/>
     <enums namespace="CL" start="0x40C0" end="0x40CF" vendor="Qualcomm" comment="Per Bug 10726">