From d66a8c9a34eb654e6a63ea577654e1e557871291 Mon Sep 17 00:00:00 2001 From: Gil Pitney Date: Thu, 14 May 2015 20:36:45 +0000 Subject: Implemented clCreateSubDevices() PARITION_EQUALLY capabibility. This is a WIP patch beginning the addition of the v1.2 device fission feature. Signed-off-by: Gil Pitney --- src/api/api_device.cpp | 9 ++++- src/core/cpu/device.cpp | 92 +++++++++++++++++++++++++++++++++++++++++----- src/core/cpu/device.h | 14 +++++-- src/core/deviceinterface.h | 18 +++++++++ src/core/platform.cpp | 2 +- 5 files changed, 119 insertions(+), 16 deletions(-) diff --git a/src/api/api_device.cpp b/src/api/api_device.cpp index 7d1bd9e..392b860 100644 --- a/src/api/api_device.cpp +++ b/src/api/api_device.cpp @@ -87,7 +87,8 @@ clCreateSubDevices(cl_device_id in_device, if (!in_device->isA(Coal::Object::T_Device)) return CL_INVALID_DEVICE; - return CL_SUCCESS; + Coal::DeviceInterface *iface = (Coal::DeviceInterface *)in_device; + return iface->createSubDevices(properties, num_devices, out_devices, num_devices_ret); } cl_int @@ -96,7 +97,8 @@ clRetainDevice(cl_device_id device) if (!device->isA(Coal::Object::T_Device)) return CL_INVALID_DEVICE; - return CL_SUCCESS; + device->reference(); + } cl_int @@ -105,5 +107,8 @@ clReleaseDevice(cl_device_id device) if (!device->isA(Coal::Object::T_Device)) return CL_INVALID_DEVICE; + if (device->dereference()) + delete device; + return CL_SUCCESS; } diff --git a/src/core/cpu/device.cpp b/src/core/cpu/device.cpp index 4ade11d..a8d6aaf 100644 --- a/src/core/cpu/device.cpp +++ b/src/core/cpu/device.cpp @@ -65,12 +65,24 @@ using namespace Coal; #define ONE_GIGABYTE (1024 * ONE_MEGABYTE) #define HALF_GIGABYTE (512 * ONE_MEGABYTE) -CPUDevice::CPUDevice() -: DeviceInterface(), p_cores(0), p_num_events(0), p_workers(0), p_stop(false), +//TODO: #define MAX_PARTITION_PROPS (2) +#define MAX_PARTITION_PROPS (1) + +CPUDevice::CPUDevice(DeviceInterface *parent_device, unsigned int cores) +: DeviceInterface(), p_num_events(0), p_workers(0), p_stop(false), p_initialized(false) { - // Get info about the system - p_cores = sysconf(_SC_NPROCESSORS_ONLN); + // If this is a root device, then the number of cores is that of the system... + p_parent_device = parent_device; + if (p_parent_device == NULL) { + p_cores = sysconf(_SC_NPROCESSORS_ONLN); + } + else { + // Otherwise, it was computed by createSubDevices and passed in: + p_cores = cores; + } + + // Determine frequency: p_cpu_mhz = 0.0f; std::filebuf fb; @@ -313,6 +325,64 @@ bool CPUDevice::gotEnoughToWorkOn() return p_num_events > 0; } +cl_int CPUDevice::createSubDevices( + const cl_device_partition_property * properties, + cl_uint num_devices, + cl_device_id * out_devices, + cl_uint * num_devices_ret) +{ + cl_int retval = CL_SUCCESS; + unsigned int partition_size, num_new_devices = 0; + unsigned int *cores_per_device = NULL; + + // CL_DEVICE_PARTITION_MAX_SUB_DEVICES + + // Determine if properties and property values are valid: + if (properties) { + // We support CL_DEVICE_PARTITION_EQUALLY and CL_DEVICE_PARTITION_BY_COUNTS + if (properties[0] == CL_DEVICE_PARTITION_EQUALLY) { + partition_size = properties[1]; + if (properties[2] != 0) { + retval = CL_INVALID_VALUE; + } + else if (partition_size > 0 && partition_size <= numCPUs()) { + num_new_devices = numCPUs() / partition_size; // discards fraction. + } + else { + retval = CL_INVALID_VALUE; + } + } + else if (properties[0] == CL_DEVICE_PARTITION_BY_COUNTS) { + // TODO + retval = CL_INVALID_VALUE; + } + else { + retval = CL_INVALID_VALUE; + } + } + else { + retval = CL_INVALID_VALUE; + } + + if (retval == CL_SUCCESS && out_devices) { + if (num_devices < num_new_devices) retval = CL_INVALID_VALUE; + } + + assert(retval != CL_SUCCESS || partition_size); + if (partition_size) { + // Create num_new_devices SubDevices: + Coal::CPUDevice * new_device; + for (int i = 0; i < num_new_devices; i++) { + new_device = new CPUDevice(this, partition_size); + if (out_devices) out_devices[i] = (cl_device_id)new_device; + } + if (num_devices_ret) *num_devices_ret = num_new_devices; + } + + return (retval); +} + + unsigned int CPUDevice::numCPUs() const { return p_cores; @@ -358,7 +428,7 @@ cl_int CPUDevice::info(cl_device_info param_name, cl_platform_id cl_platform_id_var; size_t work_dims[MAX_WORK_DIMS]; cl_device_id cl_device_id_var; - cl_device_partition_property cl_device_partition_property_var; + cl_device_partition_property cl_device_partition_property_var[MAX_PARTITION_PROPS]; cl_device_affinity_domain cl_device_affinity_domain_var; }; @@ -668,21 +738,23 @@ cl_int CPUDevice::info(cl_device_info param_name, STRING_ASSIGN("OpenCL C 1.2 LLVM " LLVM_VERSION); break; - /* Until device fission added, these return nominal values: */ case CL_DEVICE_PARENT_DEVICE: - SIMPLE_ASSIGN(cl_device_id, NULL); + SIMPLE_ASSIGN(cl_device_id, p_parent_device); break; case CL_DEVICE_PARTITION_MAX_SUB_DEVICES: - SIMPLE_ASSIGN(cl_uint, 0); + SIMPLE_ASSIGN(cl_uint, numCPUs()); break; case CL_DEVICE_PARTITION_PROPERTIES: - SIMPLE_ASSIGN(cl_device_partition_property, 0); + value_length = MAX_PARTITION_PROPS * sizeof(cl_device_partition_property); + cl_device_partition_property_var[0] = CL_DEVICE_PARTITION_EQUALLY; + //TODO: cl_device_partition_property_var[1] = CL_DEVICE_PARTITION_BY_COUNTS; + value = &cl_device_partition_property_var; break; case CL_DEVICE_PARTITION_AFFINITY_DOMAIN: SIMPLE_ASSIGN(cl_device_affinity_domain, 0); break; case CL_DEVICE_REFERENCE_COUNT: - SIMPLE_ASSIGN(cl_uint, 1); + SIMPLE_ASSIGN(cl_uint, references()); break; default: diff --git a/src/core/cpu/device.h b/src/core/cpu/device.h index a0ad6ef..b2f5711 100644 --- a/src/core/cpu/device.h +++ b/src/core/cpu/device.h @@ -63,7 +63,7 @@ class Kernel; class CPUDevice : public DeviceInterface { public: - CPUDevice(); + CPUDevice(DeviceInterface *parent_device, unsigned int cores); ~CPUDevice(); /** @@ -91,13 +91,19 @@ class CPUDevice : public DeviceInterface Event *getEvent(bool &stop); bool gotEnoughToWorkOn(); - unsigned int numCPUs() const; /*!< \brief Number of logical CPU cores on the system */ + cl_int createSubDevices( + const cl_device_partition_property * properties, + cl_uint num_devices, + cl_device_id * out_devices, + cl_uint * num_devices_ret); + + unsigned int numCPUs() const; /*!< \brief Number of cores in this (sub)device */ float cpuMhz() const; /*!< \brief Speed of the CPU in Mhz */ std::string builtinsHeader(void) const { return "cpu.h"; } private: - unsigned int p_cores, p_num_events; + unsigned int p_cores, p_num_events, p_compute_units; float p_cpu_mhz; std::string p_device_name; pthread_t *p_workers; @@ -106,6 +112,8 @@ class CPUDevice : public DeviceInterface pthread_cond_t p_events_cond; pthread_mutex_t p_events_mutex; bool p_stop, p_initialized; + + DeviceInterface *p_parent_device; }; } diff --git a/src/core/deviceinterface.h b/src/core/deviceinterface.h index a321a9e..7bd16c7 100644 --- a/src/core/deviceinterface.h +++ b/src/core/deviceinterface.h @@ -166,6 +166,24 @@ class DeviceInterface : public Object * \brief Ask device if it has enough work in its queue */ virtual bool gotEnoughToWorkOn() { return false; } + + /** + * \brief Partition compute units of the device into subdevices. + * \param properties Specifies how to partition this device. + * \param num_devices Size of out_devices + * \param out_devices Array of device_ids of the new subdevices. + * \param num_devices_ret NULL, or ptr to num subdevices allowed per partition scheme + * \return a CL_SUCCESS, or error code otherwise. + */ + virtual cl_int createSubDevices( + const cl_device_partition_property * properties, + cl_uint num_devices, + cl_device_id * out_devices, + cl_uint * num_devices_ret) + { + return CL_SUCCESS; + } + }; /** diff --git a/src/core/platform.cpp b/src/core/platform.cpp index 2ec65f5..e6f8c03 100644 --- a/src/core/platform.cpp +++ b/src/core/platform.cpp @@ -104,7 +104,7 @@ namespace Coal char filename[] = "/var/lock/opencl"; p_lock_fd = begin_file_lock_crit_section(filename); - p_devices.push_back((_cl_device_id*)new Coal::CPUDevice); + p_devices.push_back((_cl_device_id*)new Coal::CPUDevice(NULL,0)); // Driver class only exists for the DSPDevice, so need this guard: #ifndef SHAMROCK_BUILD -- cgit v1.2.3