aboutsummaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorGil Pitney <gil.pitney@linaro.org>2014-10-28 18:00:42 -0700
committerGil Pitney <gil.pitney@linaro.org>2014-10-28 18:00:42 -0700
commit61b2c94d9e64758e55730be6a3fc9006c171db85 (patch)
treef564f09ebf93ba293dfa225bd374df6f1f37aa01 /src/core
Initial Commit: Based on TI OpenCL v0.8, originally based on clover.
This is a continuation of the clover OpenCL project: http://people.freedesktop.org/~steckdenis/clover based on the contributions from Texas Instruments for Keystone II DSP device: git.ti.com/opencl and adding contributions from Linaro for ARM CPU-only support. See README.txt for more info, and build instructions. Signed-off-by: Gil Pitney <gil.pitney@linaro.org>
Diffstat (limited to 'src/core')
-rw-r--r--src/core/commandqueue.cpp1018
-rw-r--r--src/core/commandqueue.h494
-rw-r--r--src/core/compiler.cpp342
-rw-r--r--src/core/compiler.h138
-rw-r--r--src/core/config.h9
-rw-r--r--src/core/config.h.cmake9
-rw-r--r--src/core/context.cpp236
-rw-r--r--src/core/context.h104
-rw-r--r--src/core/cpu/buffer.cpp128
-rw-r--r--src/core/cpu/buffer.h77
-rw-r--r--src/core/cpu/builtins.cpp503
-rw-r--r--src/core/cpu/builtins.h144
-rw-r--r--src/core/cpu/device.cpp675
-rw-r--r--src/core/cpu/device.h113
-rw-r--r--src/core/cpu/kernel.cpp734
-rw-r--r--src/core/cpu/kernel.h325
-rw-r--r--src/core/cpu/program.cpp174
-rw-r--r--src/core/cpu/program.h102
-rw-r--r--src/core/cpu/sampler.cpp769
-rw-r--r--src/core/cpu/worker.cpp274
-rw-r--r--src/core/cpu/worker.h45
-rw-r--r--src/core/deviceinterface.h352
-rw-r--r--src/core/dsp/buffer.cpp149
-rw-r--r--src/core/dsp/buffer.h61
-rw-r--r--src/core/dsp/cmem.cpp271
-rw-r--r--src/core/dsp/cmem.h64
-rw-r--r--src/core/dsp/core_scheduler.h62
-rw-r--r--src/core/dsp/database.h112
-rw-r--r--src/core/dsp/device.cpp1135
-rw-r--r--src/core/dsp/device.h151
-rw-r--r--src/core/dsp/driver.cpp34
-rw-r--r--src/core/dsp/driver.h100
-rw-r--r--src/core/dsp/driver_hawking.cpp451
-rw-r--r--src/core/dsp/driver_shannon.cpp313
-rw-r--r--src/core/dsp/dspheap.h200
-rw-r--r--src/core/dsp/dspmem.h59
-rw-r--r--src/core/dsp/genfile_cache.cpp94
-rw-r--r--src/core/dsp/genfile_cache.h101
-rw-r--r--src/core/dsp/kernel.cpp718
-rw-r--r--src/core/dsp/kernel.h119
-rw-r--r--src/core/dsp/mailbox.h114
-rw-r--r--src/core/dsp/memmap.h120
-rw-r--r--src/core/dsp/message.h115
-rw-r--r--src/core/dsp/ocl_load/C60_DLOAD_DYN/c60_dynamic.c200
-rw-r--r--src/core/dsp/ocl_load/C60_DLOAD_DYN/c60_dynamic.h53
-rw-r--r--src/core/dsp/ocl_load/C60_DLOAD_DYN/c60_elf32.h160
-rw-r--r--src/core/dsp/ocl_load/C60_DLOAD_REL/c60_reloc.c1101
-rw-r--r--src/core/dsp/ocl_load/C60_DLOAD_REL/c60_reloc.h30
-rw-r--r--src/core/dsp/ocl_load/C60_DLOAD_REL/test_c60_reloc.cpp825
-rw-r--r--src/core/dsp/ocl_load/C60_DLOAD_REL/test_c60_reloc.h101
-rw-r--r--src/core/dsp/ocl_load/CMakeLists.txt26
-rw-r--r--src/core/dsp/ocl_load/DLOAD/ArrayList.c122
-rw-r--r--src/core/dsp/ocl_load/DLOAD/ArrayList.h92
-rw-r--r--src/core/dsp/ocl_load/DLOAD/Queue.h194
-rw-r--r--src/core/dsp/ocl_load/DLOAD/Stack.h155
-rw-r--r--src/core/dsp/ocl_load/DLOAD/dload.c3534
-rw-r--r--src/core/dsp/ocl_load/DLOAD/dload.h334
-rw-r--r--src/core/dsp/ocl_load/DLOAD/dload_endian.c151
-rw-r--r--src/core/dsp/ocl_load/DLOAD/dload_endian.h58
-rw-r--r--src/core/dsp/ocl_load/DLOAD/elf32.c652
-rw-r--r--src/core/dsp/ocl_load/DLOAD/elf32.h756
-rw-r--r--src/core/dsp/ocl_load/DLOAD/relocate.h64
-rw-r--r--src/core/dsp/ocl_load/DLOAD/symtab.h72
-rw-r--r--src/core/dsp/ocl_load/DLOAD/util.h89
-rw-r--r--src/core/dsp/ocl_load/DLOAD/version.h63
-rw-r--r--src/core/dsp/ocl_load/DLOAD/virtual_targets.h90
-rw-r--r--src/core/dsp/ocl_load/DLOAD_API/api_version_change.log33
-rw-r--r--src/core/dsp/ocl_load/DLOAD_API/dload_api.h700
-rw-r--r--src/core/dsp/ocl_load/DLOAD_SYM/symtab.c417
-rw-r--r--src/core/dsp/ocl_load/README8
-rw-r--r--src/core/dsp/ocl_load/Stack.h182
-rw-r--r--src/core/dsp/ocl_load/ocl_load.c139
-rw-r--r--src/core/dsp/program.cpp633
-rw-r--r--src/core/dsp/program.h92
-rw-r--r--src/core/dsp/shmem.cpp539
-rw-r--r--src/core/dsp/shmem.h134
-rw-r--r--src/core/dsp/source_cache.h114
-rw-r--r--src/core/dsp/u_concurrent_map.h137
-rw-r--r--src/core/dsp/u_concurrent_stack.h124
-rw-r--r--src/core/dsp/u_lockable.h109
-rw-r--r--src/core/dsp/u_locks_pthread.h137
-rw-r--r--src/core/dsp/utils.h85
-rw-r--r--src/core/dsp/wga.cpp464
-rw-r--r--src/core/dsp/wga.h72
-rw-r--r--src/core/dsp/worker.cpp519
-rw-r--r--src/core/events.cpp1519
-rw-r--r--src/core/events.h718
-rw-r--r--src/core/icd.cpp145
-rw-r--r--src/core/icd.h44
-rw-r--r--src/core/kernel.cpp637
-rw-r--r--src/core/kernel.h326
-rw-r--r--src/core/memobject.cpp960
-rw-r--r--src/core/memobject.h302
-rw-r--r--src/core/object.cpp115
-rw-r--r--src/core/object.h133
-rw-r--r--src/core/platform.cpp227
-rw-r--r--src/core/platform.h65
-rw-r--r--src/core/program.cpp846
-rw-r--r--src/core/program.h250
-rw-r--r--src/core/propertylist.h119
-rw-r--r--src/core/sampler.cpp247
-rw-r--r--src/core/sampler.h115
-rw-r--r--src/core/util.cpp68
-rw-r--r--src/core/util.h41
104 files changed, 31745 insertions, 0 deletions
diff --git a/src/core/commandqueue.cpp b/src/core/commandqueue.cpp
new file mode 100644
index 0000000..662dad1
--- /dev/null
+++ b/src/core/commandqueue.cpp
@@ -0,0 +1,1018 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * Copyright (c) 2012-2014, Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file commandqueue.cpp
+ * \brief Command queue
+ */
+
+#include "commandqueue.h"
+#include "context.h"
+#include "deviceinterface.h"
+#include "propertylist.h"
+#include "events.h"
+
+#include <cstring>
+#include <cstdlib>
+#include <ctime>
+#include <iostream>
+#include <stdio.h>
+
+using namespace Coal;
+
+#define OOO_QUEUE_PUSH_EVENTS_THRESHOLD 64
+
+/******************************************************************************
+* CommandQueue::CommandQueue
+******************************************************************************/
+CommandQueue::CommandQueue(Context *ctx,
+ DeviceInterface *device,
+ cl_command_queue_properties properties,
+ cl_int *errcode_ret)
+: Object(Object::T_CommandQueue, ctx), p_device(device),
+ p_num_events_in_queue(0), p_num_events_on_device(0),
+ p_num_events_completed(0),
+ p_properties(properties), p_flushed(true)
+{
+ // Initialize the locking machinery
+ pthread_mutex_init(&p_event_list_mutex, 0);
+ pthread_cond_init(&p_event_list_cond, 0);
+
+ // Check that the device belongs to the context
+ if (!ctx->hasDevice(device))
+ {
+ *errcode_ret = CL_INVALID_DEVICE;
+ return;
+ }
+ p_device->init();
+
+ *errcode_ret = checkProperties();
+}
+
+/******************************************************************************
+* CommandQueue::~CommandQueue()
+******************************************************************************/
+CommandQueue::~CommandQueue()
+{
+ cleanReleasedEvents();
+ // Free the mutex
+ pthread_mutex_destroy(&p_event_list_mutex);
+ pthread_cond_destroy(&p_event_list_cond);
+}
+
+/******************************************************************************
+* cl_int CommandQueue::info
+******************************************************************************/
+cl_int CommandQueue::info(cl_command_queue_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const
+{
+ void *value = 0;
+ size_t value_length = 0;
+
+ union {
+ cl_uint cl_uint_var;
+ cl_device_id cl_device_id_var;
+ cl_context cl_context_var;
+ cl_command_queue_properties cl_command_queue_properties_var;
+ };
+
+ switch (param_name)
+ {
+ case CL_QUEUE_CONTEXT:
+ SIMPLE_ASSIGN(cl_context, parent());
+ break;
+
+ case CL_QUEUE_DEVICE:
+ SIMPLE_ASSIGN(cl_device_id, p_device);
+ break;
+
+ case CL_QUEUE_REFERENCE_COUNT:
+ SIMPLE_ASSIGN(cl_uint, references());
+ break;
+
+ case CL_QUEUE_PROPERTIES:
+ SIMPLE_ASSIGN(cl_command_queue_properties, p_properties);
+ break;
+
+ default:
+ return CL_INVALID_VALUE;
+ }
+
+ if (param_value && param_value_size < value_length)
+ return CL_INVALID_VALUE;
+
+ if (param_value_size_ret)
+ *param_value_size_ret = value_length;
+
+ if (param_value)
+ std::memcpy(param_value, value, value_length);
+
+ return CL_SUCCESS;
+}
+
+/******************************************************************************
+* cl_int CommandQueue::setProperty
+******************************************************************************/
+cl_int CommandQueue::setProperty(cl_command_queue_properties properties,
+ cl_bool enable,
+ cl_command_queue_properties *old_properties)
+{
+ if (old_properties)
+ *old_properties = p_properties;
+
+ if (enable)
+ p_properties |= properties;
+ else
+ p_properties &= ~properties;
+
+ return checkProperties();
+}
+
+/******************************************************************************
+* cl_int CommandQueue::checkProperties
+******************************************************************************/
+cl_int CommandQueue::checkProperties() const
+{
+ // Check that all the properties are valid
+ cl_command_queue_properties properties =
+ CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE |
+ CL_QUEUE_PROFILING_ENABLE;
+
+ if ((p_properties & properties) != p_properties)
+ return CL_INVALID_VALUE;
+
+ // Check that the device handles these properties
+ cl_int result;
+
+ result = p_device->info(CL_DEVICE_QUEUE_PROPERTIES,
+ sizeof(cl_command_queue_properties),
+ &properties,
+ 0);
+
+ if (result != CL_SUCCESS)
+ return result;
+
+ if ((p_properties & properties) != p_properties)
+ return CL_INVALID_QUEUE_PROPERTIES;
+
+ return CL_SUCCESS;
+}
+
+/******************************************************************************
+* void CommandQueue::flush()
+******************************************************************************/
+void CommandQueue::flush()
+{
+ // Wait for the command queue to be in state "flushed".
+ pthread_mutex_lock(&p_event_list_mutex);
+
+ while (!p_flushed)
+ pthread_cond_wait(&p_event_list_cond, &p_event_list_mutex);
+
+ pthread_mutex_unlock(&p_event_list_mutex);
+
+ cleanReleasedEvents();
+}
+
+/******************************************************************************
+* void CommandQueue::finish()
+******************************************************************************/
+void CommandQueue::finish()
+{
+ // As pushEventsOnDevice doesn't remove SUCCESS events, we may need
+ // to do that here in order not to be stuck.
+ cleanEvents();
+
+ // All the queued events must have completed. When they are, they get
+ // deleted from the command queue, so simply wait for it to become empty.
+ pthread_mutex_lock(&p_event_list_mutex);
+
+ while (p_num_events_in_queue != 0)
+ pthread_cond_wait(&p_event_list_cond, &p_event_list_mutex);
+
+ pthread_mutex_unlock(&p_event_list_mutex);
+
+ cleanReleasedEvents();
+}
+
+/******************************************************************************
+* cl_int CommandQueue::queueEvent(Event *event)
+******************************************************************************/
+cl_int CommandQueue::queueEvent(Event *event)
+{
+ // Let the device initialize the event (for instance, a pointer at which
+ // memory would be mapped)
+ cl_int rs = p_device->initEventDeviceData(event);
+
+ if (rs != CL_SUCCESS)
+ return rs;
+
+ // Append the event at the end of the list
+ pthread_mutex_lock(&p_event_list_mutex);
+
+ p_events.push_back(event);
+ p_num_events_in_queue += 1;
+ p_flushed = false;
+
+ pthread_mutex_unlock(&p_event_list_mutex);
+
+ // Timing info if needed
+ if (p_properties & CL_QUEUE_PROFILING_ENABLE)
+ event->updateTiming(Event::Queue);
+
+ // Explore the list for events we can push on the device
+ pushEventsOnDevice();
+
+ cleanReleasedEvents();
+
+ return CL_SUCCESS;
+}
+
+/******************************************************************************
+* void CommandQueue::releaseEvent()
+******************************************************************************/
+void CommandQueue::releaseEvent(Event *e)
+{
+ pthread_mutex_lock(&p_event_list_mutex);
+ p_released_events.push_back(e);
+ pthread_mutex_unlock(&p_event_list_mutex);
+}
+
+/******************************************************************************
+* void CommandQueue::cleanEvents()
+******************************************************************************/
+void CommandQueue::cleanEvents()
+{
+ bool is_inorder =
+ (p_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0;
+
+ pthread_mutex_lock(&p_event_list_mutex);
+
+ // No need to cleanEvents() every time an event finishes, so that we can
+ // save on the event traversal time. 16 is a number that can be tuned
+ // (e.g. using ooo example).
+ if (p_num_events_completed < 16 && p_num_events_on_device > 0 &&
+ p_num_events_in_queue - p_num_events_completed > 0)
+ {
+ pthread_mutex_unlock(&p_event_list_mutex);
+ return;
+ }
+
+ std::list<Event *>::iterator it = p_events.begin(), oldit;
+
+ while (it != p_events.end())
+ {
+ Event *event = *it;
+
+ if (event->status() == Event::Complete)
+ {
+ // We cannot be deleted from inside us
+ event->setReleaseParent(false);
+ oldit = it;
+ ++it;
+
+ p_num_events_in_queue -= 1;
+ p_num_events_completed -= 1;
+ p_events.erase(oldit);
+ // put Completed events into another list
+ // let main thread release/delete them
+ p_released_events.push_back(event);
+ }
+ else if (is_inorder)
+ {
+ // In Order Queue events are dispatched and completed in Order
+ break;
+ }
+ else
+ {
+ ++it;
+ }
+ }
+
+ // We have cleared the list, so wake up the sleeping threads
+ if (p_num_events_in_queue == 0)
+ pthread_cond_broadcast(&p_event_list_cond);
+
+ pthread_mutex_unlock(&p_event_list_mutex);
+
+ // Check now if we have to be deleted
+ if (references() == 0)
+ {
+ delete this;
+ }
+}
+
+/******************************************************************************
+* void CommandQueue::cleanReleasedEvents()
+* !!! Can only be called by the main thread!!! new/delete, malloc/free are not
+* thread safe on ARM, so let main thread handle them SOLELY!
+******************************************************************************/
+void CommandQueue::cleanReleasedEvents()
+{
+ pthread_mutex_lock(&p_event_list_mutex);
+
+ while (! p_released_events.empty())
+ {
+ Event *event = p_released_events.front();
+ clReleaseEvent((cl_event)event);
+ p_released_events.pop_front();
+ }
+
+ pthread_mutex_unlock(&p_event_list_mutex);
+}
+
+/******************************************************************************
+* void CommandQueue::pushEventsOnDevice()
+* Who is calling this function:
+* (ready_event, one_event_completed_on_device)
+* (not NULL, * ): worker thread, push till this one ready event
+* ( NULL, true ): worker thread, one completes, push rest on this queue
+* ( NULL, false): main thread, queued a new event, push this queue
+******************************************************************************/
+void CommandQueue::pushEventsOnDevice(Event *ready_event,
+ bool one_event_completed_on_device)
+{
+ int non_complete_events_traversed = 0;
+ bool is_ooo = (p_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
+ bool do_profile = (p_properties & CL_QUEUE_PROFILING_ENABLE) != 0;
+
+ pthread_mutex_lock(&p_event_list_mutex);
+
+ if (one_event_completed_on_device)
+ {
+ p_num_events_on_device -= 1;
+ p_num_events_completed += 1;
+ }
+
+ // No need to push more events on Device if 1) device has already got
+ // enough to work on, and 2) not pushing won't cause starvation of this
+ // commandqueue. Not pushing can save p_event_list traversal time.
+ // 2 is a QoS number, set to 2 for the time being
+ // imagaine there are multiple commandqueues on same device
+ if(is_ooo && ready_event == NULL &&
+ p_num_events_on_device > 2 && p_device->gotEnoughToWorkOn())
+ {
+ pthread_mutex_unlock(&p_event_list_mutex);
+ return;
+ }
+
+ // Explore the events in p_events and push on the device all of them that
+ // are :
+ //
+ // - Not already pushed (in Event::Queued state)
+ // - Not after a barrier, except if we begin with a barrier
+ // - If we are in-order, only the first event in Event::Queued state can
+ // be pushed
+
+ std::list<Event *>::iterator it = p_events.begin();
+ std::list<Event *>::iterator oldit;
+ bool first = true;
+
+ // We assume that we will flush the command queue (submit all the events)
+ // This will be changed in the while() when we know that not all events
+ // are submitted.
+ p_flushed = true;
+
+ while (it != p_events.end())
+ {
+ Event *event = *it;
+
+ // If the event is completed, remove it
+ if (event->status() == Event::Complete)
+ {
+ event->setReleaseParent(false);
+ oldit = it;
+ ++it;
+
+ p_num_events_completed -= 1;
+ p_num_events_in_queue -= 1;
+ p_events.erase(oldit);
+ // put Completed events into another list
+ // let main thread release/delete them
+ p_released_events.push_back(event);
+ continue;
+ }
+
+ // If OOO queue threshold is met, skip examining the rest of events
+ if(ready_event == NULL &&
+ non_complete_events_traversed > OOO_QUEUE_PUSH_EVENTS_THRESHOLD)
+ break;
+ non_complete_events_traversed += 1;
+
+ // We cannot do out-of-order, so we can only push the first event.
+ if (!is_ooo && !first)
+ {
+ p_flushed = false; // There are remaining events.
+ break;
+ }
+
+ // Stop if we encounter a barrier that isn't the first event in the list.
+ if (event->type() == Event::Barrier && !first)
+ {
+ // We have events to wait, stop
+ p_flushed = false;
+ break;
+ }
+
+ // Completed events and first barriers are out, it remains real events
+ // that have to block in-order execution.
+ first = false;
+
+ // If the event is not "pushable" (in Event::Queued state), skip it
+ // It is either Submitted or Running.
+ if (event->status() != Event::Queued)
+ {
+ // Intended event is scheduled, skip the rest in queue
+ if (event == ready_event) break;
+
+ ++it;
+ continue;
+ }
+
+ // Check that all the waiting-on events of this event are finished
+ if (! event->waitEventsAllCompleted())
+ {
+ p_flushed = false;
+ // If we encounter a WaitForEvents event that is not "finished",
+ // don't push events after it.
+ if (event->type() == Event::WaitForEvents)
+ break;
+
+ // The event has its dependencies not already met.
+ ++it;
+ continue;
+ }
+
+ if (event->isInstantaneous())
+ {
+ // Set the event as completed. This will call pushEventsOnDevice,
+ // again, so release the lock to avoid a deadlock. We also return
+ // because the recursive call will continue our work.
+ pthread_mutex_unlock(&p_event_list_mutex);
+ event->setStatus(Event::Complete);
+ return;
+ }
+
+ // The event can be pushed, if we need to
+ if (do_profile) event->updateTiming(Event::Submit);
+
+ event->setStatus(Event::Submitted);
+ p_num_events_on_device += 1;
+ p_device->pushEvent(event);
+ }
+
+ if (ready_event != NULL && p_flushed)
+ p_flushed = (p_num_events_in_queue == 0);
+
+ if (p_flushed)
+ pthread_cond_broadcast(&p_event_list_cond);
+
+ pthread_mutex_unlock(&p_event_list_mutex);
+}
+
+/******************************************************************************
+* Event **CommandQueue::events(unsigned int &count)
+******************************************************************************/
+Event **CommandQueue::events(unsigned int &count,
+ bool include_completed_events)
+{
+ Event **result = NULL;
+
+ pthread_mutex_lock(&p_event_list_mutex);
+
+ count = p_num_events_in_queue;
+ if (count > 0)
+ result = (Event **)std::malloc(count * sizeof(Event *));
+
+ // Copy each event of the list into result, retaining them
+ unsigned int index = 0;
+ std::list<Event *>::iterator it = p_events.begin();
+
+ while (it != p_events.end())
+ {
+ if (! include_completed_events)
+ {
+ Event *e = *it;
+ if (e->status() == Event::Complete)
+ {
+ ++it;
+ continue;
+ }
+ }
+
+ result[index] = *it;
+ result[index]->reference();
+
+ ++it;
+ ++index;
+ }
+ count = index;
+
+ // Now result contains an immutable list of events. Even if the events
+ // become completed in another thread while result is used, the events
+ // are retained and so guaranteed to remain valid.
+ pthread_mutex_unlock(&p_event_list_mutex);
+
+ return result;
+}
+
+/******************************************************************************
+* Event::Event
+******************************************************************************/
+Event::Event(CommandQueue *parent,
+ Status status,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: Object(Object::T_Event, parent),
+ p_status(status), p_device_data(0)
+{
+ // Initialize the locking machinery
+ pthread_cond_init(&p_state_change_cond, 0);
+ pthread_mutex_init(&p_state_mutex, 0);
+
+ std::memset(&p_timing, 0, sizeof(p_timing));
+
+ // Check sanity of parameters
+ if (!event_wait_list && num_events_in_wait_list)
+ {
+ *errcode_ret = CL_INVALID_EVENT_WAIT_LIST;
+ return;
+ }
+
+ if (event_wait_list && !num_events_in_wait_list)
+ {
+ *errcode_ret = CL_INVALID_EVENT_WAIT_LIST;
+ return;
+ }
+
+ // Check that none of the events in event_wait_list is in an error state
+ for (cl_uint i=0; i<num_events_in_wait_list; ++i)
+ {
+ if (event_wait_list[i] == 0)
+ {
+ *errcode_ret = CL_INVALID_EVENT_WAIT_LIST;
+ return;
+ }
+ else if (event_wait_list[i]->status() < 0)
+ {
+ *errcode_ret = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
+ return;
+ }
+ }
+
+ if (parent && num_events_in_wait_list > 0)
+ {
+ pthread_mutex_lock(&p_state_mutex);
+ for (cl_uint i=0; i<num_events_in_wait_list; ++i)
+ {
+ // if event_wait_list[i] is already COMPLETE, don't add it!!!
+ if (event_wait_list[i]->addDependentEvent(this))
+ p_wait_events.push_back(event_wait_list[i]);
+ }
+ pthread_mutex_unlock(&p_state_mutex);
+ }
+}
+
+/******************************************************************************
+* void Event::freeDeviceData()
+******************************************************************************/
+void Event::freeDeviceData()
+{
+ if (parent() && p_device_data)
+ {
+ DeviceInterface *device = 0;
+ ((CommandQueue *)parent())->info(CL_QUEUE_DEVICE, sizeof(DeviceInterface *), &device, 0);
+
+ device->freeEventDeviceData(this);
+ }
+}
+
+/******************************************************************************
+* Event::~Event()
+******************************************************************************/
+Event::~Event()
+{
+ pthread_mutex_destroy(&p_state_mutex);
+ pthread_cond_destroy(&p_state_change_cond);
+}
+
+/******************************************************************************
+* bool Event::isInstantaneous()
+******************************************************************************/
+bool Event::isInstantaneous() const
+{
+ // A dummy event has nothing to do on an execution device and must be
+ // completed directly after being "submitted".
+
+ switch (type())
+ {
+ case Marker:
+ case User:
+ case Barrier:
+ case WaitForEvents:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+/******************************************************************************
+* void Event::setStatus
+******************************************************************************/
+int Event::setStatusHelper(Status status)
+{
+ int num_dependent_events;
+
+ // TODO: If status < 0, terminate all the events depending on us.
+ pthread_mutex_lock(&p_state_mutex);
+ p_status = status;
+ num_dependent_events = p_dependent_events.size();
+
+ pthread_cond_broadcast(&p_state_change_cond);
+
+ // Call the callbacks
+ std::multimap<Status, CallbackData>::const_iterator it;
+ std::pair<std::multimap<Status, CallbackData>::const_iterator,
+ std::multimap<Status, CallbackData>::const_iterator> ret;
+
+ ret = p_callbacks.equal_range(status > 0 ? status : Complete);
+
+ for (it=ret.first; it!=ret.second; ++it)
+ {
+ const CallbackData &data = (*it).second;
+ data.callback((cl_event)this, p_status, data.user_data);
+ }
+
+ pthread_mutex_unlock(&p_state_mutex);
+
+ return num_dependent_events;
+}
+
+void Event::setStatus(Status status)
+{
+ if (type() == Event::User || (parent() && status == Complete))
+ {
+ CommandQueue *cq = (CommandQueue *) parent();
+
+ int num_dependent_events = setStatusHelper(status);
+ /*---------------------------------------------------------------------
+ * From this point on, the event could be dereferenced to 0 and deleted!
+ * Thus we cannot call flushQueues(). Need to save these queues.
+ *--------------------------------------------------------------------*/
+
+ /*---------------------------------------------------------------------
+ * Notify dependent events, remove dependence, and push them if possible
+ *--------------------------------------------------------------------*/
+ for (int i = 0; i < num_dependent_events; i += 1)
+ {
+ Event *d_event = p_dependent_events[i];
+ CommandQueue *q = (CommandQueue *) d_event->parent();
+ if (d_event->removeWaitEvent(this) && q != NULL) // order!
+ {
+ q->pushEventsOnDevice(d_event, (cq == q));
+ if (cq == q) cq = NULL;
+ }
+ }
+
+ /*---------------------------------------------------------------------
+ * Inform our parent to push other events to the device if haven't done
+ * so already. UserEvent's parent is NULL.
+ *--------------------------------------------------------------------*/
+ if (cq != NULL) cq->pushEventsOnDevice(NULL, true);
+ }
+ else
+ setStatusHelper(status);
+}
+
+bool Event::addDependentEvent(Event *event)
+{
+ pthread_mutex_lock(&p_state_mutex);
+ if (p_status == Event::Complete)
+ {
+ pthread_mutex_unlock(&p_state_mutex);
+ return false;
+ }
+
+ p_dependent_events.push_back(event);
+ Object::reference(); // retain this event
+ pthread_mutex_unlock(&p_state_mutex);
+ return true;
+}
+
+bool Event::removeWaitEvent(Event *event)
+{
+ bool empty;
+
+ pthread_mutex_lock(&p_state_mutex);
+ p_wait_events.remove(event);
+ empty = p_wait_events.empty();
+ pthread_mutex_unlock(&p_state_mutex);
+
+ CommandQueue *q = (CommandQueue *) event->parent();
+ if (q != NULL) q->releaseEvent(event);
+ return empty;
+}
+
+bool Event::waitEventsAllCompleted()
+{
+// YUAN TODO: p_wait_events is always shrinking, is lock necessary?
+// it is a little bit faster without having to lock!!!
+#if 1
+ bool empty;
+
+ pthread_mutex_lock(&p_state_mutex);
+ empty = p_wait_events.empty();
+ pthread_mutex_unlock(&p_state_mutex);
+
+ return empty;
+#else
+ return p_wait_events.empty();
+#endif
+}
+
+/******************************************************************************
+* void Event::reference, dereference
+* This should be protected, since main thread and worker threads could all
+* updating the event reference count
+******************************************************************************/
+void Event::reference()
+{
+ pthread_mutex_lock(&p_state_mutex);
+ Object::reference();
+ pthread_mutex_unlock(&p_state_mutex);
+}
+
+bool Event::dereference()
+{
+ bool retval = false;
+ pthread_mutex_lock(&p_state_mutex);
+ retval = Object::dereference();
+ pthread_mutex_unlock(&p_state_mutex);
+ return retval;
+}
+
+/******************************************************************************
+* void Event::setDeviceData
+******************************************************************************/
+void Event::setDeviceData(void *data)
+{
+ p_device_data = data;
+}
+
+/******************************************************************************
+* void Event::updateTiming
+******************************************************************************/
+void Event::updateTiming(Timing timing)
+{
+ if (timing >= Max)
+ return;
+
+ pthread_mutex_lock(&p_state_mutex);
+
+ // Don't update more than one time (NDRangeKernel for example)
+ if (p_timing[timing])
+ {
+ pthread_mutex_unlock(&p_state_mutex);
+ return;
+ }
+
+ struct timespec tp;
+ cl_ulong rs;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &tp) != 0)
+ clock_gettime(CLOCK_REALTIME, &tp);
+
+ rs = tp.tv_nsec / 1000; // convert to microseconds
+ rs += tp.tv_sec * 1000000; // convert to microseconds
+
+ p_timing[timing] = rs;
+
+ pthread_mutex_unlock(&p_state_mutex);
+}
+
+/******************************************************************************
+* Event::Status Event::status() const
+******************************************************************************/
+Event::Status Event::status() const
+{
+ // HACK : We need const qualifier but we also need to lock a mutex
+ Event *me = (Event *)(void *)this;
+
+ pthread_mutex_lock(&me->p_state_mutex);
+
+ Status ret = p_status;
+
+ pthread_mutex_unlock(&me->p_state_mutex);
+
+ return ret;
+}
+
+/******************************************************************************
+* void Event::waitForStatus(Status status)
+******************************************************************************/
+void Event::waitForStatus(Status status)
+{
+ pthread_mutex_lock(&p_state_mutex);
+
+ while (p_status != status && p_status > 0)
+ {
+ pthread_cond_wait(&p_state_change_cond, &p_state_mutex);
+ }
+
+ pthread_mutex_unlock(&p_state_mutex);
+}
+
+/******************************************************************************
+* void *Event::deviceData()
+******************************************************************************/
+void *Event::deviceData()
+{
+ return p_device_data;
+}
+
+/******************************************************************************
+* void Event::setCallback
+******************************************************************************/
+void Event::setCallback(cl_int command_exec_callback_type,
+ event_callback callback,
+ void *user_data)
+{
+ CallbackData data;
+
+ data.callback = callback;
+ data.user_data = user_data;
+
+ pthread_mutex_lock(&p_state_mutex);
+
+ p_callbacks.insert(std::pair<Status, CallbackData>(
+ (Status)command_exec_callback_type,
+ data));
+
+ pthread_mutex_unlock(&p_state_mutex);
+}
+
+/******************************************************************************
+* cl_int Event::info
+******************************************************************************/
+cl_int Event::info(cl_event_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const
+{
+ void *value = 0;
+ size_t value_length = 0;
+
+ union {
+ cl_command_queue cl_command_queue_var;
+ cl_context cl_context_var;
+ cl_command_type cl_command_type_var;
+ cl_int cl_int_var;
+ cl_uint cl_uint_var;
+ };
+
+ switch (param_name)
+ {
+ case CL_EVENT_COMMAND_QUEUE:
+ SIMPLE_ASSIGN(cl_command_queue, parent());
+ break;
+
+ case CL_EVENT_CONTEXT:
+ if (parent())
+ {
+ SIMPLE_ASSIGN(cl_context, parent()->parent());
+ }
+ else
+ {
+ if (type() == User)
+ SIMPLE_ASSIGN(cl_context, ((UserEvent *)this)->context())
+ else
+ SIMPLE_ASSIGN(cl_context, 0);
+ }
+ break;
+
+ case CL_EVENT_COMMAND_TYPE:
+ SIMPLE_ASSIGN(cl_command_type, type());
+ break;
+
+ // avoid status() call, if called from callbacks, we deadlock on mutex
+ case CL_EVENT_COMMAND_EXECUTION_STATUS:
+ SIMPLE_ASSIGN(cl_int, p_status);
+ break;
+
+ case CL_EVENT_REFERENCE_COUNT:
+ SIMPLE_ASSIGN(cl_uint, references());
+ break;
+
+ default:
+ return CL_INVALID_VALUE;
+ }
+
+ if (param_value && param_value_size < value_length)
+ return CL_INVALID_VALUE;
+
+ if (param_value_size_ret)
+ *param_value_size_ret = value_length;
+
+ if (param_value)
+ std::memcpy(param_value, value, value_length);
+
+ return CL_SUCCESS;
+}
+
+/******************************************************************************
+* cl_int Event::profilingInfo(
+******************************************************************************/
+cl_int Event::profilingInfo(cl_profiling_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const
+{
+ if (type() == Event::User)
+ return CL_PROFILING_INFO_NOT_AVAILABLE;
+
+ // Check that the Command Queue has profiling enabled
+ cl_command_queue_properties queue_props;
+ cl_int rs;
+
+ rs = ((CommandQueue *)parent())->info(CL_QUEUE_PROPERTIES,
+ sizeof(cl_command_queue_properties),
+ &queue_props, 0);
+
+ if (rs != CL_SUCCESS)
+ return rs;
+
+ if ((queue_props & CL_QUEUE_PROFILING_ENABLE) == 0)
+ return CL_PROFILING_INFO_NOT_AVAILABLE;
+
+ // avoid status() call, if called from callbacks, we deadlock on mutex
+ if (p_status != Event::Complete)
+ return CL_PROFILING_INFO_NOT_AVAILABLE;
+
+ void *value = 0;
+ size_t value_length = 0;
+ cl_ulong cl_ulong_var;
+
+ switch (param_name)
+ {
+ case CL_PROFILING_COMMAND_QUEUED:
+ SIMPLE_ASSIGN(cl_ulong, 1000*p_timing[Queue]);
+ break;
+
+ case CL_PROFILING_COMMAND_SUBMIT:
+ SIMPLE_ASSIGN(cl_ulong, 1000*p_timing[Submit]);
+ break;
+
+ case CL_PROFILING_COMMAND_START:
+ SIMPLE_ASSIGN(cl_ulong, 1000*p_timing[Start]);
+ break;
+
+ case CL_PROFILING_COMMAND_END:
+ SIMPLE_ASSIGN(cl_ulong, 1000*p_timing[End]);
+ break;
+
+ default:
+ return CL_INVALID_VALUE;
+ }
+
+ if (param_value && param_value_size < value_length)
+ return CL_INVALID_VALUE;
+
+ if (param_value_size_ret)
+ *param_value_size_ret = value_length;
+
+ if (param_value)
+ std::memcpy(param_value, value, value_length);
+
+ return CL_SUCCESS;
+}
+
diff --git a/src/core/commandqueue.h b/src/core/commandqueue.h
new file mode 100644
index 0000000..7d2c65e
--- /dev/null
+++ b/src/core/commandqueue.h
@@ -0,0 +1,494 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * Copyright (c) 2012-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file commandqueue.h
+ * \brief Command queue and base class for events
+ */
+
+#ifndef __COMMANDQUEUE_H__
+#define __COMMANDQUEUE_H__
+
+#include "object.h"
+
+#include <CL/cl.h>
+#include <pthread.h>
+
+#include <map>
+#include <list>
+#include <vector>
+
+namespace Coal
+{
+
+class Context;
+class DeviceInterface;
+class Event;
+
+/**
+ * \brief Command queue
+ *
+ * This class holds a list of events that will be pushed on a given device.
+ *
+ * More details are given on the \ref events page.
+ */
+class CommandQueue : public Object
+{
+ public:
+ CommandQueue(Context *ctx,
+ DeviceInterface *device,
+ cl_command_queue_properties properties,
+ cl_int *errcode_ret);
+ ~CommandQueue();
+
+ /**
+ * \brief Queue an event
+ * \param event event to be queued
+ * \return \c CL_SUCCESS if success, otherwise an error code
+ */
+ cl_int queueEvent(Event *event);
+
+ /**
+ * \brief Information about the command queue
+ * \copydetails Coal::DeviceInterface::info
+ */
+ cl_int info(cl_command_queue_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const;
+
+ /**
+ * \brief Set properties of the command queue
+ * \note This function is deprecated and only there for OpenCL 1.0
+ * compatibility
+ * \param properties property to enable or disable
+ * \param enable true to enable the property, false to disable it
+ * \param old_properties old value of the properties, ignored if NULL
+ * \return \c CL_SUCCESS if all is good, an error code if \p properties is
+ * invalid
+ */
+ cl_int setProperty(cl_command_queue_properties properties,
+ cl_bool enable,
+ cl_command_queue_properties *old_properties);
+
+ /**
+ * \brief Check the properties given
+ * \return \c CL_SUCCESS if they are valid, an error code otherwise
+ */
+ cl_int checkProperties() const;
+
+ /**
+ * \brief Push events on the device
+ *
+ * This function implements a big part of what is described in
+ * \ref events .
+ *
+ * It is called by \c Coal::Event::setStatus() when an event is
+ * completed, or by \c queueEvent(). Its purpose is to explore the list
+ * of queued events (\c p_events) and to call
+ * \c Coal::DeviceInterface::pushEvent() for each event meeting its push
+ * conditions.
+ *
+ * \param ready_event is know to be pushable, push events in the
+ * queue till this point, skip the events after this one.
+ *
+ * \param one_event_completed_on_device can be used to differentiate
+ * whether this function is called by worker thread when an event is
+ * completed, or by main thread's queueEvent().
+ *
+ * \section conditions Conditions
+ *
+ * If the command queue has the \c CL_OUT_OF_ORDER_EXEC_MODE_ENABLE
+ * property disabled, an event can be pushed only if all the previous
+ * ones in the list are completed with success. This way, an event
+ * must be completed before any other can be pushed. This ensures
+ * in-order execution.
+ *
+ * If this property is enable, more complex heuristics are used.
+ *
+ * The event list \c p_events is explored from top to bottom. At each
+ * loop iteration, checks are performed to see if the event can be pushed.
+ *
+ * - When a \c Coal::BarrierEvent is encountered, no more events can be
+ * pushed, except if the \c Coal::BarrierEvent is the first in the list,
+ * as that means there are no other events that can be pushed, so the
+ * barrier can go away
+ * - All events that are already pushed or finished are skipped
+ * - The wait list of the event is then explored to ensure that all its
+ * dependencies are met.
+ * - Finally, if the events passes all the tests, it is either pushed on
+ * the device, or simply set to \c Coal::Event::Complete if it's a
+ * dummy event (see \c Coal::Event::isInstantaneous()).
+ */
+ void pushEventsOnDevice(Event *ready_event = NULL,
+ bool one_event_completed_on_device = false);
+
+ /**
+ * \brief Push an event onto p_release_event list
+ *
+ * Later main thread will perform release event action.
+ */
+ void releaseEvent(Event *e);
+
+ /**
+ * \brief Remove from the event list completed events
+ *
+ * This function is called periodically to clean the event list from
+ * completed events.
+ *
+ * It is needed to do that out of \c pushEventsOnDevice() as deleting
+ * event may \c dereference() this command queue, and also delete it. It
+ * would produce crashes.
+ */
+ void cleanEvents();
+
+ /**
+ * \brief Release events on the released event list
+ *
+ * This function is called periodically to release the events on the
+ * released events list. This is only performed on the main thread
+ * because deleting/freeing memory from worker thread has caused
+ * weird memory problems on ARM.
+ *
+ */
+ void cleanReleasedEvents();
+
+ /**
+ * \brief Flush the command queue
+ *
+ * Pushes all the events on the device, and then return. The event
+ * don't need to be completed after this call.
+ */
+ void flush();
+
+ /**
+ * \brief Finish the command queue
+ *
+ * Pushes the events like \c flush() but also wait for them to be
+ * completed before returning.
+ */
+ void finish();
+
+ /**
+ * \brief Return all the events in the command queue
+ * \note Retains all the events
+ * \param count number of events in the event queue
+ * \param include_completed_events default to true
+ * \return events currently in the event queue
+ */
+ Event **events(unsigned int &count,
+ bool include_completed_events = true);
+
+ private:
+ DeviceInterface *p_device;
+ cl_int p_num_events_in_queue;
+ cl_int p_num_events_on_device;
+ cl_int p_num_events_completed;
+ cl_command_queue_properties p_properties;
+
+ std::list<Event *> p_events;
+ std::list<Event *> p_released_events;
+ pthread_mutex_t p_event_list_mutex;
+ pthread_cond_t p_event_list_cond;
+ bool p_flushed;
+};
+
+/**
+ * \brief Base class for all events
+ *
+ * This class contains logic common to all the events.
+ *
+ * Beside handling OpenCL-specific stuff, \c Coal::Event objects do nothing
+ * implementation-wise. They do not compile kernels, copy data around, etc.
+ * They only contain static and immutable data that is then used by the devices
+ * to actually implement the event.
+ */
+class Event : public Object
+{
+ public:
+ /**
+ * \brief Event type
+ *
+ * The allows objects using \c Coal::Event to know which event it is,
+ * and to cast it to the correct sub-class.
+ */
+ enum Type
+ {
+ NDRangeKernel = CL_COMMAND_NDRANGE_KERNEL,
+ TaskKernel = CL_COMMAND_TASK,
+ NativeKernel = CL_COMMAND_NATIVE_KERNEL,
+ ReadBuffer = CL_COMMAND_READ_BUFFER,
+ WriteBuffer = CL_COMMAND_WRITE_BUFFER,
+ CopyBuffer = CL_COMMAND_COPY_BUFFER,
+ ReadImage = CL_COMMAND_READ_IMAGE,
+ WriteImage = CL_COMMAND_WRITE_IMAGE,
+ CopyImage = CL_COMMAND_COPY_IMAGE,
+ CopyImageToBuffer = CL_COMMAND_COPY_IMAGE_TO_BUFFER,
+ CopyBufferToImage = CL_COMMAND_COPY_BUFFER_TO_IMAGE,
+ MapBuffer = CL_COMMAND_MAP_BUFFER,
+ MapImage = CL_COMMAND_MAP_IMAGE,
+ UnmapMemObject = CL_COMMAND_UNMAP_MEM_OBJECT,
+ Marker = CL_COMMAND_MARKER,
+ AcquireGLObjects = CL_COMMAND_ACQUIRE_GL_OBJECTS,
+ ReleaseGLObjects = CL_COMMAND_RELEASE_GL_OBJECTS,
+ ReadBufferRect = CL_COMMAND_READ_BUFFER_RECT,
+ WriteBufferRect = CL_COMMAND_WRITE_BUFFER_RECT,
+ CopyBufferRect = CL_COMMAND_COPY_BUFFER_RECT,
+ User = CL_COMMAND_USER,
+ Barrier,
+ WaitForEvents
+ };
+
+ /**
+ * \brief Event status
+ */
+ enum Status
+ {
+ Queued = CL_QUEUED, /*!< \brief Simply queued in a command queue */
+ Submitted = CL_SUBMITTED, /*!< \brief Submitted to a device */
+ Running = CL_RUNNING, /*!< \brief Running on the device */
+ Complete = CL_COMPLETE /*!< \brief Completed */
+ };
+
+ /**
+ * \brief Function that can be called when an event change status
+ */
+ typedef void (CL_CALLBACK *event_callback)(cl_event, cl_int, void *);
+
+ /**
+ * Structure used internally by \c Coal::Event to store for each event
+ * status the callbacks to call with the corresponding \c user_data.
+ */
+ struct CallbackData
+ {
+ event_callback callback; /*!< Function to call */
+ void *user_data; /*!< Pointer to pass as its third argument */
+ };
+
+ /**
+ * \brief Timing counters of an event
+ */
+ enum Timing
+ {
+ Queue, /*!< Time when the event was queued */
+ Submit, /*!< Time when the event was submitted to the device */
+ Start, /*!< Time when its execution began on the device */
+ End, /*!< Time when its execution finished */
+ Max /*!< Number of items in this enum */
+ };
+
+ public:
+ /**
+ * \brief Constructor
+ * \param parent parent \c Coal::CommandQueue
+ * \param status \c Status the event has when it is created
+ * \param num_events_in_wait_list number of events to wait on
+ * \param event_wait_list list of events to wait on
+ * \param errcode_ret return value
+ */
+ Event(CommandQueue *parent,
+ Status status,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+
+ void freeDeviceData(); /*!< \brief Call \c Coal::DeviceInterface::freeEventDeviceData() */
+ virtual ~Event(); /*!< \brief Destructor */
+
+ /**
+ * \brief Type of the event
+ * \return type of the event
+ */
+ virtual Type type() const = 0;
+
+ /**
+ * \brief Dummy event
+ *
+ * A dummy event is an event that doesn't have to be pushed on a device,
+ * it is only a hint for \c Coal::CommandQueue
+ *
+ * \return true if the event is dummy
+ */
+ bool isInstantaneous() const;
+
+ /**
+ * \brief Set the event status
+ *
+ * This function calls the event callbacks, and
+ * \c Coal::CommandQueue::pushEventsOnDevice() if \p status is
+ * \c Complete .
+ *
+ * \param status new status of the event
+ */
+ void setStatus(Status status);
+
+ /**
+ * \brief Increase Event reference count
+ *
+ * This function uses mutex to protect the reference count
+ * \c update in the underlying object.
+ */
+ void reference();
+
+ /**
+ * \brief Decrease Event reference count
+ *
+ * This function uses mutex to protect the reference count
+ * \c update in the underlying object.
+ *
+ * \return true if the reference count is decreased to 0
+ */
+ bool dereference();
+
+ /**
+ * \brief Set device-specific data
+ * \param data device-specific data
+ */
+ void setDeviceData(void *data);
+
+ /**
+ * \brief Update timing info
+ *
+ * This function reads current system time and puts it in \c p_timing
+ *
+ * \param timing timing event having just finished
+ */
+ void updateTiming(Timing timing);
+
+ /**
+ * \brief Status
+ * \return status of the event
+ */
+ Status status() const;
+
+ /**
+ * \brief Wait for a specified status
+ *
+ * This function blocks until the event's status is set to \p status
+ * by another thread.
+ *
+ * \param status the status the event must have for the function to return
+ */
+ void waitForStatus(Status status);
+
+ /**
+ * \brief Device-specific data
+ * \return data set using \c setDeviceData()
+ */
+ void *deviceData();
+
+ /**
+ * \brief Add a callback for this event
+ * \param command_exec_callback_type status the event must have in order
+ * to have the callback called
+ * \param callback callback function
+ * \param user_data user data given to the callback
+ */
+ void setCallback(cl_int command_exec_callback_type,
+ event_callback callback,
+ void *user_data);
+
+ /**
+ * \brief Info about the event
+ * \copydetails Coal::DeviceInterface::info
+ */
+ cl_int info(cl_event_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const;
+
+ /**
+ * \brief Profiling info
+ * \copydetails Coal::DeviceInterface::info
+ */
+ cl_int profilingInfo(cl_profiling_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const;
+
+ /**
+ * \brief Call \c Coal::CommandQueue::pushEventsOnDevice() for each command queue
+ * in which this event is queued or each queue with an event waiting on this event
+ */
+ void flushQueues();
+
+
+ /**
+ * \brief Add event to p_dependent_events, which will be notified when
+ * current event completes. If current event is already complete,
+ * no need to add and return false.
+ * \param event the event to be notified
+ */
+ bool addDependentEvent(Event *event);
+
+ /**
+ * \brief Remove event from p_wait_events, which should be waited on
+ * before current event can start. When p_wait_events becomes empty,
+ * return true to indicate that current event is ready to be pushed.
+ * \param event the event to be removed from p_wait_events
+ */
+ bool removeWaitEvent(Event *event);
+
+ /**
+ * \brief Check if there are no more events to wait on before current
+ * event can start.
+ */
+ bool waitEventsAllCompleted();
+
+ private:
+ /**
+ * \brief Helper function for setStatus()
+ * return number of dependent events
+ */
+ int setStatusHelper(Status status);
+
+ private:
+ pthread_cond_t p_state_change_cond;
+ pthread_mutex_t p_state_mutex;
+
+ Status p_status;
+ void *p_device_data;
+ std::multimap<Status, CallbackData> p_callbacks;
+
+ cl_uint p_timing[Max];
+
+ // p_wait_events: I should wait after these events complete
+ // p_dependent_events: when I complete, I should notify these events
+ std::list<const Event *> p_wait_events;
+ std::vector<Event *> p_dependent_events;
+};
+
+}
+
+struct _cl_command_queue : public Coal::CommandQueue
+{};
+
+struct _cl_event : public Coal::Event
+{};
+
+#endif
diff --git a/src/core/compiler.cpp b/src/core/compiler.cpp
new file mode 100644
index 0000000..d4d5240
--- /dev/null
+++ b/src/core/compiler.cpp
@@ -0,0 +1,342 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * Copyright (c) 2012-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file compiler.cpp
+ * \brief Compiler wrapper around Clang
+ */
+
+#include "compiler.h"
+#include "deviceinterface.h"
+
+#include <cstring>
+#include <cstdio>
+#include <string>
+#include <sstream>
+#include <iostream>
+#include <clang/Frontend/CompilerInvocation.h>
+#include <clang/Frontend/TextDiagnosticPrinter.h>
+#include <clang/Frontend/LangStandard.h>
+#include <clang/Basic/Diagnostic.h>
+#include <clang/CodeGen/CodeGenAction.h>
+#include <llvm/ADT/SmallVector.h>
+#include <llvm/Support/Host.h>
+#include <llvm/Support/MemoryBuffer.h> // ASW
+#include <llvm/IR/Module.h>
+#include <llvm/IR/LLVMContext.h>
+#include <sys/stat.h>
+
+std::string get_ocl_dsp();
+
+using namespace Coal;
+
+Compiler::Compiler(DeviceInterface *device)
+: p_device(device), p_module(0), p_optimize(true), p_log_stream(p_log),
+ p_log_printer(0)
+{
+}
+
+Compiler::~Compiler()
+{
+
+}
+
+int Compiler::compile(const std::string &options,
+ llvm::MemoryBuffer *source)
+{
+ /* Set options */
+ p_options = options;
+
+ clang::CodeGenOptions &codegen_opts = p_compiler.getCodeGenOpts();
+ clang::DiagnosticOptions &diag_opts = p_compiler.getDiagnosticOpts();
+ clang::FrontendOptions &frontend_opts = p_compiler.getFrontendOpts();
+ clang::HeaderSearchOptions &header_opts = p_compiler.getHeaderSearchOpts();
+ clang::LangOptions &lang_opts = p_compiler.getLangOpts();
+ clang::TargetOptions &target_opts = p_compiler.getTargetOpts();
+ clang::PreprocessorOptions &prep_opts = p_compiler.getPreprocessorOpts();
+ clang::CompilerInvocation &invocation = p_compiler.getInvocation();
+
+ // Set codegen options
+ codegen_opts.setDebugInfo(clang::CodeGenOptions::NoDebugInfo);
+ codegen_opts.AsmVerbose = true;
+ codegen_opts.CodeModel = "default";
+
+ // level 3 is too much for the pocl transformations.
+ codegen_opts.OptimizationLevel = 2;
+
+ // Set diagnostic options
+ diag_opts.Pedantic = true;
+ diag_opts.ShowColumn = true;
+ diag_opts.ShowLocation = true;
+ diag_opts.ShowCarets = false;
+ diag_opts.ShowFixits = true;
+ diag_opts.ShowColors = false;
+ diag_opts.ErrorLimit = 19;
+ diag_opts.MessageLength = 0;
+
+ // Set frontend options
+ frontend_opts.ProgramAction = clang::frontend::EmitLLVMOnly;
+ frontend_opts.DisableFree = true;
+
+ // Set header search options
+ header_opts.Verbose = false;
+ header_opts.UseBuiltinIncludes = false;
+ header_opts.UseStandardSystemIncludes = false;
+ header_opts.UseStandardCXXIncludes = false;
+
+ // Set preprocessor options
+ prep_opts.RetainRemappedFileBuffers = true;
+ //prep_opts.ImplicitPCHInclude = "/usr/share/ti/opencl/clc.h";
+ prep_opts.Includes.push_back("clc.h");
+ prep_opts.Includes.push_back(p_device->builtinsHeader());
+
+ // Set lang options
+ lang_opts.NoBuiltin = true;
+ lang_opts.OpenCL = true;
+ lang_opts.CPlusPlus = false;
+
+ // Set target options
+ cl_device_type devtype;
+ p_device->info(CL_DEVICE_TYPE, sizeof(devtype), &devtype, 0);
+
+ if (devtype == CL_DEVICE_TYPE_CPU) {
+ // Originally: target_opts.Triple = llvm::sys::getHostTriple();
+ target_opts.Triple = llvm::sys::getDefaultTargetTriple();
+ }
+ else // devtype != CL_DEVICE_TYPE_CPU
+ {
+ // For 6X, use the 'spir' target, since it implements opencl specs
+ target_opts.Triple = "spir-unknown-unknown-unknown";
+
+ // Currently, llp6x does not handle fused multiply and add
+ // llvm intrinsics (llvm.fmuladd.*). Disable generating these
+ // intrinsics using clang -ffp-contract=off option
+ codegen_opts.setFPContractMode(clang::CodeGenOptions::FPC_Off);
+ }
+
+ // Parse the user options
+ std::istringstream options_stream(options);
+ std::string token;
+ bool Werror = false, inI = false, inD = false;
+
+#ifndef SHAMROCK_BUILD
+ // Add opencl-headers' package default install include path as location to search
+ std::string header_path(get_ocl_dsp());
+#else // TODO: /usr/include/CL is where opencl headers go, but use ENV vars?
+ std::string header_path("/usr/include/CL");
+#endif
+ header_opts.AddPath(header_path, clang::frontend::Angled, false, false);
+
+
+ while (options_stream >> token)
+ {
+ if (inI)
+ {
+ // token is an include path
+ header_opts.AddPath(token, clang::frontend::Angled, false, false);
+ inI = false;
+ continue;
+ }
+ else if (inD)
+ {
+ // token is name or name=value
+ prep_opts.addMacroDef(token);
+ inD = false;
+ continue;
+ }
+
+ //Handle -I xxx or -Ixxx. Assuming no other -I option prefix
+ if (token == "-I")
+ {
+ inI = true;
+ }
+ else if (token.compare(0,2,"-I") == 0)
+ {
+ header_opts.AddPath(token.substr(2), clang::frontend::Angled, false,
+ false);
+ }
+ //Handle -D xxx or -Dxxx. Assuming no other -D option prefix
+ else if (token == "-D")
+ {
+ inD = true;
+ }
+ else if (token.compare(0,2,"-D") == 0) //Handle -Dxxx (no space between)
+ {
+ prep_opts.addMacroDef(token.substr(2));
+ }
+ else if (token == "-cl-single-precision-constant")
+ {
+ lang_opts.SinglePrecisionConstants = true;
+ }
+ else if (token == "-cl-opt-disable")
+ {
+ p_optimize = false;
+ codegen_opts.OptimizationLevel = 0;
+ }
+ else if (token == "-cl-mad-enable")
+ {
+ codegen_opts.LessPreciseFPMAD = true;
+ }
+ else if (token == "-cl-unsafe-math-optimizations")
+ {
+ codegen_opts.UnsafeFPMath = true;
+ }
+ else if (token == "-cl-finite-math-only")
+ {
+ codegen_opts.NoInfsFPMath = true;
+ codegen_opts.NoNaNsFPMath = true;
+ }
+ else if (token == "-cl-fast-relaxed-math")
+ {
+ codegen_opts.UnsafeFPMath = true;
+ codegen_opts.NoInfsFPMath = true;
+ codegen_opts.NoNaNsFPMath = true;
+ lang_opts.FastRelaxedMath = true;
+ }
+ else if (token == "-w")
+ {
+ diag_opts.IgnoreWarnings = true;
+ }
+ else if (token == "-Werror")
+ {
+ Werror = true;
+ }
+ else if (token == "-cl-std=CL1.1")
+ {
+ }
+ else
+ {
+ return CL_INVALID_BUILD_OPTIONS;
+ }
+ }
+
+ add_macrodefs_for_supported_opencl_extensions(prep_opts);
+
+ // Set invocation options
+ //invocation.setLangDefaults(lang_opts,clang::IK_OpenCL);
+ invocation.setLangDefaults(lang_opts,clang::IK_OpenCL, clang::LangStandard::lang_opencl12);
+
+ // Create the diagnostics engine
+ p_log_printer = new clang::TextDiagnosticPrinter(p_log_stream, &diag_opts);
+ p_compiler.createDiagnostics(p_log_printer);
+
+ if (!p_compiler.hasDiagnostics())
+ return false;
+
+ p_compiler.getDiagnostics().setWarningsAsErrors(Werror);
+
+ // Feed the compiler with source
+ frontend_opts.Inputs.push_back(clang::FrontendInputFile("program.cl", clang::IK_OpenCL));
+
+ //ASW TODO cleanup
+#if 0
+ prep_opts.addRemappedFile("program.cl", source);
+#else
+
+ const llvm::StringRef s_data(source->getBuffer());
+ const llvm::StringRef s_name("<source>");
+ llvm::MemoryBuffer *buffer =
+ llvm::MemoryBuffer::getMemBuffer(s_data, s_name);
+
+ prep_opts.addRemappedFile("program.cl", buffer);
+#endif
+
+ //timespec t0, t1;
+ //clock_gettime(CLOCK_MONOTONIC, &t0);
+ // Compile
+
+ clang::CodeGenAction *Act = new clang::EmitLLVMOnlyAction(&llvm::getGlobalContext());
+ if (!p_compiler.ExecuteAction(*Act))
+ {
+ // DEBUG
+ std::cout << log() << std::endl;
+ return true;
+ }
+
+ //clock_gettime(CLOCK_MONOTONIC, &t1);
+ //printf("clang time: %6.4f secs\n",
+ //(float)t1.tv_sec-t0.tv_sec+(t1.tv_nsec-t0.tv_nsec)/1e9);
+
+ p_log_stream.flush();
+ p_module = Act->takeModule();
+
+ // uncomment to debug the llvm IR
+ // p_module->dump();
+
+ return false;
+}
+
+// Query the device to get list of supported OpenCL extensions. Standard
+// requires that each supported extension has a macro definition with the
+// same name as the extension
+void Compiler::add_macrodefs_for_supported_opencl_extensions
+ (clang::PreprocessorOptions &prep_opts)
+{
+ // Get the extensions string for the device
+ size_t size;
+ p_device->info(CL_DEVICE_EXTENSIONS, 0, NULL, &size);
+
+ char *extensions = new char[size + 1];
+ memset( extensions, CHAR_MIN, sizeof(char)*(size+1) );
+
+ p_device->info(CL_DEVICE_EXTENSIONS, sizeof(char)*size, extensions, NULL);
+
+ // Create macro definitions from the extension names
+ std::istringstream extensions_stream(extensions);
+ std::string token;
+
+ while (extensions_stream >> token)
+ prep_opts.addMacroDef(token);
+
+ delete [] extensions;
+}
+
+const std::string &Compiler::log() const
+{
+ return p_log;
+}
+
+const std::string &Compiler::options() const
+{
+ return p_options;
+}
+
+bool Compiler::optimize() const
+{
+ return p_optimize;
+}
+
+llvm::Module *Compiler::module() const
+{
+ return p_module;
+}
+
+void Compiler::appendLog(const std::string &log)
+{
+ p_log += log;
+}
diff --git a/src/core/compiler.h b/src/core/compiler.h
new file mode 100644
index 0000000..58788e6
--- /dev/null
+++ b/src/core/compiler.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file compiler.h
+ * \brief Compiler wrapped around Clang
+ */
+
+#ifndef __COMPILER_H__
+#define __COMPILER_H__
+
+#include <string>
+
+#include <clang/Frontend/CompilerInstance.h>
+#include <llvm/Support/raw_ostream.h>
+
+namespace llvm
+{
+ class MemoryBuffer;
+ class Module;
+}
+
+namespace clang
+{
+ class TextDiagnosticPrinter;
+}
+
+namespace Coal
+{
+
+class DeviceInterface;
+
+/**
+ * \brief Compiler using Clang
+ *
+ * This class builds a Clang instance, runs it and then retains compilation logs
+ * and produced data.
+ */
+class Compiler
+{
+ public:
+ /**
+ * \brief Constructor
+ * \param device \c Coal::DeviceInterface for which code will be compiled
+ */
+ Compiler(DeviceInterface *device);
+ ~Compiler();
+
+ /**
+ * \brief Compile \p source to produce a LLVM module
+ * \param options options given to the compiler, described in the OpenCL spec
+ * \param source source to be compiled
+ * \return true if the compilation is successful, false otherwise
+ * 2 if illegal options
+ * \sa module()
+ * \sa log()
+ */
+ int compile(const std::string &options, llvm::MemoryBuffer *source);
+
+ /**
+ * \brief Compilation log
+ * \note \c appendLog() can also be used to append custom info at the end
+ * of the log, for instance to keep compilation and linking logs
+ * in the same place
+ * \return log
+ */
+ const std::string &log() const;
+
+ /**
+ * \brief Options given at \c compile()
+ * \return options used during compilation
+ */
+ const std::string &options() const;
+
+ /**
+ * \brief Optimization enabled
+ * \return true if -cl-opt-disable was given in the options, false otherwise
+ */
+ bool optimize() const;
+
+ /**
+ * \brief LLVM module generated
+ * \return LLVM module generated by the compilation, 0 if an error occured
+ */
+ llvm::Module *module() const;
+
+ /**
+ * \brief Append a string to the log
+ *
+ * This function can be used to append linking or code-gen logs to the
+ * internal compilation log kept by this class
+ *
+ * \param log log to be appended
+ */
+ void appendLog(const std::string &log);
+
+ private:
+ DeviceInterface *p_device;
+ clang::CompilerInstance p_compiler;
+ llvm::Module *p_module;
+ bool p_optimize;
+
+ std::string p_log, p_options;
+ llvm::raw_string_ostream p_log_stream;
+ clang::TextDiagnosticPrinter *p_log_printer;
+
+ void add_macrodefs_for_supported_opencl_extensions
+ (clang::PreprocessorOptions &prep_opts);
+
+};
+
+}
+
+#endif
diff --git a/src/core/config.h b/src/core/config.h
new file mode 100644
index 0000000..e1e401b
--- /dev/null
+++ b/src/core/config.h
@@ -0,0 +1,9 @@
+#ifndef __CONFIG_H__
+#define __CONFIG_H__
+
+#define LLVM_VERSION "3.5.0svn"
+#define COAL_VERSION ""
+
+#define MAX_WORK_DIMS 3
+
+#endif
diff --git a/src/core/config.h.cmake b/src/core/config.h.cmake
new file mode 100644
index 0000000..ccf87b7
--- /dev/null
+++ b/src/core/config.h.cmake
@@ -0,0 +1,9 @@
+#ifndef __CONFIG_H__
+#define __CONFIG_H__
+
+#define LLVM_VERSION "@LLVM_VERSION@"
+#define COAL_VERSION "@Coal_VERSION@"
+
+#define MAX_WORK_DIMS 3
+
+#endif
diff --git a/src/core/context.cpp b/src/core/context.cpp
new file mode 100644
index 0000000..e9129ff
--- /dev/null
+++ b/src/core/context.cpp
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * Copyright (c) 2012-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file context.cpp
+ * \brief Context
+ */
+
+#include "context.h"
+#include "deviceinterface.h"
+#include "propertylist.h"
+#include "platform.h"
+
+#include <cstring>
+#include <cstdlib>
+
+#include <llvm/Support/TargetSelect.h>
+
+using namespace Coal;
+
+static void default_pfn_notify(const char *, const void *, size_t, void *)
+{
+ return;
+}
+
+Context::Context(const cl_context_properties *properties,
+ cl_uint num_devices,
+ const cl_device_id *devices,
+ void (CL_CALLBACK *pfn_notify)(const char *, const void *,
+ size_t, void *),
+ void *user_data,
+ cl_int *errcode_ret)
+: Object(Object::T_Context, 0), p_properties(0), p_pfn_notify(pfn_notify),
+ p_user_data(user_data), p_devices(0), p_num_devices(0), p_props_len(0),
+ p_platform(&the_platform)
+{
+ if (!p_pfn_notify)
+ p_pfn_notify = &default_pfn_notify;
+
+ // Intialize LLVM, this can be done more than one time per program
+ llvm::InitializeNativeTarget();
+ llvm::InitializeNativeTargetAsmPrinter();
+ llvm::InitializeNativeTargetAsmParser();
+
+ // Explore the properties
+ if (properties)
+ {
+ const unsigned char *props = (const unsigned char *)properties;
+ cl_context_properties prop;
+ size_t props_len = 0;
+
+#define GET_PROP(type, var) \
+ var = *(const type *)props; \
+ props += sizeof(type); \
+ props_len += sizeof(type);
+
+ int propset = 0;
+ while (true)
+ {
+ GET_PROP(cl_context_properties, prop)
+
+ if (!prop)
+ break;
+
+ switch (prop)
+ {
+ case CL_CONTEXT_PLATFORM:
+ if (!propset)
+ {
+ GET_PROP(cl_platform_id, p_platform);
+ propset = 1;
+ }
+ else
+ {
+ *errcode_ret = CL_INVALID_PROPERTY;
+ return;
+ }
+ break;
+
+ default:
+ *errcode_ret = CL_INVALID_PROPERTY;
+ return;
+ }
+ }
+
+ // properties may be allocated on the stack of the client application
+ // copy it into a real buffer
+ p_properties = (cl_context_properties *)std::malloc(props_len);
+ p_props_len = props_len;
+
+ if (!p_properties)
+ {
+ *errcode_ret = CL_OUT_OF_HOST_MEMORY;
+ return;
+ }
+
+ std::memcpy((void *)p_properties, (const void *)properties, props_len);
+ }
+
+ // Verify that the platform is good
+ if (p_platform != &the_platform)
+ {
+ *errcode_ret = CL_INVALID_PLATFORM;
+ return;
+ }
+
+ // Explore the devices
+ p_devices = (DeviceInterface **)std::malloc(num_devices * sizeof(DeviceInterface *));
+ p_num_devices = num_devices;
+
+ if (!p_devices)
+ {
+ *errcode_ret = CL_OUT_OF_HOST_MEMORY;
+ return;
+ }
+
+ for (cl_uint i=0; i<num_devices; ++i)
+ {
+ cl_device_id device = devices[i];
+
+ if (device == 0)
+ {
+ *errcode_ret = CL_INVALID_DEVICE;
+ return;
+ }
+
+ // Verify that the device is available
+ cl_bool device_available;
+
+ *errcode_ret = device->info(CL_DEVICE_AVAILABLE,
+ sizeof(device_available),
+ &device_available,
+ 0);
+
+ if (*errcode_ret != CL_SUCCESS)
+ return;
+
+ if (!device_available)
+ {
+ *errcode_ret = CL_DEVICE_NOT_AVAILABLE;
+ return;
+ }
+
+ // Add the device to the list
+ p_devices[i] = (DeviceInterface *)device;
+ }
+}
+
+Context::~Context()
+{
+ if (p_properties)
+ std::free((void *)p_properties);
+
+ if (p_devices)
+ std::free((void *)p_devices);
+}
+
+cl_int Context::info(cl_context_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const
+{
+ void *value = 0;
+ size_t value_length = 0;
+
+ union {
+ cl_uint cl_uint_var;
+ };
+
+ switch (param_name)
+ {
+ case CL_CONTEXT_REFERENCE_COUNT:
+ SIMPLE_ASSIGN(cl_uint, references());
+ break;
+
+ case CL_CONTEXT_NUM_DEVICES:
+ SIMPLE_ASSIGN(cl_uint, p_num_devices);
+ break;
+
+ case CL_CONTEXT_DEVICES:
+ MEM_ASSIGN(p_num_devices * sizeof(DeviceInterface *), p_devices);
+ break;
+
+ case CL_CONTEXT_PROPERTIES:
+ MEM_ASSIGN(p_props_len, p_properties);
+ break;
+
+ default:
+ return CL_INVALID_VALUE;
+ }
+
+ if (param_value && param_value_size < value_length)
+ return CL_INVALID_VALUE;
+
+ if (param_value_size_ret)
+ *param_value_size_ret = value_length;
+
+ if (param_value && value_length /* CONTEXT_PROPERTIES can be of length 0 */)
+ std::memcpy(param_value, value, value_length);
+
+ return CL_SUCCESS;
+}
+
+bool Context::hasDevice(DeviceInterface *device) const
+{
+ for (unsigned int i=0; i<p_num_devices; ++i)
+ if (p_devices[i] == device)
+ return true;
+
+ return false;
+}
diff --git a/src/core/context.h b/src/core/context.h
new file mode 100644
index 0000000..4712d25
--- /dev/null
+++ b/src/core/context.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file context.h
+ * \brief OpenCL context
+ */
+
+#ifndef __CONTEXT_H__
+#define __CONTEXT_H__
+
+#include "object.h"
+
+#include <CL/cl.h>
+
+namespace Coal
+{
+
+class DeviceInterface;
+
+/**
+ * \brief OpenCL context
+ *
+ * This class is the root of all OpenCL objects, except \c Coal::DeviceInterface.
+ */
+class Context : public Object
+{
+ public:
+ /**
+ * \brief Constructor
+ * \param properties properties of the context
+ * \param num_devices number of devices that will be used
+ * \param devices \c Coal::DeviceInterface to be used
+ * \param pfn_notify function to call when an error arises, to give
+ * more detail
+ * \param user_data user data to pass to \p pfn_notify
+ * \param errcode_ret return code
+ */
+ Context(const cl_context_properties *properties,
+ cl_uint num_devices,
+ const cl_device_id *devices,
+ void (CL_CALLBACK *pfn_notify)(const char *, const void *,
+ size_t, void *),
+ void *user_data,
+ cl_int *errcode_ret);
+ ~Context();
+
+ /**
+ * \brief Info about the context
+ * \copydetails Coal::DeviceInterface::info
+ */
+ cl_int info(cl_context_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const;
+
+ /**
+ * \brief Check that this context contains a given \p device
+ * \param device device to check
+ * \return whether this context contains \p device
+ */
+ bool hasDevice(DeviceInterface *device) const;
+
+ private:
+ cl_context_properties *p_properties;
+ void (CL_CALLBACK *p_pfn_notify)(const char *, const void *,
+ size_t, void *);
+ void *p_user_data;
+
+ DeviceInterface **p_devices;
+ unsigned int p_num_devices, p_props_len;
+ cl_platform_id p_platform;
+};
+
+}
+
+struct _cl_context : public Coal::Context
+{};
+
+#endif
diff --git a/src/core/cpu/buffer.cpp b/src/core/cpu/buffer.cpp
new file mode 100644
index 0000000..9125872
--- /dev/null
+++ b/src/core/cpu/buffer.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file cpu/buffer.cpp
+ * \brief CPU buffer
+ */
+
+#include "buffer.h"
+#include "device.h"
+
+#include "../memobject.h"
+
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+
+using namespace Coal;
+
+CPUBuffer::CPUBuffer(CPUDevice *device, MemObject *buffer, cl_int *rs)
+: DeviceBuffer(), p_device(device), p_buffer(buffer), p_data(0),
+ p_data_malloced(false)
+{
+ if (buffer->type() == MemObject::SubBuffer)
+ {
+ // We need to create this CPUBuffer based on the CPUBuffer of the
+ // parent buffer
+ SubBuffer *subbuf = (SubBuffer *)buffer;
+ MemObject *parent = subbuf->parent();
+ CPUBuffer *parentcpubuf = (CPUBuffer *)parent->deviceBuffer(device);
+
+ char *tmp_data = (char *)parentcpubuf->data();
+ tmp_data += subbuf->offset();
+
+ p_data = (void *)tmp_data;
+ }
+ else if (buffer->flags() & CL_MEM_USE_HOST_PTR)
+ {
+ // We use the host ptr, we are already allocated
+ p_data = buffer->host_ptr();
+ }
+
+ // NOTE: This function can also reject Image buffers by setting a value
+ // != CL_SUCCESS in rs.
+}
+
+CPUBuffer::~CPUBuffer()
+{
+ if (p_data_malloced)
+ {
+ std::free((void *)p_data);
+ }
+}
+
+void *CPUBuffer::data() const
+{
+ return p_data;
+}
+
+void *CPUBuffer::nativeGlobalPointer() const
+{
+ return data();
+}
+
+bool CPUBuffer::allocate()
+{
+ size_t buf_size = p_buffer->size();
+
+ if (buf_size == 0)
+ // Something went wrong...
+ return false;
+
+ if (!p_data)
+ {
+ // We don't use a host ptr, we need to allocate a buffer
+ p_data = std::malloc(buf_size);
+
+ if (!p_data)
+ return false;
+
+ p_data_malloced = true;
+ }
+
+ if (p_buffer->type() != MemObject::SubBuffer &&
+ p_buffer->flags() & CL_MEM_COPY_HOST_PTR)
+ {
+ std::memcpy(p_data, p_buffer->host_ptr(), buf_size);
+ }
+
+ // Say to the memobject that we are allocated
+ p_buffer->deviceAllocated(this);
+
+ return true;
+}
+
+DeviceInterface *CPUBuffer::device() const
+{
+ return p_device;
+}
+
+bool CPUBuffer::allocated() const
+{
+ return p_data != 0;
+}
diff --git a/src/core/cpu/buffer.h b/src/core/cpu/buffer.h
new file mode 100644
index 0000000..d88c9e5
--- /dev/null
+++ b/src/core/cpu/buffer.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file buffer.h
+ * \brief CPU buffer
+ */
+
+#ifndef __CPU_BUFFER_H__
+#define __CPU_BUFFER_H__
+
+#include "../deviceinterface.h"
+
+namespace Coal
+{
+
+class CPUDevice;
+class MemObject;
+
+/**
+ * \brief CPU implementation of \c Coal::MemObject
+ *
+ * This class is responsible of the actual allocation of buffer objects, using
+ * \c malloc() or by reusing a given \c host_ptr.
+ */
+class CPUBuffer : public DeviceBuffer
+{
+ public:
+ /**
+ * \brief Constructor
+ * \param device Device for which the buffer is allocated
+ * \param buffer \c Coal::MemObject holding information about the buffer
+ * \param rs return code (\c CL_SUCCESS if all is good)
+ */
+ CPUBuffer(CPUDevice *device, MemObject *buffer, cl_int *rs);
+ ~CPUBuffer();
+
+ bool allocate();
+ DeviceInterface *device() const;
+ void *data() const; /*!< \brief Pointer to the buffer's data */
+ void *nativeGlobalPointer() const;
+ bool allocated() const;
+
+ private:
+ CPUDevice *p_device;
+ MemObject *p_buffer;
+ void *p_data;
+ bool p_data_malloced;
+};
+
+}
+
+#endif
diff --git a/src/core/cpu/builtins.cpp b/src/core/cpu/builtins.cpp
new file mode 100644
index 0000000..137d34e
--- /dev/null
+++ b/src/core/cpu/builtins.cpp
@@ -0,0 +1,503 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file cpu/builtins.cpp
+ * \brief Native OpenCL C built-in functions
+ *
+ * All these built-ins are directly called by kernels. When the LLVM JIT
+ * sees a function name it doesn't know, it calls \c getBuiltin() with this
+ * name as parameter. This function then returns the address of an actual
+ * function implementation, that finally gets called by the kernel when
+ * it is run.
+ */
+
+#include "builtins.h"
+#include "kernel.h"
+#include "buffer.h"
+
+#include "../events.h"
+#include "../memobject.h"
+
+#include <sys/mman.h>
+#include <signal.h>
+
+#include <llvm/IR/Function.h>
+
+#include <iostream>
+#include <cstring>
+#include <cmath>
+#include <boost/math/special_functions.hpp>
+
+#include <stdio.h>
+
+using namespace Coal;
+
+unsigned char *imageData(unsigned char *base, size_t x, size_t y, size_t z,
+ size_t row_pitch, size_t slice_pitch,
+ unsigned int bytes_per_pixel)
+{
+ unsigned char *result = base;
+
+ result += (z * slice_pitch) +
+ (y * row_pitch) +
+ (x * bytes_per_pixel);
+
+ return result;
+}
+
+/*
+ * TLS-related functions
+ */
+__thread Coal::CPUKernelWorkGroup *g_work_group; /*!< \brief \c Coal::CPUKernelWorkGroup currently running on this thread */
+__thread void *work_items_data; /*!< \brief Space allocated for work-items stacks, see \ref barrier */
+__thread size_t work_items_size; /*!< \brief Size of \c work_items_data, see \ref barrier */
+
+void setThreadLocalWorkGroup(Coal::CPUKernelWorkGroup *current)
+{
+ g_work_group = current;
+}
+
+void *getWorkItemsData(size_t &size)
+{
+ size = work_items_size;
+ return work_items_data;
+}
+
+void setWorkItemsData(void *ptr, size_t size)
+{
+ work_items_data = ptr;
+ work_items_size = size;
+}
+
+/*
+ * Actual built-ins implementations
+ */
+cl_uint CPUKernelWorkGroup::getWorkDim() const
+{
+ return p_work_dim;
+}
+
+size_t CPUKernelWorkGroup::getGlobalId(cl_uint dimindx) const
+{
+ if (dimindx > p_work_dim)
+ return 0;
+
+ return p_global_id_start_offset[dimindx] + p_current_context->local_id[dimindx];
+}
+
+size_t CPUKernelWorkGroup::getGlobalSize(cl_uint dimindx) const
+{
+ if (dimindx >p_work_dim)
+ return 1;
+
+ return p_event->global_work_size(dimindx);
+}
+
+size_t CPUKernelWorkGroup::getLocalSize(cl_uint dimindx) const
+{
+ if (dimindx > p_work_dim)
+ return 1;
+
+ return p_event->local_work_size(dimindx);
+}
+
+size_t CPUKernelWorkGroup::getLocalID(cl_uint dimindx) const
+{
+ if (dimindx > p_work_dim)
+ return 0;
+
+ return p_current_context->local_id[dimindx];
+}
+
+size_t CPUKernelWorkGroup::getNumGroups(cl_uint dimindx) const
+{
+ if (dimindx > p_work_dim)
+ return 1;
+
+ return (p_event->global_work_size(dimindx) /
+ p_event->local_work_size(dimindx));
+}
+
+size_t CPUKernelWorkGroup::getGroupID(cl_uint dimindx) const
+{
+ if (dimindx > p_work_dim)
+ return 0;
+
+ return p_index[dimindx];
+}
+
+size_t CPUKernelWorkGroup::getGlobalOffset(cl_uint dimindx) const
+{
+ if (dimindx > p_work_dim)
+ return 0;
+
+ return p_event->global_work_offset(dimindx);
+}
+
+void CPUKernelWorkGroup::barrier(unsigned int flags)
+{
+ p_had_barrier = true;
+
+ // Allocate or reuse TLS memory for the stacks (it isn't freed between
+ // the work groups, and even the kernels, so if we need less space than
+ // allocated, it's good)
+ if (!p_contexts)
+ {
+ if (p_current_work_item != 0)
+ {
+ // Completely abnormal, it means that not every work-items
+ // encounter the barrier
+ std::cerr << "*** Not every work-items of "
+ << p_kernel->function()->getName().str()
+ << " calls barrier(); !" << std::endl;
+ return;
+ }
+
+ // Allocate or reuse the stacks
+ size_t contexts_size;
+ p_contexts = getWorkItemsData(contexts_size);
+ size_t needed_size = p_num_work_items * (p_stack_size + sizeof(Context));
+
+ if (!p_contexts || contexts_size < needed_size)
+ {
+ // We must allocate a new space
+ if (p_contexts)
+ munmap(p_contexts, contexts_size);
+
+ p_contexts = mmap(0, needed_size, PROT_EXEC | PROT_READ | PROT_WRITE, /* People say a stack must be executable */
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ setWorkItemsData(p_contexts, contexts_size);
+ }
+
+ // Now that we have a real main context, initialize it
+ p_current_context = getContextAddr(0);
+ p_current_context->initialized = 1;
+ std::memset(p_current_context->local_id, 0, p_work_dim * sizeof(size_t));
+
+ getcontext(&p_current_context->context);
+ }
+
+ // Take the next context
+ p_current_work_item++;
+ if (p_current_work_item == p_num_work_items) p_current_work_item = 0;
+
+ Context *next = getContextAddr(p_current_work_item);
+ Context *main = getContextAddr(0); // The context not created with makecontext
+
+ // If the next context isn't initialized, initialize it.
+ // Note: mmap zeroes the memory, so next->initialized == 0 if it isn't initialized
+ if (next->initialized == 0)
+ {
+ next->initialized = 1;
+
+ // local-id of next is the one of the current context, but incVec'ed
+ std::memcpy(next->local_id, p_current_context->local_id,
+ MAX_WORK_DIMS * sizeof(size_t));
+
+ incVec(p_work_dim, next->local_id, p_max_local_id);
+
+ // Initialize the next context
+ if (getcontext(&next->context) != 0)
+ return;
+
+ // Get its stack. It is located a next + sizeof(Context)
+ char *stack = (char *)next;
+ stack += sizeof(Context);
+
+ next->context.uc_link = &main->context;
+ next->context.uc_stack.ss_sp = stack;
+ next->context.uc_stack.ss_size = p_stack_size;
+
+ // Tell it to run the kernel function
+ makecontext(&next->context, (void (*)())p_kernel_func_addr, 1, p_args);
+ }
+
+ // Switch to the next context
+ ucontext_t *cur = &p_current_context->context;
+ p_current_context = next;
+
+ swapcontext(cur, &next->context);
+
+ // When we return here, it means that all the other work items encountered
+ // a barrier and that we returned to this one. We can continue.
+}
+
+void CPUKernelWorkGroup::builtinNotFound(const std::string &name) const
+{
+ std::cout << "OpenCL: Non-existant builtin function " << name << std::endl;
+ std::cout << " found in " << p_kernel->function()->getName().str()
+ << '.' << std::endl;
+}
+
+/*
+ * Built-in functions
+ */
+
+static size_t get_global_id(cl_uint dimindx)
+{
+ return g_work_group->getGlobalId(dimindx);
+}
+
+static cl_uint get_work_dim()
+{
+ return g_work_group->getWorkDim();
+}
+
+static size_t get_global_size(uint dimindx)
+{
+ return g_work_group->getGlobalSize(dimindx);
+}
+
+static size_t get_local_size(uint dimindx)
+{
+ return g_work_group->getLocalSize(dimindx);
+}
+
+static size_t get_local_id(uint dimindx)
+{
+ return g_work_group->getLocalID(dimindx);
+}
+
+static size_t get_num_groups(uint dimindx)
+{
+ return g_work_group->getNumGroups(dimindx);
+}
+
+static size_t get_group_id(uint dimindx)
+{
+ return g_work_group->getGroupID(dimindx);
+}
+
+static size_t get_global_offset(uint dimindx)
+{
+ return g_work_group->getGlobalOffset(dimindx);
+}
+
+static void barrier(unsigned int flags)
+{
+ g_work_group->barrier(flags);
+}
+
+// Images
+
+static int get_image_width(Image2D *image)
+{
+ return image->width();
+}
+
+static int get_image_height(Image2D *image)
+{
+ return image->height();
+}
+
+static int get_image_depth(Image3D *image)
+{
+ if (image->type() != MemObject::Image3D)
+ return 1;
+
+ return image->depth();
+}
+
+static int get_image_channel_data_type(Image2D *image)
+{
+ return image->format().image_channel_data_type;
+}
+
+static int get_image_channel_order(Image2D *image)
+{
+ return image->format().image_channel_order;
+}
+
+static void *image_data(Image2D *image, int x, int y, int z, int *order, int *type)
+{
+ *order = image->format().image_channel_order;
+ *type = image->format().image_channel_data_type;
+
+ return g_work_group->getImageData(image, x, y, z);
+}
+
+static bool is_image_3d(Image3D *image)
+{
+ return (image->type() == MemObject::Image3D ? 1 : 0);
+}
+
+static void write_imagef(Image2D *image, int x, int y, int z, float *color)
+{
+ g_work_group->writeImage(image, x, y, z, color);
+}
+
+static void write_imagei(Image2D *image, int x, int y, int z, int32_t *color)
+{
+ g_work_group->writeImage(image, x, y, z, color);
+}
+
+static void write_imageui(Image2D *image, int x, int y, int z, uint32_t *color)
+{
+ g_work_group->writeImage(image, x, y, z, color);
+}
+
+static void read_imagefi(float *result, Image2D *image, int x, int y, int z,
+ int32_t sampler)
+{
+ g_work_group->readImage(result, image, x, y, z, sampler);
+}
+
+static void read_imageii(int32_t *result, Image2D *image, int x, int y, int z,
+ int32_t sampler)
+{
+ g_work_group->readImage(result, image, x, y, z, sampler);
+}
+
+static void read_imageuii(uint32_t *result, Image2D *image, int x, int y, int z,
+ int32_t sampler)
+{
+ g_work_group->readImage(result, image, x, y, z, sampler);
+}
+
+static void read_imageff(float *result, Image2D *image, float x, float y,
+ float z, int32_t sampler)
+{
+ g_work_group->readImage(result, image, x, y, z, sampler);
+}
+
+static void read_imageif(int32_t *result, Image2D *image, float x, float y,
+ float z, int32_t sampler)
+{
+ g_work_group->readImage(result, image, x, y, z, sampler);
+}
+
+static void read_imageuif(uint32_t *result, Image2D *image, float x, float y,
+ float z, int32_t sampler)
+{
+ g_work_group->readImage(result, image, x, y, z, sampler);
+}
+
+/* Dummy function to plug missing ARM ABI EH fxns: */
+static void dummy_fxn(void)
+{
+}
+
+
+/*
+ * Bridge between LLVM and us
+ */
+static void unimplemented_stub()
+{
+}
+
+void *getBuiltin(const std::string &name)
+{
+ if (name == "get_global_id")
+ return (void *)&get_global_id;
+ else if (name == "get_work_dim")
+ return (void *)&get_work_dim;
+ else if (name == "get_global_size")
+ return (void *)&get_global_size;
+ else if (name == "get_local_size")
+ return (void *)&get_local_size;
+ else if (name == "get_local_id")
+ return (void *)&get_local_id;
+ else if (name == "get_num_groups")
+ return (void *)&get_num_groups;
+ else if (name == "get_group_id")
+ return (void *)&get_group_id;
+ else if (name == "get_global_offset")
+ return (void *)&get_global_offset;
+ else if (name == "barrier")
+ return (void *)&barrier;
+
+ else if (name == "__cpu_get_image_width")
+ return (void *)&get_image_width;
+ else if (name == "__cpu_get_image_height")
+ return (void *)&get_image_height;
+ else if (name == "__cpu_get_image_depth")
+ return (void *)&get_image_depth;
+ else if (name == "__cpu_get_image_channel_data_type")
+ return (void *)&get_image_channel_data_type;
+ else if (name == "__cpu_get_image_channel_order")
+ return (void *)&get_image_channel_order;
+ else if (name == "__cpu_image_data")
+ return (void *)&image_data;
+ else if (name == "__cpu_is_image_3d")
+ return (void *)&is_image_3d;
+ else if (name == "__cpu_write_imagef")
+ return (void *)&write_imagef;
+ else if (name == "__cpu_write_imagei")
+ return (void *)&write_imagei;
+ else if (name == "__cpu_write_imageui")
+ return (void *)&write_imageui;
+ else if (name == "__cpu_read_imagefi")
+ return (void *)&read_imagefi;
+ else if (name == "__cpu_read_imageii")
+ return (void *)&read_imageii;
+ else if (name == "__cpu_read_imageuii")
+ return (void *)&read_imageuii;
+ else if (name == "__cpu_read_imageff")
+ return (void *)&read_imageff;
+ else if (name == "__cpu_read_imageif")
+ return (void *)&read_imageif;
+ else if (name == "__cpu_read_imageuif")
+ return (void *)&read_imageuif;
+
+ else if (name == "debug")
+ return (void *)&printf;
+ else if (name == "__aeabi_unwind_cpp_pr0")
+ return (void *)&dummy_fxn;
+ else if (name == "__aeabi_unwind_cpp_pr1")
+ return (void *)&dummy_fxn;
+ else if (name == "__aeabi_unwind_cpp_pr2")
+ return (void *)&dummy_fxn;
+
+ // Math library disambiguation for OpenCL double functions of the same name.
+ else if (name == "builtin_sincos")
+ return (void *)&sincos;
+ else if (name == "builtin_lgamma_r")
+ return (void *)&lgamma_r;
+ else if (name == "builtin_modf")
+ return (void *)&modf;
+ else if (name == "builtin_remquo")
+ return (void *)&remquo;
+ else if (name == "builtin_pow")
+ return (void *)&pow;
+ else if (name == "builtin_exp10f")
+ return (void *)&exp10f;
+ else if (name == "builtin_exp10")
+ return (void *)&exp10;
+
+#if 0
+ // Other misc functions Khronos tests say are builtins, though not in the spec!
+ else if (name == "memcpy")
+ return (void *)&memcpy;
+#endif
+
+ // Function not found
+ g_work_group->builtinNotFound(name);
+
+ return (void *)&unimplemented_stub;
+}
diff --git a/src/core/cpu/builtins.h b/src/core/cpu/builtins.h
new file mode 100644
index 0000000..69143ea
--- /dev/null
+++ b/src/core/cpu/builtins.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file builtins.h
+ * \brief CPU built-in functions
+ */
+#ifndef __BUILTINS_H__
+#define __BUILTINS_H__
+
+#include <string>
+
+namespace Coal {
+ class CPUKernelWorkGroup;
+}
+
+/**
+ * \brief Set the current kernel work-group of this thread
+ * \param current \c Coal::CPUKernelWorkGroup to be set in \c g_work_group.
+ */
+void setThreadLocalWorkGroup(Coal::CPUKernelWorkGroup *current);
+
+/**
+ * \brief Return the address of a built-in function given its name
+ * \param name name of the built-in whose address is requested
+ */
+void *getBuiltin(const std::string &name);
+
+/**
+ * \brief Work-item stacks
+ * \see \ref barrier
+ * \param size size of the allocated space for stacks
+ * \return address of the allocated space for stacks
+ */
+void *getWorkItemsData(size_t &size);
+
+/**
+ * \brief Set work-item stacks
+ * \see \ref barrier
+ * \param ptr address of allocated space for stacks
+ * \param size size of the allocated space for stacks
+ */
+void setWorkItemsData(void *ptr, size_t size);
+
+/**
+ * \brief Increment a n-component vector given a maximum value
+ *
+ * This function is used to increment a vector for which a set of maximum values
+ * each of its element can reach before the next is incremented.
+ *
+ * For example, if \p dims is \c 3, \p vec starts at <tt>{0, 0, 0}</tt> and
+ * \p maxs if <tt>{2, 3, 1}</tt>, repeatedly calling this function with the
+ * same vector will produce the following results :
+ *
+ * \code
+ * {0, 0, 1}
+ * {0, 1, 0}
+ * {0, 1, 1}
+ * {0, 2, 0}
+ * {0, 2, 1}
+ * {0, 3, 0}
+ * {0, 3, 1}
+ * {1, 0, 0}
+ * ...
+ * \endcode
+ *
+ * Until \p vec reaches <tt>{2, 3, 1}</tt>.
+ *
+ * \param dims number of elements in the vectors
+ * \param vec vector whose elements will be incremented
+ * \param maxs vector containing a maximum value above which each corresponding
+ * element of \p vec cannot go.
+ * \return false if the increment was ok, true if \p vec was already at it's
+ * maximum value and couldn't be further incremented.
+ */
+template<typename T>
+bool incVec(unsigned long dims, T *vec, T *maxs)
+{
+ bool overflow = false;
+
+ for (unsigned int i=0; i<dims; ++i)
+ {
+ vec[i] += 1;
+
+ if (vec[i] > maxs[i])
+ {
+ vec[i] = 0;
+ overflow = true;
+ }
+ else
+ {
+ overflow = false;
+ break;
+ }
+ }
+
+ return overflow;
+}
+
+/**
+ * \brief Address of a pixel in an image
+ *
+ * This function is heavily used when Clover needs to address a pixel or a byte
+ * in a rectangular or three-dimensional image or buffer.
+ *
+ * \param base address of the first pixel in the image (address of the image itself)
+ * \param x X coordinate, cannot be bigger or equal to \c width
+ * \param y Y coordinate, cannot be bigger or equal to \c height
+ * \param z Z coordinate, cannot be bigger or equal to \c depth (1 for 2D arrays)
+ * \param row_pitch size in bytes of a row of pixels in the image
+ * \param slice_pitch size in bytes of a slice in a 3D array
+ * \param bytes_per_pixel bytes per pixel (1 for simple buffers), used when
+ * coordinates are in pixels and not in bytes.
+ */
+unsigned char *imageData(unsigned char *base, size_t x, size_t y, size_t z,
+ size_t row_pitch, size_t slice_pitch,
+ unsigned int bytes_per_pixel);
+
+#endif
+
diff --git a/src/core/cpu/device.cpp b/src/core/cpu/device.cpp
new file mode 100644
index 0000000..eb3fcb1
--- /dev/null
+++ b/src/core/cpu/device.cpp
@@ -0,0 +1,675 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file cpu/device.cpp
+ * \brief CPU Device
+ */
+
+#include "device.h"
+#include "buffer.h"
+#include "kernel.h"
+#include "program.h"
+#include "worker.h"
+#include "builtins.h"
+
+#include <core/config.h>
+#include "../propertylist.h"
+#include "../commandqueue.h"
+#include "../events.h"
+#include "../memobject.h"
+#include "../kernel.h"
+#include "../program.h"
+#include "../util.h"
+
+#include <cstring>
+#include <cstdlib>
+#include <unistd.h>
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+
+using namespace Coal;
+
+#if !(defined(DSPC868X) || defined(SHAMROCK_BUILD))
+#include "../dsp/shmem.h"
+// unsigned arm_speed();
+#endif
+
+#define ONE_GIGABYTE (1 << 30)
+
+CPUDevice::CPUDevice()
+: DeviceInterface(), p_cores(0), p_num_events(0), p_workers(0), p_stop(false),
+ p_initialized(false)
+{
+ // Get info about the system
+ p_cores = sysconf(_SC_NPROCESSORS_ONLN);
+ p_cpu_mhz = 0.0f;
+
+ std::filebuf fb;
+ fb.open("/proc/cpuinfo", std::ios::in);
+ std::istream is(&fb);
+
+ while (!is.eof())
+ {
+ std::string key, value;
+
+ std::getline(is, key, ':');
+ is.ignore(1);
+ std::getline(is, value);
+
+ if (key.compare(0, 7, "cpu MHz") == 0)
+ {
+ std::istringstream ss(value);
+ ss >> p_cpu_mhz;
+ }
+
+ if (key.compare(0, 10, "model name") == 0)
+ p_device_name = value;
+
+ if (key.compare(0, 9, "Processor") == 0)
+ p_device_name = value;
+ }
+
+ if (p_cpu_mhz == 0.0f)
+ {
+ std::string file("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq");
+ std::ifstream fs(file.c_str());
+ if (fs) { fs >> p_cpu_mhz; p_cpu_mhz /= 1000; }
+ }
+
+ if (p_cpu_mhz == 0.0f) p_cpu_mhz = 1000.0;
+
+#if !defined(DSPC868X)
+ // p_cpu_mhz = arm_speed();
+#endif
+}
+
+
+void CPUDevice::init()
+{
+ if (p_initialized) return;
+
+ // Initialize the locking machinery
+ pthread_cond_init(&p_events_cond, 0);
+ pthread_mutex_init(&p_events_mutex, 0);
+
+ // Create worker threads
+ p_workers = (pthread_t *)std::malloc(numCPUs() * sizeof(pthread_t));
+
+ for (unsigned int i=0; i<numCPUs(); ++i)
+ {
+ pthread_create(&p_workers[i], 0, &worker, this);
+ }
+
+ p_initialized = true;
+}
+
+CPUDevice::~CPUDevice()
+{
+ if (!p_initialized)
+ return;
+
+ // Terminate the workers and wait for them
+ pthread_mutex_lock(&p_events_mutex);
+
+ p_stop = true;
+
+ pthread_cond_broadcast(&p_events_cond);
+ pthread_mutex_unlock(&p_events_mutex);
+
+ for (unsigned int i=0; i<numCPUs(); ++i)
+ {
+ pthread_join(p_workers[i], 0);
+ }
+
+ // Free allocated memory
+ std::free((void *)p_workers);
+ pthread_mutex_destroy(&p_events_mutex);
+ pthread_cond_destroy(&p_events_cond);
+}
+
+DeviceBuffer *CPUDevice::createDeviceBuffer(MemObject *buffer, cl_int *rs)
+{
+ return (DeviceBuffer *)new CPUBuffer(this, buffer, rs);
+}
+
+DeviceProgram *CPUDevice::createDeviceProgram(Program *program)
+{
+ return (DeviceProgram *)new CPUProgram(this, program);
+}
+
+DeviceKernel *CPUDevice::createDeviceKernel(Kernel *kernel,
+ llvm::Function *function)
+{
+ return (DeviceKernel *)new CPUKernel(this, kernel, function);
+}
+
+cl_int CPUDevice::initEventDeviceData(Event *event)
+{
+ switch (event->type())
+ {
+ case Event::MapBuffer:
+ {
+ MapBufferEvent *e = (MapBufferEvent *)event;
+ CPUBuffer *buf = (CPUBuffer *)e->buffer()->deviceBuffer(this);
+ unsigned char *data = (unsigned char *)buf->data();
+
+ data += e->offset();
+
+ e->setPtr((void *)data);
+ break;
+ }
+ case Event::MapImage:
+ {
+ MapImageEvent *e = (MapImageEvent *)event;
+ Image2D *image = (Image2D *)e->buffer();
+ CPUBuffer *buf = (CPUBuffer *)image->deviceBuffer(this);
+ unsigned char *data = (unsigned char *)buf->data();
+
+ data = imageData(data,
+ e->origin(0),
+ e->origin(1),
+ e->origin(2),
+ image->row_pitch(),
+ image->slice_pitch(),
+ image->pixel_size());
+
+ e->setPtr((void *)data);
+ e->setRowPitch(image->row_pitch());
+ e->setSlicePitch(image->slice_pitch());
+ break;
+ }
+ case Event::UnmapMemObject:
+ // Nothing do to
+ break;
+
+ case Event::NDRangeKernel:
+ case Event::TaskKernel:
+ {
+ // Instantiate the JIT for the CPU program
+ KernelEvent *e = (KernelEvent *)event;
+ Program *p = (Program *)e->kernel()->parent();
+ CPUProgram *prog = (CPUProgram *)p->deviceDependentProgram(this);
+
+ if (!prog->initJIT())
+ return CL_INVALID_PROGRAM_EXECUTABLE;
+
+ // Set device-specific data
+ CPUKernelEvent *cpu_e = new CPUKernelEvent(this, e);
+ e->setDeviceData((void *)cpu_e);
+
+ break;
+ }
+ default:
+ break;
+ }
+
+ return CL_SUCCESS;
+}
+
+void CPUDevice::freeEventDeviceData(Event *event)
+{
+ switch (event->type())
+ {
+ case Event::NDRangeKernel:
+ case Event::TaskKernel:
+ {
+ CPUKernelEvent *cpu_e = (CPUKernelEvent *)event->deviceData();
+
+ if (cpu_e)
+ delete cpu_e;
+ }
+ default:
+ break;
+ }
+}
+
+void CPUDevice::pushEvent(Event *event)
+{
+ // Add an event in the list
+ pthread_mutex_lock(&p_events_mutex);
+
+ p_events.push_back(event);
+ p_num_events++; // Way faster than STL list::size() !
+
+ pthread_cond_broadcast(&p_events_cond);
+ pthread_mutex_unlock(&p_events_mutex);
+}
+
+Event *CPUDevice::getEvent(bool &stop)
+{
+ // Return the first event in the list, if any. Remove it if it is a
+ // single-shot event.
+ pthread_mutex_lock(&p_events_mutex);
+
+ while (p_num_events == 0 && !p_stop)
+ pthread_cond_wait(&p_events_cond, &p_events_mutex);
+
+ if (p_stop)
+ {
+ pthread_mutex_unlock(&p_events_mutex);
+ stop = true;
+ return 0;
+ }
+
+ Event *event = p_events.front();
+
+ // If the run of this event will finish it, remove it from the list
+ bool last_slot = true;
+
+ if (event->type() == Event::NDRangeKernel ||
+ event->type() == Event::TaskKernel)
+ {
+ CPUKernelEvent *ke = (CPUKernelEvent *)event->deviceData();
+ last_slot = ke->reserve();
+ }
+
+ if (last_slot)
+ {
+ p_num_events--;
+ p_events.pop_front();
+ }
+
+ pthread_mutex_unlock(&p_events_mutex);
+
+ return event;
+}
+
+/******************************************************************************
+* Device's decision about whether CommandQueue should push more events over
+* This number could be tuned (e.g. using ooo example). Note that p_num_events
+* are in device's queue, but not yet executed.
+******************************************************************************/
+bool CPUDevice::gotEnoughToWorkOn()
+{
+ return p_num_events > 0;
+}
+
+unsigned int CPUDevice::numCPUs() const
+{
+ return p_cores;
+}
+
+float CPUDevice::cpuMhz() const
+{
+ return p_cpu_mhz;
+}
+
+// From inner parentheses to outher ones :
+//
+// sizeof * 8 => 8
+// -1 => 7
+// 1 << $ => 10000000
+// -1 => 01111111
+// *2 => 11111110
+// +1 => 11111111
+//
+// A simple way to do this is (1 << (sizeof(type) * 8)) - 1, but it overflows
+// the type (for int8, 1 << $ = 100000000 = 256 > 255)
+#define TYPE_MAX(type) ((((type)1 << ((sizeof(type) * 8) - 1)) - 1) * 2 + 1)
+
+cl_int CPUDevice::info(cl_device_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const
+{
+ void *value = 0;
+ size_t value_length = 0;
+
+ union {
+ cl_device_type cl_device_type_var;
+ cl_uint cl_uint_var;
+ size_t size_t_var;
+ cl_ulong cl_ulong_var;
+ cl_bool cl_bool_var;
+ cl_device_fp_config cl_device_fp_config_var;
+ cl_device_mem_cache_type cl_device_mem_cache_type_var;
+ cl_device_local_mem_type cl_device_local_mem_type_var;
+ cl_device_exec_capabilities cl_device_exec_capabilities_var;
+ cl_command_queue_properties cl_command_queue_properties_var;
+ cl_platform_id cl_platform_id_var;
+ size_t work_dims[MAX_WORK_DIMS];
+ };
+
+ switch (param_name)
+ {
+ case CL_DEVICE_TYPE:
+ SIMPLE_ASSIGN(cl_device_type, CL_DEVICE_TYPE_CPU);
+ break;
+
+ case CL_DEVICE_VENDOR_ID:
+ SIMPLE_ASSIGN(cl_uint, 0);
+ break;
+
+ case CL_DEVICE_MAX_COMPUTE_UNITS:
+ SIMPLE_ASSIGN(cl_uint, numCPUs());
+ break;
+
+ case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:
+ SIMPLE_ASSIGN(cl_uint, MAX_WORK_DIMS);
+ break;
+
+ case CL_DEVICE_MAX_WORK_GROUP_SIZE:
+ SIMPLE_ASSIGN(size_t, ONE_GIGABYTE);
+ break;
+
+ case CL_DEVICE_MAX_WORK_ITEM_SIZES:
+ for (int i=0; i<MAX_WORK_DIMS; ++i)
+ {
+ work_dims[i] = ONE_GIGABYTE;
+ }
+ value_length = MAX_WORK_DIMS * sizeof(size_t);
+ value = &work_dims;
+ break;
+
+ case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR:
+ SIMPLE_ASSIGN(cl_uint, 16);
+ break;
+
+ case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT:
+ SIMPLE_ASSIGN(cl_uint, 8);
+ break;
+
+ case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT:
+ SIMPLE_ASSIGN(cl_uint, 4);
+ break;
+
+ case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG:
+ SIMPLE_ASSIGN(cl_uint, 2);
+ break;
+
+ case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT:
+ SIMPLE_ASSIGN(cl_uint, 4);
+ break;
+
+ case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE:
+ SIMPLE_ASSIGN(cl_uint, 2);
+ break;
+
+ case CL_DEVICE_MAX_CLOCK_FREQUENCY:
+ SIMPLE_ASSIGN(cl_uint, cpuMhz());
+ break;
+
+ case CL_DEVICE_ADDRESS_BITS:
+ SIMPLE_ASSIGN(cl_uint, 8*sizeof(void *));
+ break;
+
+ case CL_DEVICE_MAX_READ_IMAGE_ARGS:
+ SIMPLE_ASSIGN(cl_uint, 0); //images not supported
+ break;
+
+ case CL_DEVICE_MAX_WRITE_IMAGE_ARGS:
+ SIMPLE_ASSIGN(cl_uint, 0); // images not supported
+ break;
+
+ case CL_DEVICE_IMAGE2D_MAX_WIDTH:
+ SIMPLE_ASSIGN(size_t, 0); // images not supported
+ break;
+
+ case CL_DEVICE_IMAGE2D_MAX_HEIGHT:
+ SIMPLE_ASSIGN(size_t, 0); //images not supported
+ break;
+
+ case CL_DEVICE_IMAGE3D_MAX_WIDTH:
+ SIMPLE_ASSIGN(size_t, 0); //images not supported
+ break;
+
+ case CL_DEVICE_IMAGE3D_MAX_HEIGHT:
+ SIMPLE_ASSIGN(size_t, 0); //images not supported
+ break;
+
+ case CL_DEVICE_IMAGE3D_MAX_DEPTH:
+ SIMPLE_ASSIGN(size_t, 0); //images not supported
+ break;
+
+ case CL_DEVICE_IMAGE_SUPPORT:
+ SIMPLE_ASSIGN(cl_bool, CL_FALSE); //images not supported
+ break;
+
+ case CL_DEVICE_MAX_PARAMETER_SIZE:
+ SIMPLE_ASSIGN(size_t, 65536);
+ break;
+
+ case CL_DEVICE_MAX_SAMPLERS:
+ SIMPLE_ASSIGN(cl_uint, 0); //images not supported
+ break;
+
+ case CL_DEVICE_MEM_BASE_ADDR_ALIGN:
+ SIMPLE_ASSIGN(cl_uint, 1024 /* sizeof(long16)*8) */); // 128 byte
+ break;
+
+ case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE:
+ SIMPLE_ASSIGN(cl_uint, 16);
+ break;
+
+ case CL_DEVICE_SINGLE_FP_CONFIG:
+ // TODO: Check what an x86 SSE engine can support.
+ // Currently not supporting CL_FP_DENORM
+ SIMPLE_ASSIGN(cl_device_fp_config,
+ CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST);
+ break;
+
+ case CL_DEVICE_DOUBLE_FP_CONFIG:
+ // These are minimally required to be supported by the OCL spec:
+ SIMPLE_ASSIGN(cl_device_fp_config,
+ CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO |
+ CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM);
+ break;
+
+ case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE:
+ SIMPLE_ASSIGN(cl_device_mem_cache_type,
+ CL_READ_WRITE_CACHE);
+ break;
+
+ case CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE:
+ // TODO: Get this information from the processor
+ SIMPLE_ASSIGN(cl_uint, 16);
+ break;
+
+ case CL_DEVICE_GLOBAL_MEM_CACHE_SIZE:
+ // TODO: Get this information from the processor
+ SIMPLE_ASSIGN(cl_ulong, 512*1024*1024);
+ break;
+
+ case CL_DEVICE_GLOBAL_MEM_SIZE:
+ // parse /proc/meminfo to get the value
+ SIMPLE_ASSIGN(cl_ulong, parse_file_line_value("/proc/meminfo",
+ "MemTotal:", 512*1024) * 1024);
+ break;
+
+ case CL_DEVICE_MAX_MEM_ALLOC_SIZE:
+ case CL_DEVICE_LOCAL_MEM_SIZE:
+ case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:
+ // TODO: 1 Gio seems to be enough for software acceleration
+
+#if defined(__arm__)
+ SIMPLE_ASSIGN(cl_ulong, 512*1024*1024);
+#else
+ SIMPLE_ASSIGN(cl_ulong, 1*1024*1024*1024);
+#endif
+ break;
+
+ case CL_DEVICE_MAX_CONSTANT_ARGS:
+ SIMPLE_ASSIGN(cl_uint, 65536);
+ break;
+
+ case CL_DEVICE_LOCAL_MEM_TYPE:
+ SIMPLE_ASSIGN(cl_device_local_mem_type, CL_GLOBAL);
+ break;
+
+
+ case CL_DEVICE_ERROR_CORRECTION_SUPPORT:
+ SIMPLE_ASSIGN(cl_bool, CL_FALSE);
+ break;
+
+ case CL_DEVICE_PROFILING_TIMER_RESOLUTION:
+ // TODO
+ SIMPLE_ASSIGN(size_t, 1000); // 1000 nanoseconds = 1 ms
+ break;
+
+ case CL_DEVICE_ENDIAN_LITTLE:
+ SIMPLE_ASSIGN(cl_bool, CL_TRUE);
+ break;
+
+ case CL_DEVICE_AVAILABLE:
+ SIMPLE_ASSIGN(cl_bool, CL_TRUE);
+ break;
+
+ case CL_DEVICE_COMPILER_AVAILABLE:
+ SIMPLE_ASSIGN(cl_bool, CL_TRUE);
+ break;
+
+ case CL_DEVICE_EXECUTION_CAPABILITIES:
+ SIMPLE_ASSIGN(cl_device_exec_capabilities, CL_EXEC_KERNEL |
+ CL_EXEC_NATIVE_KERNEL);
+ break;
+
+ case CL_DEVICE_QUEUE_PROPERTIES:
+ SIMPLE_ASSIGN(cl_command_queue_properties,
+ CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE |
+ CL_QUEUE_PROFILING_ENABLE);
+ break;
+
+ case CL_DEVICE_NAME:
+ value_length = p_device_name.size() + 1;
+ value = const_cast<char*>(p_device_name.c_str());
+ break;
+
+ case CL_DEVICE_VENDOR:
+ STRING_ASSIGN("Generic");
+ break;
+
+ case CL_DRIVER_VERSION:
+ STRING_ASSIGN("" COAL_VERSION);
+ break;
+
+ case CL_DEVICE_PROFILE:
+ STRING_ASSIGN("FULL_PROFILE");
+ break;
+
+ case CL_DEVICE_VERSION:
+ STRING_ASSIGN("OpenCL 1.1 " COAL_VERSION);
+ break;
+
+ case CL_DEVICE_EXTENSIONS:
+ STRING_ASSIGN("cl_khr_global_int32_base_atomics"
+ " cl_khr_global_int32_extended_atomics"
+ " cl_khr_local_int32_base_atomics"
+ " cl_khr_local_int32_extended_atomics"
+ " cl_khr_byte_addressable_store"
+
+ " cl_khr_fp64"
+ " cl_khr_int64_base_atomics"
+ " cl_khr_int64_extended_atomics")
+
+ break;
+
+ case CL_DEVICE_PLATFORM:
+ SIMPLE_ASSIGN(cl_platform_id, 0);
+ break;
+
+ case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF:
+ SIMPLE_ASSIGN(cl_uint, 0);
+ break;
+
+ case CL_DEVICE_HOST_UNIFIED_MEMORY:
+ SIMPLE_ASSIGN(cl_bool, CL_TRUE);
+ break;
+
+ case CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR:
+ SIMPLE_ASSIGN(cl_uint, 16);
+ break;
+
+ case CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT:
+ SIMPLE_ASSIGN(cl_uint, 8);
+ break;
+
+ case CL_DEVICE_NATIVE_VECTOR_WIDTH_INT:
+ SIMPLE_ASSIGN(cl_uint, 4);
+ break;
+
+ case CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG:
+ SIMPLE_ASSIGN(cl_uint, 2);
+ break;
+
+ case CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT:
+ SIMPLE_ASSIGN(cl_uint, 4);
+ break;
+
+ case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE:
+ SIMPLE_ASSIGN(cl_uint, 2);
+ break;
+
+ case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF:
+ SIMPLE_ASSIGN(cl_uint, 0);
+ break;
+
+ case CL_DEVICE_OPENCL_C_VERSION:
+ STRING_ASSIGN("OpenCL C 1.1 LLVM " LLVM_VERSION);
+ break;
+
+ default:
+ return CL_INVALID_VALUE;
+ }
+
+ if (param_value && param_value_size < value_length)
+ return CL_INVALID_VALUE;
+
+ if (param_value_size_ret)
+ *param_value_size_ret = value_length;
+
+ if (param_value)
+ std::memcpy(param_value, value, value_length);
+
+ return CL_SUCCESS;
+}
+
+#if !defined(DSPC868X)
+#if 0 // /dev/mem is no longer available
+unsigned arm_speed()
+{
+ //return 1000.0;
+ const unsigned TETRIS_PLL = 125000000;
+ const unsigned pagesize = 0x1000;
+
+ shmem_persistent page;
+ page.configure(0x02620000, pagesize);
+ char *host_msmc = (char*)page.map(0x02620000, pagesize);
+ unsigned SECPLLCTL0 = *(unsigned*)(host_msmc + 0x370);
+ unsigned prediv = 1 + (SECPLLCTL0 & 0x3F);
+ unsigned mult = 1 + ((SECPLLCTL0 >> 6) & 0x1FFF);
+ unsigned output_div = 1 + ((SECPLLCTL0 >> 19) & 0xF);
+ unsigned speed = TETRIS_PLL * mult / prediv / output_div;
+ page.unmap(host_msmc, pagesize);
+
+ return speed / 1000000;
+}
+#endif
+#endif
+
diff --git a/src/core/cpu/device.h b/src/core/cpu/device.h
new file mode 100644
index 0000000..a0ad6ef
--- /dev/null
+++ b/src/core/cpu/device.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file cpu/device.h
+ * \brief CPU device
+ */
+
+#ifndef __CPU_DEVICE_H__
+#define __CPU_DEVICE_H__
+
+#include "../deviceinterface.h"
+
+#include <pthread.h>
+#include <list>
+#include <string>
+
+namespace Coal
+{
+
+class MemObject;
+class Event;
+class Program;
+class Kernel;
+
+/**
+ * \brief CPU device
+ *
+ * This class is the base of all the CPU-accelerated OpenCL processing. It
+ * creates and manages subclasses such as \c Coal::DeviceBuffer,
+ * \c Coal::DeviceProgram and \c Coal::DeviceKernel.
+ *
+ * This class and the aforementioned ones work together to compile and run
+ * kernels using the LLVM JIT, manage buffers, provide built-in functions
+ * and do all of this in a multithreaded fashion using worker threads.
+ *
+ * \see \ref events
+ */
+class CPUDevice : public DeviceInterface
+{
+ public:
+ CPUDevice();
+ ~CPUDevice();
+
+ /**
+ * \brief Initialize the CPU device
+ *
+ * This function creates the worker threads and get information about
+ * the host system for the \c numCPUs() and \c cpuMhz functions.
+ */
+ void init();
+
+ cl_int info(cl_device_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const;
+
+ DeviceBuffer *createDeviceBuffer(MemObject *buffer, cl_int *rs);
+ DeviceProgram *createDeviceProgram(Program *program);
+ DeviceKernel *createDeviceKernel(Kernel *kernel,
+ llvm::Function *function);
+
+ cl_int initEventDeviceData(Event *event);
+ void freeEventDeviceData(Event *event);
+
+ void pushEvent(Event *event);
+ Event *getEvent(bool &stop);
+ bool gotEnoughToWorkOn();
+
+ unsigned int numCPUs() const; /*!< \brief Number of logical CPU cores on the system */
+ float cpuMhz() const; /*!< \brief Speed of the CPU in Mhz */
+
+ std::string builtinsHeader(void) const { return "cpu.h"; }
+
+ private:
+ unsigned int p_cores, p_num_events;
+ float p_cpu_mhz;
+ std::string p_device_name;
+ pthread_t *p_workers;
+
+ std::list<Event *> p_events;
+ pthread_cond_t p_events_cond;
+ pthread_mutex_t p_events_mutex;
+ bool p_stop, p_initialized;
+};
+
+}
+
+#endif
diff --git a/src/core/cpu/kernel.cpp b/src/core/cpu/kernel.cpp
new file mode 100644
index 0000000..ef09f6b
--- /dev/null
+++ b/src/core/cpu/kernel.cpp
@@ -0,0 +1,734 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file cpu/kernel.cpp
+ * \brief CPU kernel
+ */
+
+#include "kernel.h"
+#include "device.h"
+#include "buffer.h"
+#include "program.h"
+#include "builtins.h"
+
+#include "../kernel.h"
+#include "../memobject.h"
+#include "../events.h"
+#include "../program.h"
+
+#include <llvm/IR/Function.h>
+#include <llvm/IR/Constants.h>
+#include <llvm/IR/Instructions.h>
+#include <llvm/IR/LLVMContext.h>
+#include <llvm/IR/Module.h>
+#include <llvm/ExecutionEngine/ExecutionEngine.h>
+
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <sys/mman.h>
+
+using namespace Coal;
+
+CPUKernel::CPUKernel(CPUDevice *device, Kernel *kernel, llvm::Function *function)
+: DeviceKernel(), p_device(device), p_kernel(kernel), p_function(function),
+ p_call_function(0)
+{
+ pthread_mutex_init(&p_call_function_mutex, 0);
+
+ const char *fn_name;
+
+ // If we can reuse the same function between work groups, do it
+/* tag out for now if (p_call_function)
+ {
+ llvm::Function *rs = p_call_function;
+ pthread_mutex_unlock(&p_call_function_mutex);
+
+ return rs;
+ } */
+
+ /* Create a stub function in the form of
+ *
+ * void stub(void *args) {
+ * kernel(*(int *)((char *)args + 0),
+ * *(float **)((char *)args + sizeof(int)),
+ * *(sampler_t *)((char *)args + sizeof(int) + sizeof(float *)));
+ * }
+ *
+ * In LLVM, it is exprimed in the form of :
+ *
+ * @stub(i8* args) {
+ * kernel(
+ * load(i32* bitcast(i8* getelementptr(i8* args, i64 0), i32*)),
+ * load(float** bitcast(i8* getelementptr(i8* args, i64 4), float**)),
+ * ...
+ * );
+ * }
+ */
+ fn_name = kernel->p_name.c_str();
+ Program *p = (Program *)kernel->parent();
+ CPUProgram *prog = (CPUProgram *)(p->deviceDependentProgram(device));
+ //llvm::Function *t_function = prog->jit()->FindFunctionNamed(fn_name);
+
+ char * s_name = (char *) malloc(strlen(fn_name)+6);
+ sprintf(s_name,"_stub%s",fn_name);
+
+ llvm::FunctionType *kernel_function_type = function->getFunctionType();
+ llvm::FunctionType *stub_function_type = llvm::FunctionType::get(
+ function->getReturnType(),
+ llvm::Type::getInt8PtrTy(
+ function->getContext()),
+ false);
+ llvm::Function *stub_function = llvm::Function::Create(
+ stub_function_type,
+ llvm::Function::InternalLinkage,
+ s_name,
+ function->getParent());
+
+ // Insert a basic block
+ llvm::BasicBlock *basic_block = llvm::BasicBlock::Create(
+ function->getContext(),
+ "",
+ stub_function);
+
+ // Create the function arguments
+ llvm::Argument &stub_arg = stub_function->getArgumentList().front();
+ llvm::SmallVector<llvm::Value *, 8> args;
+ size_t args_offset = 0;
+
+ for (unsigned int i=0; i<kernel_function_type->getNumParams(); ++i)
+ {
+ llvm::Type *param_type = kernel_function_type->getParamType(i);
+ llvm::Type *param_type_ptr = param_type->getPointerTo(); // We'll use pointers to the value
+ const Kernel::Arg *arg = p_kernel->arg(i);
+
+ // Calculate the size of the arg
+ size_t arg_size = arg->valueSize() * arg->vecDim();
+
+ // Get where to place this argument
+ size_t arg_offset = typeOffset(args_offset, arg_size);
+
+ // %1 = getelementptr(args, $arg_offset);
+ llvm::Value *getelementptr = llvm::GetElementPtrInst::CreateInBounds(
+ &stub_arg,
+ llvm::ConstantInt::get(stub_function->getContext(),
+ llvm::APInt(64, arg_offset)),
+ "",
+ basic_block);
+
+ // %2 = bitcast(%1, $param_type_ptr)
+ llvm::Value *bitcast = new llvm::BitCastInst(
+ getelementptr,
+ param_type_ptr,
+ "",
+ basic_block);
+
+ // %3 = load(%2)
+ llvm::Value *load = new llvm::LoadInst(
+ bitcast,
+ "",
+ false,
+ arg_size, // We ensure that an argument is always aligned on its size, it enables things like fast movaps
+ basic_block);
+
+ // We have the value, send it to the function
+ args.push_back(load);
+ }
+
+ // Create the call instruction
+ llvm::CallInst *call_inst = llvm::CallInst::Create(
+ function,
+ args,
+ "",
+ basic_block);
+ call_inst->setCallingConv(function->getCallingConv());
+ call_inst->setTailCall();
+
+ // Create a return instruction to end the stub
+ llvm::ReturnInst::Create(
+ function->getContext(),
+ basic_block);
+
+ // Retain the function if it can be reused
+ p_call_function = stub_function;
+
+}
+
+CPUKernel::~CPUKernel()
+{
+ if (p_call_function)
+ p_call_function->eraseFromParent();
+
+ pthread_mutex_destroy(&p_call_function_mutex);
+}
+
+size_t CPUKernel::workGroupSize()
+{
+ // Just use CL_DEVICE_MAX_WORK_GROUP_SIZE
+ size_t param_value;
+ size_t param_value_size_ret;
+
+ p_device->info(CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t),
+ &param_value, &param_value_size_ret);
+
+ return param_value;
+}
+
+cl_ulong CPUKernel::localMemSize() const
+{
+ return 0; // TODO
+}
+
+cl_ulong CPUKernel::privateMemSize() const
+{
+ return 0; // TODO
+}
+
+size_t CPUKernel::preferredWorkGroupSizeMultiple() const
+{
+ unsigned int cpus = p_device->numCPUs();
+ return cpus;
+}
+
+template<typename T>
+T k_exp(T base, unsigned int e)
+{
+ T rs = base;
+
+ for (unsigned int i=1; i<e; ++i)
+ rs *= base;
+
+ return rs;
+}
+
+// Try to find the size a work group has to have to be executed the fastest on
+// the CPU.
+size_t CPUKernel::guessWorkGroupSize(cl_uint num_dims, cl_uint dim,
+ size_t global_work_size) const
+{
+ unsigned int cpus = p_device->numCPUs();
+
+ // Don't break in too small parts
+ if (k_exp(global_work_size, num_dims) > 64)
+ return global_work_size;
+
+ // Find the divisor of global_work_size the closest to cpus but >= than it
+ unsigned int divisor = cpus;
+
+ while (true)
+ {
+ if ((global_work_size % divisor) == 0)
+ break;
+
+ // Don't let the loop go up to global_work_size, the overhead would be
+ // too huge
+ if (divisor > global_work_size || divisor > cpus * 32)
+ {
+ divisor = 1; // Not parallel but has no CommandQueue overhead
+ break;
+ }
+ }
+
+ // Return the size
+ return global_work_size / divisor;
+}
+
+llvm::Function *CPUKernel::function() const
+{
+ return p_function;
+}
+
+Kernel *CPUKernel::kernel() const
+{
+ return p_kernel;
+}
+
+CPUDevice *CPUKernel::device() const
+{
+ return p_device;
+}
+
+// From Wikipedia : http://www.wikipedia.org/wiki/Power_of_two#Algorithm_to_round_up_to_power_of_two
+template <class T>
+T next_power_of_two(T k) {
+ if (k == 0)
+ return 1;
+ k--;
+ for (int i=1; i<sizeof(T)*8; i<<=1)
+ k = k | k >> i;
+ return k+1;
+}
+
+size_t CPUKernel::typeOffset(size_t &offset, size_t type_len)
+{
+ size_t rs = offset;
+
+ // Align offset to stype_len
+ type_len = next_power_of_two(type_len);
+ size_t mask = ~(type_len - 1);
+
+ while (rs & mask != rs)
+ rs++;
+
+ // Where to try to place the next value
+ offset = rs + type_len;
+
+ return rs;
+}
+
+llvm::Function *CPUKernel::callFunction()
+{
+ const char *fn_name;
+ pthread_mutex_lock(&p_call_function_mutex);
+
+ // If we can reuse the same function between work groups, do it
+ if (p_call_function)
+ {
+ llvm::Function *rs = p_call_function;
+ pthread_mutex_unlock(&p_call_function_mutex);
+
+ return rs;
+ }
+
+ /* Create a stub function in the form of
+ *
+ * void stub(void *args) {
+ * kernel(*(int *)((char *)args + 0),
+ * *(float **)((char *)args + sizeof(int)),
+ * *(sampler_t *)((char *)args + sizeof(int) + sizeof(float *)));
+ * }
+ *
+ * In LLVM, it is exprimed in the form of :
+ *
+ * @stub(i8* args) {
+ * kernel(
+ * load(i32* bitcast(i8* getelementptr(i8* args, i64 0), i32*)),
+ * load(float** bitcast(i8* getelementptr(i8* args, i64 4), float**)),
+ * ...
+ * );
+ * }
+ */
+ fn_name = kernel()->p_name.c_str();
+ Program *p = (Program *)kernel()->parent();
+ CPUProgram *prog = (CPUProgram *)(p->deviceDependentProgram(device()));
+ llvm::Function *t_function = prog->jit()->FindFunctionNamed(fn_name);
+
+
+ llvm::FunctionType *kernel_function_type = p_function->getFunctionType();
+ llvm::FunctionType *stub_function_type = llvm::FunctionType::get(
+ p_function->getReturnType(),
+ llvm::Type::getInt8PtrTy(
+ p_function->getContext()),
+ false);
+ llvm::Function *stub_function = llvm::Function::Create(
+ stub_function_type,
+ llvm::Function::InternalLinkage,
+ "stub",
+ p_function->getParent());
+
+ // Insert a basic block
+ llvm::BasicBlock *basic_block = llvm::BasicBlock::Create(
+ p_function->getContext(),
+ "",
+ stub_function);
+
+ // Create the function arguments
+ llvm::Argument &stub_arg = stub_function->getArgumentList().front();
+ llvm::SmallVector<llvm::Value *, 8> args;
+ size_t args_offset = 0;
+
+ for (unsigned int i=0; i<kernel_function_type->getNumParams(); ++i)
+ {
+ llvm::Type *param_type = kernel_function_type->getParamType(i);
+ llvm::Type *param_type_ptr = param_type->getPointerTo(); // We'll use pointers to the value
+ const Kernel::Arg *arg = p_kernel->arg(i);
+
+ // Calculate the size of the arg
+ size_t arg_size = arg->valueSize() * arg->vecDim();
+
+ // Get where to place this argument
+ size_t arg_offset = typeOffset(args_offset, arg_size);
+
+ // %1 = getelementptr(args, $arg_offset);
+ llvm::Value *getelementptr = llvm::GetElementPtrInst::CreateInBounds(
+ &stub_arg,
+ llvm::ConstantInt::get(stub_function->getContext(),
+ llvm::APInt(64, arg_offset)),
+ "",
+ basic_block);
+
+ // %2 = bitcast(%1, $param_type_ptr)
+ llvm::Value *bitcast = new llvm::BitCastInst(
+ getelementptr,
+ param_type_ptr,
+ "",
+ basic_block);
+
+ // %3 = load(%2)
+ llvm::Value *load = new llvm::LoadInst(
+ bitcast,
+ "",
+ false,
+ arg_size, // We ensure that an argument is always aligned on its size, it enables things like fast movaps
+ basic_block);
+
+ // We have the value, send it to the function
+ args.push_back(load);
+ }
+
+ // Create the call instruction
+ llvm::CallInst *call_inst = llvm::CallInst::Create(
+ t_function,
+ args,
+ "",
+ basic_block);
+ call_inst->setCallingConv(p_function->getCallingConv());
+ call_inst->setTailCall();
+
+ // Create a return instruction to end the stub
+ llvm::ReturnInst::Create(
+ p_function->getContext(),
+ basic_block);
+
+ // Retain the function if it can be reused
+ p_call_function = stub_function;
+
+ pthread_mutex_unlock(&p_call_function_mutex);
+
+ return stub_function;
+}
+
+/*
+ * CPUKernelEvent
+ */
+CPUKernelEvent::CPUKernelEvent(CPUDevice *device, KernelEvent *event)
+: p_device(device), p_event(event), p_current_wg(0), p_finished_wg(0),
+ p_kernel_args(0)
+{
+ // Mutex
+ pthread_mutex_init(&p_mutex, 0);
+
+ // Set current work group to (0, 0, ..., 0)
+ std::memset(p_current_work_group, 0, event->work_dim() * sizeof(size_t));
+
+ // Populate p_max_work_groups
+ p_num_wg = 1;
+
+ for (cl_uint i=0; i<event->work_dim(); ++i)
+ {
+ p_max_work_groups[i] =
+ (event->global_work_size(i) / event->local_work_size(i)) - 1; // 0..n-1, not 1..n
+
+ p_num_wg *= p_max_work_groups[i] + 1;
+ }
+}
+
+CPUKernelEvent::~CPUKernelEvent()
+{
+ pthread_mutex_destroy(&p_mutex);
+
+ if (p_kernel_args)
+ std::free(p_kernel_args);
+}
+
+bool CPUKernelEvent::reserve()
+{
+ // Lock, this will be unlocked in takeInstance()
+ pthread_mutex_lock(&p_mutex);
+
+ // Last work group if current == max - 1
+ return (p_current_wg == p_num_wg - 1);
+}
+
+bool CPUKernelEvent::finished()
+{
+ bool rs;
+
+ pthread_mutex_lock(&p_mutex);
+
+ rs = (p_finished_wg == p_num_wg);
+
+ pthread_mutex_unlock(&p_mutex);
+
+ return rs;
+}
+
+void CPUKernelEvent::workGroupFinished()
+{
+ pthread_mutex_lock(&p_mutex);
+
+ p_finished_wg++;
+
+ pthread_mutex_unlock(&p_mutex);
+}
+
+CPUKernelWorkGroup *CPUKernelEvent::takeInstance()
+{
+ CPUKernelWorkGroup *wg = new CPUKernelWorkGroup((CPUKernel *)p_event->deviceKernel(),
+ p_event,
+ this,
+ p_current_work_group);
+
+ // Increment current work group
+ incVec(p_event->work_dim(), p_current_work_group, p_max_work_groups);
+ p_current_wg += 1;
+
+ // Release event
+ pthread_mutex_unlock(&p_mutex);
+
+ return wg;
+}
+
+void *CPUKernelEvent::kernelArgs() const
+{
+ return p_kernel_args;
+}
+
+void CPUKernelEvent::cacheKernelArgs(void *args)
+{
+ p_kernel_args = args;
+}
+
+/*
+ * CPUKernelWorkGroup
+ */
+CPUKernelWorkGroup::CPUKernelWorkGroup(CPUKernel *kernel, KernelEvent *event,
+ CPUKernelEvent *cpu_event,
+ const size_t *work_group_index)
+: p_kernel(kernel), p_cpu_event(cpu_event), p_event(event),
+ p_work_dim(event->work_dim()), p_contexts(0), p_stack_size(8192 /* TODO */),
+ p_had_barrier(false)
+{
+
+ // Set index
+ std::memcpy(p_index, work_group_index, p_work_dim * sizeof(size_t));
+
+ // Set maxs and global id
+ p_num_work_items = 1;
+
+ for (unsigned int i=0; i<p_work_dim; ++i)
+ {
+ p_max_local_id[i] = event->local_work_size(i) - 1; // 0..n-1, not 1..n
+ p_num_work_items *= event->local_work_size(i);
+
+ // Set global id
+ p_global_id_start_offset[i] = (p_index[i] * event->local_work_size(i))
+ + event->global_work_offset(i);
+ }
+}
+
+CPUKernelWorkGroup::~CPUKernelWorkGroup()
+{
+ p_cpu_event->workGroupFinished();
+}
+
+void *CPUKernelWorkGroup::callArgs(std::vector<void *> &locals_to_free)
+{
+ if (p_cpu_event->kernelArgs() && !p_kernel->kernel()->hasLocals())
+ {
+ // We have cached the args and can reuse them
+ return p_cpu_event->kernelArgs();
+ }
+
+ // We need to create them from scratch
+ void *rs;
+
+ size_t args_size = 0;
+
+ for (unsigned int i=0; i<p_kernel->kernel()->numArgs(); ++i)
+ {
+ const Kernel::Arg *arg = p_kernel->kernel()->arg(i);
+ CPUKernel::typeOffset(args_size, arg->valueSize() * arg->vecDim());
+ }
+
+ rs = std::malloc(args_size);
+
+ if (!rs)
+ return NULL;
+
+ size_t arg_offset = 0;
+
+ for (unsigned int i=0; i<p_kernel->kernel()->numArgs(); ++i)
+ {
+ const Kernel::Arg *arg = p_kernel->kernel()->arg(i);
+ size_t size = arg->valueSize() * arg->vecDim();
+ size_t offset = CPUKernel::typeOffset(arg_offset, size);
+
+ // Where to place the argument
+ unsigned char *target = (unsigned char *)rs;
+ target += offset;
+
+ // We may have to perform some changes in the values (buffers, etc)
+ switch (arg->kind())
+ {
+ case Kernel::Arg::Buffer:
+ {
+ MemObject *buffer = *(MemObject **)arg->data();
+
+ if (arg->file() == Kernel::Arg::Local)
+ {
+ // Alloc a buffer and pass it to the kernel
+ void *local_buffer = std::malloc(arg->allocAtKernelRuntime());
+ locals_to_free.push_back(local_buffer);
+ *(void **)target = local_buffer;
+ }
+ else
+ {
+ if (!buffer)
+ {
+ // We can do that, just send NULL
+ *(void **)target = NULL;
+ }
+ else
+ {
+ // Get the CPU buffer, allocate it and get its pointer
+ CPUBuffer *cpubuf =
+ (CPUBuffer *)buffer->deviceBuffer(p_kernel->device());
+ void *buf_ptr = 0;
+
+ buffer->allocate(p_kernel->device());
+ buf_ptr = cpubuf->data();
+
+ *(void **)target = buf_ptr;
+ }
+ }
+
+ break;
+ }
+ case Kernel::Arg::Image2D:
+ case Kernel::Arg::Image3D:
+ {
+ // We need to ensure the image is allocated
+ Image2D *image = *(Image2D **)arg->data();
+ image->allocate(p_kernel->device());
+
+ // Fall through to the memcpy
+ }
+ default:
+ // Simply copy the arg's data into the buffer
+ std::memcpy(target, arg->data(), size);
+ break;
+ }
+ }
+
+ // Cache the arguments if we can do so
+ if (!p_kernel->kernel()->hasLocals())
+ p_cpu_event->cacheKernelArgs(rs);
+
+ return rs;
+}
+
+bool CPUKernelWorkGroup::run()
+{
+ // Get the kernel function to call
+ std::vector<void *> locals_to_free;
+ llvm::Function *kernel_func = p_kernel->callFunction();
+
+ if (!kernel_func)
+ return false;
+
+ Program *p = (Program *)p_kernel->kernel()->parent();
+ CPUProgram *prog = (CPUProgram *)(p->deviceDependentProgram(p_kernel->device()));
+
+ // Make object usable for execution: (only applies to MCJIT):
+ prog->jit()->finalizeObject();
+
+ std::string kname = kernel_func->getName().str();
+
+ // original
+ p_kernel_func_addr =
+ (void(*)(void *))prog->jit()->getPointerToFunction(kernel_func);
+
+ // TAG
+ // llvm::Function *t_func = prog->jit()->FindFunctionNamed(p_kernel->p_kernel->p_name->str());
+// llvm::Function *t_func = prog->jit()->FindFunctionNamed(p_kernel->kernel()->p_name.c_str());
+// p_kernel_func_addr = (void(*)(void *))prog->jit()->getPointerToFunction(t_func);
+ p_kernel_func_addr =(void(*)(void *)) prog->jit()->getFunctionAddress(kname);
+
+ // Get the arguments
+ p_args = callArgs(locals_to_free);
+
+ // Tell the builtins this thread will run a kernel work group
+ setThreadLocalWorkGroup(this);
+
+ // Initialize the dummy context used by the builtins before a call to barrier()
+ p_current_work_item = 0;
+ p_current_context = &p_dummy_context;
+
+ std::memset(p_dummy_context.local_id, 0, p_work_dim * sizeof(size_t));
+
+ do
+ {
+ // Simply call the "call function", it and the builtins will do the rest
+ p_kernel_func_addr(p_args);
+ } while (!p_had_barrier &&
+ !incVec(p_work_dim, p_dummy_context.local_id, p_max_local_id));
+
+ // If no barrier() call was made, all is fine. If not, only the first
+ // work-item has currently finished. We must let the others run.
+ if (p_had_barrier)
+ {
+ Context *main_context = p_current_context; // After the first swapcontext,
+ // we will not be able to trust
+ // p_current_context anymore.
+
+ // We'll call swapcontext for each remaining work-item. They will
+ // finish, and when they'll do so, this main context will be resumed, so
+ // it's easy (i starts from 1 because the main context already finished)
+ for (unsigned int i=1; i<p_num_work_items; ++i)
+ {
+ Context *ctx = getContextAddr(i);
+ swapcontext(&main_context->context, &ctx->context);
+ }
+ }
+
+ // Free the allocated locals
+ if (p_kernel->kernel()->hasLocals())
+ {
+ for (size_t i=0; i<locals_to_free.size(); ++i)
+ {
+ std::free(locals_to_free[i]);
+ }
+
+ std::free(p_args);
+ }
+
+ return true;
+}
+
+CPUKernelWorkGroup::Context *CPUKernelWorkGroup::getContextAddr(unsigned int index)
+{
+ size_t size;
+ char *data = (char *)p_contexts;
+
+ // Each Context in data is an element of size p_stack_size + sizeof(Context)
+ size = p_stack_size + sizeof(Context);
+ size *= index; // To get an offset
+
+ return (Context *)(data + size); // Pointer to the context
+}
diff --git a/src/core/cpu/kernel.h b/src/core/cpu/kernel.h
new file mode 100644
index 0000000..ab4d1ac
--- /dev/null
+++ b/src/core/cpu/kernel.h
@@ -0,0 +1,325 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file cpu/kernel.h
+ * \brief CPU kernel
+ */
+
+#ifndef __CPU_KERNEL_H__
+#define __CPU_KERNEL_H__
+
+#include "../deviceinterface.h"
+#include <core/config.h>
+
+#include <llvm/ExecutionEngine/GenericValue.h>
+#include <vector>
+#include <string>
+
+#include <ucontext.h>
+#include <pthread.h>
+#include <stdint.h>
+
+namespace llvm
+{
+ class Function;
+}
+
+namespace Coal
+{
+
+class CPUDevice;
+class Kernel;
+class KernelEvent;
+class Image2D;
+class Image3D;
+
+/**
+ * \brief CPU kernel
+ *
+ * This class holds passive information about a kernel (\c Coal::Kernel object
+ * and device on which it is run) and provides the \c callFunction() function.
+ *
+ * This function is described at the end of \ref llvm .
+ *
+ * \see Coal::CPUKernelWorkGroup
+ */
+class CPUKernel : public DeviceKernel
+{
+ public:
+ /**
+ * \brief Constructor
+ * \param device device on which the kernel will be run
+ * \param kernel \c Coal::Kernel object holding information about this
+ * kernel
+ * \param function \c llvm::Function to run
+ */
+ CPUKernel(CPUDevice *device, Kernel *kernel, llvm::Function *function);
+ ~CPUKernel();
+
+ size_t workGroupSize();
+ cl_ulong localMemSize() const;
+ cl_ulong privateMemSize() const;
+ size_t preferredWorkGroupSizeMultiple() const;
+ size_t guessWorkGroupSize(cl_uint num_dims, cl_uint dim,
+ size_t global_work_size) const;
+
+ Kernel *kernel() const; /*!< \brief \c Coal::Kernel object this kernel will run */
+ CPUDevice *device() const; /*!< \brief device on which the kernel will be run */
+
+ llvm::Function *function() const; /*!< \brief \c llvm::Function representing the kernel but <strong>not to be run</strong> */
+ llvm::Function *callFunction(); /*!< \brief stub function used to run the kernel, see \ref llvm */
+
+ /**
+ * \brief Calculate where to place a value in an array
+ *
+ * This function is used to calculate where to place a value in an
+ * array given its size, properly aligning it.
+ *
+ * This function is called repeatedly to obtain the aligned position of
+ * each value that must be place in the array
+ *
+ * \code
+ * size_t array_len = 0, array_offset = 0;
+ * void *array;
+ *
+ * // First, get the array size given alignment constraints
+ * typeOffset(array_len, sizeof(int));
+ * typeOffset(array_len, sizeof(float));
+ * typeOffset(array_len, sizeof(void *));
+ *
+ * // Then, allocate memory
+ * array = malloc(array_len)
+ *
+ * // Finally, place the arguments
+ * *(int *)((char *)array + typeOffset(array_offset, sizeof(int))) = 1337;
+ * *(float *)((char *)array + typeOffset(array_offset, sizeof(int))) = 3.1415f;
+ * *(void **)((char *)array + typeOffset(array_offset, sizeof(int))) = array;
+ * \endcode
+ *
+ * \param offset offset at which the value will be placed. This variable
+ * gets incremented by <tt>type_len + padding</tt>.
+ * \param type_len size in bytes of the value that will be stored
+ * \return offset at which the value will be stored (equal to \p offset
+ * before incrementation.
+ */
+ static size_t typeOffset(size_t &offset, size_t type_len);
+
+ private:
+ CPUDevice *p_device;
+ Kernel *p_kernel;
+ llvm::Function *p_function, *p_call_function;
+ pthread_mutex_t p_call_function_mutex;
+};
+
+class CPUKernelEvent;
+
+/**
+ * \brief CPU kernel work-group
+ *
+ * This class represent a bulk of work-items that will be run. It is the one
+ * to actually run the kernel of its elements.
+ *
+ * \see \ref llvm
+ * \nosubgrouping
+ */
+class CPUKernelWorkGroup
+{
+ public:
+ /**
+ * \brief Constructor
+ * \param kernel kernel to run
+ * \param event event containing information about the kernel run
+ * \param cpu_event CPU-specific information and cache about \p event
+ * \param work_group_index index of this work-group in the kernel
+ */
+ CPUKernelWorkGroup(CPUKernel *kernel, KernelEvent *event,
+ CPUKernelEvent *cpu_event,
+ const size_t *work_group_index);
+ ~CPUKernelWorkGroup();
+
+ /**
+ * \brief Build a structure of arguments
+ *
+ * As C doesn't support calling functions with variable arguments
+ * unknown at the compilation, this function builds the list of
+ * arguments in memory. This array will then be passed to a LLVM stub
+ * function reading it and passing its values to the actuel kernel.
+ *
+ * \see \ref llvm
+ * \param locals_to_free if this kernel takes \c __local arguments, they
+ * must be \c malloc()'ed for every work-group.
+ * They are placed in this vector to be
+ * \c free()'ed at the end of \c run().
+ * \return address of a memory location containing the arguments
+ */
+ void *callArgs(std::vector<void *> &locals_to_free);
+
+ /**
+ * \brief Run the work-group
+ *
+ * This function is the core of CPU-acceleration. It runs the work-items
+ * of this work-group given the correct arguments.
+ *
+ * \see \ref llvm
+ * \see \ref barrier
+ * \see callArgs()
+ * \return true if success, false in case of an error
+ */
+ bool run();
+
+ /**
+ * \name Native implementation of built-in OpenCL C functions
+ * @{
+ */
+ size_t getGlobalId(cl_uint dimindx) const;
+ cl_uint getWorkDim() const;
+ size_t getGlobalSize(cl_uint dimindx) const;
+ size_t getLocalSize(cl_uint dimindx) const;
+ size_t getLocalID(cl_uint dimindx) const;
+ size_t getNumGroups(cl_uint dimindx) const;
+ size_t getGroupID(cl_uint dimindx) const;
+ size_t getGlobalOffset(cl_uint dimindx) const;
+
+ void barrier(unsigned int flags);
+
+ void *getImageData(Image2D *image, int x, int y, int z) const;
+
+ void writeImage(Image2D *image, int x, int y, int z, float *color) const;
+ void writeImage(Image2D *image, int x, int y, int z, int32_t *color) const;
+ void writeImage(Image2D *image, int x, int y, int z, uint32_t *color) const;
+
+ void readImage(float *result, Image2D *image, int x, int y, int z,
+ uint32_t sampler) const;
+ void readImage(int32_t *result, Image2D *image, int x, int y, int z,
+ uint32_t sampler) const;
+ void readImage(uint32_t *result, Image2D *image, int x, int y, int z,
+ uint32_t sampler) const;
+
+ void readImage(float *result, Image2D *image, float x, float y, float z,
+ uint32_t sampler) const;
+ void readImage(int32_t *result, Image2D *image, float x, float y, float z,
+ uint32_t sampler) const;
+ void readImage(uint32_t *result, Image2D *image, float x, float y, float z,
+ uint32_t sampler) const;
+ /**
+ * @}
+ */
+
+ /**
+ * \brief Function called when a built-in name cannot be found
+ */
+ void builtinNotFound(const std::string &name) const;
+
+ private:
+ template<typename T>
+ void writeImageImpl(Image2D *image, int x, int y, int z, T *color) const;
+ template<typename T>
+ void readImageImplI(T *result, Image2D *image, int x, int y, int z,
+ uint32_t sampler) const;
+ template<typename T>
+ void readImageImplF(T *result, Image2D *image, float x, float y, float z,
+ uint32_t sampler) const;
+ template<typename T>
+ void linear3D(T *result, float a, float b, float c,
+ int i0, int j0, int k0, int i1, int j1, int k1,
+ Image3D *image) const;
+ template<typename T>
+ void linear2D(T *result, float a, float b, float c, int i0, int j0,
+ int i1, int j1, Image2D *image) const;
+
+ private:
+ CPUKernel *p_kernel;
+ CPUKernelEvent *p_cpu_event;
+ KernelEvent *p_event;
+ cl_uint p_work_dim;
+ size_t p_index[MAX_WORK_DIMS],
+ p_max_local_id[MAX_WORK_DIMS],
+ p_global_id_start_offset[MAX_WORK_DIMS];
+
+ void (*p_kernel_func_addr)(void *);
+ void *p_args;
+
+ // Machinery to have barrier() working
+ struct Context
+ {
+ size_t local_id[MAX_WORK_DIMS];
+ ucontext_t context;
+ unsigned int initialized;
+ };
+
+ Context *getContextAddr(unsigned int index);
+
+ Context *p_current_context;
+ Context p_dummy_context;
+ void *p_contexts;
+ size_t p_stack_size;
+ unsigned int p_num_work_items, p_current_work_item;
+ bool p_had_barrier;
+};
+
+/**
+ * \brief CPU-specific information about a kernel event
+ *
+ * This class put in a \c Coal::KernelEvent device-data field
+ * (see \c Coal::Event::setDeviceData()) is responsible for dispatching the
+ * \c Coal::CPUKernelWorkGroup objects between the CPU worker threads.
+ */
+class CPUKernelEvent
+{
+ public:
+ /**
+ * \brief Constructor
+ * \param device device running the kernel
+ * \param event \c Coal::KernelEvent holding device-agnostic data
+ * about the event
+ */
+ CPUKernelEvent(CPUDevice *device, KernelEvent *event);
+ ~CPUKernelEvent();
+
+ bool reserve(); /*!< \brief The next Work Group that will execute will be the last. Locks the event */
+ bool finished(); /*!< \brief All the work groups have finished */
+ CPUKernelWorkGroup *takeInstance(); /*!< \brief Must be called exactly one time after reserve(). Unlocks the event */
+
+ void *kernelArgs() const; /*!< \brief Return the cached kernel arguments */
+ void cacheKernelArgs(void *args); /*!< \brief Cache pre-built kernel arguments */
+
+ void workGroupFinished(); /*!< \brief A work-group has just finished */
+
+ private:
+ CPUDevice *p_device;
+ KernelEvent *p_event;
+ size_t p_current_work_group[MAX_WORK_DIMS],
+ p_max_work_groups[MAX_WORK_DIMS];
+ size_t p_current_wg, p_finished_wg, p_num_wg;
+ pthread_mutex_t p_mutex;
+ void *p_kernel_args;
+};
+
+}
+
+#endif
diff --git a/src/core/cpu/program.cpp b/src/core/cpu/program.cpp
new file mode 100644
index 0000000..7eb632c
--- /dev/null
+++ b/src/core/cpu/program.cpp
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file cpu/program.cpp
+ * \brief CPU program
+ */
+
+#include "program.h"
+#include "device.h"
+#include "kernel.h"
+#include "builtins.h"
+
+#include "../program.h"
+
+#include <llvm/PassManager.h>
+#include <llvm/Analysis/Passes.h>
+#include <llvm/IR/Verifier.h>
+#include <llvm/Transforms/Scalar.h>
+#include <llvm/Transforms/IPO.h>
+#include <llvm/ExecutionEngine/ExecutionEngine.h>
+#include <llvm/ExecutionEngine/MCJIT.h>
+#include <llvm/ExecutionEngine/SectionMemoryManager.h>
+#include <llvm/ExecutionEngine/Interpreter.h>
+#include <llvm/Support/ErrorHandling.h>
+
+#include <string>
+#include <iostream>
+
+using namespace Coal;
+using namespace llvm;
+
+// Create a custom memory manager for MCJIT
+class ClientMemoryManager : public SectionMemoryManager
+{
+ ClientMemoryManager(const ClientMemoryManager&) LLVM_DELETED_FUNCTION;
+ void operator=(const ClientMemoryManager&) LLVM_DELETED_FUNCTION;
+
+public:
+ ClientMemoryManager() {}
+ virtual ~ClientMemoryManager() {}
+
+ /// This method returns the (host) address of the specified function.
+ virtual uint64_t getSymbolAddress(const std::string &Name);
+};
+
+uint64_t ClientMemoryManager::getSymbolAddress(const std::string &Name)
+{
+ // Try the standard symbol resolution first, but ask it not to abort.
+ uint64_t addr = RTDyldMemoryManager::getSymbolAddress(Name);
+ if (!addr) {
+ addr = (uint64_t)getBuiltin(Name);
+ }
+
+ if (!addr)
+ report_fatal_error("OpenCL program references external function '" + Name +
+ "' which could not be resolved!");
+ return addr;
+}
+
+CPUProgram::CPUProgram(CPUDevice *device, Program *program)
+: DeviceProgram(), p_device(device), p_program(program), p_jit(0)
+{
+
+}
+
+CPUProgram::~CPUProgram()
+{
+ if (p_jit)
+ {
+ // Dont delete the module
+ p_jit->removeModule(p_module);
+
+ delete p_jit;
+ }
+}
+
+bool CPUProgram::linkStdLib() const
+{
+ return true;
+}
+
+void CPUProgram::createOptimizationPasses(llvm::PassManager *manager,
+ bool optimize, bool hasBarrier)
+{
+ if (optimize)
+ {
+ /*
+ * Inspired by code from "The LLVM Compiler Infrastructure"
+ */
+ manager->add(llvm::createDeadArgEliminationPass());
+ manager->add(llvm::createInstructionCombiningPass());
+ manager->add(llvm::createFunctionInliningPass());
+ manager->add(llvm::createPruneEHPass()); // Remove dead EH info.
+ manager->add(llvm::createGlobalOptimizerPass());
+ manager->add(llvm::createGlobalDCEPass()); // Remove dead functions.
+ manager->add(llvm::createArgumentPromotionPass());
+ manager->add(llvm::createInstructionCombiningPass());
+ manager->add(llvm::createJumpThreadingPass());
+ manager->add(llvm::createScalarReplAggregatesPass());
+ manager->add(llvm::createFunctionAttrsPass()); // Add nocapture.
+ manager->add(llvm::createGlobalsModRefPass()); // IP alias analysis.
+ manager->add(llvm::createLICMPass()); // Hoist loop invariants.
+ manager->add(llvm::createGVNPass()); // Remove redundancies.
+ manager->add(llvm::createMemCpyOptPass()); // Remove dead memcpys.
+ manager->add(llvm::createDeadStoreEliminationPass());
+ manager->add(llvm::createInstructionCombiningPass());
+ manager->add(llvm::createJumpThreadingPass());
+ manager->add(llvm::createCFGSimplificationPass());
+ }
+}
+
+bool CPUProgram::build(llvm::Module *module, std::string *binary_str)
+{
+ // Nothing to build
+ p_module = module;
+
+ return true;
+}
+
+bool CPUProgram::initJIT()
+{
+ if (p_jit)
+ return true;
+
+ if (!p_module)
+ return false;
+
+ // Create the JIT
+ std::string err;
+
+ p_jit = llvm::EngineBuilder(p_module)
+ .setErrorStr(&err)
+ .setUseMCJIT(true)
+ .setMCJITMemoryManager(new ClientMemoryManager())
+ .create();
+
+ if (!p_jit)
+ {
+ std::cout << "Unable to create a JIT: " << err << std::endl;
+ return false;
+ }
+
+ return true;
+}
+
+llvm::ExecutionEngine *CPUProgram::jit() const
+{
+ return p_jit;
+}
diff --git a/src/core/cpu/program.h b/src/core/cpu/program.h
new file mode 100644
index 0000000..0a08d61
--- /dev/null
+++ b/src/core/cpu/program.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file cpu/program.h
+ * \brief CPU program
+ */
+
+#ifndef __CPU_PROGRAM_H__
+#define __CPU_PROGRAM_H__
+
+#include "../deviceinterface.h"
+
+namespace llvm
+{
+ class ExecutionEngine;
+ class Module;
+}
+
+namespace Coal
+{
+
+class CPUDevice;
+class Program;
+
+/**
+ * \brief CPU program
+ *
+ * This class implements the \c Coal::DeviceProgram interface for CPU
+ * acceleration.
+ *
+ * It's main purpose is to initialize a \c llvm::JIT object to run LLVM bitcode,
+ * in \c initJIT().
+ */
+class CPUProgram : public DeviceProgram
+{
+ public:
+ /**
+ * \brief Constructor
+ * \param device CPU device to which this program is attached
+ * \param program \c Coal::Program that will be run
+ */
+ CPUProgram(CPUDevice *device, Program *program);
+ ~CPUProgram();
+
+ bool linkStdLib() const;
+ void createOptimizationPasses(llvm::PassManager *manager,
+ bool optimize, bool hasBarrier=false);
+ bool build(llvm::Module *module, std::string *binary_str);
+
+ /**
+ * \brief Initialize an LLVM JIT
+ *
+ * This function creates a \c llvm::JIT object to run this program on
+ * the CPU. A few implementation details :
+ *
+ * - The JIT is set not to resolve unknown symbols using \c dlsym().
+ * This way, a malicious kernel cannot execute arbitrary code on
+ * the host by declaring \c libc functions and calling them.
+ * - All the unknown function names are passed to \c getBuiltin() to
+ * get native built-in implementations.
+ *
+ * \return true if success, false otherwise
+ */
+ bool initJIT();
+ llvm::ExecutionEngine *jit() const; /*!< \brief Current LLVM execution engine */
+
+ private:
+ CPUDevice *p_device;
+ Program *p_program;
+
+ llvm::ExecutionEngine *p_jit;
+ llvm::Module *p_module;
+};
+
+}
+
+#endif
diff --git a/src/core/cpu/sampler.cpp b/src/core/cpu/sampler.cpp
new file mode 100644
index 0000000..893e66e
--- /dev/null
+++ b/src/core/cpu/sampler.cpp
@@ -0,0 +1,769 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file cpu/sampler.cpp
+ * \brief OpenCL C image access functions
+ *
+ * It is recommended to compile this file using Clang as it supports the
+ * \c __builtin_shufflevector() built-in function, providing SSE or
+ * NEON-accelerated code.
+ */
+
+#include "../memobject.h"
+#include "../sampler.h"
+#include "kernel.h"
+#include "buffer.h"
+#include "builtins.h"
+
+#include <cstdlib>
+#include <cmath>
+// ASW #include <immintrin.h>
+
+using namespace Coal;
+
+/*
+ * Helper functions
+ */
+
+static int clamp(int a, int b, int c)
+{
+ return (a < b) ? b : ((a > c) ? c : a);
+}
+
+static int min(int a, int b)
+{
+ return (a < b ? a : b);
+}
+
+static int max(int a, int b)
+{
+ return (a > b ? a : b);
+}
+
+static float frac(float x)
+{
+ return x - std::floor(x);
+}
+
+static float round(float x)
+{
+ return (float)(int)x;
+}
+
+static bool handle_address_mode(Image2D *image, int &x, int &y, int &z,
+ uint32_t sampler)
+{
+ bool is_3d = (image->type() == MemObject::Image3D);
+ int w = image->width(),
+ h = image->height(),
+ d = (is_3d ? ((Image3D *)image)->depth() : 1);
+
+ if ((sampler & 0xf0) == CLK_ADDRESS_CLAMP_TO_EDGE)
+ {
+ x = clamp(x, 0, w - 1);
+ y = clamp(y, 0, h - 1);
+ if (is_3d) z = clamp(z, 0, d - 1);
+ }
+ else if ((sampler & 0xf0) == CLK_ADDRESS_CLAMP)
+ {
+ x = clamp(x, 0, w);
+ y = clamp(y, 0, h);
+ if (is_3d) z = clamp(z, 0, d);
+ }
+
+ return (x == w || y == h || z == d);
+}
+
+/*
+ * Macros or functions used to accelerate the functions
+ */
+#ifndef __has_builtin
+ #define __has_builtin(x) 0
+#endif
+
+static void slow_shuffle4(uint32_t *rs, uint32_t *a, uint32_t *b,
+ int x, int y, int z, int w)
+{
+ rs[0] = (x < 4 ? a[x] : b[x - 4]);
+ rs[1] = (y < 4 ? a[y] : b[y - 4]);
+ rs[2] = (z < 4 ? a[z] : b[z - 4]);
+ rs[3] = (w < 4 ? a[w] : b[w - 4]);
+}
+
+static void convert_to_format(void *dest, float *data,
+ cl_channel_type type, unsigned int channels)
+{
+ // Convert always the four components of source to target
+ if (type == CL_FLOAT)
+ std::memcpy(dest, data, channels * sizeof(float));
+
+ for (unsigned int i=0; i<channels; ++i)
+ {
+ switch (type)
+ {
+ case CL_SNORM_INT8:
+ ((int8_t *)dest)[i] = data[i] * 128.0f;
+ break;
+ case CL_SNORM_INT16:
+ ((int16_t *)dest)[i] = data[i] * 32767.0f;
+ break;
+ case CL_UNORM_INT8:
+ ((uint8_t *)dest)[i] = data[i] * 255.0f;
+ break;
+ case CL_UNORM_INT16:
+ ((uint16_t *)dest)[i] = data[i] * 65535.0f;
+ break;
+ }
+ }
+}
+
+static void convert_from_format(float *data, void *source,
+ cl_channel_type type, unsigned int channels)
+{
+ // Convert always the four components of source to target
+ if (type == CL_FLOAT)
+ std::memcpy(data, source, channels * sizeof(float));
+
+ for (unsigned int i=0; i<channels; ++i)
+ {
+ switch (type)
+ {
+ case CL_SNORM_INT8:
+ data[i] = (float)((int8_t *)source)[i] / 127.0f;
+ break;
+ case CL_SNORM_INT16:
+ data[i] = (float)((int16_t *)source)[i] / 32767.0f;
+ break;
+ case CL_UNORM_INT8:
+ data[i] = (float)((uint8_t *)source)[i] / 127.0f;
+ break;
+ case CL_UNORM_INT16:
+ data[i] = (float)((uint16_t *)source)[i] / 127.0f;
+ break;
+ }
+ }
+}
+
+static void convert_to_format(void *dest, int *data,
+ cl_channel_type type, unsigned int channels)
+{
+ // Convert always the four components of source to target
+ if (type == CL_SIGNED_INT32)
+ std::memcpy(dest, data, channels * sizeof(int32_t));
+
+ for (unsigned int i=0; i<channels; ++i)
+ {
+ switch (type)
+ {
+ case CL_SIGNED_INT8:
+ ((int8_t *)dest)[i] = data[i];
+ break;
+ case CL_SIGNED_INT16:
+ ((int16_t *)dest)[i] = data[i];
+ break;
+ }
+ }
+}
+
+static void convert_from_format(int32_t *data, void *source,
+ cl_channel_type type, unsigned int channels)
+{
+ // Convert always the four components of source to target
+ if (type == CL_SIGNED_INT32)
+ std::memcpy(data, source, channels * sizeof(int32_t));
+
+ for (unsigned int i=0; i<channels; ++i)
+ {
+ switch (type)
+ {
+ case CL_SIGNED_INT8:
+ data[i] = ((int8_t *)source)[i];
+ break;
+ case CL_SIGNED_INT16:
+ data[i] = ((int16_t *)source)[i];
+ break;
+ }
+ }
+}
+
+static void convert_to_format(void *dest, uint32_t *data,
+ cl_channel_type type, unsigned int channels)
+{
+ // Convert always the four components of source to target
+ if (type == CL_UNSIGNED_INT32)
+ std::memcpy(dest, data, channels * sizeof(uint32_t));
+
+ for (unsigned int i=0; i<3; ++i)
+ {
+ switch (type)
+ {
+ case CL_UNSIGNED_INT8:
+ ((uint8_t *)dest)[i] = data[i];
+ break;
+ case CL_UNSIGNED_INT16:
+ ((uint16_t *)dest)[i] = data[i];
+ break;
+ }
+ }
+}
+
+static void convert_from_format(uint32_t *data, void *source,
+ cl_channel_type type, unsigned int channels)
+{
+ // Convert always the four components of source to target
+ if (type == CL_UNSIGNED_INT32)
+ std::memcpy(data, source, channels * sizeof(uint32_t));
+
+ for (unsigned int i=0; i<channels; ++i)
+ {
+ switch (type)
+ {
+ case CL_UNSIGNED_INT8:
+ data[i] = ((uint8_t *)source)[i];
+ break;
+ case CL_UNSIGNED_INT16:
+ data[i] = ((uint16_t *)source)[i];
+ break;
+ }
+ }
+}
+
+template<typename T>
+static void vec4_scalar_mul(T *vec, float val)
+{
+ for (unsigned int i=0; i<4; ++i)
+ vec[i] *= val;
+}
+
+template<typename T>
+static void vec4_add(T *vec1, T *vec2)
+{
+ for (unsigned int i=0; i<4; ++i)
+ vec1[i] += vec2[i];
+}
+
+template<typename T>
+void CPUKernelWorkGroup::linear3D(T *result, float a, float b, float c,
+ int i0, int j0, int k0, int i1, int j1, int k1,
+ Image3D *image) const
+{
+ T accum[4];
+
+ readImageImplI<T>(result, image, i0, j0, k0, 0);
+ vec4_scalar_mul(result, (1.0f - a) * (1.0f - b) * (1.0f - c ));
+
+ readImageImplI<T>(accum, image, i1, j0, k0, 0);
+ vec4_scalar_mul(accum, a * (1.0f - b) * (1.0f - c ));
+ vec4_add(result, accum);
+
+ readImageImplI<T>(accum, image, i0, j1, k0, 0);
+ vec4_scalar_mul(accum, (1.0f - a) * b * (1.0f - c ));
+ vec4_add(result, accum);
+
+ readImageImplI<T>(accum, image, i1, j1, k0, 0);
+ vec4_scalar_mul(accum, a * b * (1.0f -c ));
+ vec4_add(result, accum);
+
+ readImageImplI<T>(accum, image, i0, j0, k1, 0);
+ vec4_scalar_mul(accum, (1.0f - a) * (1.0f - b) * c);
+ vec4_add(result, accum);
+
+ readImageImplI<T>(accum, image, i1, j0, k1, 0);
+ vec4_scalar_mul(accum, a * (1.0f - b) * c);
+ vec4_add(result, accum);
+
+ readImageImplI<T>(accum, image, i0, j1, k1, 0);
+ vec4_scalar_mul(accum, (1.0f - a) * b * c);
+ vec4_add(result, accum);
+
+ readImageImplI<T>(accum, image, i1, j1, k1, 0);
+ vec4_scalar_mul(accum, a * b * c);
+ vec4_add(result, accum);
+}
+
+template<typename T>
+void CPUKernelWorkGroup::linear2D(T *result, float a, float b, float c, int i0, int j0,
+ int i1, int j1, Image2D *image) const
+{
+ T accum[4];
+
+ readImageImplI<T>(result, image, i0, j0, 0, 0);
+ vec4_scalar_mul(result, (1.0f - a) * (1.0f - b));
+
+ readImageImplI<T>(accum, image, i1, j0, 0, 0);
+ vec4_scalar_mul(accum, a * (1.0f - b));
+ vec4_add(result, accum);
+
+ readImageImplI<T>(accum, image, i0, j1, 0, 0);
+ vec4_scalar_mul(accum, (1.0f - a) * b);
+ vec4_add(result, accum);
+
+ readImageImplI<T>(accum, image, i1, j1, 0, 0);
+ vec4_scalar_mul(accum, a * b);
+ vec4_add(result, accum);
+}
+
+#if __has_builtin(__builtin_shufflevector)
+ #define shuffle4(rs, a, b, x, y, z, w) \
+ *(__v4sf *)rs = __builtin_shufflevector(*(__v4sf *)a, *(__v4sf *)b, \
+ x, y, z, w)
+#else
+ #define shuffle4(rs, a, b, x, y, z, w) \
+ slow_shuffle4(rs, a, b, x, y, z, w)
+#endif
+
+static void swizzle(uint32_t *target, uint32_t *source,
+ cl_channel_order order, bool reading, uint32_t t_max)
+{
+ uint32_t special[4] = {0, t_max, 0, 0 };
+
+ if (reading)
+ {
+ switch (order)
+ {
+ case CL_R:
+ case CL_Rx:
+ // target = {source->x, 0, 0, t_max}
+ shuffle4(target, source, special, 0, 4, 4, 5);
+ break;
+ case CL_A:
+ // target = {0, 0, 0, source->x}
+ shuffle4(target, source, special, 4, 4, 4, 0);
+ break;
+ case CL_INTENSITY:
+ // target = {source->x, source->x, source->x, source->x}
+ shuffle4(target, source, source, 0, 0, 0, 0);
+ break;
+ case CL_LUMINANCE:
+ // target = {source->x, source->x, source->x, t_max}
+ shuffle4(target, source, special, 0, 0, 0, 5);
+ break;
+ case CL_RG:
+ case CL_RGx:
+ // target = {source->x, source->y, 0, t_max}
+ shuffle4(target, source, special, 0, 1, 4, 5);
+ break;
+ case CL_RA:
+ // target = {source->x, 0, 0, source->y}
+ shuffle4(target, source, special, 0, 4, 4, 1);
+ break;
+ case CL_RGB:
+ case CL_RGBx:
+ case CL_RGBA:
+ // Nothing to do, already the good order
+ std::memcpy(target, source, 16);
+ break;
+ case CL_ARGB:
+ // target = {source->y, source->z, source->w, source->x}
+ shuffle4(target, source, source, 1, 2, 3, 0);
+ break;
+ case CL_BGRA:
+ // target = {source->z, source->y, source->x, source->w}
+ shuffle4(target, source, source, 2, 1, 0, 3);
+ break;
+ }
+ }
+ else
+ {
+ switch (order)
+ {
+ case CL_A:
+ // target = {source->w, undef, undef, undef}
+ shuffle4(target, source, source, 3, 3, 3, 3);
+ break;
+ case CL_RA:
+ // target = {source->x, source->w, undef, undef}
+ shuffle4(target, source, source, 0, 3, 3, 3);
+ break;
+ case CL_ARGB:
+ // target = {source->w, source->x, source->y, source->z}
+ shuffle4(target, source, source, 3, 0, 1, 2);
+ break;
+ case CL_BGRA:
+ // target = {source->z, source->y, source->x, source->w}
+ shuffle4(target, source, source, 2, 1, 0, 3);
+ break;
+ default:
+ std::memcpy(target, source, 16);
+ }
+ }
+}
+
+/*
+ * Actual implementation of the built-ins
+ */
+
+void *CPUKernelWorkGroup::getImageData(Image2D *image, int x, int y, int z) const
+{
+ CPUBuffer *buffer =
+ (CPUBuffer *)image->deviceBuffer((DeviceInterface *)p_kernel->device());
+
+ return imageData((unsigned char *)buffer->data(),
+ x, y, z,
+ image->row_pitch(),
+ image->slice_pitch(),
+ image->pixel_size());
+}
+
+template<typename T>
+void CPUKernelWorkGroup::writeImageImpl(Image2D *image, int x, int y, int z,
+ T *color) const
+{
+ T converted[4];
+
+ // Swizzle to the correct order (float, int and uint are 32-bit, so the
+ // type has no importance
+ swizzle((uint32_t *)converted, (uint32_t *)color,
+ image->format().image_channel_order, false, 0);
+
+ // Get a pointer in the image where to write the data
+ void *target = getImageData(image, x, y, z);
+
+ // Convert color to the correct format
+ convert_to_format(target,
+ converted,
+ image->format().image_channel_data_type,
+ image->channels());
+}
+
+void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z,
+ float *color) const
+{
+ writeImageImpl<float>(image, x, y, z, color);
+}
+
+void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z,
+ int32_t *color) const
+{
+ writeImageImpl<int32_t>(image, x, y, z, color);
+}
+
+void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z,
+ uint32_t *color) const
+{
+ writeImageImpl<uint32_t>(image, x, y, z, color);
+}
+
+template<typename T>
+uint32_t type_max_value()
+{
+ return 0;
+}
+
+template<>
+uint32_t type_max_value<float>()
+{
+ return 1065353216; // 1.0f in decimal form
+}
+
+template<>
+uint32_t type_max_value<int32_t>()
+{
+ return 0x7fffffff;
+}
+
+template<>
+uint32_t type_max_value<uint32_t>()
+{
+ return 0xffffffff;
+}
+
+template<typename T>
+void CPUKernelWorkGroup::readImageImplI(T *result, Image2D *image, int x, int y,
+ int z, uint32_t sampler) const
+{
+ // Handle the addressing mode of the sampler
+ if (handle_address_mode(image, x, y, z, sampler))
+ {
+ // Border color
+ result[0] = 0.0f;
+ result[1] = 0.0f;
+ result[2] = 0.0f;
+
+ switch (image->format().image_channel_order)
+ {
+ case CL_R:
+ case CL_RG:
+ case CL_RGB:
+ case CL_LUMINANCE:
+ result[3] = 1.0f;
+ break;
+ default:
+ result[3] = 0.0f;
+ }
+
+ return;
+ }
+
+ // Load the data from the image, converting it
+ void *source = getImageData(image, x, y, z);
+ T converted[4];
+
+ convert_from_format(converted,
+ source,
+ image->format().image_channel_data_type,
+ image->channels());
+
+ // Swizzle the pixel just read and place it in result
+ swizzle((uint32_t *)result, (uint32_t *)converted,
+ image->format().image_channel_order, true, type_max_value<T>());
+}
+
+void CPUKernelWorkGroup::readImage(float *result, Image2D *image, int x, int y,
+ int z, uint32_t sampler) const
+{
+ readImageImplI<float>(result, image, x, y, z, sampler);
+}
+
+void CPUKernelWorkGroup::readImage(int32_t *result, Image2D *image, int x, int y,
+ int z, uint32_t sampler) const
+{
+ readImageImplI<int32_t>(result, image, x, y, z, sampler);
+}
+
+void CPUKernelWorkGroup::readImage(uint32_t *result, Image2D *image, int x, int y,
+ int z, uint32_t sampler) const
+{
+ readImageImplI<uint32_t>(result, image, x, y, z, sampler);
+}
+
+template<typename T>
+void CPUKernelWorkGroup::readImageImplF(T *result, Image2D *image, float x,
+ float y, float z, uint32_t sampler) const
+{
+ bool is_3d = (image->type() == MemObject::Image3D);
+ Image3D *image3d = (Image3D *)image;
+
+ int w = image->width(),
+ h = image->height(),
+ d = (is_3d ? image3d->depth() : 1);
+
+ switch (sampler & 0xf0)
+ {
+ case CLK_ADDRESS_NONE:
+ case CLK_ADDRESS_CLAMP:
+ case CLK_ADDRESS_CLAMP_TO_EDGE:
+ /* De-normalize coordinates */
+ if ((sampler & 0xf) == CLK_NORMALIZED_COORDS_TRUE)
+ {
+ x *= (float)w;
+ y *= (float)h;
+ if (is_3d) z *= (float)d;
+ }
+
+ switch (sampler & 0xf00)
+ {
+ case CLK_FILTER_NEAREST:
+ {
+ readImageImplI<T>(result, image, std::floor(x),
+ std::floor(y), std::floor(z), sampler);
+ }
+ case CLK_FILTER_LINEAR:
+ {
+ float a, b, c;
+
+ a = frac(x - 0.5f);
+ b = frac(y - 0.5f);
+ c = frac(z - 0.5f);
+
+ if (is_3d)
+ {
+ linear3D<T>(result, a, b, c,
+ std::floor(x - 0.5f),
+ std::floor(y - 0.5f),
+ std::floor(z - 0.5f),
+ std::floor(x - 0.5f) + 1,
+ std::floor(y - 0.5f) + 1,
+ std::floor(z - 0.5f) + 1,
+ image3d);
+ }
+ else
+ {
+ linear2D<T>(result, a, b, c,
+ std::floor(x - 0.5f),
+ std::floor(y - 0.5f),
+ std::floor(x - 0.5f) + 1,
+ std::floor(y - 0.5f) + 1,
+ image);
+ }
+ }
+ }
+ break;
+ case CLK_ADDRESS_REPEAT:
+ switch (sampler & 0xf00)
+ {
+ case CLK_FILTER_NEAREST:
+ {
+ int i, j, k;
+
+ x = (x - std::floor(x)) * (float)w;
+ i = std::floor(x);
+ if (i > w - 1)
+ i = i - w;
+
+ y = (y - std::floor(y)) * (float)h;
+ j = std::floor(y);
+ if (j > h - 1)
+ j = j - h;
+
+ if (is_3d)
+ {
+ z = (z - std::floor(z)) * (float)d;
+ k = std::floor(z);
+ if (k > d - 1)
+ k = k - d;
+ }
+
+ readImageImplI<T>(result, image, i, j, k, sampler);
+ }
+ case CLK_FILTER_LINEAR:
+ {
+ float a, b, c;
+ int i0, i1, j0, j1, k0, k1;
+
+ x = (x - std::floor(x)) * (float)w;
+ i0 = std::floor(x - 0.5f);
+ i1 = i0 + 1;
+ if (i0 < 0)
+ i0 = w + i0;
+ if (i1 > w - 1)
+ i1 = i1 - w;
+
+ y = (y - std::floor(y)) * (float)h;
+ j0 = std::floor(y - 0.5f);
+ j1 = j0 + 1;
+ if (j0 < 0)
+ j0 = h + j0;
+ if (j1 > h - 1)
+ j1 = j1 - h;
+
+ if (is_3d)
+ {
+ z = (z - std::floor(z)) * (float)d;
+ k0 = std::floor(z - 0.5f);
+ k1 = k0 + 1;
+ if (k0 < 0)
+ k0 = d + k0;
+ if (k1 > d - 1)
+ k1 = k1 - d;
+ }
+
+ a = frac(x - 0.5f);
+ b = frac(y - 0.5f);
+ c = frac(z - 0.5f);
+
+ if (is_3d)
+ {
+ linear3D<T>(result, a, b, c, i0, j0, k0, i1, j1, k1,
+ image3d);
+ }
+ else
+ {
+ linear2D<T>(result, a, b, c, i0, j0, i1, j1, image);
+ }
+ }
+ }
+ break;
+ case CLK_ADDRESS_MIRRORED_REPEAT:
+ switch (sampler & 0xf00)
+ {
+ case CLK_FILTER_NEAREST:
+ {
+ x = std::fabs(x - 2.0f * round(0.5f * x)) * (float)w;
+ y = std::fabs(y - 2.0f * round(0.5f * y)) * (float)h;
+ if (is_3d)
+ z = std::fabs(z - 2.0f * round(0.5f * z)) * (float)d;
+
+ readImageImplI<T>(result, image,
+ min(std::floor(x), w - 1),
+ min(std::floor(y), h - 1),
+ min(std::floor(z), d - 1),
+ sampler);
+ }
+ case CLK_FILTER_LINEAR:
+ {
+ float a, b, c;
+ int i0, i1, j0, j1, k0, k1;
+
+ x = std::fabs(x - 2.0f * round(0.5f * x)) * (float)w;
+ i0 = std::floor(x - 0.5f);
+ i1 = i0 + 1;
+ i0 = max(i0, 0);
+ i1 = min(i1, w - 1);
+
+ y = std::fabs(y - 2.0f * round(0.5f * y)) * (float)h;
+ j0 = std::floor(y - 0.5f);
+ j1 = j0 + 1;
+ j0 = max(j0, 0);
+ j1 = min(j1, h - 1);
+
+ if (is_3d)
+ {
+ z = std::fabs(z - 2.0f * round(0.5f * z)) * (float)d;
+ k0 = std::floor(z - 0.5f);
+ k1 = k0 + 1;
+ k0 = max(k0, 0);
+ k1 = min(k1, d - 1);
+ }
+
+ a = frac(x - 0.5f);
+ b = frac(y - 0.5f);
+ c = frac(z - 0.5f);
+
+ if (is_3d)
+ {
+ linear3D<T>(result, a, b, c, i0, j0, k0, i1, j1, k1,
+ image3d);
+ }
+ else
+ {
+ linear2D<T>(result, a, b, c, i0, j0, i1, j1, image);
+ }
+ }
+ }
+ break;
+ }
+}
+
+void CPUKernelWorkGroup::readImage(float *result, Image2D *image, float x,
+ float y, float z, uint32_t sampler) const
+{
+ readImageImplF<float>(result, image, x, y, z, sampler);
+}
+
+void CPUKernelWorkGroup::readImage(int32_t *result, Image2D *image, float x,
+ float y, float z, uint32_t sampler) const
+{
+ readImageImplF<int32_t>(result, image, x, y, z, sampler);
+}
+
+void CPUKernelWorkGroup::readImage(uint32_t *result, Image2D *image, float x,
+ float y, float z, uint32_t sampler) const
+{
+ readImageImplF<uint32_t>(result, image, x, y, z, sampler);
+}
diff --git a/src/core/cpu/worker.cpp b/src/core/cpu/worker.cpp
new file mode 100644
index 0000000..e5251f2
--- /dev/null
+++ b/src/core/cpu/worker.cpp
@@ -0,0 +1,274 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file cpu/worker.cpp
+ * \brief Code running in the worker threads launched by \c Coal::CPUDevice
+ * \sa builtins.cpp
+ */
+
+#include "worker.h"
+#include "device.h"
+#include "buffer.h"
+#include "kernel.h"
+#include "builtins.h"
+
+#include "../commandqueue.h"
+#include "../events.h"
+#include "../memobject.h"
+#include "../kernel.h"
+
+#include <sys/mman.h>
+
+#include <cstring>
+#include <iostream>
+
+using namespace Coal;
+
+void *worker(void *data)
+{
+ CPUDevice *device = (CPUDevice *)data;
+ bool stop = false;
+ cl_int errcode;
+ Event *event;
+
+ // Initialize TLS
+ setWorkItemsData(0, 0);
+
+ while (true)
+ {
+ event = device->getEvent(stop);
+
+ // Ensure we have a good event and we don't have to stop
+ if (stop) break;
+ if (!event) continue;
+
+ // Get info about the event and its command queue
+ Event::Type t = event->type();
+ CommandQueue *queue = 0;
+ cl_command_queue_properties queue_props = 0;
+
+ errcode = CL_SUCCESS;
+
+ event->info(CL_EVENT_COMMAND_QUEUE, sizeof(CommandQueue *), &queue, 0);
+
+ if (queue)
+ queue->info(CL_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties),
+ &queue_props, 0);
+
+ if (queue_props & CL_QUEUE_PROFILING_ENABLE)
+ event->updateTiming(Event::Start);
+
+ // Execute the action
+ switch (t)
+ {
+ case Event::ReadBuffer:
+ case Event::WriteBuffer:
+ {
+ ReadWriteBufferEvent *e = (ReadWriteBufferEvent *)event;
+ CPUBuffer *buf = (CPUBuffer *)e->buffer()->deviceBuffer(device);
+ char *data = (char *)buf->data();
+
+ data += e->offset();
+
+ if (t == Event::ReadBuffer)
+ std::memcpy(e->ptr(), data, e->cb());
+ else std::memcpy(data, e->ptr(), e->cb());
+
+ break;
+ }
+ case Event::CopyBuffer:
+ {
+ CopyBufferEvent *e = (CopyBufferEvent *)event;
+ CPUBuffer *src = (CPUBuffer *)e->source()->deviceBuffer(device);
+ CPUBuffer *dst = (CPUBuffer *)e->destination()->deviceBuffer(device);
+
+ std::memcpy((char*)dst->data() + e->dst_offset(),
+ (char*)src->data() + e->src_offset(), e->cb());
+ break;
+ }
+ case Event::ReadBufferRect:
+ case Event::WriteBufferRect:
+ case Event::CopyBufferRect:
+ case Event::ReadImage:
+ case Event::WriteImage:
+ case Event::CopyImage:
+ case Event::CopyBufferToImage:
+ case Event::CopyImageToBuffer:
+ {
+ // src = buffer and dst = mem if note copy
+ ReadWriteCopyBufferRectEvent *e = (ReadWriteCopyBufferRectEvent *)event;
+ CPUBuffer *src_buf = (CPUBuffer *)e->source()->deviceBuffer(device);
+
+ unsigned char *src = (unsigned char *)src_buf->data();
+ unsigned char *dst;
+
+ switch (t)
+ {
+ case Event::CopyBufferRect:
+ case Event::CopyImage:
+ case Event::CopyImageToBuffer:
+ case Event::CopyBufferToImage:
+ {
+ CopyBufferRectEvent *cbre = (CopyBufferRectEvent *)e;
+ CPUBuffer *dst_buf =
+ (CPUBuffer *)cbre->destination()->deviceBuffer(device);
+
+ dst = (unsigned char *)dst_buf->data();
+ break;
+ }
+ default:
+ {
+ // dst = host memory location
+ ReadWriteBufferRectEvent *rwbre = (ReadWriteBufferRectEvent *)e;
+
+ dst = (unsigned char *)rwbre->ptr();
+ }
+ }
+
+ // Iterate over the lines to copy and use memcpy
+ for (size_t z=0; z<e->region(2); ++z)
+ {
+ for (size_t y=0; y<e->region(1); ++y)
+ {
+ unsigned char *s;
+ unsigned char *d;
+
+ d = imageData(dst,
+ e->dst_origin(0),
+ y + e->dst_origin(1),
+ z + e->dst_origin(2),
+ e->dst_row_pitch(),
+ e->dst_slice_pitch(),
+ 1);
+
+ s = imageData(src,
+ e->src_origin(0),
+ y + e->src_origin(1),
+ z + e->src_origin(2),
+ e->src_row_pitch(),
+ e->src_slice_pitch(),
+ 1);
+
+ // Copying and image to a buffer may need to add an offset
+ // to the buffer address (its rectangular origin is
+ // always (0, 0, 0)).
+ if (t == Event::CopyBufferToImage)
+ {
+ CopyBufferToImageEvent *cptie = (CopyBufferToImageEvent *)e;
+ s += cptie->offset();
+ }
+ else if (t == Event::CopyImageToBuffer)
+ {
+ CopyImageToBufferEvent *citbe = (CopyImageToBufferEvent *)e;
+ d += citbe->offset();
+ }
+
+ if (t == Event::WriteBufferRect || t == Event::WriteImage)
+ std::memcpy(s, d, e->region(0)); // Write dest (memory) in src
+ else
+ std::memcpy(d, s, e->region(0)); // Write src (buffer) in dest (memory), or copy the buffers
+ }
+ }
+
+ break;
+ }
+ case Event::MapBuffer:
+ case Event::MapImage:
+ // All was already done in CPUBuffer::initEventDeviceData()
+ break;
+
+ case Event::NativeKernel:
+ {
+ NativeKernelEvent *e = (NativeKernelEvent *)event;
+ void (*func)(void *) = (void (*)(void *))e->function();
+ void *args = e->args();
+
+ func(args);
+
+ break;
+ }
+ case Event::NDRangeKernel:
+ case Event::TaskKernel:
+ {
+ KernelEvent *e = (KernelEvent *)event;
+ CPUKernelEvent *ke = (CPUKernelEvent *)e->deviceData();
+
+ // Take an instance
+ CPUKernelWorkGroup *instance = ke->takeInstance();
+ ke = 0; // Unlocked, don't use anymore
+
+ if (!instance->run())
+ errcode = CL_INVALID_PROGRAM_EXECUTABLE;
+
+ delete instance;
+
+ break;
+ }
+ default:
+ break;
+ }
+
+ // Cleanups
+ if (errcode == CL_SUCCESS)
+ {
+ bool finished = true;
+
+ if (event->type() == Event::NDRangeKernel ||
+ event->type() == Event::TaskKernel)
+ {
+ CPUKernelEvent *ke = (CPUKernelEvent *)event->deviceData();
+ finished = ke->finished();
+ }
+
+ if (finished)
+ {
+ // an event may be released once it is Complete
+ if (queue_props & CL_QUEUE_PROFILING_ENABLE)
+ event->updateTiming(Event::End);
+ event->setStatus(Event::Complete);
+ }
+ }
+ else
+ {
+ // an event may be released once it is Complete
+ if (queue_props & CL_QUEUE_PROFILING_ENABLE)
+ event->updateTiming(Event::End);
+ // The event failed
+ event->setStatus((Event::Status)errcode);
+ }
+ }
+
+ // Free mmapped() data if needed
+ size_t mapped_size;
+ void *mapped_data = getWorkItemsData(mapped_size);
+
+ if (mapped_data)
+ munmap(mapped_data, mapped_size);
+
+ return 0;
+}
diff --git a/src/core/cpu/worker.h b/src/core/cpu/worker.h
new file mode 100644
index 0000000..43ddd03
--- /dev/null
+++ b/src/core/cpu/worker.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file worker.h
+ * \brief Function run by the CPU worker threads
+ */
+
+#ifndef __CPU_WORKER_H__
+#define __CPU_WORKER_H__
+
+/**
+ * \brief Main loop of the CPU worker threads
+ *
+ * This function is run by as many thread as they are CPU cores on the host
+ * system. As explained by \ref events , this function waits until there
+ * are \c Coal::Event objects to process and handle them.
+ */
+void *worker(void *data);
+
+#endif
diff --git a/src/core/deviceinterface.h b/src/core/deviceinterface.h
new file mode 100644
index 0000000..a321a9e
--- /dev/null
+++ b/src/core/deviceinterface.h
@@ -0,0 +1,352 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * Copyright (c) 2012-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file deviceinterface.h
+ * \brief Abstraction layer between Clover core and the devices
+ */
+
+#ifndef __DEVICEINTERFACE_H__
+#define __DEVICEINTERFACE_H__
+
+#include <CL/cl.h>
+#include <string>
+#include "object.h"
+
+/* This pulls in legacy::PassManager when LLVM >= 3.4 */
+#include <llvm/PassManager.h>
+
+namespace Coal
+{
+
+class DeviceBuffer;
+class DeviceProgram;
+class DeviceKernel;
+
+class MemObject;
+class Event;
+class Program;
+class Kernel;
+
+/**
+ * \brief Abstraction layer between core Clover objects and the devices
+ *
+ * This interface is used by the core Clover classes to communicate with the
+ * devices, that must reimplement all the functions described here.
+ */
+class DeviceInterface : public Object
+{
+ public:
+ DeviceInterface() : Object(Object::T_Device, 0) {}
+ virtual ~DeviceInterface() {}
+
+ /**
+ * \brief Retrieve information about the device
+ *
+ * This function is used to retrieve information about an object.
+ * Sometimes, the size of the data retrieved is unknown (for example, a
+ * string). The application can call this function twice, the first time
+ * to get the size, then it allocates a buffer, and finally get the data.
+ *
+ * \code
+ * const char *string = 0;
+ * size_t len;
+ *
+ * object->info(FOO_PROPERTY_STRING, 0, 0, &len);
+ * string = std::malloc(len);
+ * object->info(FOO_PROPERTY_STRING, len, string, 0);
+ * \endcode
+ *
+ * \param param_name Name of the property to retrieve
+ * \param param_value_size Size of the application-allocated buffer
+ * in which to put the value.
+ * \param param_value Pointer to an application-allocated buffer
+ * where the property data will be stored. Ignored
+ * if NULL.
+ * \param param_value_size_ret Size of the value retrieved, ignored if
+ * NULL.
+ * \return CL_SUCCESS in case of success, otherwise a CL error code.
+ */
+ virtual cl_int info(cl_device_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const = 0;
+
+ /**
+ * \brief Create a \c Coal::DeviceBuffer object for this device
+ * \param buffer Memory object for which the buffer has to be created
+ * \param rs Error code (\c CL_SUCCESS if no error)
+ * \return a \c Coal::DeviceBuffer object, undefined if there is an error
+ */
+ virtual DeviceBuffer *createDeviceBuffer(MemObject *buffer, cl_int *rs) = 0;
+
+ /**
+ * \brief Create a \c Coal::DeviceProgram object for this device
+ * \param program \c Coal::Program containing the device-independent
+ * program data
+ * \return a \c Coal::DeviceProgram object
+ */
+ virtual DeviceProgram *createDeviceProgram(Program *program) = 0;
+
+ /**
+ * \brief Create a \c Coal::DeviceKernel object for this device
+ * \param kernel \c Coal::Kernel containing the device-independent kernel
+ * data
+ * \param function device-specific \c llvm::Function to be used
+ * \return a \c Coal::DeviceKernel object
+ */
+ virtual DeviceKernel *createDeviceKernel(Kernel *kernel,
+ llvm::Function *function) = 0;
+
+ /**
+ * \brief Push an event on the device
+ * \sa the end of \ref events
+ * \param event the event to be pushed
+ */
+ virtual void pushEvent(Event *event) = 0;
+
+ /**
+ * \brief Initialize device-specific event data
+ *
+ * This call allows a device to initialize device-specific event data,
+ * by using \c Coal::Event::setDeviceData(). For instance, an
+ * hardware-accelerated device can associate a device command to an
+ * event, and use it to manage the event when it gets pushed.
+ *
+ * @note This function has one obligation: it must call
+ * \c Coal::MapBufferEvent::setPtr() and
+ * \c Coal::MapImageEvent::setPtr() (and other function described
+ * in its documentation)
+ *
+ * \param event the event for which data can be set
+ * \return CL_SUCCESS in case of success
+ */
+ virtual cl_int initEventDeviceData(Event *event) = 0;
+
+ /**
+ * \brief Free device-specific event data
+ *
+ * This function is called just before \p event gets deleted. It allows
+ * a device to free device-specific data of this event, if any.
+ *
+ * \param event the event that will be destroyed
+ */
+ virtual void freeEventDeviceData(Event *event) = 0;
+
+ virtual std::string builtinsHeader(void) const = 0;
+
+ virtual void init() = 0;
+
+ /**
+ * \brief Ask device if it has enough work in its queue
+ */
+ virtual bool gotEnoughToWorkOn() { return false; }
+};
+
+/**
+ * \brief Device-specific memory buffer
+ *
+ * This class is the backing-store used on a device for a \c Coal::MemObject. It
+ * is created by \c Coal::DeviceInterface::createDeviceBuffer().
+ */
+class DeviceBuffer
+{
+ public:
+ DeviceBuffer() {}
+ virtual ~DeviceBuffer() {}
+
+ /**
+ * \brief Allocate the buffer on the device
+ * \return true when success, false otherwise
+ */
+ virtual bool allocate() = 0;
+
+ /**
+ * \brief \c Coal::DeviceInterface of this buffer
+ * \return parent \c Coal::DeviceInterface
+ */
+ virtual DeviceInterface *device() const = 0;
+
+ /**
+ * \brief Allocation status
+ * \return true if already allocated, false otherwise
+ */
+ virtual bool allocated() const = 0;
+
+ /**
+ * \brief Host-accessible memory pointer
+ *
+ * This function returns what is passed as arguments to native kernels
+ * (\c clEnqueueNativeKernel(), \c Coal::NativeKernelEvent) in place of
+ * \c Coal::MemObject pointers.
+ *
+ * For \c Coal::CPUDevice, it's simply a pointer in RAM, but
+ * hardware-accelerated devices may need to do some copying or mapping.
+ *
+ * \warning Beware that this data may get written to by the native kernel.
+ *
+ * \return A memory pointer usable by a host native kernel
+ */
+ virtual void *nativeGlobalPointer() const = 0;
+};
+
+/**
+ * \brief Device-specific program data
+ */
+class DeviceProgram
+{
+ public:
+ DeviceProgram() {}
+ virtual ~DeviceProgram() {}
+
+ /**
+ * \brief Linking or not \b stdlib with this program
+ *
+ * \b stdlib is a LLVM bitcode file containing some implementations of
+ * OpenCL C built-ins. This function allows a device to tell
+ * \c Coal::Program::build() if it wants \b stdlib to be linked or not.
+ *
+ * Linking the library may allow inlining of functions like \c ceil(),
+ * \c floor(), \c clamp(), etc. So, if these functions are not better
+ * handled by the device itself than by \b stdlib, it's a good thing
+ * to link it.
+ *
+ * But if the device provides instructions for these functions, then
+ * it could be better not to link \b stdlib and to replace the LLVM
+ * calls to these functions with device-specific instructions.
+ *
+ * \warning \b Stdlib currently only works for \c Coal::CPUDevice, as
+ * it contains host-specific code (LLVM IR is not meant to be
+ * portable, pointer size changes for example).
+ *
+ * \return true if \b stdlib must be linked with the program
+ */
+ virtual bool linkStdLib() const = 0;
+
+ /**
+ * \brief Create device-specific optimization passes
+ *
+ * This hook allows a device to add LLVM optimization passes to a
+ * \c llvm::PassManager . This way, devices needing function flattening
+ * or special analysis passes can have them run on the mode.
+ *
+ * \param manager \c llvm::PassManager to which add the passes
+ * \param optimize false if \c -cl-opt-disable was given at compilation
+ * time.
+ */
+ virtual void createOptimizationPasses(llvm::PassManager *manager,
+ bool optimize, bool hasBarrier=false) = 0;
+
+ /**
+ * \brief Build a device-specific representation of the program
+ *
+ * This function is called by \c Coal::Program::build() when the module
+ * is compiled and linked. It can be used by the device to build a
+ * device-specific representation of the program.
+ *
+ * \param module \c llvm::Module containing the program's LLVM IR
+ * \param binary_str \c std::string containing dep.unlinked_binary
+ * \return true in case of success, false otherwise
+ */
+ virtual bool build(llvm::Module *module, std::string* binary_str) = 0;
+
+ /**
+ * \brief Extract binaries from MIXED binary
+ *
+ * This function is called to extract LLVM bitcode from the native
+ * binary in the MIXED binary.
+ * \param binary_str \c std::string containing mixed binary
+ * \param bitcode \c std::string returns LLVM bitcode if not NULL
+ * \param native \c std::string returns native binary if not NULL
+ * \return true if the binary is indeed mixed
+ */
+ virtual bool ExtractMixedBinary(std::string *binary_str,
+ std::string *bitcode, std::string *native)
+ { return false; }
+};
+
+/**
+ * \brief Device-specific kernel data
+ */
+class DeviceKernel
+{
+ public:
+ DeviceKernel() {}
+ virtual ~DeviceKernel() {}
+
+ /**
+ * \brief Maximum work-group size of a kernel
+ * \return Maximum work-group size of the kernel based on device-specific
+ * data such as memory usage, register pressure, etc)
+ */
+ virtual size_t workGroupSize() = 0;
+
+ /**
+ * \brief Local memory used by the kernel
+ * \return Local memory used by the kernel, in bytes
+ */
+ virtual cl_ulong localMemSize() const = 0;
+
+ /**
+ * \brief Private memory used by the kernel
+ * \return Private memory used by the kernel, in bytes
+ */
+ virtual cl_ulong privateMemSize() const = 0;
+
+ /**
+ * \brief Preferred work-group size multiple
+ * \return The size multiple a work-group can have to work the best and
+ * the fastest on the device
+ */
+ virtual size_t preferredWorkGroupSizeMultiple() const = 0;
+
+ /**
+ * \brief Optimal work-group size
+ *
+ * This function allows a device to calculate the optimal work-group size
+ * for this kernel, using it's memory usage, SIMD dimension, etc.
+ *
+ * \c Coal::CPUDevice tries to split the kernel into a number of
+ * work-groups the closest possible to the number of CPU cores.
+ *
+ * \param num_dims Number of working dimensions
+ * \param dim Dimension for which the multiple is being calculated
+ * \param global_work_size Total number of work-items to split into
+ * work-groups
+ * \return optimal size of a work-group, for the \p dim dimension.
+ */
+ virtual size_t guessWorkGroupSize(cl_uint num_dims, cl_uint dim,
+ size_t global_work_size) const = 0;
+};
+
+}
+
+struct _cl_device_id : public Coal::DeviceInterface
+{};
+
+#endif
diff --git a/src/core/dsp/buffer.cpp b/src/core/dsp/buffer.cpp
new file mode 100644
index 0000000..72c5419
--- /dev/null
+++ b/src/core/dsp/buffer.cpp
@@ -0,0 +1,149 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#include "buffer.h"
+#include "device.h"
+#include "driver.h"
+
+#include "CL/cl_ext.h"
+#include "../memobject.h"
+
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+
+using namespace Coal;
+
+DSPBuffer::DSPBuffer(DSPDevice *device, MemObject *buffer, cl_int *rs)
+ : DeviceBuffer(), p_device(device), p_buffer(buffer), p_data(0),
+ p_data_malloced(false), p_buffer_idx(0)
+{
+ if (buffer->type() != MemObject::SubBuffer &&
+ buffer->flags() & CL_MEM_USE_HOST_PTR)
+ {
+ /*---------------------------------------------------------------------
+ * We use the host ptr, we are already allocated
+ *--------------------------------------------------------------------*/
+ p_data = (DSPDevicePtr64)(uint64_t)buffer->host_ptr();
+ }
+}
+
+DSPBuffer::~DSPBuffer()
+{
+ if (p_data_malloced)
+ {
+ if (p_buffer->flags() & CL_MEM_USE_MSMC_TI)
+ p_device->free_msmc (p_data);
+ else p_device->free_global(p_data);
+ }
+}
+
+DSPDevicePtr64 DSPBuffer::data() const
+{
+ if (!p_data && p_buffer->type() == MemObject::SubBuffer)
+ {
+ /*---------------------------------------------------------------------
+ * Data is based on the DSPBuffer of the parent buffer
+ *--------------------------------------------------------------------*/
+ SubBuffer *subbuf = (SubBuffer *)p_buffer;
+ MemObject *parent = subbuf->parent();
+ DSPBuffer *parent_dspbuf = (DSPBuffer *)parent->deviceBuffer(p_device);
+
+ if (!parent_dspbuf->data()) parent_dspbuf->allocate();
+ if (!parent_dspbuf->data()) { return 0; } //ERROR()
+
+ return parent_dspbuf->data() + subbuf->offset();
+ }
+ else if (!p_data) ; // ERROR();
+
+ return p_data;
+}
+
+void *DSPBuffer::nativeGlobalPointer() const
+{
+ return (void*) (uint64_t) data();
+}
+
+bool DSPBuffer::allocate()
+{
+ size_t buf_size = p_buffer->size();
+
+ /*-------------------------------------------------------------------------
+ * Something went wrong...
+ *------------------------------------------------------------------------*/
+ if (buf_size == 0) return false;
+
+ if (!p_data && p_buffer->type() == MemObject::SubBuffer)
+ {
+ /*---------------------------------------------------------------------
+ * Data is based on the DSPBuffer of the parent buffer
+ *--------------------------------------------------------------------*/
+ SubBuffer *subbuf = (SubBuffer *)p_buffer;
+ MemObject *parent = subbuf->parent();
+ DSPBuffer *parent_dspbuf = (DSPBuffer *)parent->deviceBuffer(p_device);
+
+ if (!parent_dspbuf->data()) parent_dspbuf->allocate();
+ if (!parent_dspbuf->data()) return false;
+
+ p_data = parent_dspbuf->data() + subbuf->offset();
+ return true;
+ }
+
+ /*-------------------------------------------------------------------------
+ * We not using a host ptr, allocate a buffer
+ *------------------------------------------------------------------------*/
+ if (!p_data)
+ {
+ if (p_buffer->flags() & CL_MEM_USE_MSMC_TI)
+ p_data = (DSPDevicePtr64) p_device->malloc_msmc(buf_size);
+ else p_data = (DSPDevicePtr64) p_device->malloc_global(buf_size, false);
+
+ if (!p_data) return false;
+
+ p_data_malloced = true;
+ }
+
+ if (p_buffer->type() != MemObject::SubBuffer &&
+ p_buffer->flags() & CL_MEM_COPY_HOST_PTR)
+ Driver::instance()->write(p_device->dspID(), p_data,
+ (uint8_t*)p_buffer->host_ptr(), buf_size);
+
+ // Say to the memobject that we are allocated
+ p_buffer->deviceAllocated(this);
+
+ return true;
+}
+
+DeviceInterface *DSPBuffer::device() const
+{
+ return p_device;
+}
+
+bool DSPBuffer::allocated() const
+{
+ return p_data != 0;
+}
diff --git a/src/core/dsp/buffer.h b/src/core/dsp/buffer.h
new file mode 100644
index 0000000..b8cb860
--- /dev/null
+++ b/src/core/dsp/buffer.h
@@ -0,0 +1,61 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+
+#ifndef __DSP_BUFFER_H__
+#define __DSP_BUFFER_H__
+
+#include "../deviceinterface.h"
+#include "device.h"
+
+namespace Coal
+{
+
+class DSPDevice;
+class MemObject;
+
+class DSPBuffer : public DeviceBuffer
+{
+ public:
+ DSPBuffer(DSPDevice *device, MemObject *buffer, cl_int *rs);
+ ~DSPBuffer();
+
+ bool allocate();
+ DeviceInterface *device() const;
+ DSPDevicePtr64 data() const ;
+ void *nativeGlobalPointer() const ;
+ bool allocated() const;
+
+ private:
+ DSPDevice * p_device;
+ MemObject * p_buffer;
+ DSPDevicePtr64 p_data;
+ bool p_data_malloced;
+ unsigned int p_buffer_idx;
+};
+}
+#endif
diff --git a/src/core/dsp/cmem.cpp b/src/core/dsp/cmem.cpp
new file mode 100644
index 0000000..ee0f938
--- /dev/null
+++ b/src/core/dsp/cmem.cpp
@@ -0,0 +1,271 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#include "cmem.h"
+#include <deque>
+#include <iostream>
+#include <cstring>
+#include <cstdio>
+#include <cstdlib>
+#include <sys/stat.h>
+#include <string>
+
+#define ERR(status, msg) if (status) { printf("ERROR: %s\n", msg); exit(-1); }
+
+Cmem* Cmem::pInstance = 0;
+
+/*=============================================================================
+* C M E M
+*============================================================================*/
+#define CEIL_DIVIDE(x,y) (((x) + (y) - 1) / y)
+
+/******************************************************************************
+* Thread safe instance function for singleton behavior
+******************************************************************************/
+Cmem* Cmem::instance()
+{
+ static Mutex Cmem_instance_mutex;
+ Cmem* tmp = pInstance;
+
+ __sync_synchronize();
+
+ if (tmp == 0)
+ {
+ ScopedLock lck(Cmem_instance_mutex);
+
+ tmp = pInstance;
+ if (tmp == 0)
+ {
+ tmp = new Cmem;
+ __sync_synchronize();
+ pInstance = tmp;
+ }
+ }
+ return tmp;
+}
+
+/******************************************************************************
+* Cmem::open()
+******************************************************************************/
+void Cmem::open()
+{
+ int status = cmem_drv_open();
+ ERR(status, "DMA Contiguous Memory Driver Open Error");
+
+ status = cmem_drv_free(0, HOST_BUF_TYPE_DYNAMIC, buf_desc);
+ ERR(status, "DMA Contiguous Memory Free Error");
+
+ status = cmem_drv_alloc(MAX_NUM_HOST_DSP_BUFFERS, HOST_CMEM_BUFFER_SIZE,
+ HOST_BUF_TYPE_DYNAMIC, buf_desc);
+ ERR(status, "DMA Contiguous Memory Alloc Error");
+
+ status = bufmgrCreate(&DmaBufPool, MAX_NUM_HOST_DSP_BUFFERS, buf_desc);
+ ERR(status, "DMA Buffer manager Create Error");
+}
+
+/******************************************************************************
+* Cmem::close()
+******************************************************************************/
+void Cmem::close()
+{
+ bufmgrDelete(&DmaBufPool);
+
+ int status = cmem_drv_free(MAX_NUM_HOST_DSP_BUFFERS, HOST_BUF_TYPE_DYNAMIC,
+ buf_desc);
+ ERR(status, "DMA Contiguous Memory Driver Free Error");
+
+ status = cmem_drv_close();
+ ERR(status, "DMA Contiguous Memory Driver Close Error");
+}
+
+
+/******************************************************************************
+* The dma to the dsp memory system can only occur from contiguous memory, i.e.
+* cmem. CMEM buffers are currently limited to 4M, the algorithm is to
+* copy the general buffer in 4M chunks into CMEM 4M buffers. Then we are able
+* to chain 2 4M buffer writes per DMA initiate. As a result, we will have
+* ceil ( size / 8M ) dma transfers initiated by the routine. to make it
+* concrete at 48M buffer dma, will result in:
+* 12 memcpy calls of 4M each,
+* 12 CMEM buffers allocated of 4M each
+* 6 dma_initiates each with 2 - 4M buffers
+*
+* The algorithm is based one the MAX_CONTIGUOUS_XFER_BUFFERS and
+* HOST_CMEM_BUFFER_SIZE macros. Currently they are 2 and 4M.
+******************************************************************************/
+void Cmem::dma_write(int32_t dsp_id, uint32_t addr, uint8_t *buf, uint32_t size)
+{
+ static uint32_t trans_id = 0;
+ uint32_t start_trans_id = trans_id;
+ int32_t ret_val;
+ std::deque<uint32_t> dma_ids;
+
+ uint32_t simul_dmas = 4;
+ uint32_t cmem_buffer_size = HOST_CMEM_BUFFER_SIZE;
+ uint32_t tot_buffers = CEIL_DIVIDE(size, cmem_buffer_size);
+ uint32_t circ_buffers = std::min(simul_dmas, tot_buffers);
+ uint32_t last_buffer_size = size - ((tot_buffers-1) * cmem_buffer_size);
+
+ cmem_host_buf_desc_t *host_buf_desc =
+ new cmem_host_buf_desc_t[circ_buffers];
+
+ cmem_host_frame_desc_t *host_frame_desc =
+ new cmem_host_frame_desc_t[circ_buffers];
+
+ /*---------------------------------------------------------------------
+ * Allocate Host CMEM buffers
+ *--------------------------------------------------------------------*/
+ for (int i = 0; i < circ_buffers; i++)
+ {
+ ret_val = bufmgrAlloc(DmaBufPool, 1, &host_buf_desc[i]);
+ ERR(ret_val, "dma buffer allocation failed");
+ host_frame_desc[i].bufDescP = &host_buf_desc[i];
+ host_frame_desc[i].numBuffers = 1;
+ host_frame_desc[i].frameStartOffset = 0;
+ host_frame_desc[i].frameSize = cmem_buffer_size;
+ }
+
+ /*-------------------------------------------------------------------------
+ * Initiate one transfer at a time based on what fits within the allowed
+ * contiguous buffers per DMA transaction
+ *------------------------------------------------------------------------*/
+ for (int i = 0; i < tot_buffers; ++i)
+ {
+ int circ_i = i % simul_dmas;
+ int offset = i * cmem_buffer_size;
+
+ cmem_host_buf_desc_t &buf_desc = host_buf_desc[circ_i];
+ uint32_t cpy_size = buf_desc.length;
+
+ if (i == tot_buffers-1)
+ host_frame_desc[circ_i].frameSize = cpy_size = last_buffer_size;
+
+ memcpy(buf_desc.userAddr, buf + offset, cpy_size);
+
+ /*---------------------------------------------------------------------
+ * Initiate DMA
+ *--------------------------------------------------------------------*/
+ ret_val = pciedrv_dma_write_initiate(dsp_id, addr + offset,
+ &host_frame_desc[circ_i],
+ PCIEDRV_DMA_XFER_NON_BLOCKING,
+ &trans_id);
+ ERR(ret_val, "DMA initiate failed");
+
+ dma_ids.push_back(trans_id);
+
+ if (dma_ids.size() >= simul_dmas)
+ {
+ while (pciedrv_dma_check(dsp_id, dma_ids.front()));
+ dma_ids.pop_front();
+ }
+ }
+
+ /*---------------------------------------------------------------------
+ * Wait for all dmas to complete
+ *--------------------------------------------------------------------*/
+ for (int i = 0; i < dma_ids.size(); i++)
+ while (pciedrv_dma_check(dsp_id, dma_ids[i]));
+
+ /*---------------------------------------------------------------------
+ * Free host CMEM buffers
+ *--------------------------------------------------------------------*/
+ for (int i = 0; i < circ_buffers; i++)
+ {
+ ret_val = bufmgrFreeDesc(DmaBufPool, &host_buf_desc[i]);
+ ERR(ret_val, "dma buffer free failed");
+ }
+
+ delete [] host_buf_desc;
+ delete [] host_frame_desc;
+}
+
+/******************************************************************************
+* Cmem::dma_read
+******************************************************************************/
+void Cmem::dma_read(int32_t dsp_id, uint32_t addr, uint8_t *buf, uint32_t size)
+{
+ cmem_host_buf_desc_t host_buf_desc;
+ cmem_host_frame_desc_t host_frame_desc;
+
+ /*-------------------------------------------------------------------------
+ * Calculate total number of host buffers required to fit the data
+ *------------------------------------------------------------------------*/
+ uint32_t num_buffers = CEIL_DIVIDE(size, HOST_CMEM_BUFFER_SIZE);
+ uint32_t remaining_size = size;
+ uint32_t offset = 0;
+ uint32_t transfer_size = HOST_CMEM_BUFFER_SIZE;
+ uint32_t trans_id;
+ int32_t ret_val;
+
+ /*---------------------------------------------------------------------
+ * Allocate Host buffer
+ *--------------------------------------------------------------------*/
+ ret_val = bufmgrAlloc(DmaBufPool, 1, &host_buf_desc);
+ ERR(ret_val, "dma buffer allocation failed");
+
+ /*---------------------------------------------------------------------
+ * Populate details of data in frame descriptor
+ *--------------------------------------------------------------------*/
+ host_frame_desc.bufDescP = &host_buf_desc;
+ host_frame_desc.numBuffers = 1;
+ host_frame_desc.frameStartOffset = 0;
+ host_frame_desc.frameSize = transfer_size;
+
+ /*-------------------------------------------------------------------------
+ * Initiate one transfer at a time based on what fits within the allowed
+ *------------------------------------------------------------------------*/
+ while (num_buffers)
+ {
+ if (num_buffers == 1)
+ {
+ transfer_size = remaining_size;
+ host_frame_desc.frameSize = transfer_size;
+ }
+
+ /*---------------------------------------------------------------------
+ * Initiate DMA
+ *--------------------------------------------------------------------*/
+ ret_val = pciedrv_dma_read_initiate(dsp_id, addr + offset,
+ &host_frame_desc, PCIEDRV_DMA_XFER_BLOCKING, &trans_id);
+ ERR(ret_val, "DMA initiate failed");
+
+ /*---------------------------------------------------------------------
+ * Copy from dma buffers into buffer
+ *--------------------------------------------------------------------*/
+ memcpy (buf + offset, host_buf_desc.userAddr, transfer_size);
+
+ num_buffers--;
+ offset += transfer_size;
+ remaining_size -= transfer_size;
+ }
+
+ /*---------------------------------------------------------------------
+ * Free Buffer Descriptors
+ *--------------------------------------------------------------------*/
+ ret_val = bufmgrFreeDesc(DmaBufPool, &host_buf_desc);
+ ERR(ret_val, "dma buffer free failed");
+}
diff --git a/src/core/dsp/cmem.h b/src/core/dsp/cmem.h
new file mode 100644
index 0000000..24a6de0
--- /dev/null
+++ b/src/core/dsp/cmem.h
@@ -0,0 +1,64 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#ifndef _CMEM_H
+#define _CMEM_H
+#include "u_lockable.h"
+
+extern "C"
+{
+ #include "pciedrv.h"
+ #include "cmem_drv.h"
+ #include "bufmgr.h"
+}
+
+#define HOST_CMEM_BUFFER_SIZE 0x400000 // 4M
+#define MAX_NUM_HOST_DSP_BUFFERS 128
+
+class Cmem : public Lockable_off
+{
+ public:
+ ~Cmem() { close(); }
+ static Cmem* instance ();
+
+ void open();
+ void close();
+ void dma_write(int32_t dsp_id, uint32_t addr, uint8_t *buf, uint32_t size);
+ void dma_read (int32_t dsp_id, uint32_t addr, uint8_t *buf, uint32_t size);
+
+ private:
+ static Cmem* pInstance;
+
+ cmem_host_buf_desc_t buf_desc[MAX_NUM_HOST_DSP_BUFFERS];
+ void * DmaBufPool;
+
+ Cmem() : DmaBufPool(NULL) { open(); }
+ Cmem(const Cmem&); // copy ctor disallowed
+ Cmem& operator=(const Cmem&); // assignment disallowed
+};
+
+#endif // _CMEM_H
diff --git a/src/core/dsp/core_scheduler.h b/src/core/dsp/core_scheduler.h
new file mode 100644
index 0000000..58d0555
--- /dev/null
+++ b/src/core/dsp/core_scheduler.h
@@ -0,0 +1,62 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#include "u_lockable.h"
+#ifndef _CORE_SCHEDULER_H
+#define _CORE_SCHEDULER_H
+
+class CoreScheduler : public Lockable
+{
+ public:
+ CoreScheduler() : p_avail(0xff) {}
+
+ void free(int core)
+ {
+ Lock lock(this);
+ p_avail |= (1 << core);
+ CV.notify_one();
+ }
+
+ int allocate()
+ {
+ Lock lock(this);
+
+ /*---------------------------------------------------------------------
+ * Wait in a loop in case the condvar is falsely signalled
+ *--------------------------------------------------------------------*/
+ while (!p_avail) CV.wait(lock.raw());
+
+ for (int i=0, mask = 1; i < 8; ++i, mask <<= 1)
+ if (p_avail & mask) { p_avail &= ~mask; return i; }
+ }
+
+ private:
+ unsigned char p_avail;
+ CondVar CV;
+};
+
+#endif //_CORE_SCHEDULER_H
diff --git a/src/core/dsp/database.h b/src/core/dsp/database.h
new file mode 100644
index 0000000..ca4d69e
--- /dev/null
+++ b/src/core/dsp/database.h
@@ -0,0 +1,112 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#ifndef __DATABASE_H__
+#define __DATABASE_H__
+
+#include <string>
+#include <vector>
+#include <iostream>
+#include <sqlite3.h>
+
+using namespace std;
+
+class Database
+{
+ public:
+ Database(const char* filename) : database(NULL) { open(filename); }
+ ~Database() { close(); }
+
+ void close()
+ {
+ if (database) sqlite3_close(database);
+ database = NULL;
+ }
+
+ vector<vector<string> > query(const char* query)
+ {
+ sqlite3_stmt *statement;
+ vector<vector<string> > results;
+ const int retry_limit = 20;
+ int retries = 0;
+
+ int rc = sqlite3_prepare_v2(database, query, -1, &statement, 0);
+
+ while ((rc == SQLITE_BUSY || rc == SQLITE_LOCKED) &&
+ ++retries <= retry_limit)
+ {
+ sqlite3_finalize(statement);
+ usleep(100);
+ rc = sqlite3_prepare_v2(database, query, -1, &statement, 0);
+ }
+
+ if (rc == SQLITE_OK)
+ {
+ int cols = sqlite3_column_count(statement);
+ int result = 0;
+
+ while (true)
+ {
+ result = sqlite3_step(statement);
+
+ if (result == SQLITE_ROW)
+ {
+ vector<string> values;
+ for (int col = 0; col < cols; col++)
+ values.push_back((char*)sqlite3_column_text(statement,col));
+ results.push_back(values);
+ }
+ else break;
+ }
+
+ sqlite3_finalize(statement);
+ }
+
+ string error = sqlite3_errmsg(database);
+ if (error != "not an error")
+ std::cout << query << " " << error << std::endl;
+
+ return results;
+ }
+
+ private:
+ sqlite3 *database;
+
+ private:
+ bool open(const char* filename)
+ {
+ if (sqlite3_open(filename, &database) == SQLITE_OK)
+ {
+ sqlite3_busy_timeout(database, 1000);
+ return true;
+ }
+ return false;
+ }
+
+};
+
+#endif
diff --git a/src/core/dsp/device.cpp b/src/core/dsp/device.cpp
new file mode 100644
index 0000000..32cd9b0
--- /dev/null
+++ b/src/core/dsp/device.cpp
@@ -0,0 +1,1135 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#include "../platform.h"
+#include "device.h"
+#include "buffer.h"
+#include "kernel.h"
+#include "program.h"
+#include <cstdlib>
+#include <algorithm>
+#include <limits.h>
+#include "CL/cl_ext.h"
+
+#include <core/config.h>
+#include "../propertylist.h"
+#include "../commandqueue.h"
+#include "../events.h"
+#include "../memobject.h"
+#include "../kernel.h"
+#include "../program.h"
+#include "../util.h"
+
+#include "driver.h"
+#include "mailbox.h"
+
+extern "C"
+{
+ #include "dload_api.h"
+ #include <ti/runtime/mmap/include/mmap_resource.h>
+
+}
+
+#include <cstring>
+#include <cstdlib>
+#include <unistd.h>
+
+#include <algorithm>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+
+using namespace Coal;
+
+Mailbox* Mailbox::pInstance = 0;
+
+/******************************************************************************
+* On DSPC868X the mailboxes are remote on the device DDR. On Hawking the
+* mailboxes are in shared DDR
+******************************************************************************/
+#ifdef DSPC868X
+#define MAILBOX_LOCATION MPM_MAILBOX_MEMORY_LOCATION_REMOTE
+#else
+#define MAILBOX_LOCATION MPM_MAILBOX_MEMORY_LOCATION_LOCAL
+
+#include "shmem.h"
+unsigned dsp_speed()
+{
+ const unsigned DSP_PLL = 122880000;
+ const unsigned pagesize = 0x1000;
+
+ shmem_persistent bootcfg_page;
+ shmem_persistent clock_page;
+
+ bootcfg_page.configure(0x02620000, pagesize);
+ clock_page.configure(0x02310000, pagesize);
+
+ char *BOOTCFG_BASE_ADDR = (char*)bootcfg_page.map(0x02620000, pagesize);
+ char *CLOCK_BASE_ADDR = (char*)clock_page.map(0x02310000, pagesize);
+
+ int MAINPLLCTL0 = (*(int*)(BOOTCFG_BASE_ADDR + 0x350));
+ int MULT = (*(int*)(CLOCK_BASE_ADDR + 0x110));
+ int OUTDIV = (*(int*)(CLOCK_BASE_ADDR + 0x108));
+
+ unsigned mult = 1 + ((MULT & 0x3F) | ((MAINPLLCTL0 & 0x7F000) >> 6));
+ unsigned prediv = 1 + (MAINPLLCTL0 & 0x3F);
+ unsigned output_div = 1 + ((OUTDIV >> 19) & 0xF);
+ unsigned speed = DSP_PLL * mult / prediv / output_div;
+
+ bootcfg_page.unmap(BOOTCFG_BASE_ADDR, pagesize);
+ clock_page.unmap(CLOCK_BASE_ADDR, pagesize);
+
+ return speed / 1000000;
+}
+#endif
+
+/*-----------------------------------------------------------------------------
+* Declare our threaded dsp handler function
+*----------------------------------------------------------------------------*/
+void *dsp_worker(void* data);
+void HOSTwait (unsigned char dsp_id);
+
+/******************************************************************************
+* DSPDevice::DSPDevice(unsigned char dsp_id)
+******************************************************************************/
+DSPDevice::DSPDevice(unsigned char dsp_id)
+ : DeviceInterface (),
+ p_cores (8),
+ p_num_events (0),
+ p_dsp_mhz (1000), // 1.00 GHz
+ p_worker (0),
+ p_rx_mbox (0),
+ p_tx_mbox (0),
+ p_stop (false),
+ p_initialized (false),
+ p_dsp_id (dsp_id),
+ p_device_msmc_heap(),
+ p_device_ddr_heap1(),
+ p_device_ddr_heap2(),
+ p_device_ddr_heap3(),
+ p_device_l2_heap (),
+ p_dload_handle (0),
+ p_complete_pending(),
+ p_mpax_default_res(NULL)
+{
+ Driver *driver = Driver::instance();
+
+ void *hdl = driver->reset_and_load(dsp_id);
+
+ p_addr_kernel_config = driver->get_symbol(hdl, "kernel_config_l2");
+ p_addr_local_mem = driver->get_symbol(hdl, "ocl_local_mem_start");
+ p_addr_mbox_d2h_phys = driver->get_symbol(hdl, "mbox_d2h_phys");
+ p_addr_mbox_h2d_phys = driver->get_symbol(hdl, "mbox_h2d_phys");
+ p_size_local_mem = driver->get_symbol(hdl, "ocl_local_mem_size");
+ p_size_mbox_d2h = driver->get_symbol(hdl, "mbox_d2h_size");
+ p_size_mbox_h2d = driver->get_symbol(hdl, "mbox_h2d_size");
+
+ /*-------------------------------------------------------------------------
+ * These 4 variables were previously retrieved from the monitor out file.
+ * They are now determined by query of the CMEM system.
+ *------------------------------------------------------------------------*/
+ //p_addr_global_mem = driver->get_symbol(hdl, "ocl_global_mem_start");
+ //p_addr_msmc_mem = driver->get_symbol(hdl, "ocl_msmc_mem_start");
+ //p_size_global_mem = driver->get_symbol(hdl, "ocl_global_mem_size");
+ //p_size_msmc_mem = driver->get_symbol(hdl, "ocl_msmc_mem_size");
+
+#if 0
+ // Adjust p_size_global_mem for PG1.0 board, monitor takes 2MB
+ #define MONITOR_MEM 2
+ uint32_t mem_reserve = parse_file_line_value("/proc/cmdline",
+ "mem_reserve=", 0);
+ if (mem_reserve > 0 && mem_reserve*1024*1024 < p_size_global_mem)
+ p_size_global_mem = (mem_reserve - MONITOR_MEM) * 1024 * 1024;
+
+ char *dsp_global_mem_size = getenv("TI_OCL_DSP_GLOBAL_MEM_SIZE");
+ if (dsp_global_mem_size)
+ p_size_global_mem = atol(dsp_global_mem_size);
+
+ // Ordering is important: global in CMEM block 0, msmc in CMEM block 1
+ driver->cmem_init(p_addr_global_mem, p_size_global_mem,
+ p_addr_msmc_mem, p_size_msmc_mem);
+#endif
+ p_addr64_global_mem = 0;
+ p_size64_global_mem = 0;
+ p_addr_msmc_mem = 0;
+ p_size_msmc_mem = 0;
+ DSPDevicePtr64 global3 = 0;
+ uint64_t gsize3 = 0;
+ driver->cmem_init(&p_addr64_global_mem, &p_size64_global_mem,
+ &p_addr_msmc_mem, &p_size_msmc_mem,
+ &global3, &gsize3);
+
+ DSPDevicePtr64 global1 = p_addr64_global_mem;
+ DSPDevicePtr64 global2 = 0;
+ uint64_t gsize1 = p_size64_global_mem;
+ uint64_t gsize2 = 0;
+ driver->split_ddr_memory(p_addr64_global_mem, p_size64_global_mem,
+ global1, gsize1, global2, gsize2, gsize3);
+
+ driver->shmem_configure(global1, gsize1, 0);
+ if (gsize2 > 0) driver->shmem_configure(global2, gsize2, 0);
+ if (gsize3 > 0) driver->shmem_configure(global3, gsize3, 0);
+ driver->shmem_configure(p_addr_msmc_mem, p_size_msmc_mem, 1);
+ driver->shmem_configure(p_addr_mbox_d2h_phys, p_size_mbox_d2h);
+ driver->shmem_configure(p_addr_mbox_h2d_phys, p_size_mbox_h2d);
+ for (int core=0; core < 8; core++)
+ driver->shmem_configure(((0x10 + core) << 24) + p_addr_local_mem,
+ p_size_local_mem);
+
+ driver->free_image_handle(hdl);
+
+ /*-------------------------------------------------------------------------
+ * Setup the DSP heaps for memory allocation
+ *------------------------------------------------------------------------*/
+ p_device_ddr_heap1.configure(global1, gsize1);
+ p_device_ddr_heap2.configure(global2, gsize2, true);
+ p_device_ddr_heap3.configure(global3, gsize3, true);
+ p_device_l2_heap.configure (p_addr_local_mem, p_size_local_mem);
+ p_device_msmc_heap.configure(p_addr_msmc_mem, p_size_msmc_mem);
+
+ /*-------------------------------------------------------------------------
+ * initialize the mailboxes on the cores, so they can receive an exit cmd
+ *------------------------------------------------------------------------*/
+ Mailbox* mb_instance = Mailbox::instance();
+
+ uint32_t mailboxallocsize = mpm_mailbox_get_alloc_size();
+
+ p_tx_mbox = (void*)malloc(mailboxallocsize);
+ p_rx_mbox = (void*)malloc(mailboxallocsize);
+
+ mpm_mailbox_config_t mbConfig;
+ mbConfig.mem_start_addr =
+ (uint32_t)driver->map(p_addr_mbox_h2d_phys, p_size_mbox_h2d);
+
+ mbConfig.mem_size = p_size_mbox_h2d;
+ mbConfig.max_payload_size = mbox_payload;
+
+ int tx_status = mb_instance->create(p_tx_mbox,
+ NULL,
+ MAILBOX_LOCATION,
+ MPM_MAILBOX_DIRECTION_SEND, &mbConfig);
+
+ mbConfig.mem_start_addr =
+ (uint32_t)driver->map(p_addr_mbox_d2h_phys, p_size_mbox_d2h);
+ mbConfig.mem_size = p_size_mbox_d2h;
+
+ int rx_status = mb_instance->create(p_rx_mbox,
+ NULL,
+ MAILBOX_LOCATION,
+ MPM_MAILBOX_DIRECTION_RECEIVE, &mbConfig);
+
+ tx_status |= mb_instance->open(p_tx_mbox);
+ rx_status |= mb_instance->open(p_rx_mbox);
+
+ if (tx_status != 0 || rx_status != 0)
+ std::cout << "Could not create mailboxes for dsp "
+ << p_dsp_id << std::endl;
+
+
+#ifdef DSPC868X
+ char *ghz1 = getenv("TI_OCL_DSP_1_25GHZ");
+ if (ghz1) p_dsp_mhz = 1250; // 1.25 GHz
+#else
+ mail_to(frequencyMsg);
+
+ int ret = 0;
+ do
+ {
+ while (!mail_query()) ;
+ ret = mail_from();
+ } while (ret == -1);
+
+ p_dsp_mhz = ret;
+#endif
+
+}
+
+
+/******************************************************************************
+* void DSPDevice::init()
+******************************************************************************/
+void DSPDevice::init()
+{
+ if (p_initialized) return;
+
+ /*-------------------------------------------------------------------------
+ * Initialize the locking machinery and create worker threads
+ *------------------------------------------------------------------------*/
+ pthread_cond_init(&p_events_cond, 0);
+ pthread_mutex_init(&p_events_mutex, 0);
+ pthread_create(&p_worker, 0, &dsp_worker, this);
+
+ p_initialized = true;
+}
+
+/******************************************************************************
+* DSPDevice::~DSPDevice()
+******************************************************************************/
+DSPDevice::~DSPDevice()
+{
+ /*-------------------------------------------------------------------------
+ * Inform the cores on the device to stop listening for commands
+ *------------------------------------------------------------------------*/
+ mail_to(exitMsg);
+
+ free (p_tx_mbox);
+ free (p_rx_mbox);
+
+ /*-------------------------------------------------------------------------
+ * Only need to close the driver for one of the devices
+ *------------------------------------------------------------------------*/
+ if (p_dsp_id == 0) Driver::instance()->close();
+
+ if (!p_initialized) return;
+
+ /*-------------------------------------------------------------------------
+ * Terminate the workers and wait for them
+ *------------------------------------------------------------------------*/
+ pthread_mutex_lock(&p_events_mutex);
+
+ p_stop = true;
+
+ pthread_cond_broadcast(&p_events_cond);
+ pthread_mutex_unlock(&p_events_mutex);
+
+ pthread_join(p_worker, 0);
+
+ pthread_mutex_destroy(&p_events_mutex);
+ pthread_cond_destroy(&p_events_cond);
+}
+
+/******************************************************************************
+* DeviceBuffer *DSPDevice::createDeviceBuffer(MemObject *buffer)
+******************************************************************************/
+DeviceBuffer *DSPDevice::createDeviceBuffer(MemObject *buffer, cl_int *rs)
+ { return (DeviceBuffer *)new DSPBuffer(this, buffer, rs); }
+
+/******************************************************************************
+* DeviceProgram *DSPDevice::createDeviceProgram(Program *program)
+******************************************************************************/
+DeviceProgram *DSPDevice::createDeviceProgram(Program *program)
+ { return (DeviceProgram *)new DSPProgram(this, program); }
+
+/******************************************************************************
+* DeviceKernel *DSPDevice::createDeviceKernel(Kernel *kernel,
+******************************************************************************/
+DeviceKernel *DSPDevice::createDeviceKernel(Kernel *kernel,
+ llvm::Function *function)
+ { return (DeviceKernel *)new DSPKernel(this, kernel); }
+
+/******************************************************************************
+* cl_int DSPDevice::initEventDeviceData(Event *event)
+******************************************************************************/
+cl_int DSPDevice::initEventDeviceData(Event *event)
+{
+ switch (event->type())
+ {
+ case Event::MapBuffer:
+ {
+ MapBufferEvent *e = (MapBufferEvent*) event;
+
+ if (e->buffer()->flags() & CL_MEM_USE_HOST_PTR)
+ {
+ e->setPtr((char*)e->buffer()->host_ptr() + e->offset());
+ break;
+ }
+
+ DSPBuffer *buf = (DSPBuffer*) e->buffer()->deviceBuffer(this);
+ DSPDevicePtr64 data = buf->data() + e->offset();
+
+ // DO NOT INVALIDATE! Here only initializes host_addr, it cannot
+ // be used before MapBuffer event is scheduled and processed!
+ void* host_addr = Driver::instance()->map(data, e->cb(), false);
+ e->setPtr(host_addr);
+ break;
+ }
+
+ case Event::MapImage: break;
+
+ case Event::NDRangeKernel:
+ case Event::TaskKernel:
+ {
+ KernelEvent *e = (KernelEvent *)event;
+ Program *p = (Program *)e->kernel()->parent();
+ DSPProgram *prog = (DSPProgram *)p->deviceDependentProgram(this);
+
+ /*-----------------------------------------------------------------
+ * Just in time loading
+ *----------------------------------------------------------------*/
+ if (!prog->is_loaded() && !prog->load())
+ return CL_MEM_OBJECT_ALLOCATION_FAILURE;
+
+ DSPKernel *dspkernel = (DSPKernel*)e->deviceKernel();
+
+ cl_int ret = dspkernel->preAllocBuffers();
+ if (ret != CL_SUCCESS) return ret;
+
+ // ASW TODO do something
+
+ // Set device-specific data
+ DSPKernelEvent *dsp_e = new DSPKernelEvent(this, e);
+ e->setDeviceData((void *)dsp_e);
+ break;
+ }
+ default: break;
+ }
+
+ return CL_SUCCESS;
+}
+
+/******************************************************************************
+* void DSPDevice::freeEventDeviceData(Event *event)
+******************************************************************************/
+void DSPDevice::freeEventDeviceData(Event *event)
+{
+ switch (event->type())
+ {
+ case Event::NDRangeKernel:
+ case Event::TaskKernel:
+ {
+ DSPKernelEvent *dsp_e = (DSPKernelEvent *)event->deviceData();
+ if (dsp_e) delete dsp_e;
+ }
+ default: break;
+ }
+}
+
+/******************************************************************************
+* void DSPDevice::pushEvent(Event *event)
+******************************************************************************/
+void DSPDevice::pushEvent(Event *event)
+{
+ /*-------------------------------------------------------------------------
+ * Add an event in the list
+ *------------------------------------------------------------------------*/
+ pthread_mutex_lock(&p_events_mutex);
+
+ p_events.push_back(event);
+ p_num_events++; // Way faster than STL list::size() !
+
+ pthread_cond_broadcast(&p_events_cond);
+ pthread_mutex_unlock(&p_events_mutex);
+}
+
+bool DSPDevice::stop() { return p_stop; }
+bool DSPDevice::availableEvent() { return p_num_events > 0; }
+
+/******************************************************************************
+* Event *DSPDevice::getEvent(bool &stop)
+******************************************************************************/
+Event *DSPDevice::getEvent(bool &stop)
+{
+ /*-------------------------------------------------------------------------
+ * Return the first event in the list, if any. Remove it if it is a
+ * single-shot event.
+ *------------------------------------------------------------------------*/
+ pthread_mutex_lock(&p_events_mutex);
+
+ while (p_num_events == 0 && !p_stop)
+ pthread_cond_wait(&p_events_cond, &p_events_mutex);
+
+ if (p_stop)
+ {
+ pthread_mutex_unlock(&p_events_mutex);
+ stop = true;
+ return 0;
+ }
+
+ Event *event = p_events.front();
+ p_num_events--;
+ p_events.pop_front();
+
+ pthread_mutex_unlock(&p_events_mutex);
+
+ return event;
+}
+
+void DSPDevice::push_complete_pending(uint32_t idx, Event* const data)
+ { p_complete_pending.push(idx, data); }
+
+bool DSPDevice::get_complete_pending(uint32_t idx, Event*& data)
+ { return p_complete_pending.try_pop(idx, data); }
+
+void DSPDevice::dump_complete_pending() { p_complete_pending.dump(); }
+
+bool DSPDevice::any_complete_pending() { return !p_complete_pending.empty(); }
+
+/******************************************************************************
+* Device's decision about whether CommandQueue should push more events over
+* This number could be tuned (e.g. using ooo example). Note that p_num_events
+* are in device's queue, but not yet executed.
+******************************************************************************/
+bool DSPDevice::gotEnoughToWorkOn() { return p_num_events > 0; }
+
+/******************************************************************************
+* Getter functions
+******************************************************************************/
+unsigned int DSPDevice::numDSPs() const { return p_cores; }
+float DSPDevice::dspMhz() const { return p_dsp_mhz; }
+unsigned char DSPDevice::dspID() const { return p_dsp_id; }
+DLOAD_HANDLE DSPDevice::dload_handle() const { return p_dload_handle; }
+
+
+int DSPDevice::load(const char *filename)
+{
+ if (!p_dload_handle)
+ {
+ p_dload_handle = DLOAD_create((void*)this);
+ DLOAD_initialize(p_dload_handle);
+ }
+
+ FILE *fp = fopen(filename, "rb");
+ if (!fp) { printf("can't open OpenCL Program file\n"); exit(1); }
+
+ int prog_handle = DLOAD_load(p_dload_handle, fp);
+ fclose(fp);
+ return prog_handle;
+}
+
+bool DSPDevice::unload(int file_handle)
+{
+ if (p_dload_handle)
+ return DLOAD_unload(p_dload_handle, file_handle);
+ return false;
+}
+
+DSPDevicePtr DSPDevice::get_local_scratch(uint32_t &size, uint32_t &block_size)
+{
+ uint64_t size64;
+ DSPDevicePtr64 addr64 = p_device_l2_heap.max_block_size(size64, block_size);
+ size = (uint32_t) size64;
+ return (DSPDevicePtr) addr64;
+}
+
+DSPDevicePtr DSPDevice::malloc_local(size_t size)
+ { return p_device_l2_heap.malloc(size,true); }
+
+void DSPDevice::free_local(DSPDevicePtr addr)
+ { p_device_l2_heap.free(addr); }
+
+DSPDevicePtr DSPDevice::malloc_msmc(size_t size)
+ { return p_device_msmc_heap.malloc(size,true); }
+
+void DSPDevice::free_msmc(DSPDevicePtr addr)
+ { p_device_msmc_heap.free(addr); }
+
+// TODO: examine the flag, the logic, etc
+#define FRACTION_PERSISTENT_FOR_BUFFER 8
+DSPDevicePtr64 DSPDevice::malloc_global(size_t size, bool prefer_32bit)
+{
+ if (prefer_32bit) return p_device_ddr_heap1.malloc(size, true);
+
+ DSPDevicePtr64 addr = 0;
+ uint64_t size64 = 0;
+ uint32_t block_size;
+ p_device_ddr_heap1.max_block_size(size64, block_size);
+ if (size64 / size > FRACTION_PERSISTENT_FOR_BUFFER)
+ addr = p_device_ddr_heap1.malloc(size, true);
+ if (!addr)
+ // addr = Driver::instance()->cmem_ondemand_malloc(size);
+ addr = p_device_ddr_heap2.malloc(size, true);
+ if (!addr)
+ addr = p_device_ddr_heap3.malloc(size, true);
+ if (!addr)
+ addr = p_device_ddr_heap1.malloc(size, true); // give it another chance
+ return addr;
+}
+
+void DSPDevice::free_global(DSPDevicePtr64 addr)
+{
+ if (addr < DSP_36BIT_ADDR)
+ p_device_ddr_heap1.free(addr);
+ else
+ // Driver::instance()->cmem_ondemand_free(addr);
+ if (p_device_ddr_heap2.free(addr) == -1)
+ p_device_ddr_heap3.free(addr);
+}
+
+void DSPDevice::mail_to(Msg_t &msg)
+{
+ static unsigned trans_id = 0xC0DE0000;
+ Mailbox::instance()->write(p_tx_mbox, (uint8_t*)&msg, sizeof(Msg_t),
+ trans_id++);
+}
+
+bool DSPDevice::mail_query()
+{
+ return Mailbox::instance()->query(p_rx_mbox);
+}
+
+int DSPDevice::mail_from()
+{
+ uint32_t size_rx, trans_id_rx;
+ Msg_t rxmsg;
+
+ Mailbox::instance()->read(p_rx_mbox, (uint8_t*)&rxmsg, &size_rx,
+ &trans_id_rx);
+
+ if (rxmsg.command == ERROR)
+ {
+ printf("%s", rxmsg.u.message);
+ return -1;
+ }
+
+ if (rxmsg.command == PRINT)
+ {
+ printf("[core %c] %s", rxmsg.u.message[0], rxmsg.u.message+1);
+ return -1;
+ }
+
+ return trans_id_rx;
+}
+
+/******************************************************************************
+* void* DSPDevice::get_mpax_default_res, only need to be computed once
+******************************************************************************/
+void* DSPDevice::get_mpax_default_res()
+{
+ if (p_mpax_default_res == NULL)
+ {
+ p_mpax_default_res = malloc(sizeof(keystone_mmap_resources_t));
+ memset(p_mpax_default_res, 0, sizeof(keystone_mmap_resources_t));
+
+#define NUM_VIRT_HEAPS 2
+ uint32_t xmc_regs[MAX_XMCSES_MPAXS] = {3, 4, 5, 6, 7, 8, 9};
+ uint32_t ses_regs[MAX_XMCSES_MPAXS] = {1, 2, 3, 4, 5, 6, 7};
+ uint32_t heap_base[NUM_VIRT_HEAPS] = {0x80000000, 0xC0000000};
+ uint32_t heap_size[NUM_VIRT_HEAPS] = {0x20000000, 0x40000000};
+ for (int i = 0; i < MAX_XMCSES_MPAXS; i++)
+ {
+ xmc_regs[i] = FIRST_FREE_XMC_MPAX + i;
+ ses_regs[i] = FIRST_FREE_SES_MPAX + i;
+ }
+ keystone_mmap_resource_init(MAX_XMCSES_MPAXS, xmc_regs, ses_regs,
+ NUM_VIRT_HEAPS, heap_base, heap_size,
+ (keystone_mmap_resources_t *) p_mpax_default_res);
+
+ }
+ return p_mpax_default_res;
+}
+
+/******************************************************************************
+* cl_int DSPDevice::info
+******************************************************************************/
+cl_int DSPDevice::info(cl_device_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const
+{
+ void *value = 0;
+ size_t value_length = 0;
+
+ union
+ {
+ cl_device_type cl_device_type_var;
+ cl_uint cl_uint_var;
+ size_t size_t_var;
+ cl_ulong cl_ulong_var;
+ cl_bool cl_bool_var;
+ cl_device_fp_config cl_device_fp_config_var;
+ cl_device_mem_cache_type cl_device_mem_cache_type_var;
+ cl_device_local_mem_type cl_device_local_mem_type_var;
+ cl_device_exec_capabilities cl_device_exec_capabilities_var;
+ cl_command_queue_properties cl_command_queue_properties_var;
+ cl_platform_id cl_platform_id_var;
+ size_t work_dims[MAX_WORK_DIMS];
+ };
+
+ uint64_t maxblock;
+ uint32_t dummy;
+
+ switch (param_name)
+ {
+ case CL_DEVICE_TYPE:
+ SIMPLE_ASSIGN(cl_device_type, CL_DEVICE_TYPE_ACCELERATOR);
+ break;
+
+ case CL_DEVICE_VENDOR_ID:
+ SIMPLE_ASSIGN(cl_uint, 0);
+ break;
+
+ case CL_DEVICE_MAX_COMPUTE_UNITS:
+ SIMPLE_ASSIGN(cl_uint, numDSPs());
+ break;
+
+ case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:
+ SIMPLE_ASSIGN(cl_uint, MAX_WORK_DIMS);
+ break;
+
+ /*-----------------------------------------------------------------
+ * Set to local mem size / 128 so that conf basic/local_kernel_def
+ * can allocate and pass. This allows a long16 for each wi to exist
+ * in local mem.
+ *----------------------------------------------------------------*/
+ case CL_DEVICE_MAX_WORK_GROUP_SIZE:
+ SIMPLE_ASSIGN(size_t, 0xffffffff); //p_size_local_mem / 128);
+ break;
+
+ case CL_DEVICE_MAX_WORK_ITEM_SIZES:
+ for (int i=0; i<MAX_WORK_DIMS; ++i)
+ {
+ work_dims[i] = 0xffffffff; //p_size_local_mem / 128;
+ }
+ value_length = MAX_WORK_DIMS * sizeof(size_t);
+ value = &work_dims;
+ break;
+
+ case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR:
+ SIMPLE_ASSIGN(cl_uint, 8);
+ break;
+
+ case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT:
+ SIMPLE_ASSIGN(cl_uint, 4);
+ break;
+
+ case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT:
+ SIMPLE_ASSIGN(cl_uint, 2);
+ break;
+
+ case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG:
+ SIMPLE_ASSIGN(cl_uint, 2);
+ break;
+
+ case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT:
+ SIMPLE_ASSIGN(cl_uint, 2);
+ break;
+
+ case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE:
+ SIMPLE_ASSIGN(cl_uint, 1);
+ break;
+
+ case CL_DEVICE_MAX_CLOCK_FREQUENCY:
+ SIMPLE_ASSIGN(cl_uint, dspMhz());
+ break;
+
+ case CL_DEVICE_ADDRESS_BITS:
+ SIMPLE_ASSIGN(cl_uint, 32);
+ break;
+
+ case CL_DEVICE_MAX_READ_IMAGE_ARGS:
+ SIMPLE_ASSIGN(cl_uint, 0); //images not supported
+ break;
+
+ case CL_DEVICE_MAX_WRITE_IMAGE_ARGS:
+ SIMPLE_ASSIGN(cl_uint, 0); // images not supported
+ break;
+
+ case CL_DEVICE_MAX_MEM_ALLOC_SIZE:
+ SIMPLE_ASSIGN(cl_ulong, std::min(p_device_ddr_heap1.size(), (cl_ulong)1ul << 30));
+ break;
+
+ case CL_DEVICE_IMAGE2D_MAX_WIDTH:
+ SIMPLE_ASSIGN(size_t, 0); // images not supported
+ break;
+
+ case CL_DEVICE_IMAGE2D_MAX_HEIGHT:
+ SIMPLE_ASSIGN(size_t, 0); //images not supported
+ break;
+
+ case CL_DEVICE_IMAGE3D_MAX_WIDTH:
+ SIMPLE_ASSIGN(size_t, 0); //images not supported
+ break;
+
+ case CL_DEVICE_IMAGE3D_MAX_HEIGHT:
+ SIMPLE_ASSIGN(size_t, 0); //images not supported
+ break;
+
+ case CL_DEVICE_IMAGE3D_MAX_DEPTH:
+ SIMPLE_ASSIGN(size_t, 0); //images not supported
+ break;
+
+ case CL_DEVICE_IMAGE_SUPPORT:
+ SIMPLE_ASSIGN(cl_bool, CL_FALSE); //images not supported
+ break;
+
+ case CL_DEVICE_MAX_PARAMETER_SIZE:
+ SIMPLE_ASSIGN(size_t, 116); // ASW TODO - needs to be 1024
+ break;
+
+ case CL_DEVICE_MAX_SAMPLERS:
+ SIMPLE_ASSIGN(cl_uint, 0); //images not supported
+ break;
+
+ case CL_DEVICE_MEM_BASE_ADDR_ALIGN:
+ SIMPLE_ASSIGN(cl_uint, 1024); // 128 byte aligned
+ break;
+
+ case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE:
+ SIMPLE_ASSIGN(cl_uint, 128);
+ break;
+
+ case CL_DEVICE_SINGLE_FP_CONFIG:
+ // Currently don't support CL_FP_DENORM
+ // ASW TODO: Investigate others
+ SIMPLE_ASSIGN(cl_device_fp_config,
+ CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST);
+ break;
+
+ case CL_DEVICE_DOUBLE_FP_CONFIG:
+ SIMPLE_ASSIGN(cl_device_fp_config,
+ CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO |
+ CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM);
+ break;
+
+ case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE:
+ SIMPLE_ASSIGN(cl_device_mem_cache_type, CL_READ_WRITE_CACHE);
+ break;
+
+ case CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE:
+ SIMPLE_ASSIGN(cl_uint, 128);
+ break;
+
+ case CL_DEVICE_GLOBAL_MEM_CACHE_SIZE:
+ SIMPLE_ASSIGN(cl_ulong, 128*1024);
+ break;
+
+ case CL_DEVICE_GLOBAL_MEM_SIZE:
+ SIMPLE_ASSIGN(cl_ulong, p_device_ddr_heap1.size());
+ break;
+
+ case CL_DEVICE_GLOBAL_EXT1_MEM_SIZE_TI:
+ SIMPLE_ASSIGN(cl_ulong, p_device_ddr_heap2.size());
+ break;
+
+ case CL_DEVICE_GLOBAL_EXT2_MEM_SIZE_TI:
+ SIMPLE_ASSIGN(cl_ulong, p_device_ddr_heap3.size());
+ break;
+
+ case CL_DEVICE_MSMC_MEM_SIZE_TI:
+ SIMPLE_ASSIGN(cl_ulong, p_device_msmc_heap.size());
+ break;
+
+ case CL_DEVICE_GLOBAL_MEM_MAX_ALLOC_TI:
+ p_device_ddr_heap1.max_block_size(maxblock, dummy);
+ SIMPLE_ASSIGN(cl_ulong, maxblock);
+ break;
+
+ case CL_DEVICE_GLOBAL_EXT1_MEM_MAX_ALLOC_TI:
+ p_device_ddr_heap2.max_block_size(maxblock, dummy);
+ SIMPLE_ASSIGN(cl_ulong, maxblock);
+ break;
+
+ case CL_DEVICE_GLOBAL_EXT2_MEM_MAX_ALLOC_TI:
+ p_device_ddr_heap3.max_block_size(maxblock, dummy);
+ SIMPLE_ASSIGN(cl_ulong, maxblock);
+ break;
+
+ case CL_DEVICE_MSMC_MEM_MAX_ALLOC_TI:
+ p_device_msmc_heap.max_block_size(maxblock, dummy);
+ SIMPLE_ASSIGN(cl_ulong, maxblock);
+ break;
+
+ case CL_DEVICE_LOCAL_MEM_MAX_ALLOC_TI:
+ p_device_l2_heap.max_block_size(maxblock, dummy);
+ SIMPLE_ASSIGN(cl_ulong, maxblock);
+ break;
+
+ case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:
+ SIMPLE_ASSIGN(cl_ulong, 64<<10);
+ break;
+
+ case CL_DEVICE_MAX_CONSTANT_ARGS:
+ SIMPLE_ASSIGN(cl_uint, 8);
+ break;
+
+ case CL_DEVICE_LOCAL_MEM_TYPE:
+ SIMPLE_ASSIGN(cl_device_local_mem_type, CL_LOCAL);
+ break;
+
+ case CL_DEVICE_LOCAL_MEM_SIZE:
+ SIMPLE_ASSIGN(cl_ulong, p_device_l2_heap.size());
+ break;
+
+ case CL_DEVICE_ERROR_CORRECTION_SUPPORT:
+ // ASW TODO - check answer
+ SIMPLE_ASSIGN(cl_bool, CL_FALSE);
+ break;
+
+ case CL_DEVICE_HOST_UNIFIED_MEMORY:
+ SIMPLE_ASSIGN(cl_bool, CL_FALSE);
+ break;
+
+ case CL_DEVICE_PROFILING_TIMER_RESOLUTION:
+ SIMPLE_ASSIGN(size_t, 1000); // 1000 nanoseconds = 1 microsecond
+ break;
+
+ case CL_DEVICE_ENDIAN_LITTLE:
+ SIMPLE_ASSIGN(cl_bool, CL_TRUE);
+ break;
+
+ case CL_DEVICE_AVAILABLE:
+ SIMPLE_ASSIGN(cl_bool, CL_TRUE);
+ break;
+
+ case CL_DEVICE_COMPILER_AVAILABLE:
+ SIMPLE_ASSIGN(cl_bool, CL_TRUE);
+ break;
+
+ case CL_DEVICE_EXECUTION_CAPABILITIES:
+ SIMPLE_ASSIGN(cl_device_exec_capabilities, CL_EXEC_KERNEL);
+ break;
+
+ case CL_DEVICE_QUEUE_PROPERTIES:
+ SIMPLE_ASSIGN(cl_command_queue_properties,
+ CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE |
+ CL_QUEUE_PROFILING_ENABLE);
+ break;
+
+ case CL_DEVICE_NAME:
+ // ASW TODO add device number suffix
+#ifdef DSPC868X
+ STRING_ASSIGN("TI TMS320C6678 DSP");
+#else
+ STRING_ASSIGN("TI K2H DSP (8x C66)");
+#endif
+ break;
+
+ case CL_DEVICE_VENDOR:
+ STRING_ASSIGN("Texas Instruments, Inc.");
+ break;
+
+ case CL_DRIVER_VERSION:
+ STRING_ASSIGN("" COAL_VERSION);
+ break;
+
+ case CL_DEVICE_PROFILE:
+ STRING_ASSIGN("FULL_PROFILE");
+ break;
+
+ case CL_DEVICE_VERSION:
+ STRING_ASSIGN("OpenCL 1.1 TI " COAL_VERSION);
+ break;
+
+ case CL_DEVICE_EXTENSIONS:
+ STRING_ASSIGN("cl_khr_byte_addressable_store"
+ " cl_khr_global_int32_base_atomics"
+ " cl_khr_global_int32_extended_atomics"
+ " cl_khr_local_int32_base_atomics"
+ " cl_khr_local_int32_extended_atomics"
+ " cl_khr_fp64"
+ " cl_ti_msmc_buffers")
+ break;
+
+ case CL_DEVICE_PLATFORM:
+ SIMPLE_ASSIGN(cl_platform_id, &the_platform);
+ break;
+
+ case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF:
+ SIMPLE_ASSIGN(cl_uint, 0);
+ break;
+
+ case CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR:
+ SIMPLE_ASSIGN(cl_uint, 8);
+ break;
+
+ case CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT:
+ SIMPLE_ASSIGN(cl_uint, 4);
+ break;
+
+ case CL_DEVICE_NATIVE_VECTOR_WIDTH_INT:
+ SIMPLE_ASSIGN(cl_uint, 2);
+ break;
+
+ case CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG:
+ SIMPLE_ASSIGN(cl_uint, 2);
+ break;
+
+ case CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT:
+ SIMPLE_ASSIGN(cl_uint, 2);
+ break;
+
+ case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE:
+ SIMPLE_ASSIGN(cl_uint, 1);
+ break;
+
+ case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF:
+ SIMPLE_ASSIGN(cl_uint, 0);
+ break;
+
+ case CL_DEVICE_OPENCL_C_VERSION:
+ STRING_ASSIGN("OpenCL C 1.1 LLVM " LLVM_VERSION);
+ break;
+
+ default:
+ return CL_INVALID_VALUE;
+ }
+
+ if (param_value && param_value_size < value_length)
+ return CL_INVALID_VALUE;
+
+ if (param_value_size_ret)
+ *param_value_size_ret = value_length;
+
+ if (param_value)
+ std::memcpy(param_value, value, value_length);
+
+ return CL_SUCCESS;
+}
+
+/******************************************************************************
+* Call back functions from the target loader
+******************************************************************************/
+extern "C"
+{
+
+/*****************************************************************************/
+/* DLIF_ALLOCATE() - Return the load address of the segment/section */
+/* described in its parameters and record the run address in */
+/* run_address field of DLOAD_MEMORY_REQUEST. */
+/*****************************************************************************/
+BOOL DLIF_allocate(void* client_handle, struct DLOAD_MEMORY_REQUEST *targ_req)
+{
+ DSPDevice* device = (DSPDevice*) client_handle;
+
+ /*------------------------------------------------------------------------*/
+ /* Get pointers to API segment and file descriptors. */
+ /*------------------------------------------------------------------------*/
+ struct DLOAD_MEMORY_SEGMENT* obj_desc = targ_req->segment;
+
+ uint32_t addr;
+
+ if (obj_desc->target_address >> 20 == 0x008)
+ addr = (uint32_t)device->malloc_local (obj_desc->memsz_in_bytes);
+ else if (obj_desc->target_address >> 24 == 0x0C)
+ addr = (uint32_t)device->malloc_msmc (obj_desc->memsz_in_bytes);
+ else addr = (uint32_t)device->malloc_global(obj_desc->memsz_in_bytes);
+
+#if DEBUG
+ printf("DLIF_allocate: %d bytes starting at 0x%x (relocated from 0x%x)\n",
+ obj_desc->memsz_in_bytes, (uint32_t)addr,
+ (uint32_t)obj_desc->target_address);
+#endif
+
+ obj_desc->target_address = (TARGET_ADDRESS) addr;
+
+ /*------------------------------------------------------------------------*/
+ /* Target memory request was successful. */
+ /*------------------------------------------------------------------------*/
+ return addr == 0 ? 0 : 1;
+}
+
+/*****************************************************************************/
+/* DLIF_RELEASE() - Unmap or free target memory that was previously */
+/* allocated by DLIF_allocate(). */
+/*****************************************************************************/
+BOOL DLIF_release(void* client_handle, struct DLOAD_MEMORY_SEGMENT* ptr)
+{
+ DSPDevice* device = (DSPDevice*) client_handle;
+
+ if (ptr->target_address >> 20 == 0x008)
+ device->free_local ((DSPDevicePtr)ptr->target_address);
+ else if (ptr->target_address >> 24 == 0x0C)
+ device->free_msmc ((DSPDevicePtr)ptr->target_address);
+ else device->free_global((DSPDevicePtr)ptr->target_address);
+
+#if DEBUG
+ printf("DLIF_free: %d bytes starting at 0x%x\n",
+ ptr->memsz_in_bytes, (uint32_t)ptr->target_address);
+#endif
+
+ return 1;
+}
+
+/*****************************************************************************/
+/* DLIF_WRITE() - Write updated (relocated) segment contents to target */
+/* memory. */
+/*****************************************************************************/
+BOOL DLIF_write(void* client_handle, struct DLOAD_MEMORY_REQUEST* req)
+{
+ struct DLOAD_MEMORY_SEGMENT* obj_desc = req->segment;
+ DSPDevice* device = (DSPDevice*) client_handle;
+ int dsp_id = device->dspID();
+
+ Driver::instance()->write (dsp_id,
+ (uint32_t)obj_desc->target_address,
+ (uint8_t*)req->host_address,
+ obj_desc->memsz_in_bytes);
+
+#if DEBUG
+ printf("DLIF_write (dsp:%d): %d bytes starting at 0x%x\n",
+ dsp_id, obj_desc->memsz_in_bytes,
+ (uint32_t)obj_desc->target_address);
+#endif
+
+ extern DSPProgram::segment_list *segments;
+
+ if (segments) segments->push_back
+ (DSPProgram::seg_desc((DSPDevicePtr)obj_desc->target_address, obj_desc->memsz_in_bytes, req->flags));
+
+ return 1;
+}
+
+/******************************************************************************
+* DLIF_LOAD_DEPENDENT()
+******************************************************************************/
+int DLIF_load_dependent(void* client_handle, const char* so_name)
+{
+ DSPDevice* device = (DSPDevice*) client_handle;
+ FILE* fp = fopen(so_name, "rb");
+
+ if (!fp)
+ {
+ DLIF_error(DLET_FILE, "Can't open dependent file '%s'.\n", so_name);
+ return 0;
+ }
+
+ int to_ret = DLOAD_load(device->dload_handle(), fp);
+
+ if (!to_ret)
+ DLIF_error(DLET_MISC, "Failed load of dependent file '%s'.\n", so_name);
+
+ fclose(fp);
+ return to_ret;
+}
+
+/******************************************************************************
+* DLIF_UNLOAD_DEPENDENT()
+******************************************************************************/
+void DLIF_unload_dependent(void* client_handle, uint32_t file_handle)
+{
+ DSPDevice* device = (DSPDevice*) client_handle;
+ DLOAD_unload(device->dload_handle(), file_handle);
+}
+
+}
+
+void dump_hex(char *addr, int bytes)
+{
+ int cnt = 0;
+
+ printf("\n");
+ while (cnt < bytes)
+ {
+ for (int col = 0; col < 16; ++col)
+ {
+ printf("%02x ", addr[cnt++] & 0xff);
+ if (cnt >= bytes) break;
+ }
+ printf("\n");
+ }
+}
+
diff --git a/src/core/dsp/device.h b/src/core/dsp/device.h
new file mode 100644
index 0000000..4a6f32a
--- /dev/null
+++ b/src/core/dsp/device.h
@@ -0,0 +1,151 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#ifndef __DSP_DEVICE_H__
+#define __DSP_DEVICE_H__
+
+extern "C" {
+#include "dload_api.h"
+}
+
+#include "../deviceinterface.h"
+#include "dspheap.h"
+#include "message.h"
+#include "u_concurrent_map.h"
+#include "kernel.h"
+#include <pthread.h>
+#include <string>
+#include <list>
+
+namespace Coal
+{
+
+class MemObject;
+class Event;
+class Program;
+class Kernel;
+
+class DSPDevice : public DeviceInterface
+{
+ public:
+ DSPDevice(unsigned char dsp_id);
+ ~DSPDevice();
+
+ void init();
+
+ cl_int info(cl_device_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const;
+
+ DeviceBuffer *createDeviceBuffer(MemObject *buffer, cl_int *rs);
+ DeviceProgram *createDeviceProgram(Program *program);
+ DeviceKernel *createDeviceKernel(Kernel *kernel,
+ llvm::Function *function);
+
+ cl_int initEventDeviceData(Event *event);
+ void freeEventDeviceData(Event *event);
+
+ void pushEvent(Event *event);
+ bool stop();
+ bool availableEvent();
+ Event *getEvent(bool &stop);
+
+ unsigned int numDSPs() const;
+ float dspMhz() const;
+ unsigned char dspID() const;
+ DLOAD_HANDLE dload_handle() const;
+
+ int load(const char *filename);
+ bool unload(int file_handle);
+
+ /*---------------------------------------------------------------------
+ * These malloc routines return a uint32_t instead of a pointer
+ * Because the target memory space is 32 bit and is independent of the
+ * size of a host pointer (ie. 32bit vs 64 bit)
+ * Device/Target global memory could be 36-bit.
+ * get_local_scratch returns max local free block for per kernel use.
+ *--------------------------------------------------------------------*/
+ DSPDevicePtr get_local_scratch(uint32_t &size, uint32_t &block_size);
+ DSPDevicePtr malloc_local (size_t size);
+ void free_local (DSPDevicePtr add);
+ DSPDevicePtr malloc_msmc (size_t size);
+ void free_msmc (DSPDevicePtr add);
+ DSPDevicePtr64 malloc_global(size_t size, bool prefer_32bit=true);
+ void free_global (DSPDevicePtr64 add);
+
+ void mail_to (Msg_t& msg);
+ bool mail_query();
+ int mail_from ();
+
+ void push_complete_pending(uint32_t idx, class Event* const data);
+ bool get_complete_pending(uint32_t idx, class Event* &data);
+ void dump_complete_pending();
+ bool any_complete_pending();
+ bool gotEnoughToWorkOn();
+
+ std::string builtinsHeader(void) const { return "dsp.h"; }
+
+ DSPDevicePtr get_addr_kernel_config() { return p_addr_kernel_config; }
+ void* get_mpax_default_res();
+
+ private:
+ unsigned int p_cores;
+ unsigned int p_num_events;
+ float p_dsp_mhz;
+ pthread_t p_worker;
+ void* p_rx_mbox; // int
+ void* p_tx_mbox;
+ std::list<Event *> p_events;
+ pthread_cond_t p_events_cond;
+ pthread_mutex_t p_events_mutex;
+ bool p_stop;
+ bool p_initialized;
+ unsigned char p_dsp_id;
+ dspheap p_device_ddr_heap1; // persistently mapped memory
+ dspheap p_device_ddr_heap2; // ondemand mapped memory
+ dspheap p_device_ddr_heap3; // addl ondemand mapped memory
+ dspheap p_device_l2_heap;
+ dspheap p_device_msmc_heap;
+ DLOAD_HANDLE p_dload_handle;
+ concurrent_map<uint32_t, class Event*> p_complete_pending;
+
+ DSPDevicePtr p_addr_kernel_config;
+ DSPDevicePtr64 p_addr64_global_mem;
+ DSPDevicePtr p_addr_local_mem;
+ DSPDevicePtr p_addr_msmc_mem;
+ DSPDevicePtr p_addr_mbox_d2h_phys;
+ DSPDevicePtr p_addr_mbox_h2d_phys;
+ uint64_t p_size64_global_mem;
+ uint32_t p_size_local_mem;
+ uint32_t p_size_msmc_mem;
+ uint32_t p_size_mbox_d2h;
+ uint32_t p_size_mbox_h2d;
+ void* p_mpax_default_res;
+};
+}
+#endif
diff --git a/src/core/dsp/driver.cpp b/src/core/dsp/driver.cpp
new file mode 100644
index 0000000..08e97f7
--- /dev/null
+++ b/src/core/dsp/driver.cpp
@@ -0,0 +1,34 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#ifdef DSPC868X
+#include "driver_shannon.cpp"
+#include "cmem.cpp"
+#else
+#include "driver_hawking.cpp"
+#include "shmem.cpp"
+#endif
diff --git a/src/core/dsp/driver.h b/src/core/dsp/driver.h
new file mode 100644
index 0000000..1e41a28
--- /dev/null
+++ b/src/core/dsp/driver.h
@@ -0,0 +1,100 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#ifndef _DRIVER_H
+#define _DRIVER_H
+#include <vector>
+#include "u_lockable.h"
+#include "device.h"
+
+#ifdef DSPC868X
+extern "C"
+{
+ #include "pciedrv.h"
+ #include "dnldmgr.h"
+ #include "cmem_drv.h"
+ #include "bufmgr.h"
+}
+#else
+#include "shmem.h"
+#endif
+
+class Driver : public Lockable_off
+{
+ public:
+ ~Driver() { close(); }
+ int32_t num_dsps() const { return pNum_dsps; }
+ int32_t close();
+
+ int32_t write(int32_t dsp, DSPDevicePtr64 addr, uint8_t *buf, uint32_t sz);
+ int32_t read (int32_t dsp, DSPDevicePtr64 addr, uint8_t *buf, uint32_t sz);
+
+ void* reset_and_load (int chip);
+ void free_image_handle(void *handle);
+ void cmem_init(DSPDevicePtr64 *addr1, uint64_t *size1,
+ DSPDevicePtr *addr2, uint32_t *size2,
+ DSPDevicePtr64 *addr3, uint64_t *size3);
+ void cmem_exit();
+ DSPDevicePtr64 cmem_ondemand_malloc(uint64_t size);
+ void cmem_ondemand_free (DSPDevicePtr64 addr);
+ void split_ddr_memory (DSPDevicePtr64 addr, uint64_t size,
+ DSPDevicePtr64& addr1, uint64_t& size1,
+ DSPDevicePtr64& addr2, uint64_t& size2,
+ uint64_t& size3);
+ void shmem_configure (DSPDevicePtr64 addr, uint64_t size,
+ int cmem_block = -1);
+ void* map (DSPDevicePtr64 addr, uint32_t sz,
+ bool is_read = false);
+ int32_t unmap (void *host_addr, DSPDevicePtr64 buf_addr,
+ uint32_t sz, bool is_write = false);
+ DSPDevicePtr get_symbol(void* image_handle, const char *name);
+
+ static Driver* instance ();
+
+ private:
+ static Driver* pInstance;
+ int32_t pNum_dsps;
+
+#ifdef DSPC868X
+ pciedrv_open_config_t config;
+ pciedrv_device_info_t *pDevices_info;
+#else
+ std::vector<shmem*> pShmem_areas;
+ shmem* get_memory_region(DSPDevicePtr64 addr);
+#endif
+
+ int32_t open ();
+ bool wait_for_ready(int chip);
+ int32_t write_core(int32_t dsp, DSPDevicePtr64 addr, uint8_t *buf,
+ uint32_t sz);
+
+ Driver() { open(); }
+ Driver(const Driver&); // copy ctor disallowed
+ Driver& operator=(const Driver&); // assignment disallowed
+};
+
+#endif // _DRIVER_H
diff --git a/src/core/dsp/driver_hawking.cpp b/src/core/dsp/driver_hawking.cpp
new file mode 100644
index 0000000..7cb2857
--- /dev/null
+++ b/src/core/dsp/driver_hawking.cpp
@@ -0,0 +1,451 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#include "driver.h"
+#include <deque>
+#include <iostream>
+#include <cstring>
+#include <cstdio>
+#include <cstdlib>
+#include <sys/stat.h>
+#include <string>
+#include <bfd.h>
+
+extern "C"
+{
+ #include "mpmclient.h"
+};
+
+
+#define ERR(status, msg) if (status) { printf("ERROR: %s\n", msg); exit(-1); }
+#define BOOT_ENTRY_LOCATION_ADDR 0x87FFFC
+#define BOOT_MAGIC_ADDR(core) (0x10000000 | (core << 24) | 0x87FFFC)
+
+Driver* Driver::pInstance = 0;
+
+/******************************************************************************
+* Thread safe instance function for singleton behavior
+******************************************************************************/
+Driver* Driver::instance ()
+{
+ static Mutex Driver_instance_mutex;
+ Driver* tmp = pInstance;
+
+ __sync_synchronize();
+
+ if (tmp == 0)
+ {
+ ScopedLock lck(Driver_instance_mutex);
+
+ tmp = pInstance;
+ if (tmp == 0)
+ {
+ tmp = new Driver;
+ __sync_synchronize();
+ pInstance = tmp;
+ }
+ }
+ return tmp;
+}
+
+/******************************************************************************
+* Convert pci data into a recognizable board name for a device
+******************************************************************************/
+const char *get_board(unsigned switch_device)
+{
+ switch (switch_device)
+ {
+ case 0x8624: return "dspc8681";
+ case 0x8748: return "dspc8682";
+ default : ERR(1, "Unsupported device"); return "unknown";
+ }
+}
+
+#define TOTAL_NUM_CORES_PER_CHIP 8
+
+/******************************************************************************
+* wait_for_ready
+******************************************************************************/
+bool Driver::wait_for_ready(int chip) { return true; }
+
+static void report_core_state(const char *curr_core)
+{
+#if 0
+ char state[50];
+ int ret;
+ mpm_slave_state_e core_state;
+
+ ret = mpm_state(curr_core, &core_state);
+ if ( ret < 0)
+ printf("state query failed, %s\n", curr_core);
+
+ switch (core_state)
+ {
+ case mpm_slave_state_idle: sprintf(state, "idle"); break;
+ case mpm_slave_state_loaded: sprintf(state, "loaded"); break;
+ case mpm_slave_state_running: sprintf(state, "running"); break;
+ case mpm_slave_state_crashed: sprintf(state, "crashed"); break;
+ case mpm_slave_state_error: sprintf(state, "in error"); break;
+
+ default: sprintf(state, "in undefined state"); break;
+ }
+
+ printf("DSP core state: %s is %s\n", curr_core, state);
+#endif
+}
+
+void *Driver::reset_and_load(int chip)
+{
+ int ret;
+ int error_code = 0;
+ int error_code_msg[50];
+ char curr_core[10];
+
+ std::string get_ocl_dsp();
+ std::string monitor = get_ocl_dsp() + "/dsp.out";
+
+ for (int core=0; core< TOTAL_NUM_CORES_PER_CHIP; core++)
+ {
+ snprintf(curr_core, 5, "dsp%d", core);
+
+ ret = mpm_reset(curr_core, &error_code);
+ if ( ret < 0)
+ printf("reset failed, core %d (retval: %d, error: %d)\n",
+ core, ret, error_code);
+// JKN Update ERR to handle error_code
+ ERR (ret, "DSP out of reset failed");
+
+ report_core_state(curr_core);
+ }
+
+ /*-------------------------------------------------------------------------
+ * Load monitor on the devices
+ *------------------------------------------------------------------------*/
+ for (int core=0; core< TOTAL_NUM_CORES_PER_CHIP; core++)
+ {
+ snprintf(curr_core, 5,"dsp%d", core);
+ ret = mpm_load(curr_core, const_cast<char*>(monitor.c_str()),
+ &error_code);
+ if ( ret < 0)
+ printf("load failed, core %d (retval: %d, error: %d)\n",
+ core, ret, error_code);
+ ERR(ret, "Download image failed");
+
+ report_core_state(curr_core);
+ }
+
+ /*-------------------------------------------------------------------------
+ * Run monitor on the devices
+ *------------------------------------------------------------------------*/
+ for (int core=0; core< TOTAL_NUM_CORES_PER_CHIP; core++)
+ {
+ snprintf(curr_core, 5,"dsp%d", core);
+ ret = mpm_run(curr_core, &error_code);
+ if ( ret < 0)
+ printf("run failed, core %d (retval: %d, error: %d)\n",
+ core, ret, error_code);
+ ERR(ret, "DSP run failed");
+
+ report_core_state(curr_core);
+ }
+
+ bfd *dsp_bfd = bfd_openr(monitor.c_str(), NULL);
+ char** matching;
+ char *ptr;
+
+ if(dsp_bfd == NULL)
+ {
+ printf("\nERROR:driver: %s Error Open image %s\n",
+ bfd_errmsg(bfd_get_error()), monitor.c_str());
+ exit(-1);
+ }
+ /* Check format with matching */
+ if (!bfd_check_format_matches (dsp_bfd, bfd_object, &matching))
+ {
+ fprintf(stderr, "\nERROR:driver %s: %s\n", monitor.c_str(),
+ bfd_errmsg(bfd_get_error()));
+ if (bfd_get_error () == bfd_error_file_ambiguously_recognized)
+ {
+ for (ptr = *matching; ptr != NULL; ptr++)
+ {
+ printf("%s: \n", ptr);
+ exit(-1);
+ }
+ free (matching);
+ }
+ }
+
+ return (void *)dsp_bfd;
+}
+
+/******************************************************************************
+* Driver::open
+******************************************************************************/
+int32_t Driver::open()
+{
+ Lock lock(this);
+
+ pNum_dsps = 1;
+
+ return 0;
+}
+
+/******************************************************************************
+* Driver::close()
+******************************************************************************/
+int32_t Driver::close()
+{
+ Lock lock(this);
+
+ while (!pShmem_areas.empty()) delete pShmem_areas.back(), pShmem_areas.pop_back();
+
+ cmem_exit();
+ return 0;
+}
+
+void Driver::cmem_init(DSPDevicePtr64 *addr1, uint64_t *size1,
+ DSPDevicePtr *addr2, uint32_t *size2,
+ DSPDevicePtr64 *addr3, uint64_t *size3)
+{
+ shmem_cmem_persistent::cmem_init(addr1, size1, addr2, size2, addr3, size3);
+}
+
+void Driver::cmem_exit()
+{
+ shmem_cmem_persistent::cmem_exit();
+}
+
+DSPDevicePtr64 Driver::cmem_ondemand_malloc(uint64_t size)
+{
+ return shmem_cmem_ondemand::cmem_malloc(size);
+}
+
+void Driver::cmem_ondemand_free(DSPDevicePtr64 addr)
+{
+ shmem_cmem_ondemand::cmem_free(addr);
+}
+
+/******************************************************************************
+* Driver::split_ddr_heap: partition DDR to persistent mapping part (heap1)
+* and on demand mapping part (heap2)
+******************************************************************************/
+void Driver::split_ddr_memory(DSPDevicePtr64 addr, uint64_t size,
+ DSPDevicePtr64& addr1, uint64_t& size1,
+ DSPDevicePtr64& addr2, uint64_t& size2,
+ uint64_t& size3)
+{
+ addr1 = addr;
+ size1 = size;
+ addr2 = 0;
+ size2 = 0;
+
+
+ // split ddr memory 1 into two chunks
+ if (getenv("TI_OCL_DSP_NOMAP") != NULL)
+ {
+ size3 = 0;
+ }
+ else if (addr + size > ALL_PERSISTENT_MAX_DSP_ADDR ||
+ (size3 > 0 && addr + size > MPAX_USER_MAPPED_DSP_ADDR))
+ {
+ size2 = addr + size - MPAX_USER_MAPPED_DSP_ADDR;
+ size1 = size - size2;
+ addr2 = addr + size1;
+ }
+
+ // translate first chunk to using 32-bit aliased physical addresses
+ if (addr > DSP_36BIT_ADDR)
+ {
+ addr1 = addr + 0xA0000000 - 0x820000000ULL;
+ /*---------------------------------------------------------------------
+ * if the ddr size is greater than we can currently support, limit it
+ *--------------------------------------------------------------------*/
+ //const int ddr_size_limit = (1.5 * 1024*1024*1024) - (48 *1024*1024);
+ const uint64_t ddr_size_limit = ALL_PERSISTENT_MAX_DSP_ADDR - addr;
+ if (size1 > ddr_size_limit)
+ size1 = ddr_size_limit;
+ }
+}
+
+void Driver::shmem_configure(DSPDevicePtr64 addr, uint64_t size, int cmem_block)
+{
+ if (size <= 0) return;
+
+ shmem *area;
+ if (addr >= MPAX_USER_MAPPED_DSP_ADDR)
+ area = new shmem_cmem_ondemand();
+ else if (cmem_block >= 0)
+ area = new shmem_cmem_persistent(cmem_block);
+ else
+ area = new shmem_persistent();
+
+ area->configure(addr, size);
+ pShmem_areas.push_back(area);
+}
+
+/******************************************************************************
+* Driver::get_memory_region
+******************************************************************************/
+shmem* Driver::get_memory_region(DSPDevicePtr64 addr)
+{
+
+ for (int i = 0; i < pShmem_areas.size(); ++i)
+ {
+ uint64_t end_exclusive = (uint64_t)pShmem_areas[i]->start() +
+ pShmem_areas[i]->size();
+
+ if (addr >= pShmem_areas[i]->start() && addr < end_exclusive)
+ return pShmem_areas[i];
+ }
+
+ printf("Illegal memory region: addr = 0x%llx\n", addr);
+ exit(-1);
+}
+
+
+/******************************************************************************
+* Driver::write
+******************************************************************************/
+int32_t Driver::write(int32_t dsp_id, DSPDevicePtr64 addr, uint8_t *buf,
+ uint32_t size)
+{
+ int core;
+ /*-------------------------------------------------------------------------
+ * if the write is to L2, then write for each core
+ *------------------------------------------------------------------------*/
+ if ((addr >> 20) == 0x008)
+ for (core=0; core< TOTAL_NUM_CORES_PER_CHIP; core++)
+ write_core(dsp_id, ((0x10 + core) << 24) + addr, buf, size);
+ else write_core(dsp_id, addr, buf, size);
+}
+
+/******************************************************************************
+* Driver::write_core
+******************************************************************************/
+int32_t Driver::write_core(int32_t dsp_id, DSPDevicePtr64 addr, uint8_t *buf,
+ uint32_t size)
+{
+ Lock lock(this);
+
+ shmem* region = get_memory_region(addr);
+ void* dst_host_addr = region->map(addr, size, false);
+ if (dst_host_addr) memcpy((char*)dst_host_addr, buf, size);
+ else ERR(1, "Unable to map dsp addr for write");
+ region->unmap(dst_host_addr, size, true);
+
+ return 0;
+}
+
+void* Driver::map(DSPDevicePtr64 addr, uint32_t sz, bool is_read)
+{
+ Lock lock(this);
+ shmem* region = get_memory_region(addr);
+ void* host_addr = region->map(addr, sz, is_read);
+ if (host_addr == NULL) ERR(1, "Unable to map a dsp address");
+ return host_addr;
+}
+
+int32_t Driver::unmap(void *host_addr, DSPDevicePtr64 buf_addr, uint32_t sz,
+ bool is_write)
+{
+ Lock lock(this);
+ shmem* region = get_memory_region(buf_addr);
+ region->unmap(host_addr, sz, is_write);
+ return 0;
+}
+
+/******************************************************************************
+* Driver::read
+******************************************************************************/
+int32_t Driver::read(int32_t dsp_id, DSPDevicePtr64 addr, uint8_t *buf,
+ uint32_t size)
+{
+ Lock lock(this);
+
+ shmem* region = get_memory_region(addr);
+ void* dst_host_addr = region->map(addr, size, true);
+ if (dst_host_addr) memcpy(buf, (char*)dst_host_addr, size);
+ else ERR(1, "Unable to map dsp addr for read");
+ region->unmap(dst_host_addr, size, false);
+
+ return 0;
+}
+
+/******************************************************************************
+* Driver::free_image_handle
+******************************************************************************/
+void Driver::free_image_handle(void *handle)
+{
+ bfd_close((bfd*)handle);
+}
+
+/******************************************************************************
+* Driver::get_symbol
+******************************************************************************/
+DSPDevicePtr Driver::get_symbol(void* image_handle, const char *name)
+{
+ DSPDevicePtr addr;
+ bfd* dsp_bfd;
+ uint32_t nsyms, nsize;
+ asymbol ** symtab;
+ symbol_info syminfo;
+ int i;
+
+ if (!image_handle)
+ {
+ std::cout << "ERROR: Failed to get image handle" << std::endl;
+ exit(-1);
+ }
+
+ dsp_bfd = (bfd *)image_handle;
+
+ /*-------------------------------------------------------------------------
+ * Find boot address and address of mpi_rank.
+ *------------------------------------------------------------------------*/
+ nsize = bfd_get_symtab_upper_bound (dsp_bfd);
+ if ((symtab = (asymbol**)malloc(nsize)) == NULL)
+ {
+ std::cout << "ERROR: Failed to malloc memory in get_symbol" << std::endl;
+ exit(-1);
+ }
+
+ nsyms = bfd_canonicalize_symtab(dsp_bfd, symtab);
+
+ for (i = 0; i < nsyms; i++)
+ if (strcmp(symtab[i]->name, name) == 0)
+ {
+ bfd_symbol_info(symtab[i], &syminfo);
+ DSPDevicePtr addr = syminfo.value;
+ free(symtab);
+
+ return addr;
+ }
+
+ free(symtab);
+ std::cout << "ERROR: Get symbol failed" << std::endl;
+ exit(-1);
+}
diff --git a/src/core/dsp/driver_shannon.cpp b/src/core/dsp/driver_shannon.cpp
new file mode 100644
index 0000000..b428dbb
--- /dev/null
+++ b/src/core/dsp/driver_shannon.cpp
@@ -0,0 +1,313 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#include "driver.h"
+#include "cmem.h"
+#include <deque>
+#include <iostream>
+#include <cstring>
+#include <cstdio>
+#include <cstdlib>
+#include <sys/stat.h>
+#include <string>
+
+#define ERR(status, msg) if (status) { printf("ERROR: %s\n", msg); exit(-1); }
+#define BOOT_ENTRY_LOCATION_ADDR 0x87FFFC
+#define BOOT_MAGIC_ADDR(core) (0x10000000 | (core << 24) | 0x87FFFC)
+
+Driver* Driver::pInstance = 0;
+
+/******************************************************************************
+* Thread safe instance function for singleton behavior
+******************************************************************************/
+Driver* Driver::instance ()
+{
+ static Mutex Driver_instance_mutex;
+ Driver* tmp = pInstance;
+
+ __sync_synchronize();
+
+ if (tmp == 0)
+ {
+ ScopedLock lck(Driver_instance_mutex);
+
+ tmp = pInstance;
+ if (tmp == 0)
+ {
+ tmp = new Driver;
+ __sync_synchronize();
+ pInstance = tmp;
+ }
+ }
+ return tmp;
+}
+
+/******************************************************************************
+* Convert pci data into a recognizable board name for a device
+******************************************************************************/
+const char *get_board(unsigned switch_device)
+{
+ switch (switch_device)
+ {
+ case 0x8624: return "dspc8681";
+ case 0x8748: return "dspc8682";
+ default : ERR(1, "Unsupported device"); return "unknown";
+ }
+}
+
+#define TOTAL_NUM_CORES_PER_CHIP 8
+
+/******************************************************************************
+* wait_for_ready
+******************************************************************************/
+bool Driver::wait_for_ready(int chip)
+{
+ int execution_wait_count = 0;
+ while (1)
+ {
+ int core;
+ for (core=0; core< TOTAL_NUM_CORES_PER_CHIP; core++)
+ {
+ uint32_t boot_entry_value;
+ int ret = pciedrv_dsp_read(chip,
+ ((0x10 + core) << 24) + BOOT_ENTRY_LOCATION_ADDR,
+ (unsigned char *) &boot_entry_value, 4);
+ ERR(ret, "pciedrv_dsp_read failed");
+
+ if (boot_entry_value != 0) break;
+ }
+
+ if (core == TOTAL_NUM_CORES_PER_CHIP) return true;
+ if (++execution_wait_count > 1000) return false;
+
+ usleep(1000);
+ }
+}
+
+char *get_ocl_install();
+void *Driver::reset_and_load(int chip)
+{
+ char *installation = get_ocl_install();
+
+ /*------------------------------------------------------------------------
+ * Determine DSP speed. 1 Ghz by default. Set Env Var for 1.25Ghz Oper
+ *-----------------------------------------------------------------------*/
+ uint32_t pll_multiplier = 0x00000014; // 1.00 Ghz by default
+ if (getenv("TI_OCL_DSP_1_25GHZ")) pll_multiplier = 0x00000019;
+
+ /*-------------------------------------------------------------------------
+ * Configure boot config
+ *------------------------------------------------------------------------*/
+ uint32_t bootcfg_words[]= { 0xBABEFACE, pll_multiplier };
+ boot_cfg_t bootcfg = { 0x86FF00, sizeof(bootcfg_words), bootcfg_words};
+
+ /*-------------------------------------------------------------------------
+ * reset the devices
+ *------------------------------------------------------------------------*/
+ int ret = dnldmgr_reset_dsp(chip, 0, NULL, 0 , NULL);
+ ERR (ret, "DSP putting in reset failed");
+
+ const char *board = get_board(pDevices_info[chip].switch_device);
+ std::string init(installation);
+ init += "/lib/init_";
+ init += board;
+ init += ".out";
+
+ void * image_handle;
+ uint32_t entry;
+
+ ret = dnldmgr_get_image(init.c_str(), &image_handle, &entry);
+ ERR(ret, "Get reset image failed");
+
+ ret = dnldmgr_reset_dsp(chip, 1, image_handle, entry, &bootcfg);
+ ERR (ret, "DSP out of reset failed");
+
+ dnldmgr_free_image(image_handle);
+
+ /*---------------------------------------------------------------------
+ * wait for reset to complete
+ *--------------------------------------------------------------------*/
+ ERR(!wait_for_ready(chip), "Reset Failed due to timeout");
+
+ /*-------------------------------------------------------------------------
+ * Load monitor on the devices
+ *------------------------------------------------------------------------*/
+ std::string monitor(installation);
+ monitor += "/lib/dsp.out";
+
+ ret = dnldmgr_get_image(monitor.c_str(), &image_handle, &entry);
+ ERR(ret, "Get DSP image failed");
+
+ ret = dnldmgr_load_image(chip, 0xFFFF, image_handle, entry, NULL);
+ ERR(ret, "Download image failed");
+
+ return image_handle;
+}
+
+/******************************************************************************
+* Driver::open
+******************************************************************************/
+int32_t Driver::open()
+{
+ Lock lock(this);
+
+ memset((void*)&config, 0, sizeof(pciedrv_open_config_t));
+ config.dsp_outbound_reserved_mem_size = 0;
+ config.start_dma_chan_num = 0;
+ config.num_dma_channels = 4;
+ config.start_param_set_num = 0;
+ config.num_param_sets = 32;
+ config.dsp_outbound_block_size = 0x400000;
+ config.max_dma_transactions = 256;
+
+ int status = pciedrv_open(&config);
+ ERR(status, "PCIe Driver Open Error");
+
+ pNum_dsps = pciedrv_get_num_devices();
+
+ /*-------------------------------------------------------------------------
+ * Allocate space for and retrieve device info
+ *------------------------------------------------------------------------*/
+ pDevices_info = (pciedrv_device_info_t*)
+ malloc(pNum_dsps * sizeof(pciedrv_device_info_t));
+ ERR (!pDevices_info, "malloc failed pciedrv_devices_info_t");
+
+ int ret = pciedrv_get_pci_info(pDevices_info);
+ ERR(ret, "get pci info failed");
+
+ Cmem::instance(); // Prime the setup of cmem
+ return 0;
+}
+
+/******************************************************************************
+* Driver::close()
+******************************************************************************/
+int32_t Driver::close()
+{
+ Lock lock(this);
+ free (pDevices_info);
+ int status = pciedrv_close();
+ ERR(status, "PCIe Driver Close Error");
+ return 0;
+}
+
+
+/******************************************************************************
+* Driver::write
+******************************************************************************/
+int32_t Driver::write(int32_t dsp_id, DSPDevicePtr addr, uint8_t *buf,
+ uint32_t size)
+{
+ int core;
+ /*-------------------------------------------------------------------------
+ * if the write is to L2, then write for each core
+ *------------------------------------------------------------------------*/
+ if ((addr >> 20) == 0x008)
+ for (core=0; core< TOTAL_NUM_CORES_PER_CHIP; core++)
+ write_core(dsp_id, ((0x10 + core) << 24) + addr, buf, size);
+ else write_core(dsp_id, addr, buf, size);
+}
+
+
+/******************************************************************************
+* Driver::write
+******************************************************************************/
+int32_t Driver::write_core(int32_t dsp_id, DSPDevicePtr addr, uint8_t *buf,
+ uint32_t size)
+{
+ /*-------------------------------------------------------------------------
+ * Regular writes under 24k are faster than DMA writes (may change)
+ *------------------------------------------------------------------------*/
+ if (size < 24 * 1024)
+ {
+ int status = pciedrv_dsp_write(dsp_id, addr, buf, size);
+ ERR(status, "PCIe Driver Write Error");
+ return 0;
+ }
+
+ Lock lock(this);
+ Cmem::instance()->dma_write(dsp_id, addr, buf, size);
+ return 0;
+}
+
+void* Driver::map(DSPDevicePtr addr, uint32_t sz, bool is_read)
+{
+ return (void*) (uint64_t) addr;
+}
+
+int32_t Driver::unmap(void *host_addr, DSPDevicePtr buf_addr,
+ uint32_t sz, bool is_write)
+{
+}
+
+/******************************************************************************
+* Driver::read
+******************************************************************************/
+int32_t Driver::read(int32_t dsp_id, DSPDevicePtr addr, uint8_t *buf,
+ uint32_t size)
+{
+ Cmem::instance()->dma_read(dsp_id, addr, buf, size);
+ return 0;
+}
+
+/******************************************************************************
+* Driver::get_symbol
+******************************************************************************/
+DSPDevicePtr Driver::get_symbol(void* image_handle, const char *name)
+{
+ DSPDevicePtr addr;
+ int ret = dnldmgr_get_symbol_address(image_handle, name, &addr);
+ if (ret) { printf("ERROR: Get symbol failed\n"); exit(-1); }
+
+ return addr;
+}
+
+/******************************************************************************
+* Driver::free_image_handle
+******************************************************************************/
+void Driver::free_image_handle(void *handle)
+{
+ dnldmgr_free_image(handle);
+}
+
+/******************************************************************************
+* Driver::cmem_setup
+* Driver::shmem_configure
+******************************************************************************/
+void Driver::cmem_init(DSPDevicePtr64 *addr1, uint64_t *size1,
+ DSPDevicePtr *addr2, uint32_t *size2)
+{
+}
+
+void Driver::cmem_exit()
+{
+}
+
+void Driver::shmem_configure(DSPDevicePtr addr, uint32_t size, int cmem_block)
+{
+}
+
diff --git a/src/core/dsp/dspheap.h b/src/core/dsp/dspheap.h
new file mode 100644
index 0000000..0668647
--- /dev/null
+++ b/src/core/dsp/dspheap.h
@@ -0,0 +1,200 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+/**************************************************************************//**
+* @file dspheap.h
+*
+* @brief Define a dsp device heap manager run on the host.
+*
+* @version 1.00.00
+*
+******************************************************************************/
+#ifndef _DSPHEAP_H
+#define _DSPHEAP_H
+#include <map>
+#include <assert.h>
+#include <cstdio>
+#include <cstdlib>
+#include "u_lockable.h"
+#include "dspmem.h"
+
+#define ROUNDUP(val, pow2) (((val) + (pow2) - 1) & ~((pow2) - 1))
+#define MIN_BLOCK_SIZE 128
+#define MIN_CMEM_ONDEMAND_BLOCK_SIZE 4096
+
+class dspheap : public Lockable
+{
+ typedef std::map<DSPDevicePtr64, uint64_t> block_list;
+ typedef block_list::iterator block_iter;
+ typedef block_list::value_type block_descriptor;
+
+ public:
+ dspheap(DSPDevicePtr64 start_addr, uint64_t length)
+ {
+ configure(start_addr, length);
+ }
+
+ dspheap() { }
+
+ void configure(DSPDevicePtr64 start_addr, uint64_t length,
+ bool is_cmem_ondemand_heap = false)
+ {
+ /*---------------------------------------------------------------------
+ * Ensure that the start_addr and length are multiples of 16M.
+ * 16M is the granularity of a memory region that can be controlled
+ * by a MAR register of C6x.
+ *--------------------------------------------------------------------*/
+ //assert((length & 0xFFFFFF) == 0);
+ //assert(((uint32_t)start_addr & 0xFFFFFF) == 0);
+
+ p_start_addr = start_addr;
+ p_length = length;
+ p_block_size = is_cmem_ondemand_heap ? MIN_CMEM_ONDEMAND_BLOCK_SIZE
+ : MIN_BLOCK_SIZE;
+
+ Lock lock(this);
+ if (free_list.empty())
+ free_list[start_addr] = length;
+ }
+
+ ~dspheap() { }
+
+ DSPDevicePtr64 malloc(uint32_t size, bool allow_fail=false)
+ {
+ size = min_block_size(size);
+
+ Lock lock(this);
+ for (block_iter it = free_list.begin(); it != free_list.end(); ++it)
+ {
+ DSPDevicePtr64 block_addr = (*it).first;
+ uint64_t block_size = (*it).second;
+
+ if (block_size >= size)
+ {
+ free_list.erase(it);
+ alloc_list[block_addr] = size;
+
+ /*-------------------------------------------------------------
+ * if we only use a portion of the free block
+ *------------------------------------------------------------*/
+ if (block_size > size)
+ free_list[(DSPDevicePtr64)block_addr+size] = block_size-size;
+
+ return block_addr;
+ }
+ }
+
+ if (!allow_fail)
+ {
+ printf("Malloc failed for size 0x%x from range (0x%08llx, 0x%08llx)\n",
+ size, p_start_addr, p_start_addr+p_length-1);
+ abort();
+ }
+
+ return 0;
+ }
+
+ int free(DSPDevicePtr64 addr)
+ {
+ /*---------------------------------------------------------------------
+ * Nothing to do if not an allocated address
+ *--------------------------------------------------------------------*/
+ Lock lock(this);
+ block_iter it = alloc_list.find(addr);
+ if (it == alloc_list.end()) return -1;
+
+ uint32_t size = (*it).second;
+ alloc_list.erase(it);
+
+ /*---------------------------------------------------------------------
+ * Merge the block with neighboring free blocks
+ *--------------------------------------------------------------------*/
+ it = free_list.begin();
+ while (it != free_list.end())
+ {
+ DSPDevicePtr64 block_addr = (*it).first;
+ uint64_t block_size = (*it).second;
+
+ if ( block_addr + block_size == addr
+ || addr + size == block_addr)
+ {
+ block_iter merge_it = it;
+ if (block_addr < addr) addr = block_addr;
+ size = block_size + size;
+ ++it;
+ free_list.erase(merge_it);
+ continue;
+ }
+ ++it;
+ }
+ free_list[addr] = size;
+ return 0;
+ }
+
+ DSPDevicePtr64 size() const { return p_length; }
+
+ DSPDevicePtr64 max_block_size(uint64_t &size, uint32_t &block_size)
+ {
+ if (p_length < p_block_size)
+ {
+ block_size = p_block_size;
+ size = 0;
+ return 0;
+ }
+
+ DSPDevicePtr64 max_block_addr = 0;
+ uint64_t max_block_size = p_block_size;
+
+ Lock lock(this);
+ for (block_iter it = free_list.begin(); it != free_list.end(); ++it)
+ {
+ DSPDevicePtr64 block_addr = (*it).first;
+ uint64_t block_size = (*it).second;
+
+ if (block_size >= max_block_size)
+ {
+ max_block_addr = block_addr;
+ max_block_size = block_size;
+ }
+ }
+
+ block_size = p_block_size;
+ size = max_block_size;
+ return max_block_addr;
+ }
+
+ private:
+ block_list free_list;
+ block_list alloc_list;
+ DSPDevicePtr64 p_start_addr;
+ uint64_t p_length;
+ uint32_t p_block_size;
+
+ uint32_t min_block_size(uint32_t size) { return ROUNDUP(size, p_block_size); }
+};
+
+#endif // _DSPHEAP_H
diff --git a/src/core/dsp/dspmem.h b/src/core/dsp/dspmem.h
new file mode 100644
index 0000000..f6c7c64
--- /dev/null
+++ b/src/core/dsp/dspmem.h
@@ -0,0 +1,59 @@
+/******************************************************************************
+ * Copyright (c) 2013, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#include <stdint.h>
+#ifndef _DSPMEM_H
+#define _DSPMEM_H
+
+
+typedef uint32_t DSPDevicePtr;
+typedef uint64_t DSPDevicePtr64;
+typedef uint32_t DSPVirtPtr;
+// typedef uint64_t DSPVirtPtr64; // for future C7x?
+
+/*****************************************************************************
+ * DSP Device Memory Physical Addreess (8GB)
+ * 0x8:0000_0000 - 0x8:1FFF_FFFF: Linux reserved
+ * 0x8:2000_0000 - 0x8:21FF_FFFF: OCL runtime reserved
+ * using default MPAX translation, map to
+ * DSP virtual address 0xA000_0000 - 0xA1FF_FFFF
+ * 0x8:2200_0000 - 0x8:3FFF_FFFF: using default MPAX translation, map to
+ * DSP virtual address 0xA200_0000 - 0xBFFF_FFFF
+ * used for kernel code, user app small buffers
+ * 0x8:4000_0000 - 0x9:FFFF_FFFF: using custom MPAX translation settings, map
+ * to unused DSP virtual address spaces
+ * used for user app big buffers
+ *****************************************************************************/
+#define DSP_36BIT_ADDR 0x800000000ULL
+#define MPAX_USER_MAPPED_DSP_ADDR 0x840000000ULL
+#define ALL_PERSISTENT_MAX_DSP_ADDR 0x880000000ULL
+
+#define MSMC_OCL_START_ADDR 0x0C040000
+#define MSMC_OCL_END_ADDR 0x0C500000
+
+
+#endif // _DSPMEM_H
diff --git a/src/core/dsp/genfile_cache.cpp b/src/core/dsp/genfile_cache.cpp
new file mode 100644
index 0000000..c9b2472
--- /dev/null
+++ b/src/core/dsp/genfile_cache.cpp
@@ -0,0 +1,94 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#include "genfile_cache.h"
+
+std::string genfile_cache::lookup(llvm::Module *module, std::string options)
+{
+ std::vector<std::vector<std::string> > result;
+ uint32_t hash = convert_mod2crc(module, options);
+
+ std::string query("select value from programs where hash = " +
+ boost::lexical_cast<std::string>(hash));
+
+ result = p_database.query(query.c_str());
+
+ if (!result.empty())
+ {
+ string &filename = result[0][0];
+
+ struct stat statbuf;
+ if (stat(filename.c_str(), &statbuf) == 0)
+ return filename;
+ /*-----------------------------------------------------------------
+ * if (the cached filename no longer exists, remove it from the DB
+ *----------------------------------------------------------------*/
+ else
+ {
+ std::string q2("delete from programs where hash = " +
+ boost::lexical_cast<std::string>(hash));
+
+ p_database.query(q2.c_str());
+ return std::string();
+ }
+ }
+ else return std::string();
+}
+
+void genfile_cache::remember(const char *outfile, llvm::Module *module,
+ std::string options)
+{
+ uint32_t hash = convert_mod2crc(module, options);
+ std::string query("insert into programs(hash, value) values("
+ + boost::lexical_cast<std::string>(hash)
+ + ", \""
+ + string(outfile)
+ + "\");");
+
+ p_database.query(query.c_str());
+}
+
+uint32_t genfile_cache::convert_mod2crc(llvm::Module *module,
+ std::string options)
+{
+ string llvm_ir;
+
+ llvm::raw_string_ostream ostream(llvm_ir);
+ llvm::WriteBitcodeToFile(module, ostream);
+ ostream.str();
+
+ llvm_ir += options;
+
+ return get_crc(llvm_ir);
+}
+
+uint32_t genfile_cache::get_crc(std::string& my_string)
+{
+ boost::crc_32_type result;
+ result.process_bytes(my_string.data(), my_string.length());
+ return result.checksum();
+}
diff --git a/src/core/dsp/genfile_cache.h b/src/core/dsp/genfile_cache.h
new file mode 100644
index 0000000..46b27f2
--- /dev/null
+++ b/src/core/dsp/genfile_cache.h
@@ -0,0 +1,101 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#ifndef _genfile_cache_
+#define _genfile_cache_
+
+#include <llvm/Support/raw_ostream.h>
+#include <llvm/IR/Module.h>
+#include <llvm/Bitcode/ReaderWriter.h>
+
+#include <boost/lexical_cast.hpp>
+#include <boost/crc.hpp>
+
+#include <sys/stat.h>
+
+#include <string>
+#include <iostream>
+#include <sstream>
+#include <vector>
+#include <stdint.h>
+#include "u_locks_pthread.h"
+#include "database.h"
+
+class genfile_cache
+{
+ public:
+ std::string lookup (llvm::Module *module, std::string options);
+ void remember (const char *outfile, llvm::Module *module,
+ std::string options);
+
+ /*-------------------------------------------------------------------------
+ * Thread safe instance function for singleton behavior
+ *------------------------------------------------------------------------*/
+ static genfile_cache* instance ()
+ {
+ static Mutex Cache_instance_mutex;
+ genfile_cache* tmp = pInstance;
+
+ __sync_synchronize();
+
+ if (tmp == 0)
+ {
+ ScopedLock lck(Cache_instance_mutex);
+
+ tmp = pInstance;
+ if (tmp == 0)
+ {
+ char *user = getenv("USER");
+ tmp = new genfile_cache("/tmp/opencl_ofdb_" + string(user));
+ __sync_synchronize();
+ pInstance = tmp;
+ }
+ }
+ return tmp;
+ }
+
+
+ private:
+ static genfile_cache* pInstance;
+ std::string p_dbname;
+ Database p_database;
+
+ private:
+ genfile_cache(std::string db_name) : p_dbname(db_name), p_database(db_name.c_str())
+ {
+ p_database.query("create table if not exists "
+ "programs(hash integer, value string);");
+ }
+
+ uint32_t convert_mod2crc (llvm::Module *module, std::string options);
+ uint32_t get_crc (std::string& my_string);
+
+ genfile_cache(const genfile_cache&); // copy ctor disallowed
+ genfile_cache& operator=(const genfile_cache&); // assignment disallowed
+};
+
+#endif // _genfile_cache_
diff --git a/src/core/dsp/kernel.cpp b/src/core/dsp/kernel.cpp
new file mode 100644
index 0000000..291673a
--- /dev/null
+++ b/src/core/dsp/kernel.cpp
@@ -0,0 +1,718 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#include "kernel.h"
+#include "device.h"
+#include "buffer.h"
+#include "program.h"
+#include "utils.h"
+#include "u_locks_pthread.h"
+#include "mailbox.h"
+
+#include "../kernel.h"
+#include "../memobject.h"
+#include "../events.h"
+#include "../program.h"
+
+#include <llvm/IR/Function.h>
+#include <llvm/IR/Constants.h>
+#include <llvm/IR/Instructions.h>
+#include <llvm/IR/LLVMContext.h>
+#include <llvm/IR/Module.h>
+#include <llvm/ExecutionEngine/ExecutionEngine.h>
+
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <string>
+#include <vector>
+#include <unistd.h>
+#include <sys/mman.h>
+
+extern "C"
+{
+ #include <ti/runtime/mmap/include/mmap_resource.h>
+}
+
+
+#define ROUNDUP(val, pow2) (((val) + (pow2) - 1) & ~((pow2) - 1))
+#define QERR(msg, retcode) do {if (getenv("TI_OCL_VERBOSE_ERROR")) std::cerr << msg << std::endl; return retcode; } while(0)
+#define ERR(x) std::cerr << x << std::endl
+#define ERROR() std::cerr << "Unknown error in dsp/kernel.cpp" << std::endl
+
+using namespace Coal;
+
+DSPKernel::DSPKernel(DSPDevice *device, Kernel *kernel)
+: DeviceKernel(), p_device(device), p_kernel(kernel),
+ p_device_entry_pt((DSPDevicePtr)0),
+ p_data_page_ptr ((DSPDevicePtr)0xffffffff)
+{
+}
+
+DSPKernel::~DSPKernel()
+{
+}
+
+
+template<typename T>
+T k_exp(T base, unsigned int e)
+{
+ T rs = base;
+ for (unsigned int i=1; i<e; ++i) rs *= base;
+ return rs;
+}
+
+/*-----------------------------------------------------------------------------
+* This and the next function are called from the multiple worker threads. They
+* may all enter the set the name section, but they will all set the same value,
+* so even though there is a race, there is no race error. when work group
+* division is pushed down to the dsp, the race will go away.
+*----------------------------------------------------------------------------*/
+DSPDevicePtr DSPKernel::device_entry_pt()
+{
+ if (!p_device_entry_pt)
+ {
+ size_t name_length;
+ p_kernel->info(CL_KERNEL_FUNCTION_NAME, 0, 0, &name_length);
+
+ void *name = malloc(name_length);
+ p_kernel->info(CL_KERNEL_FUNCTION_NAME, name_length, name, 0);
+
+ Program *p = (Program *)p_kernel->parent();
+ DSPProgram *prog = (DSPProgram *)(p->deviceDependentProgram(p_device));
+
+ if (!prog->is_loaded()) ERROR();
+ p_device_entry_pt = prog->query_symbol((char*)name);
+ free (name);
+ }
+ return p_device_entry_pt;
+}
+
+/******************************************************************************
+* The data page pointer can frequently be 0, so we will initialize it to be
+* 0xffffffff as a start value instead of 0.
+******************************************************************************/
+DSPDevicePtr DSPKernel::data_page_ptr()
+{
+ if (p_data_page_ptr == (DSPDevicePtr)0xffffffff)
+ {
+ Program *p = (Program *)p_kernel->parent();
+ DSPProgram *prog = (DSPProgram *)(p->deviceDependentProgram(p_device));
+
+ if (!prog->is_loaded()) ERROR();
+ //p_data_page_ptr = prog->query_symbol("__TI_STATIC_BASE");
+ p_data_page_ptr = prog->data_page_ptr();
+ }
+ return p_data_page_ptr;
+}
+
+/******************************************************************************
+* void DSPKernel::preAllocBuffers()
+******************************************************************************/
+cl_int DSPKernel::preAllocBuffers()
+{
+ for (unsigned int i=0; i < kernel()->numArgs(); ++i)
+ {
+ const Kernel::Arg &arg = kernel()->arg(i);
+
+ if (arg.kind() == Kernel::Arg::Buffer &&
+ arg.file() != Kernel::Arg::Local)
+ {
+ MemObject *buffer = *(MemObject **)arg.data();
+ if (buffer && !buffer->allocate(device()))
+ return CL_MEM_OBJECT_ALLOCATION_FAILURE;
+ }
+ }
+ return CL_SUCCESS;
+}
+
+
+/******************************************************************************
+* Try to find the size a work group needs to be executed the fastest on the DSP.
+******************************************************************************/
+size_t DSPKernel::guessWorkGroupSize(cl_uint num_dims, cl_uint dim,
+ size_t global_work_size) const
+{
+ // ASW TODO - what the ????
+ unsigned int dsps = p_device->numDSPs();
+
+ /*-------------------------------------------------------------------------
+ * Don't break in too small parts
+ *------------------------------------------------------------------------*/
+ if (k_exp(global_work_size, num_dims) > 64)
+ return global_work_size;
+
+ /*-------------------------------------------------------------------------
+ * Find the divisor of global_work_size the closest to dsps but >= than it
+ *------------------------------------------------------------------------*/
+ unsigned int divisor = dsps <= 0 ? 1 : dsps;
+
+ while (true)
+ {
+ if ((global_work_size % divisor) == 0)
+ break;
+
+ /*---------------------------------------------------------------------
+ * Don't let the loop go up to global_work_size, the overhead would be
+ * too huge
+ *--------------------------------------------------------------------*/
+ if (divisor > global_work_size || divisor > dsps * 32)
+ {
+ divisor = 1; // Not parallel but has no CommandQueue overhead
+ break;
+ }
+
+ divisor -= 1;
+ }
+
+ /*-------------------------------------------------------------------------
+ * Return the size
+ *------------------------------------------------------------------------*/
+ return global_work_size / divisor;
+}
+
+/******************************************************************************
+* localMemSize()
+******************************************************************************/
+cl_ulong DSPKernel::localMemSize() const
+{
+ cl_ulong local_mem = 0;
+
+ for (int i = 0; i < kernel()->numArgs(); ++i)
+ {
+ const Kernel::Arg &arg = kernel()->arg(i);
+
+ if (arg.kind() == Kernel::Arg::Buffer &&
+ arg.file() == Kernel::Arg::Local)
+ local_mem += arg.allocAtKernelRuntime();
+ }
+
+ return local_mem;
+}
+
+Kernel * DSPKernel::kernel() const { return p_kernel; }
+DSPDevice * DSPKernel::device() const { return p_device; }
+
+// From Wikipedia : http://www.wikipedia.org/wiki/Power_of_two#Algorithm_to_round_up_to_power_of_two
+template <class T>
+T next_power_of_two(T k)
+{
+ if (k == 0) return 1;
+
+ k--;
+ for (int i=1; i<sizeof(T)*8; i<<=1)
+ k = k | k >> i;
+ return k+1;
+}
+
+size_t DSPKernel::typeOffset(size_t &offset, size_t type_len)
+{
+ size_t rs = offset;
+
+ // Align offset to stype_len
+ type_len = next_power_of_two(type_len);
+ if (type_len > 8) type_len = 8; // The c66 has no alignment need > 8 bytes
+
+ size_t mask = ~(type_len - 1);
+
+ while (rs & mask != rs)
+ rs++;
+
+ // Where to try to place the next value
+ offset = rs + type_len;
+
+ return rs;
+}
+
+static int kernelID = 0;
+
+/*=============================================================================
+* DSPKernelEvent
+*============================================================================*/
+DSPKernelEvent::DSPKernelEvent(DSPDevice *device, KernelEvent *event)
+: p_device(device), p_event(event), p_kernel((DSPKernel*)event->deviceKernel()),
+ p_kernel_id(kernelID++), p_debug_kernel(false), p_num_arg_words(0),
+ p_WG_alloca_start(0)
+{
+ char *dbg = getenv("TI_OCL_DEBUG_KERNEL");
+ if (dbg) p_debug_kernel = true;
+
+ callArgs(MAX_ARG_BUF_SIZE);
+}
+
+DSPKernelEvent::~DSPKernelEvent() { }
+
+#define READ_ONLY_BUFFER(buffer) (buffer->flags() & CL_MEM_READ_ONLY)
+#define WRITE_ONLY_BUFFER(buffer) (buffer->flags() & CL_MEM_WRITE_ONLY)
+
+#define SETARG(val) if (arg_words < args_in_mem_size) args_in_mem[arg_words++] = val; \
+ else std::cerr << "To many argument bytes are needed" << std::endl
+
+#define SETMOREARG(sz, pval) do \
+ { \
+ more_arg_offset = ROUNDUP(more_arg_offset, sz); \
+ if (ROUNDUP(more_arg_offset + sz, 8) > sizeof(p_msg.u.k.flush.buffers))\
+ std::cerr << "Too many arguments, does not fit" << std::endl; \
+ memcpy(more_args_in_mem+more_arg_offset, pval, sz); \
+ more_arg_offset += sz; \
+ } while(0)
+
+//#define SETMOREARG(sz,psrc)
+
+/******************************************************************************
+* DSPKernelEvent::callArgs
+******************************************************************************/
+void DSPKernelEvent::callArgs(unsigned args_in_mem_size)
+{
+ int arg_words = 0;
+ unsigned *args_in_mem = (unsigned*)p_msg.u.k.kernel.argBuf;
+ char *more_args_in_mem = (char *)p_msg.u.k.flush.buffers;
+ int more_arg_offset = 4;
+ bool is_more_arg = false;
+
+ /*-------------------------------------------------------------------------
+ * Write Arguments
+ *------------------------------------------------------------------------*/
+ for (int i = 0; i < p_kernel->kernel()->numArgs(); ++i)
+ {
+ is_more_arg = (i >= 10);
+
+ const Kernel::Arg & arg = p_kernel->kernel()->arg(i);
+ size_t size = arg.valueSize() * arg.vecDim();
+
+ if (size == 0) ERR("Kernel Argument has size == 0");
+ if (size != 1 && size != 2 && size != 4 && size != 8)
+ ERR("Invalid Kernel Argument size");
+
+ /*---------------------------------------------------------------------
+ * We may have to perform some changes in the values (buffers, etc)
+ *--------------------------------------------------------------------*/
+ switch (arg.kind())
+ {
+ case Kernel::Arg::Buffer:
+ {
+ MemObject *buffer = 0;
+ DSPDevicePtr buf_ptr = 0;
+ if (arg.data()) buffer = *(MemObject **)arg.data();
+ if (!is_more_arg) SETARG(sizeof(DSPVirtPtr));
+
+ DSPVirtPtr *buf_dspvirtptr = (!is_more_arg) ?
+ (&args_in_mem[arg_words]) :
+ (DSPVirtPtr *)(more_args_in_mem+ROUNDUP(more_arg_offset,4));
+
+ /*-------------------------------------------------------------
+ * Alloc a buffer and pass it to the kernel
+ *------------------------------------------------------------*/
+ if (arg.file() == Kernel::Arg::Local)
+ {
+ uint32_t lbufsz = arg.allocAtKernelRuntime();
+ p_local_bufs.push_back(LocalPair(buf_dspvirtptr, lbufsz));
+
+ /*-----------------------------------------------------
+ * Since the only reader and writer of local memory (L2)
+ * will be the core itself, I do not believe we need
+ * to flush local buffers for correctness.
+ *----------------------------------------------------*/
+ //p_flush_bufs->push_back(DSPMemRange(lbuf, lbufsz));
+ }
+ else if (buffer != NULL)
+ {
+ /*---------------------------------------------------------
+ * Get the DSP buffer, allocate it and get its pointer
+ *--------------------------------------------------------*/
+ if (buffer->flags() & CL_MEM_USE_HOST_PTR)
+ {
+ p_hostptr_tmpbufs.push_back(
+ HostptrPair(buffer, DSPPtrPair(0, buf_dspvirtptr)));
+ }
+ else
+ {
+ DSPBuffer *dspbuf = (DSPBuffer *)buffer->deviceBuffer(p_device);
+ buffer->allocate(p_device);
+ DSPDevicePtr64 addr64 = dspbuf->data();
+ if (addr64 < 0xFFFFFFFF)
+ buf_ptr = addr64;
+ else
+ p_64bit_bufs.push_back(DSPMemRange(DSPPtrPair(
+ addr64, buf_dspvirtptr), buffer->size()));
+
+ if (! WRITE_ONLY_BUFFER(buffer))
+ p_flush_bufs.push_back(DSPMemRange(DSPPtrPair(
+ addr64, buf_dspvirtptr), buffer->size()));
+ }
+ }
+
+ /*---------------------------------------------------------
+ * Use 0 for local buffer address here, it will be overwritten
+ * with allocated local buffer address at kernel dispatch time.
+ * Same for allocating temporary buffer for use_host_ptr.
+ *--------------------------------------------------------*/
+ if (!is_more_arg) SETARG(buf_ptr);
+ else { SETMOREARG(4, &buf_ptr); }
+
+ break;
+ }
+
+ case Kernel::Arg::Image2D:
+ case Kernel::Arg::Image3D: ERR("Images not yet supported"); break;
+
+ /*-----------------------------------------------------------------
+ * Non-Buffers
+ *----------------------------------------------------------------*/
+ default:
+ if (!is_more_arg)
+ {
+ SETARG((size < 4 ? 4 : size));
+ // Cast to (int) to avoid a codegen bug
+ // ZEXT will happen in LLVM and ICODE, so don't worry
+ if (size == 1) SETARG(((int) *((signed char*)arg.data())));
+ else if (size == 2) SETARG(((int) *((short*)arg.data())));
+ else SETARG(*((unsigned*) arg.data()));
+ if (size == 8) { SETARG(*(((unsigned*)arg.data()) + 1)); }
+ }
+ else { SETMOREARG(size, arg.data()); }
+ break;
+ }
+ }
+ SETARG(0); // 0 terminator for args area
+
+ p_num_arg_words = arg_words;
+ p_msg.u.k.flush.sizeMoreArgs = (more_arg_offset > 4) ?
+ ROUNDUP(more_arg_offset, 8) : 0;
+}
+
+/******************************************************************************
+* debug_pause
+******************************************************************************/
+static void debug_pause(uint32_t entry, uint32_t dsp_id,
+ const char* outfile, char *name)
+{
+ printf("[OCL] Launching kernel %s on DSP %d\n", name, dsp_id);
+ printf("[OCL] Connect debugger and set breakpoint at 0x%08x\n", entry);
+ printf("[OCL] Load symbols from file %s\n", outfile);
+ printf("[OCL] Press any key, then enter to continue\n");
+ do { char t; std::cin >> t; } while(0);
+}
+
+
+
+/******************************************************************************
+* bool DSPKernelEvent::run()
+******************************************************************************/
+cl_int DSPKernelEvent::run(Event::Type evtype)
+{
+ Program *p = (Program *)p_kernel->kernel()->parent();
+ DSPProgram *prog = (DSPProgram *)(p->deviceDependentProgram(p_device));
+
+ // TODO perhaps ensure that prog is loaded.
+
+ int dim = p_event->work_dim();
+
+ /*-------------------------------------------------------------------------
+ * Create a message for the DSP
+ *------------------------------------------------------------------------*/
+ Msg_t &msg = p_msg;
+ kernel_config_t *cfg = &msg.u.k.kernel.config;
+
+ if (evtype == Event::TaskKernel)
+ {
+ msg.command = TASK;
+ cfg->Kernel_id = p_kernel_id;
+
+ CommandQueue *q = (CommandQueue *) p_event->parent();
+ cl_command_queue_properties q_prop = 0;
+ q->info(CL_QUEUE_PROPERTIES, sizeof(q_prop), &q_prop, NULL);
+ cfg->global_sz_0 = (q_prop & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) ?
+ OUT_OF_ORDER_TASK_SIZE : IN_ORDER_TASK_SIZE;
+ cfg->local_sz_0 = 1;
+ cfg->local_sz_1 = 1;
+ cfg->local_sz_2 = 1;
+ }
+ else
+ {
+ msg.command = NDRKERNEL;
+
+ cfg->num_dims = dim;
+ cfg->global_sz_0 = p_event->global_work_size(0);
+ cfg->global_sz_1 = dim > 1 ? p_event->global_work_size(1) : 1;
+ cfg->global_sz_2 = dim > 2 ? p_event->global_work_size(2) : 1;
+ cfg->local_sz_0 = p_event->local_work_size(0);
+ cfg->local_sz_1 = dim > 1 ? p_event->local_work_size(1) : 1;
+ cfg->local_sz_2 = dim > 2 ? p_event->local_work_size(2) : 1;
+ cfg->global_off_0 = p_event->global_work_offset(0);
+ cfg->global_off_1 = p_event->global_work_offset(1);
+ cfg->global_off_2 = p_event->global_work_offset(2);
+ cfg->WG_gid_start_0 = 0;
+ cfg->WG_gid_start_1 = 0;
+ cfg->WG_gid_start_2 = 0;
+ cfg->Kernel_id = p_kernel_id;
+ cfg->WG_id = 0;
+ cfg->stats = 0;
+ }
+
+ msg.u.k.kernel.entry_point = (unsigned)p_kernel->device_entry_pt();
+ msg.u.k.kernel.data_page_ptr = (unsigned)p_kernel->data_page_ptr();
+
+ /*-------------------------------------------------------------------------
+ * Allocating local buffer in L2 per kernel run instance
+ *------------------------------------------------------------------------*/
+ uint32_t total_sz, block_sz;
+ DSPDevicePtr local_scratch = p_device->get_local_scratch(total_sz, block_sz);
+ for (size_t i = 0; i < p_local_bufs.size(); ++i)
+ {
+ DSPVirtPtr *p_arg_word = p_local_bufs[i].first;
+ unsigned local_buf_size = p_local_bufs[i].second;
+
+ uint32_t rounded_sz = ROUNDUP(local_buf_size, block_sz);
+ if (rounded_sz > total_sz)
+ {
+ QERR("Total local buffer size exceeds available local size",
+ CL_MEM_OBJECT_ALLOCATION_FAILURE);
+ }
+ *p_arg_word = local_scratch;
+ local_scratch += rounded_sz;
+ total_sz -= rounded_sz;
+ }
+
+ /*-------------------------------------------------------------------------
+ * Allocating temporary space in global memory for kernel alloca'ed data
+ *------------------------------------------------------------------------*/
+#define NUM_CORES_PER_CHIP 8
+ cfg->WG_alloca_size = p_kernel->kernel()->get_wi_alloca_size() *
+ cfg->local_sz_0 * cfg->local_sz_1 * cfg->local_sz_2;
+ if (cfg->WG_alloca_size > 0)
+ {
+ cfg->WG_alloca_size += 4096; // 4K bytes padding between WGs' allocas
+ uint32_t chip_alloca_size = cfg->WG_alloca_size * NUM_CORES_PER_CHIP;
+ p_WG_alloca_start = p_device->malloc_global( // malloc abort if fail
+ chip_alloca_size, true);
+ if (!p_WG_alloca_start)
+ {
+ QERR("Alloca size exceeds available global memory",
+ CL_OUT_OF_RESOURCES);
+ }
+
+ if (p_WG_alloca_start < 0xFFFFFFFF)
+ cfg->WG_alloca_start = (DSPVirtPtr) p_WG_alloca_start;
+ else
+ p_64bit_bufs.push_back(DSPMemRange(DSPPtrPair(
+ p_WG_alloca_start, &cfg->WG_alloca_start), chip_alloca_size));
+ }
+
+ /*-------------------------------------------------------------------------
+ * Allocating temporary global buffer for use_host_ptr
+ *------------------------------------------------------------------------*/
+ for (int i = 0; i < p_hostptr_tmpbufs.size(); ++i)
+ {
+ MemObject *buffer = p_hostptr_tmpbufs[i].first;
+ DSPDevicePtr64 *p_addr64 = &p_hostptr_tmpbufs[i].second.first;
+ DSPVirtPtr *p_arg_word = p_hostptr_tmpbufs[i].second.second;
+
+ *p_addr64 = p_device->malloc_global(buffer->size(), false);
+
+ if (!p_addr64)
+ {
+ QERR("Temporary memory for CL_MEM_USE_HOST_PTR buffer exceeds available global memory",
+ CL_MEM_OBJECT_ALLOCATION_FAILURE);
+ }
+
+ if (*p_addr64 < 0xFFFFFFFF)
+ *p_arg_word = *p_addr64;
+ else
+ p_64bit_bufs.push_back(DSPMemRange(DSPPtrPair(
+ *p_addr64, p_arg_word), buffer->size()));
+
+ if (! WRITE_ONLY_BUFFER(buffer))
+ {
+ void *mapped_tmpbuf = Driver::instance()->map(*p_addr64,
+ buffer->size(), false);
+ memcpy(mapped_tmpbuf, buffer->host_ptr(), buffer->size());
+ p_flush_bufs.push_back(DSPMemRange(DSPPtrPair(
+ *p_addr64, p_arg_word), buffer->size()));
+ Driver::instance()->unmap(mapped_tmpbuf, *p_addr64,
+ buffer->size(), true);
+ }
+ }
+
+ /*-------------------------------------------------------------------------
+ * Compute MPAX mappings from DSPDevicePtr64 to DSPVirtPtr in p_64bit_bufs
+ *------------------------------------------------------------------------*/
+ msg.u.k.flush.num_mpaxs = 0;
+ uint32_t num_64bit_bufs = p_64bit_bufs.size();
+ if (num_64bit_bufs > 0)
+ {
+ uint64_t *phys_addrs = new uint64_t[num_64bit_bufs];
+ uint32_t *lengths = new uint32_t[num_64bit_bufs];
+ uint32_t *prots = new uint32_t[num_64bit_bufs];
+ uint32_t *virt_addrs = new uint32_t[num_64bit_bufs];
+ for (int i = 0; i < p_64bit_bufs.size(); ++i)
+ {
+ phys_addrs[i] = p_64bit_bufs[i].first.first;
+ lengths[i] = p_64bit_bufs[i].second;
+ prots[i] = 0; // don't care yet
+ }
+
+ keystone_mmap_resources_t mpax_res;
+ memcpy(&mpax_res, p_device->get_mpax_default_res(),
+ sizeof(keystone_mmap_resources_t));
+ if (keystone_mmap_resource_alloc(num_64bit_bufs, phys_addrs, lengths,
+ prots, virt_addrs, &mpax_res) != KEYSTONE_MMAP_RESOURCE_NOERR)
+ {
+ QERR("MPAX allocation failed!",
+ CL_OUT_OF_RESOURCES);
+ }
+
+ // set the MPAX settings in the message
+ uint32_t mpax_used = 0;
+ for (; mpax_res.mapping[mpax_used].segsize_power2 > 0; mpax_used += 1)
+ {
+ msg.u.k.flush.mpax_settings[2*mpax_used ] = (uint32_t)
+ (mpax_res.mapping[mpax_used].raddr >> 12); // e.g. 0x822004
+ msg.u.k.flush.mpax_settings[2*mpax_used+1] = // e.g. 0xC000000D
+ mpax_res.mapping[mpax_used].baddr
+ | (mpax_res.mapping[mpax_used].segsize_power2-1);
+ }
+ msg.u.k.flush.num_mpaxs = mpax_used;
+
+ // set the virtual address in arguments
+ for (int i = 0; i < p_64bit_bufs.size(); ++i)
+ {
+ *(p_64bit_bufs[i].first.second) = virt_addrs[i];
+ if (p_debug_kernel)
+ printf("Virtual = 0x%x, physical = 0x%llx\n",
+ virt_addrs[i], p_64bit_bufs[i].first.first);
+ }
+ delete [] phys_addrs;
+ delete [] lengths;
+ delete [] prots;
+ delete [] virt_addrs;
+ }
+
+ /*-------------------------------------------------------------------------
+ * Helpful information for debugging a kernel
+ *------------------------------------------------------------------------*/
+ if (p_debug_kernel)
+ {
+ for (int i = 0; i < msg.u.k.flush.num_mpaxs; i++)
+ printf("mpax %d: l=0x%x, h=0x%x\n", i,
+ msg.u.k.flush.mpax_settings[2*i],
+ msg.u.k.flush.mpax_settings[2*i+1]);
+
+ uint32_t *args = msg.u.k.kernel.argBuf;
+ int arg_num = 1;
+ // TODO: print more args properly
+ for (int i=0; i < p_num_arg_words; i++)
+ {
+ if (args[i] == 4)
+ {
+ i++;
+ printf("[OCL] Kernel argument %d = 0x%08x\n", arg_num, args[i]);
+ }
+ else if (args[i] == 8)
+ {
+ printf("[OCL] Kernel argument %d = 0x%08x 0x%08x\n",
+ arg_num, args[i+1], args[i+2]);
+ i+=2;
+ }
+ arg_num++;
+ }
+ }
+
+ /*-------------------------------------------------------------------------
+ * Make sure we do not overflow the number of commands a mailbox can handle
+ *------------------------------------------------------------------------*/
+ if (p_flush_bufs.size() > MAX_KERNEL_ARGUMENTS)
+ {
+ QERR("To many buffers to flush", CL_OUT_OF_RESOURCES);
+ }
+
+ /*-------------------------------------------------------------------------
+ * Populate Flush commands for any buffers that are read by the DSP
+ *------------------------------------------------------------------------*/
+ msg.u.k.flush.numBuffers = p_flush_bufs.size();
+
+#if 0 // YUAN: flush buffers used for more arguments (for now)
+ for (int i=0; i < p_flush_bufs.size(); ++i)
+ {
+ msg.u.k.flush.buffers[2*i] = p_flush_bufs[i].first;
+ msg.u.k.flush.buffers[2*i+1] = p_flush_bufs[i].second;
+ }
+#endif
+
+ /*-------------------------------------------------------------------------
+ * Feedback to user for debug
+ *------------------------------------------------------------------------*/
+ if (p_debug_kernel)
+ {
+ size_t name_length;
+ p_kernel->kernel()->info(CL_KERNEL_FUNCTION_NAME, 0, 0, &name_length);
+ char *name = (char*)malloc(name_length);
+ if (!name) return CL_OUT_OF_HOST_MEMORY;
+ p_kernel->kernel()->info(CL_KERNEL_FUNCTION_NAME, name_length, name, 0);
+
+ debug_pause(p_kernel->device_entry_pt(), p_device->dspID(),
+ prog->outfile_name(), name);
+ free (name);
+ }
+
+ /*-------------------------------------------------------------------------
+ * Dispatch the commands through the mailbox
+ *------------------------------------------------------------------------*/
+ p_device->mail_to(msg);
+
+ /*-------------------------------------------------------------------------
+ * Do not wait for completion
+ *------------------------------------------------------------------------*/
+ return CL_SUCCESS;
+}
+
+/******************************************************************************
+* free_tmp_bufs allocated for kernel allocas, and for use_host_ptr
+******************************************************************************/
+void DSPKernelEvent::free_tmp_bufs()
+{
+ if (p_WG_alloca_start > 0)
+ p_device->free_global(p_WG_alloca_start);
+
+ for (int i = 0; i < p_hostptr_tmpbufs.size(); ++i)
+ {
+ MemObject *buffer = p_hostptr_tmpbufs[i].first;
+ DSPDevicePtr64 addr64 = p_hostptr_tmpbufs[i].second.first;
+
+ if (! READ_ONLY_BUFFER(buffer))
+ {
+ void *mapped_tmpbuf = Driver::instance()->map(addr64,
+ buffer->size(), true);
+ memcpy(buffer->host_ptr(), mapped_tmpbuf, buffer->size());
+ Driver::instance()->unmap(mapped_tmpbuf, addr64,
+ buffer->size(), false);
+ }
+ p_device->free_global(addr64);
+ }
+
+}
+
diff --git a/src/core/dsp/kernel.h b/src/core/dsp/kernel.h
new file mode 100644
index 0000000..850941d
--- /dev/null
+++ b/src/core/dsp/kernel.h
@@ -0,0 +1,119 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#ifndef __DSP_KERNEL_H__
+#define __DSP_KERNEL_H__
+
+#include "../events.h"
+#include "../memobject.h"
+#include "../deviceinterface.h"
+#include "message.h"
+#include "device.h"
+#include <core/config.h>
+
+#include <vector>
+#include <string>
+#include <pthread.h>
+#include <stdint.h>
+
+namespace llvm
+{
+ class Function;
+}
+
+typedef std::pair<DSPDevicePtr64, DSPVirtPtr *> DSPPtrPair;
+typedef std::pair<DSPPtrPair, uint32_t> DSPMemRange;
+typedef std::pair<DSPVirtPtr *, uint32_t> LocalPair;
+typedef std::pair<Coal::MemObject *, DSPPtrPair> HostptrPair;
+
+
+namespace Coal
+{
+class DSPDevice;
+class Kernel;
+class KernelEvent;
+
+class DSPKernel : public DeviceKernel
+{
+ public:
+ DSPKernel(DSPDevice *device, Kernel *kernel);
+ ~DSPKernel();
+
+ size_t workGroupSize() const { return 128; }
+ cl_ulong localMemSize() const ;
+ cl_ulong privateMemSize() const { return 0; }
+ size_t preferredWorkGroupSizeMultiple() const { return 0; }
+
+ size_t guessWorkGroupSize(cl_uint num_dims, cl_uint dim,
+ size_t global_work_size) const;
+ DSPDevicePtr device_entry_pt();
+ DSPDevicePtr data_page_ptr();
+ cl_int preAllocBuffers();
+
+ Kernel * kernel() const;
+ DSPDevice * device() const;
+
+ llvm::Function *function() const;
+ static size_t typeOffset(size_t &offset, size_t type_len);
+
+ private:
+ DSPDevice * p_device;
+ Kernel * p_kernel;
+ DSPDevicePtr p_device_entry_pt;
+ DSPDevicePtr p_data_page_ptr;
+};
+
+class DSPKernelEvent
+{
+ public:
+ DSPKernelEvent (DSPDevice *device, KernelEvent *event);
+ ~DSPKernelEvent ();
+
+ cl_int run (Event::Type evtype);
+ void callArgs (unsigned rs_size);
+
+ DSPDevice* device() { return p_device; }
+ uint32_t kernel_id() { return p_kernel_id; }
+
+ void free_tmp_bufs();
+
+ private:
+ DSPDevice * p_device;
+ KernelEvent * p_event;
+ DSPKernel * p_kernel;
+ uint32_t p_kernel_id;
+ bool p_debug_kernel;
+ int p_num_arg_words;
+ Msg_t p_msg;
+ DSPDevicePtr64 p_WG_alloca_start;
+ std::vector<DSPMemRange> p_flush_bufs;
+ std::vector<LocalPair> p_local_bufs;
+ std::vector<HostptrPair> p_hostptr_tmpbufs;
+ std::vector<DSPMemRange> p_64bit_bufs;
+};
+}
+#endif
diff --git a/src/core/dsp/mailbox.h b/src/core/dsp/mailbox.h
new file mode 100644
index 0000000..f87c08c
--- /dev/null
+++ b/src/core/dsp/mailbox.h
@@ -0,0 +1,114 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#ifndef _MAILBOX_H_
+#define _MAILBOX_H_
+#include "u_locks_pthread.h"
+#include "driver.h"
+
+extern "C"
+{
+ #include "mpm_mailbox.h"
+}
+
+class Mailbox
+{
+ public:
+
+ int32_t create(void* mbox_handle, char *slave_node_name,
+ uint32_t mem_location, uint32_t direction,
+ mpm_mailbox_config_t *mbox_config)
+ {
+ int32_t result = mpm_mailbox_create(mbox_handle, slave_node_name,
+ mem_location, direction, mbox_config);
+ return result;
+ }
+
+ int32_t open(void* mbox_handle)
+ {
+ int32_t result = mpm_mailbox_open(mbox_handle);
+ return result;
+ }
+
+ int32_t write (void* mbox_handle, uint8_t *buf, uint32_t size,
+ uint32_t trans_id)
+ {
+ int result;
+
+ do result = mpm_mailbox_write (mbox_handle, buf, size, trans_id);
+ while (result == MPM_MAILBOX_ERR_MAIL_BOX_FULL);
+
+ return true;
+ }
+
+ int32_t read (void* mbox_handle, uint8_t *buf, uint32_t *size,
+ uint32_t *trans_id)
+ {
+ int32_t result = mpm_mailbox_read (mbox_handle, buf, size, trans_id);
+ return result;
+ }
+
+ int32_t query (void* mbox_handle)
+ {
+ int32_t result = mpm_mailbox_query (mbox_handle);
+ return result;
+ }
+
+ /*-------------------------------------------------------------------------
+ * Thread safe instance function for singleton behavior
+ *------------------------------------------------------------------------*/
+ static Mailbox* instance ()
+ {
+ static Mutex Mailbox_instance_mutex;
+ Mailbox* tmp = pInstance;
+
+ __sync_synchronize();
+
+ if (tmp == 0)
+ {
+ ScopedLock lck(Mailbox_instance_mutex);
+
+ tmp = pInstance;
+ if (tmp == 0)
+ {
+ tmp = new Mailbox;
+ __sync_synchronize();
+ pInstance = tmp;
+ }
+ }
+ return tmp;
+ }
+
+ private:
+ static Mailbox* pInstance;
+
+ Mailbox() { } // ctor private
+ Mailbox(const Mailbox&); // copy ctor disallowed
+ Mailbox& operator=(const Mailbox&); // assignment disallowed
+};
+
+#endif // _MAILBOX_H_
diff --git a/src/core/dsp/memmap.h b/src/core/dsp/memmap.h
new file mode 100644
index 0000000..503540e
--- /dev/null
+++ b/src/core/dsp/memmap.h
@@ -0,0 +1,120 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#define ERR(status, msg) if (status) { printf("ERROR: %s\n", msg); exit(-1); }
+
+class DSP_MappedMem
+{
+ public:
+ DSP_MappedMem(uint32_t dsp_id, uint32_t size)
+ : p_size(size), p_dsp_id(dsp_id), p_dsp_addr(0)
+ p_num_buffers(CEIL_DIVIDE(size, HOST_CMEM_BUFFER_SIZE))
+ {
+ p_buffers = new [p_num_buffers] cmem_host_buf_desc_t;
+ ERR(!p_buffers, "Cannot allocate host memory for a DSP Mapped Region");
+
+ int status
+ for (int i = 0; i< num_buffers; i++)
+ {
+ status = bufmgrAlloc(DmaBufPool, 1, &p_buffers[i]);
+ ERR(status, "Cannot allocate CMEM pool for a DSP Mapped Region");
+ }
+
+ /*---------------------------------------------------------------------
+ * Allocate DSP range
+ *--------------------------------------------------------------------*/
+ status = pciedrv_dsp_memrange_alloc(dsp_id, size, p_dsp_addr);
+ ERR(status, "PCIe driver dsp memrange alloc failed");
+
+ /*---------------------------------------------------------------------
+ * Map Input buffers to dsp range
+ *--------------------------------------------------------------------*/
+ status = pciedrv_map_bufs_to_dsp_memrange(dsp_id, num_buffers,
+ p_buffers, (uint32_t) p_dsp_addr);
+ ERR(status, "PCIe driver dsp map bufs to memrange failed");
+ }
+
+ ~DSP_MappedMem()
+ {
+ /*---------------------------------------------------------------------
+ * Free DSP range
+ *--------------------------------------------------------------------*/
+ int status = pciedrv_dsp_memrange_free(dsp_id, size, p_dsp_addr);
+ ERR(status, "PCIe driver dsp memrange free failed");
+
+ for (int i = 0; i< num_buffers; i++)
+ {
+ status = bufmgrFreeDesc(DmaBufPool, &p_buffers[i]);
+ ERR(status, "Cannot free CMEM pool for a DSP Mapped Region");
+ }
+
+ delete [p_num_buffers] p_buffers;
+ }
+
+ void copy_in(void* p, uint32_t size)
+ {
+ ERR(size > p_size, "DSP Mapped region input overflow");
+
+ uint32_t remaining_size = size;
+ uint32_t offset = 0;
+
+ for (int i = 0; remaining_size; i++)
+ {
+ int chunk_size = std::min(remaining_size, p_buffers[i].length);
+
+ memcpy(p_buffers[i].user_addr, p + offset, chunk_size);
+
+ remaining_size -= chunk_size;
+ offset += chunk_size;
+ }
+ }
+
+ void copy_out(void* p, uint32_t size)
+ {
+ ERR(size > p_size, "DSP Mapped region output underrflow");
+
+ uint32_t remaining_size = size;
+ uint32_t offset = 0;
+
+ for (int i = 0; remaining_size; i++)
+ {
+ int chunk_size = std::min(remaining_size, p_buffers[i].length);
+
+ memcpy(p + offset, p_buffers[i].user_addr, chunk_size);
+
+ remaining_size -= chunk_size;
+ offset += chunk_size;
+ }
+ }
+
+ private:
+ uint32_t p_size;
+ uint32_t p_dsp_id;
+ uint32_t p_dsp_addr;
+ uint32_t p_num_buffers;
+ cmem_host_buf_desc_t *p_buffers;
+};
diff --git a/src/core/dsp/message.h b/src/core/dsp/message.h
new file mode 100644
index 0000000..d93fe1e
--- /dev/null
+++ b/src/core/dsp/message.h
@@ -0,0 +1,115 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#ifndef __MESSAGE_H_
+#define __MESSAGE_H_
+
+#include <stdint.h>
+
+typedef enum { READY, EXIT, TASK, NDRKERNEL, WORKGROUP, CACHEINV, FREQUENCY, SUCCESS, ERROR, PRINT } command_codes;
+
+#define MAX_KERNEL_ARGUMENTS 10
+#define MAX_ARG_BUF_SIZE (MAX_KERNEL_ARGUMENTS*3)+1
+#define MAX_FLUSH_BUF_SIZE (MAX_KERNEL_ARGUMENTS*2)
+
+#define MAX_XMCSES_MPAXS 7
+#define FIRST_FREE_XMC_MPAX 3 // XMC MPAXs available: 3 - F
+#define FIRST_FREE_SES_MPAX 1 // SES MPAXs available: 1 - 7
+
+/******************************************************************************
+* Need to ensure that the alignments and therefore the offsets of all fields
+* are consistent between the host and the device.
+******************************************************************************/
+typedef struct
+{
+ uint32_t num_dims;
+
+ uint32_t global_sz_0;
+ uint32_t global_sz_1;
+ uint32_t global_sz_2;
+ uint32_t local_sz_0;
+ uint32_t local_sz_1;
+ uint32_t local_sz_2;
+ uint32_t global_off_0;
+ uint32_t global_off_1;
+ uint32_t global_off_2;
+ uint32_t WG_gid_start_0;
+ uint32_t WG_gid_start_1;
+ uint32_t WG_gid_start_2;
+ uint32_t Kernel_id;
+ uint32_t WG_id;
+ uint32_t stats;
+ uint32_t WG_alloca_start;
+ uint32_t WG_alloca_size;
+} kernel_config_t;
+
+typedef struct
+{
+ uint8_t numBuffers;
+ uint8_t num_mpaxs; // TODO: XMC only mpax for kernel alloca memory
+ uint16_t sizeMoreArgs;
+ uint32_t buffers[MAX_FLUSH_BUF_SIZE];
+ uint32_t mpax_settings[2*MAX_XMCSES_MPAXS]; // (MPAXL, MPAXH) pair
+} flush_msg_t;
+
+typedef struct
+{
+ kernel_config_t config;
+ uint32_t entry_point;
+ uint32_t data_page_ptr;
+ uint32_t argBuf[MAX_ARG_BUF_SIZE]; // NULL size terminated
+} kernel_msg_t;
+
+typedef struct
+{
+ command_codes command;
+ union
+ {
+ struct
+ {
+ kernel_msg_t kernel;
+ flush_msg_t flush;
+ } k;
+ char message[sizeof(kernel_msg_t) + sizeof(flush_msg_t)];
+ } u;
+} Msg_t;
+
+static Msg_t exitMsg = {EXIT};
+static Msg_t successMsg = {SUCCESS};
+static Msg_t readyMsg = {READY};
+static Msg_t errorMsg = {ERROR};
+static Msg_t frequencyMsg = {FREQUENCY};
+// static far Msg_t printMsg = {PRINT}; // moved to L2 in monitor
+
+static const uint32_t mbox_payload = sizeof(Msg_t);
+
+#define MBOX_SIZE 0x2000
+
+#define IN_ORDER_TASK_SIZE 1
+#define OUT_OF_ORDER_TASK_SIZE (IN_ORDER_TASK_SIZE+1)
+
+#endif
diff --git a/src/core/dsp/ocl_load/C60_DLOAD_DYN/c60_dynamic.c b/src/core/dsp/ocl_load/C60_DLOAD_DYN/c60_dynamic.c
new file mode 100644
index 0000000..545ba92
--- /dev/null
+++ b/src/core/dsp/ocl_load/C60_DLOAD_DYN/c60_dynamic.c
@@ -0,0 +1,200 @@
+/*
+* c60_dynamic.c
+*
+* C6x-specific dynamic loader functionality
+*
+* Copyright (C) 2009-2014 Texas Instruments Incorporated - http://www.ti.com/
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the
+* distribution.
+*
+* Neither the name of Texas Instruments Incorporated nor the names of
+* its contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#ifdef C60_TARGET
+#include "c60_elf32.h"
+#include <inttypes.h>
+#include "dload.h"
+
+/*****************************************************************************/
+/* c60_process_dynamic_tag() */
+/* */
+/* Process C6x specific dynamic tags. */
+/*****************************************************************************/
+BOOL DLDYN_c60_process_dynamic_tag(DLIMP_Dynamic_Module* dyn_module, int i)
+{
+ switch (dyn_module->dyntab[i].d_tag)
+ {
+ /*------------------------------------------------------------------*/
+ /* DT_C6000_GSYM_OFFSET: Dynamic symbol table is partitioned into */
+ /* local and global symbols. This tag has the */
+ /* offset into the dynamic symbol table where */
+ /* the global symbol table starts. */
+ /*------------------------------------------------------------------*/
+ case DT_C6000_GSYM_OFFSET:
+ dyn_module->gsymtab_offset = dyn_module->dyntab[i].d_un.d_val;
+#if LOADER_DEBUG
+ if (debugging_on)
+ DLIF_trace("Found global symbol table: %d\n",
+ dyn_module->gsymtab_offset);
+#endif
+ return TRUE;
+
+ /*------------------------------------------------------------------*/
+ /* DT_C6000_GSTR_OFFSET: Contains the offset into the dynamic */
+ /* string table where the global symbol names */
+ /* start. */
+ /*------------------------------------------------------------------*/
+ case DT_C6000_GSTR_OFFSET:
+ dyn_module->gstrtab_offset = dyn_module->dyntab[i].d_un.d_val;
+#if LOADER_DEBUG
+ if (debugging_on)
+ DLIF_trace("Found global string table: %d\n",
+ dyn_module->gstrtab_offset);
+#endif
+ return TRUE;
+
+ /*------------------------------------------------------------------*/
+ /* DT_C6000_DSBT_BASE: Contains address of DSBT in executable or */
+ /* shared object. */
+ /* We store the tag's location in the dynamic */
+ /* module object so that we can update it */
+ /* easily after the sections have been */
+ /* allocated (tag value is relocated). */
+ /*------------------------------------------------------------------*/
+ case DT_C6000_DSBT_BASE:
+ dyn_module->dsbt_base_tagidx = i;
+ return TRUE;
+
+ /*------------------------------------------------------------------*/
+ /* DT_C6000_DSBT_INDEX: Contains specific request for a DSBT */
+ /* index. If this object module doesn't get */
+ /* the index it requested, then the load will */
+ /* fail (object module has already assumed */
+ /* that it got the DSBT index it asks for; */
+ /* references to the DSBT index will not have */
+ /* relocation entries associated with them). */
+ /*------------------------------------------------------------------*/
+ case DT_C6000_DSBT_INDEX:
+ dyn_module->dsbt_index = dyn_module->dyntab[i].d_un.d_val;
+ return TRUE;
+
+ /*------------------------------------------------------------------*/
+ /* DT_C6000_DSBT_SIZE: Contains the size of the DSBT allocated for */
+ /* this object module. It must be big enough */
+ /* to hold the content of the master DSBT. */
+ /*------------------------------------------------------------------*/
+ case DT_C6000_DSBT_SIZE:
+ dyn_module->dsbt_size = dyn_module->dyntab[i].d_un.d_val;
+ return TRUE;
+
+ }
+
+ return FALSE;
+}
+
+/*****************************************************************************/
+/* DLDYN_c60_relocate_dynamic_tag_info() */
+/* */
+/* Update any target specific dynamic tag values that are associated with */
+/* a section address. Return TRUE if the tag value is successfully */
+/* updated or if the tag is not associated with a section address, and */
+/* FALSE if we can't find the sectoin associated with the tag or if the */
+/* tag type is not recognized. */
+/* */
+/*****************************************************************************/
+BOOL DLDYN_c60_relocate_dynamic_tag_info(DLIMP_Dynamic_Module *dyn_module,
+ int32_t i)
+{
+ switch (dyn_module->dyntab[i].d_tag)
+ {
+ /*---------------------------------------------------------------------*/
+ /* These tags do not point to sections. */
+ /*---------------------------------------------------------------------*/
+ case DT_C6000_GSYM_OFFSET:
+ case DT_C6000_GSTR_OFFSET:
+ case DT_C6000_DSBT_INDEX:
+ case DT_C6000_DSBT_SIZE:
+ return TRUE;
+
+ /*---------------------------------------------------------------------*/
+ /* DT_C6000_DSBT_BASE: This tag value provides the virtual address of */
+ /* the .dsbt section. We will go find the program */
+ /* header entry associated with the DSBT section */
+ /* and update this tag with the section's run */
+ /* address. */
+ /*---------------------------------------------------------------------*/
+ case DT_C6000_DSBT_BASE:
+ return DLIMP_update_dyntag_section_address(dyn_module, i);
+ }
+
+ DLIF_error(DLET_MISC, "Invalid dynamic tag encountered, %d\n",
+ (int)dyn_module->dyntab[i].d_tag);
+ return FALSE;
+}
+
+/*****************************************************************************/
+/* c60_process_eiosabi() */
+/* */
+/* Process the EI_OSABI value. Verify that the OSABI is supported and set */
+/* any variables which depend on the OSABI. */
+/*****************************************************************************/
+BOOL DLDYN_c60_process_eiosabi(DLIMP_Dynamic_Module* dyn_module)
+{
+ uint8_t osabi = dyn_module->fhdr.e_ident[EI_OSABI];
+
+ if (dyn_module->relocatable)
+ {
+ /*-------------------------------------------------------------------*/
+ /* ELFOSABI_C6000_ELFABI - C6x Baremetal ABI */
+ /*-------------------------------------------------------------------*/
+ if (osabi == ELFOSABI_C6000_ELFABI)
+ return TRUE;
+
+#if 0
+ /*-------------------------------------------------------------------*/
+ /* ELFOSABI_C6000_LINUX - C6x Linux ABI */
+ /* presently unsupported */
+ /*-------------------------------------------------------------------*/
+ if (osabi == ELFOSABI_C6000_LINUX)
+ return TRUE;
+#endif
+ }
+ else
+ {
+ /*-------------------------------------------------------------------*/
+ /* Static executables should have an OSABI of NONE. */
+ /*-------------------------------------------------------------------*/
+ if (osabi == ELFOSABI_NONE)
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+#endif
diff --git a/src/core/dsp/ocl_load/C60_DLOAD_DYN/c60_dynamic.h b/src/core/dsp/ocl_load/C60_DLOAD_DYN/c60_dynamic.h
new file mode 100644
index 0000000..da99604
--- /dev/null
+++ b/src/core/dsp/ocl_load/C60_DLOAD_DYN/c60_dynamic.h
@@ -0,0 +1,53 @@
+/*
+* c60_dynamic.h
+*
+* Interface into C6x-specific dynamic loader functionality
+*
+* Copyright (C) 2009-2014 Texas Instruments Incorporated - http://www.ti.com/
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the
+* distribution.
+*
+* Neither the name of Texas Instruments Incorporated nor the names of
+* its contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#ifndef DLOAD_C60_H
+#define DLOAD_C60_H
+
+#include "dload.h"
+
+BOOL DLDYN_c60_process_dynamic_tag(DLIMP_Dynamic_Module* dyn_module, int i);
+BOOL DLDYN_c60_process_eiosabi(DLIMP_Dynamic_Module* dyn_module);
+BOOL DLDYN_c60_relocate_dynamic_tag_info(DLIMP_Dynamic_Module *dyn_module, int32_t i);
+
+#define T_INTSZ 32
+#define T_CHARSZ 8
+#define MEM_INC 8
+#define PTR_SZ 32
+
+#endif
diff --git a/src/core/dsp/ocl_load/C60_DLOAD_DYN/c60_elf32.h b/src/core/dsp/ocl_load/C60_DLOAD_DYN/c60_elf32.h
new file mode 100644
index 0000000..418db17
--- /dev/null
+++ b/src/core/dsp/ocl_load/C60_DLOAD_DYN/c60_elf32.h
@@ -0,0 +1,160 @@
+/*
+* c60_elf32.h
+*
+* C6x-specific data structures for 32-bit ELF object format files.
+*
+* Copyright (C) 2009-2014 Texas Instruments Incorporated - http://www.ti.com/
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the
+* distribution.
+*
+* Neither the name of Texas Instruments Incorporated nor the names of
+* its contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#ifndef C60_ELF32_H
+#define C60_ELF32_H
+
+#include "elf32.h"
+
+/*---------------------------------------------------------------------------*/
+/* C6x specific EI_OSABI values */
+/*---------------------------------------------------------------------------*/
+enum
+{
+ ELFOSABI_C6000_ELFABI = 64, /* C6X Baremetal OSABI */
+ ELFOSABI_C6000_LINUX = 65 /* C6X Linux OSABI */
+};
+
+/*---------------------------------------------------------------------------*/
+/* File Header Flags (value of "e_flags") */
+/*---------------------------------------------------------------------------*/
+enum
+{
+ EF_C6000_REL = 0x01 /* Contains static relocations. A ET_EXEC or */
+ /* ET_DYN file w/ this flag set can be */
+ /* treated as ET_REL during static linking. */
+};
+
+/*---------------------------------------------------------------------------*/
+/* Segment Types (value of "p_type") */
+/*---------------------------------------------------------------------------*/
+enum
+{
+ PT_C6000_PHATTRS = 0x70000000 /* Extended Program Header Attributes*/
+};
+
+/*---------------------------------------------------------------------------*/
+/* C6x specific section types */
+/*---------------------------------------------------------------------------*/
+enum
+{
+
+ /*------------------------------------------------------------------------*/
+ /* Section types defined by the C6x ELFABI. */
+ /* Note: ABI defined section type should be named SHT_C6000_xxx */
+ /*------------------------------------------------------------------------*/
+ SHT_C6000_UNWIND = 0x70000001, /* Exception Index Table */
+ SHT_C6000_PREEMPTMAP = 0x70000002, /* Pre-emption Map */
+
+ SHT_C6000_ATTRIBUTES = 0x70000003, /* Obj File Compatability Attributes */
+
+ /*------------------------------------------------------------------------*/
+ /* The following section types are not part of C6x ABI. As per the ABI, */
+ /* the processor specific values not defined in the ABI are reserved for */
+ /* future use. Here we reserve the range 0x7F000000 through 0x7FFFFFFFF */
+ /* for the TI specific processor section types. */
+ /* Note: TI specific section type should be named SHT_TI_xxx */
+ /*------------------------------------------------------------------------*/
+ SHT_TI_ICODE = 0x7F000000, /* ICODE representation */
+ SHT_TI_XREF = 0x7F000001, /* Symbol cross reference */
+ SHT_TI_HANDLER = 0x7F000002, /* Handler function table */
+ SHT_TI_INITINFO = 0x7F000003, /* Info for C auto-init of variables */
+ SHT_TI_PHATTRS = 0x7F000004 /* Extended Program Header Attributes*/
+};
+
+/*****************************************************************************/
+/* C6x-Specific Dynamic Array Tags (C6x ELF ABI Section ??? - AEGUPD) */
+/* NOTE: */
+/* As per GABI a tag whose value is even number indicates a dynamic tag */
+/* that uses d_ptr. Odd number indicates the use of d_val or doesn't use */
+/* neither d_val nor d_ptr. */
+/*****************************************************************************/
+enum
+{
+ /*------------------------------------------------------------------------*/
+ /* OSABI specific tags: */
+ /* From 0x6000000D thru 0x6FFFF000 */
+ /*------------------------------------------------------------------------*/
+ DT_C6000_GSYM_OFFSET = 0x6000000D, /* d_val -- OSABI Specific -- */
+ DT_C6000_GSTR_OFFSET = 0x6000000F, /* d_val -- OSABI Specific -- */
+
+ /*------------------------------------------------------------------------*/
+ /* Processor specific tags: */
+ /* From 0x70000000 thru 0x7FFFFFFF */
+ /*------------------------------------------------------------------------*/
+ DT_C6000_DSBT_BASE = 0x70000000, /* d_ptr -- Platform Specific -- */
+ DT_C6000_DSBT_SIZE = 0x70000001, /* d_val -- Platform Specific -- */
+ DT_C6000_PREEMPTMAP = 0x70000002, /* d_ptr -- Platform Specific -- */
+ DT_C6000_DSBT_INDEX = 0x70000003 /* d_val -- Platform Specific -- */
+};
+
+/*---------------------------------------------------------------------------*/
+/* C6x Dynamic Relocation Types */
+/*---------------------------------------------------------------------------*/
+typedef enum
+{
+ R_C6000_NONE = 0,
+ R_C6000_ABS32 = 1,
+ R_C6000_ABS16 = 2,
+ R_C6000_ABS8 = 3,
+ R_C6000_PCR_S21 = 4,
+ R_C6000_PCR_S12 = 5,
+ R_C6000_PCR_S10 = 6,
+ R_C6000_PCR_S7 = 7,
+ R_C6000_ABS_S16 = 8,
+ R_C6000_ABS_L16 = 9,
+ R_C6000_ABS_H16 = 10,
+ R_C6000_SBR_U15_B = 11,
+ R_C6000_SBR_U15_H = 12,
+ R_C6000_SBR_U15_W = 13,
+ R_C6000_SBR_S16 = 14,
+ R_C6000_SBR_L16_B = 15,
+ R_C6000_SBR_L16_H = 16,
+ R_C6000_SBR_L16_W = 17,
+ R_C6000_SBR_H16_B = 18,
+ R_C6000_SBR_H16_H = 19,
+ R_C6000_SBR_H16_W = 20,
+ R_C6000_SBR_GOT_U15_W = 21,
+ R_C6000_SBR_GOT_L16_W = 22,
+ R_C6000_SBR_GOT_H16_W = 23,
+ R_C6000_DSBT_INDEX = 24,
+ R_C6000_PREL31 = 25,
+ R_C6000_COPY = 26
+}C60_RELOC_TYPE;
+
+#endif /* C60_ELF32_H */
diff --git a/src/core/dsp/ocl_load/C60_DLOAD_REL/c60_reloc.c b/src/core/dsp/ocl_load/C60_DLOAD_REL/c60_reloc.c
new file mode 100644
index 0000000..3c79e35
--- /dev/null
+++ b/src/core/dsp/ocl_load/C60_DLOAD_REL/c60_reloc.c
@@ -0,0 +1,1101 @@
+/*
+* c60_reloc.c
+*
+* Process C6x-specific dynamic relocations for core dynamic loader.
+*
+* Copyright (C) 2009-2014 Texas Instruments Incorporated - http://www.ti.com/
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the
+* distribution.
+*
+* Neither the name of Texas Instruments Incorporated nor the names of
+* its contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#include <limits.h>
+#include "relocate.h"
+#include "symtab.h"
+#include "c60_elf32.h"
+#include "dload_api.h"
+#include "util.h"
+#include "dload_endian.h"
+#include "c60_reloc.h"
+
+#define MASK(n,s) (((1 << n) - 1) << s)
+
+/*---------------------------------------------------------------------------*/
+/* C6x Relocations Supported */
+/* */
+/* See the C6000 ELF ABI Specification for more details. */
+/* */
+/* R_C6000_ABS32 | .field X,32 */
+/* R_C6000_ABS16 | .field X,16 */
+/* R_C6000_ABS8 | .field X,8 */
+/* R_C6000_PCR_S21 | B foo */
+/* CALLP foo, B3 */
+/* R_C6000_PCR_S12 | BNOP foo */
+/* R_C6000_PCR_S10 | BPOS foo, A10 */
+/* BDEC foo, A1 */
+/* R_C6000_PCR_S7 | ADDKPC foo, B3, 4 */
+/* R_C6000_ABS_S16 | MVK sym, A0 */
+/* R_C6000_ABS_L16 | MVKL sym, A0 */
+/* MVKLH sym, A0 */
+/* R_C6000_ABS_H16 | MVKH sym, A0 */
+/* R_C6000_SBR_U15_B | LDB *+B14(sym), A1 */
+/* ADDAB B14, sym, A1 */
+/* R_C6000_SBR_U15_H | LDH *+B14(sym), A1 */
+/* ADDAH B14, sym, A1 */
+/* R_C6000_SBR_U15_W | LDW *+B14(sym), A1 */
+/* ADDAW B14, sym, A1 */
+/* R_C6000_SBR_S16 | MVK sym-$bss, A0 */
+/* R_C6000_SBR_L16_B | MVKL (sym-$bss), A0 */
+/* R_C6000_SBR_L16_H | MVKL (sym-$bss)/2,A0 */
+/* R_C6000_SBR_L16_W | MVKL (sym-$bss)/4,A0 */
+/* R_C6000_SBR_H16_B | MVKH (sym-$bss), A0 */
+/* R_C6000_SBR_H16_H | MVKH (sym-$bss)/2,A0 */
+/* R_C6000_SBR_H16_W | MVKH (sym-$bss)/4,A0 */
+/* R_C6000_SBR_GOT_U15_W | LDW *+B14[GOT(sym)],A0 */
+/* R_C6000_SBR_GOT_L16_W | MVKL $DPR_GOT(sym), A0 */
+/* R_C6000_SBR_GOT_H16_W | MVKH $DPR_GOT(sym), A0 */
+/* R_C6000_DSBT_INDEX | LDW *+B14[$DSBT_index()], DP */
+/* */
+/*---------------------------------------------------------------------------*/
+
+/*****************************************************************************/
+/* WRITE_RELOC_R() - Perform a relocation into a buffered segment. */
+/*****************************************************************************/
+static void write_reloc_r(uint8_t* buffered_segment,
+ uint32_t segment_offset,
+ int r_type, uint32_t r)
+{
+ uint32_t* rel_field_ptr = (uint32_t*)(buffered_segment + segment_offset);
+
+#if LOADER_DEBUG
+ /*------------------------------------------------------------------------*/
+ /* Print some details about the relocation we are about to process. */
+ /*------------------------------------------------------------------------*/
+ if(debugging_on)
+ {
+ DLIF_trace("RWRT: segment_offset: %d\n", segment_offset);
+ DLIF_trace("RWRT: buffered_segment: 0x%x\n",
+ (uint32_t)buffered_segment);
+ DLIF_trace("RWRT: rel_field_ptr: 0x%x\n", (uint32_t)rel_field_ptr);
+ DLIF_trace("RWRT: result: 0x%x\n", r);
+ }
+#endif
+
+
+ /*------------------------------------------------------------------------*/
+ /* Given the relocation type, carry out relocation into a 4 byte packet */
+ /* within the buffered segment. */
+ /*------------------------------------------------------------------------*/
+ switch(r_type)
+ {
+ case R_C6000_ABS32:
+ *rel_field_ptr = r;
+ break;
+ case R_C6000_PREL31:
+ *rel_field_ptr = (*rel_field_ptr & ~MASK(30,0)) | r;
+ break;
+ case R_C6000_ABS16:
+ *((uint16_t*)(buffered_segment + segment_offset)) = r;
+ break;
+ case R_C6000_ABS8:
+ *((uint8_t*)(buffered_segment + segment_offset)) = r;
+ break;
+ case R_C6000_PCR_S21:
+ *rel_field_ptr = (*rel_field_ptr & ~MASK(21,7)) | (r << 7);
+ break;
+ case R_C6000_PCR_S12:
+ *rel_field_ptr = (*rel_field_ptr & ~MASK(12,16)) | (r << 16);
+ break;
+ case R_C6000_PCR_S10:
+ *rel_field_ptr = (*rel_field_ptr & ~MASK(10,13)) | (r << 13);
+ break;
+ case R_C6000_PCR_S7:
+ *rel_field_ptr = (*rel_field_ptr & ~MASK(7,16)) | (r << 16);
+ break;
+
+ case R_C6000_ABS_S16:
+ *rel_field_ptr = (*rel_field_ptr & ~MASK(16,7)) | (r << 7);
+ break;
+ case R_C6000_ABS_L16:
+ *rel_field_ptr = (*rel_field_ptr & ~MASK(16,7)) | (r << 7);
+ break;
+ case R_C6000_ABS_H16:
+ *rel_field_ptr = (*rel_field_ptr & ~MASK(16,7)) | (r << 7);
+ break;
+
+ case R_C6000_SBR_U15_B:
+ *rel_field_ptr = (*rel_field_ptr & ~MASK(15,8)) | (r << 8);
+ break;
+ case R_C6000_SBR_U15_H:
+ *rel_field_ptr = (*rel_field_ptr & ~MASK(15,8)) | (r << 8);
+ break;
+ case R_C6000_SBR_U15_W:
+ case R_C6000_DSBT_INDEX:
+ *rel_field_ptr = (*rel_field_ptr & ~MASK(15,8)) | (r << 8);
+ break;
+
+ case R_C6000_SBR_S16:
+ case R_C6000_SBR_L16_B:
+ case R_C6000_SBR_L16_H:
+ case R_C6000_SBR_L16_W:
+ case R_C6000_SBR_H16_B:
+ case R_C6000_SBR_H16_H:
+ case R_C6000_SBR_H16_W:
+ *rel_field_ptr = (*rel_field_ptr & ~MASK(16,7)) | (r << 7);
+ break;
+
+ /*---------------------------------------------------------------------*/
+ /* Linux "import-as-own" copy relocations are not yet supported. */
+ /*---------------------------------------------------------------------*/
+ case R_C6000_COPY:
+
+ default:
+ DLIF_error(DLET_RELOC,
+ "write_reloc_r called with invalid relocation type!\n");
+ }
+
+#if LOADER_DEBUG
+ if (debugging_on)
+ DLIF_trace("reloc_field 0x%x\n", *rel_field_ptr);
+#endif
+}
+
+/*****************************************************************************/
+/* PACK_RESULT() - Pack the result of a relocation calculation for storage */
+/* in the relocation field. */
+/*****************************************************************************/
+static int32_t pack_result(int32_t unpacked_result, int r_type)
+{
+ switch(r_type)
+ {
+ case R_C6000_ABS32:
+ case R_C6000_ABS16:
+ case R_C6000_ABS8:
+ case R_C6000_ABS_S16:
+ case R_C6000_ABS_L16:
+ case R_C6000_SBR_U15_B:
+ case R_C6000_SBR_S16:
+ case R_C6000_SBR_L16_B:
+ return unpacked_result;
+
+ case R_C6000_SBR_U15_H:
+ case R_C6000_SBR_L16_H:
+ case R_C6000_PREL31:
+ return unpacked_result >> 1;
+
+ case R_C6000_PCR_S21:
+ case R_C6000_PCR_S12:
+ case R_C6000_PCR_S10:
+ case R_C6000_PCR_S7:
+ case R_C6000_SBR_U15_W:
+ case R_C6000_SBR_L16_W:
+ case R_C6000_DSBT_INDEX:
+ return unpacked_result >> 2;
+
+ case R_C6000_ABS_H16:
+ case R_C6000_SBR_H16_B:
+ return unpacked_result >> 16;
+
+ case R_C6000_SBR_H16_H:
+ return unpacked_result >> 17;
+
+ case R_C6000_SBR_H16_W:
+ return unpacked_result >> 18;
+
+ /*---------------------------------------------------------------------*/
+ /* Linux "import-as-own" copy relocations are not yet supported. */
+ /*---------------------------------------------------------------------*/
+ case R_C6000_COPY:
+
+ default:
+ DLIF_error(DLET_RELOC,
+ "pack_result called with invalid relocation type!\n");
+ return 0;
+ }
+}
+
+/*****************************************************************************/
+/* MASK_RESULT() - Mask the result of a relocation calculation so that it */
+/* fits the size of the relocation type's field. */
+/*****************************************************************************/
+static int32_t mask_result(int32_t unmasked_result, int r_type)
+{
+ switch(r_type)
+ {
+ case R_C6000_ABS8:
+ return unmasked_result & 0xFF;
+
+ case R_C6000_ABS32:
+ return unmasked_result;
+
+ case R_C6000_ABS16:
+ case R_C6000_ABS_S16:
+ case R_C6000_ABS_L16:
+ case R_C6000_ABS_H16:
+ case R_C6000_SBR_S16:
+ case R_C6000_SBR_L16_B:
+ case R_C6000_SBR_L16_H:
+ case R_C6000_SBR_L16_W:
+ case R_C6000_SBR_H16_B:
+ case R_C6000_SBR_H16_H:
+ case R_C6000_SBR_H16_W:
+ return unmasked_result & 0xFFFF;
+
+ case R_C6000_PCR_S21:
+ return unmasked_result & 0x1FFFFF;
+
+ case R_C6000_PCR_S12:
+ return unmasked_result & 0xFFF;
+
+ case R_C6000_PCR_S10:
+ return unmasked_result & 0x3FF;
+
+ case R_C6000_PCR_S7:
+ return unmasked_result & 0x7F;
+
+ case R_C6000_SBR_U15_B:
+ case R_C6000_SBR_U15_H:
+ case R_C6000_SBR_U15_W:
+ case R_C6000_DSBT_INDEX:
+ return unmasked_result & 0x7FFF;
+
+ case R_C6000_PREL31:
+ return unmasked_result & 0x7FFFFFFF;
+
+ /*---------------------------------------------------------------------*/
+ /* Linux "import-as-own" copy relocations are not yet supported. */
+ /*---------------------------------------------------------------------*/
+ case R_C6000_COPY:
+
+ default:
+ DLIF_error(DLET_RELOC,
+ "mask_result called with invalid relocation type!\n");
+ return 0;
+ }
+}
+
+/*****************************************************************************/
+/* REL_OVERFLOW() */
+/* */
+/* Check relocation value against the range associated with a given */
+/* relocation type field size and signedness. */
+/* */
+/*****************************************************************************/
+static BOOL rel_overflow(C60_RELOC_TYPE r_type, int32_t reloc_value)
+{
+ /*------------------------------------------------------------------------*/
+ /* Select appropriate range check based on relocation type. */
+ /*------------------------------------------------------------------------*/
+ switch(r_type)
+ {
+ case R_C6000_ABS16: return ((reloc_value > 65535) ||
+ (reloc_value < -32768));
+ case R_C6000_ABS8: return ((reloc_value > 255) ||
+ (reloc_value < -128));
+ case R_C6000_PCR_S21: return ((reloc_value >= 0x400000) ||
+ (reloc_value < -0x400000));
+ case R_C6000_PCR_S12: return ((reloc_value >= 0x2000) ||
+ (reloc_value < -0x2000));
+ case R_C6000_PCR_S10: return ((reloc_value >= 0x800) ||
+ (reloc_value < -0x800));
+ case R_C6000_PCR_S7: return ((reloc_value >= 0x100) ||
+ (reloc_value < -0x100));
+ case R_C6000_SBR_S16:
+ case R_C6000_ABS_S16: return ((reloc_value >= 0x8000) ||
+ (reloc_value < -0x8000));
+ case R_C6000_SBR_U15_B: return (((uint32_t)reloc_value) >= 0x8000);
+ case R_C6000_SBR_U15_H: return (((uint32_t)reloc_value) >= 0xFFFF);
+ case R_C6000_DSBT_INDEX:
+ case R_C6000_SBR_U15_W: return (((uint32_t)reloc_value) >= 0x1FFFD);
+
+
+ /*---------------------------------------------------------------------*/
+ /* Some relocation types suppress overflow checking at link-time. */
+ /*---------------------------------------------------------------------*/
+ case R_C6000_ABS_L16:
+ case R_C6000_ABS_H16:
+ case R_C6000_SBR_L16_B:
+ case R_C6000_SBR_L16_H:
+ case R_C6000_SBR_L16_W:
+ case R_C6000_SBR_H16_B:
+ case R_C6000_SBR_H16_H:
+ case R_C6000_SBR_H16_W:
+ return 0;
+
+ /*---------------------------------------------------------------------*/
+ /* 32-bit relocation field values are not checked for overflow. */
+ /*---------------------------------------------------------------------*/
+ case R_C6000_ABS32:
+ case R_C6000_PREL31:
+ return 0;
+
+ /*---------------------------------------------------------------------*/
+ /* If relocation type did not appear in the above switch, then we */
+ /* didn't expect to see it. */
+ /*---------------------------------------------------------------------*/
+ default:
+ DLIF_error(DLET_RELOC,
+ "rel_overflow called with invalid relocation type!\n");
+ }
+
+ return 1;
+}
+
+#if LOADER_DEBUG || LOADER_PROFILE
+extern int DLREL_relocations;
+extern time_t DLREL_total_reloc_time;
+#endif
+
+/*****************************************************************************/
+/* RELOC_DO() - Process a single relocation entry. */
+/*****************************************************************************/
+static void reloc_do(C60_RELOC_TYPE r_type,
+ uint32_t segment_vaddr,
+ uint8_t *segment_buffer,
+ uint32_t addend,
+ uint32_t symval,
+ uint32_t spc,
+ int wrong_endian,
+ uint32_t base_pointer,
+ int32_t dsbt_index)
+{
+ int32_t reloc_value = 0;
+
+#if LOADER_DEBUG || LOADER_PROFILE
+ /*------------------------------------------------------------------------*/
+ /* In debug mode, keep a count of the number of relocations processed. */
+ /* In profile mode, start the clock on a given relocation. */
+ /*------------------------------------------------------------------------*/
+ int start_time = 0;
+ if (debugging_on || profiling_on)
+ {
+ DLREL_relocations++;
+ if (profiling_on) start_time = clock();
+ }
+#endif
+
+ /*------------------------------------------------------------------------*/
+ /* Calculate the relocation value according to the rules associated with */
+ /* the given relocation type. */
+ /*------------------------------------------------------------------------*/
+ switch(r_type)
+ {
+ /*---------------------------------------------------------------------*/
+ /* Straight-Up Address relocations (address references). */
+ /*---------------------------------------------------------------------*/
+ case R_C6000_ABS32:
+ case R_C6000_ABS16:
+ case R_C6000_ABS8:
+ case R_C6000_ABS_S16:
+ case R_C6000_ABS_L16:
+ case R_C6000_ABS_H16:
+ reloc_value = symval + addend;
+ break;
+
+ /*---------------------------------------------------------------------*/
+ /* PC-Relative relocations (calls and branches). */
+ /*---------------------------------------------------------------------*/
+ case R_C6000_PCR_S21:
+ case R_C6000_PCR_S12:
+ case R_C6000_PCR_S10:
+ case R_C6000_PCR_S7:
+ {
+ /*------------------------------------------------------------------*/
+ /* Add SPC to segment address to get the PC. Mask for exec-packet */
+ /* boundary. */
+ /*------------------------------------------------------------------*/
+ int32_t opnd_p = (spc + segment_vaddr) & 0xffffffe0;
+ reloc_value = symval + addend - opnd_p;
+ break;
+ }
+
+ /*---------------------------------------------------------------------*/
+ /* "Place"-relative relocations (TDEH). */
+ /*---------------------------------------------------------------------*/
+ /* These relocations occur in data and refer to a label that occurs */
+ /* at some signed 32-bit offset from the place where the relocation */
+ /* occurs. */
+ /*---------------------------------------------------------------------*/
+ case R_C6000_PREL31:
+ {
+ /*------------------------------------------------------------------*/
+ /* Compute location of relocation entry and subtract it from the */
+ /* address of the location being referenced (it is computed very */
+ /* much like a PC-relative relocation, but it occurs in data and */
+ /* is called a "place"-relative relocation). */
+ /*------------------------------------------------------------------*/
+ /* If this is an Elf32_Rel type relocation, then addend is assumed */
+ /* to have been scaled when it was unpacked (field << 1). */
+ /*------------------------------------------------------------------*/
+ /* For Elf32_Rela type relocations the addend is assumed to be a */
+ /* signed 32-bit integer value. */
+ /*------------------------------------------------------------------*/
+ /* Offset is not fetch-packet relative; doesn't need to be masked. */
+ /*------------------------------------------------------------------*/
+ int32_t opnd_p = (spc + segment_vaddr);
+ reloc_value = symval + addend - opnd_p;
+ break;
+ }
+
+ /*---------------------------------------------------------------------*/
+ /* Static-Base Relative relocations (near-DP). */
+ /*---------------------------------------------------------------------*/
+ case R_C6000_SBR_U15_B:
+ case R_C6000_SBR_U15_H:
+ case R_C6000_SBR_U15_W:
+ case R_C6000_SBR_S16:
+ case R_C6000_SBR_L16_B:
+ case R_C6000_SBR_L16_H:
+ case R_C6000_SBR_L16_W:
+ case R_C6000_SBR_H16_B:
+ case R_C6000_SBR_H16_H:
+ case R_C6000_SBR_H16_W:
+ reloc_value = symval + addend - base_pointer;
+ break;
+
+ /*---------------------------------------------------------------------*/
+ /* R_C6000_DSBT_INDEX - uses value assigned by the dynamic loader to */
+ /* be the DSBT index for this module as a scaled offset when */
+ /* referencing the DSBT. The DSBT base address is in symval and the */
+ /* static base is in base_pointer. DP-relative offset to slot in */
+ /* DSBT is the offset of the DSBT relative to the DP plus the */
+ /* scaled DSBT index into the DSBT. */
+ /*---------------------------------------------------------------------*/
+ case R_C6000_DSBT_INDEX:
+ reloc_value = ((symval + addend) - base_pointer) + (dsbt_index << 2);
+ break;
+
+ /*---------------------------------------------------------------------*/
+ /* Linux "import-as-own" copy relocation: after DSO initialization, */
+ /* copy the named object from the DSO into the executable's BSS */
+ /*---------------------------------------------------------------------*/
+ /* Linux "import-as-own" copy relocations are not yet supported. */
+ /*---------------------------------------------------------------------*/
+ case R_C6000_COPY:
+
+ /*---------------------------------------------------------------------*/
+ /* Unrecognized relocation type. */
+ /*---------------------------------------------------------------------*/
+ default:
+ DLIF_error(DLET_RELOC,
+ "reloc_do called with invalid relocation type!\n");
+ break;
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Overflow checking. Is relocation value out of range for the size and */
+ /* type of the current relocation? */
+ /*------------------------------------------------------------------------*/
+ if (rel_overflow(r_type, reloc_value))
+ DLIF_error(DLET_RELOC, "relocation overflow!\n");
+
+ /*------------------------------------------------------------------------*/
+ /* Move relocation value to appropriate offset for relocation field's */
+ /* location. */
+ /*------------------------------------------------------------------------*/
+ reloc_value = pack_result(reloc_value, r_type);
+
+ /*------------------------------------------------------------------------*/
+ /* Mask packed result to the size of the relocation field. */
+ /*------------------------------------------------------------------------*/
+ reloc_value = mask_result(reloc_value, r_type);
+
+ /*------------------------------------------------------------------------*/
+ /* If necessary, Swap endianness of data at relocation address. */
+ /*------------------------------------------------------------------------*/
+ if (wrong_endian)
+ DLIMP_change_endian32((int32_t*)(segment_buffer + spc));
+
+ /*------------------------------------------------------------------------*/
+ /* Write the relocated 4-byte packet back to the segment buffer. */
+ /*------------------------------------------------------------------------*/
+ write_reloc_r(segment_buffer, spc, r_type, reloc_value);
+
+ /*------------------------------------------------------------------------*/
+ /* Change endianness of segment address back to original. */
+ /*------------------------------------------------------------------------*/
+ if (wrong_endian)
+ DLIMP_change_endian32((int32_t*)(segment_buffer + spc));
+
+#if LOADER_DEBUG || LOADER_PROFILE
+ /*------------------------------------------------------------------------*/
+ /* In profile mode, add elapsed time for this relocation to total time */
+ /* spent doing relocations. */
+ /*------------------------------------------------------------------------*/
+ if (profiling_on)
+ DLREL_total_reloc_time += (clock() - start_time);
+ if (debugging_on)
+ DLIF_trace("reloc_value = 0x%x\n", reloc_value);
+#endif
+}
+
+/*****************************************************************************/
+/* REL_UNPACK_ADDEND() */
+/* */
+/* Unpack addend value from the relocation field. */
+/* */
+/*****************************************************************************/
+static void rel_unpack_addend(C60_RELOC_TYPE r_type,
+ uint8_t *address,
+ uint32_t *addend)
+{
+ /*------------------------------------------------------------------------*/
+ /* C6000 does not support Elf32_Rel type relocations in the dynamic */
+ /* loader core. We will emit an internal error and abort until this */
+ /* support is added. I abort here because this is necessarily a target- */
+ /* specific part of the relocation infrastructure. */
+ /*------------------------------------------------------------------------*/
+ *addend = 0;
+
+ DLIF_error(DLET_RELOC,
+ "Internal Error: unpacking addend values from the relocation "
+ "field is not supported in the C6000 dynamic loader at this "
+ "time; aborting\n");
+ DLIF_exit(1);
+}
+
+/*****************************************************************************/
+/* REL_SWAP_ENDIAN() */
+/* */
+/* Return TRUE if we should change the endianness of a relocation field. */
+/* */
+/*****************************************************************************/
+static BOOL rel_swap_endian(DLIMP_Dynamic_Module *dyn_module,
+ C60_RELOC_TYPE r_type)
+{
+ if (dyn_module->wrong_endian) return TRUE;
+
+ return FALSE;
+}
+
+/*****************************************************************************/
+/* REL_CHANGE_ENDIAN() */
+/* */
+/* Change the endianness of the relocation field at the specified address */
+/* in the segment's data. */
+/* */
+/*****************************************************************************/
+static void rel_change_endian(C60_RELOC_TYPE r_type, uint8_t *address)
+{
+ /*------------------------------------------------------------------------*/
+ /* On C6000, all instructions are 32-bits wide. */
+ /*------------------------------------------------------------------------*/
+ DLIMP_change_endian32((int32_t *)address);
+}
+
+/*****************************************************************************/
+/* READ_REL_TABLE() */
+/* */
+/* Read in an Elf32_Rel type relocation table. This function allocates */
+/* host memory for the table. */
+/* */
+/*****************************************************************************/
+static void read_rel_table(struct Elf32_Rel **rel_table,
+ int32_t table_offset,
+ uint32_t relnum, uint32_t relent,
+ LOADER_FILE_DESC *fd, BOOL wrong_endian)
+{
+ if (relnum == 0) { *rel_table = NULL; return; }
+
+ *rel_table = (struct Elf32_Rel *)DLIF_malloc(relnum * relent);
+ DLIF_fseek(fd, table_offset, LOADER_SEEK_SET);
+ DLIF_fread(*rel_table, relnum, relent, fd);
+
+ if (wrong_endian)
+ {
+ int i;
+ for (i = 0; i < relnum; i++)
+ DLIMP_change_rel_endian(*rel_table + i);
+ }
+}
+
+/*****************************************************************************/
+/* PROCESS_REL_TABLE() */
+/* */
+/* Process table of Elf32_Rel type relocations. */
+/* */
+/*****************************************************************************/
+static void process_rel_table(DLOAD_HANDLE handle,
+ DLIMP_Loaded_Segment* seg,
+ struct Elf32_Rel *rel_table,
+ uint32_t relnum,
+ int32_t *start_relidx,
+ uint32_t ti_static_base,
+ DLIMP_Dynamic_Module* dyn_module)
+{
+ Elf32_Addr seg_start_addr = seg->input_vaddr;
+ Elf32_Addr seg_end_addr = seg_start_addr + seg->phdr.p_memsz;
+ BOOL found = FALSE;
+ int32_t relidx = *start_relidx;
+
+ /*------------------------------------------------------------------------*/
+ /* If the given start reloc index is out of range, then start from the */
+ /* beginning of the given table. */
+ /*------------------------------------------------------------------------*/
+ if (relidx >= relnum) relidx = 0;
+
+ /*------------------------------------------------------------------------*/
+ /* Spin through Elf32_Rel type relocation table. */
+ /*------------------------------------------------------------------------*/
+ for ( ; relidx < relnum; relidx++)
+ {
+ /*---------------------------------------------------------------------*/
+ /* If the relocation offset falls within the segment, process it. */
+ /*---------------------------------------------------------------------*/
+ if (rel_table[relidx].r_offset >= seg_start_addr &&
+ rel_table[relidx].r_offset < seg_end_addr)
+ {
+ Elf32_Addr r_symval = 0;
+ C60_RELOC_TYPE r_type =
+ (C60_RELOC_TYPE)ELF32_R_TYPE(rel_table[relidx].r_info);
+ int32_t r_symid = ELF32_R_SYM(rel_table[relidx].r_info);
+
+ uint8_t *reloc_address = NULL;
+ uint32_t pc = 0;
+ uint32_t addend = 0;
+
+ BOOL change_endian = FALSE;
+
+ found = TRUE;
+
+ /*------------------------------------------------------------------*/
+ /* If symbol definition is not found, don't do the relocation. */
+ /* An error is generated by the lookup function. */
+ /*------------------------------------------------------------------*/
+ if (!DLSYM_canonical_lookup(handle, r_symid, dyn_module, &r_symval))
+ continue;
+
+ /*------------------------------------------------------------------*/
+ /* Addend value is stored in the relocation field. */
+ /* We'll need to unpack it from the data for the segment that is */
+ /* currently being relocated. */
+ /*------------------------------------------------------------------*/
+ pc = rel_table[relidx].r_offset - seg->input_vaddr;
+ reloc_address = (uint8_t *)seg->host_address + pc;
+
+ change_endian = rel_swap_endian(dyn_module, r_type);
+ if (change_endian)
+ rel_change_endian(r_type, reloc_address);
+
+ rel_unpack_addend(
+ (C60_RELOC_TYPE)ELF32_R_TYPE(rel_table[relidx].r_info),
+ reloc_address, &addend);
+
+ /*------------------------------------------------------------------*/
+ /* Perform actual relocation. This is a really wide function */
+ /* interface and could do with some encapsulation. */
+ /*------------------------------------------------------------------*/
+ reloc_do(r_type,
+ seg->phdr.p_vaddr,
+ seg->host_address,
+ addend,
+ r_symval,
+ pc,
+ dyn_module->wrong_endian,
+ ti_static_base,
+ dyn_module->dsbt_index);
+
+ }
+
+ else if (found)
+ break;
+ }
+}
+
+/*****************************************************************************/
+/* READ_RELA_TABLE() */
+/* */
+/* Read in an Elf32_Rela type relocation table. This function allocates */
+/* host memory for the table. */
+/* */
+/*****************************************************************************/
+static void read_rela_table(struct Elf32_Rela **rela_table,
+ int32_t table_offset,
+ uint32_t relanum, uint32_t relaent,
+ LOADER_FILE_DESC *fd, BOOL wrong_endian)
+{
+ if (relanum == 0) { *rela_table = NULL; return; }
+ *rela_table = (struct Elf32_Rela *)DLIF_malloc(relanum * relaent);
+ DLIF_fseek(fd, table_offset, LOADER_SEEK_SET);
+ DLIF_fread(*rela_table, relanum, relaent, fd);
+
+ if (wrong_endian)
+ {
+ int i;
+ for (i = 0; i < relanum; i++)
+ DLIMP_change_rela_endian(*rela_table + i);
+ }
+}
+
+/*****************************************************************************/
+/* PROCESS_RELA_TABLE() */
+/* */
+/* Process a table of Elf32_Rela type relocations. */
+/* */
+/*****************************************************************************/
+static void process_rela_table(DLOAD_HANDLE handle,
+ DLIMP_Loaded_Segment *seg,
+ struct Elf32_Rela *rela_table,
+ uint32_t relanum,
+ int32_t *start_relidx,
+ uint32_t ti_static_base,
+ DLIMP_Dynamic_Module *dyn_module)
+{
+ Elf32_Addr seg_start_addr = seg->input_vaddr;
+ Elf32_Addr seg_end_addr = seg_start_addr + seg->phdr.p_memsz;
+ BOOL found = FALSE;
+ int32_t relidx = *start_relidx;
+
+ /*-----------------------------------------------------------------------*/
+ /* If the given start reloc index is out of range, then start from */
+ /* the beginning of the given table. */
+ /*-----------------------------------------------------------------------*/
+ if (relidx > relanum) relidx = 0;
+
+ /*-----------------------------------------------------------------------*/
+ /* Spin through RELA relocation table. */
+ /*-----------------------------------------------------------------------*/
+ for ( ; relidx < relanum; relidx++)
+ {
+ /*-------------------------------------------------------------------*/
+ /* If the relocation offset falls within the segment, process it. */
+ /*-------------------------------------------------------------------*/
+ if (rela_table[relidx].r_offset >= seg_start_addr &&
+ rela_table[relidx].r_offset < seg_end_addr)
+ {
+ Elf32_Addr r_symval;
+ C60_RELOC_TYPE r_type =
+ (C60_RELOC_TYPE)ELF32_R_TYPE(rela_table[relidx].r_info);
+ int32_t r_symid = ELF32_R_SYM(rela_table[relidx].r_info);
+
+ found = TRUE;
+
+ /*---------------------------------------------------------------*/
+ /* If symbol definition is not found, don't do the relocation. */
+ /* An error is generated by the lookup function. */
+ /*---------------------------------------------------------------*/
+ if (!DLSYM_canonical_lookup(handle, r_symid, dyn_module, &r_symval))
+ continue;
+
+ /*---------------------------------------------------------------*/
+ /* Perform actual relocation. This is a really wide function */
+ /* interface and could do with some encapsulation. */
+ /*---------------------------------------------------------------*/
+ reloc_do(r_type,
+ seg->phdr.p_vaddr,
+ seg->host_address,
+ rela_table[relidx].r_addend,
+ r_symval,
+ rela_table[relidx].r_offset - seg->input_vaddr,
+ dyn_module->wrong_endian,
+ ti_static_base,
+ dyn_module->dsbt_index);
+ }
+
+ else if (found)
+ break;
+ }
+}
+
+/*****************************************************************************/
+/* PROCESS_GOT_RELOCS() */
+/* */
+/* Process all GOT relocations. It is possible to have both Elf32_Rel */
+/* and Elf32_Rela type relocations in the same file, so we handle tham */
+/* both. */
+/* */
+/*****************************************************************************/
+static void process_got_relocs(DLOAD_HANDLE handle,
+ struct Elf32_Rel* rel_table, uint32_t relnum,
+ struct Elf32_Rela* rela_table, uint32_t relanum,
+ DLIMP_Dynamic_Module* dyn_module)
+{
+ DLIMP_Loaded_Segment *seg =
+ (DLIMP_Loaded_Segment*)(dyn_module->loaded_module->loaded_segments.buf);
+ uint32_t num_segs = dyn_module->loaded_module->loaded_segments.size;
+ int32_t rel_relidx = 0;
+ int32_t rela_relidx = 0;
+ uint32_t seg_idx = 0;
+ uint32_t ti_static_base = 0;
+
+ /*------------------------------------------------------------------------*/
+ /* Get the value of the static base (__TI_STATIC_BASE) which will be */
+ /* passed into the relocation table processing functions. */
+ /*------------------------------------------------------------------------*/
+ if (!DLSYM_lookup_local_symtab("__TI_STATIC_BASE", dyn_module->symtab,
+ dyn_module->symnum, &ti_static_base))
+ DLIF_error(DLET_RELOC, "Could not resolve value of __TI_STATIC_BASE\n");
+
+ /*------------------------------------------------------------------------*/
+ /* Process relocations segment by segment. */
+ /*------------------------------------------------------------------------*/
+ for (seg_idx = 0; seg_idx < num_segs; seg_idx++)
+ {
+ /*---------------------------------------------------------------------*/
+ /* Relocations should not occur in uninitialized segments. */
+ /*---------------------------------------------------------------------*/
+ if (!seg[seg_idx].phdr.p_filesz) continue;
+
+ if (rela_table)
+ process_rela_table(handle, (seg + seg_idx),
+ rela_table, relanum, &rela_relidx,
+ ti_static_base, dyn_module);
+
+ if (rel_table)
+ process_rel_table(handle, (seg + seg_idx),
+ rel_table, relnum, &rel_relidx,
+ ti_static_base, dyn_module);
+ }
+}
+
+/*****************************************************************************/
+/* PROCESS_PLTGOT_RELOCS() */
+/* */
+/* Process all PLTGOT relocation entries. The PLTGOT relocation table */
+/* can be either Elf32_Rel or Elf32_Rela type. All PLTGOT relocations */
+/* ar guaranteed to belong to the same segment. */
+/* */
+/*****************************************************************************/
+static void process_pltgot_relocs(DLOAD_HANDLE handle,
+ void* plt_reloc_table,
+ int reltype,
+ uint32_t pltnum,
+ DLIMP_Dynamic_Module* dyn_module)
+{
+ Elf32_Addr r_offset = (reltype == DT_REL) ?
+ ((struct Elf32_Rel *)plt_reloc_table)->r_offset :
+ ((struct Elf32_Rela *)plt_reloc_table)->r_offset;
+
+ DLIMP_Loaded_Segment* seg =
+ (DLIMP_Loaded_Segment*)(dyn_module->loaded_module->loaded_segments.buf);
+
+ uint32_t num_segs = dyn_module->loaded_module->loaded_segments.size;
+ int32_t plt_relidx = 0;
+ uint32_t seg_idx = 0;
+ uint32_t ti_static_base = 0;
+
+ /*------------------------------------------------------------------------*/
+ /* Get the value of the static base (__TI_STATIC_BASE) which will be */
+ /* passed into the relocation table processing functions. */
+ /*------------------------------------------------------------------------*/
+ if (!DLSYM_lookup_local_symtab("__TI_STATIC_BASE", dyn_module->symtab,
+ dyn_module->symnum, &ti_static_base))
+ DLIF_error(DLET_RELOC, "Could not resolve value of __TI_STATIC_BASE\n");
+
+ /*------------------------------------------------------------------------*/
+ /* For each segment s, check if the relocation falls within s. If so, */
+ /* then all other relocations are guaranteed to fall with s. Process */
+ /* all relocations and then return. */
+ /*------------------------------------------------------------------------*/
+ for (seg_idx = 0; seg_idx < num_segs; seg_idx++)
+ {
+ Elf32_Addr seg_start_addr = seg[seg_idx].input_vaddr;
+ Elf32_Addr seg_end_addr = seg_start_addr + seg[seg_idx].phdr.p_memsz;
+
+ /*---------------------------------------------------------------------*/
+ /* Relocations should not occur in uninitialized segments. */
+ /*---------------------------------------------------------------------*/
+ if(!seg[seg_idx].phdr.p_filesz) continue;
+
+ if (r_offset >= seg_start_addr &&
+ r_offset < seg_end_addr)
+ {
+ if (reltype == DT_REL)
+ process_rel_table(handle, (seg + seg_idx),
+ (struct Elf32_Rel *)plt_reloc_table,
+ pltnum, &plt_relidx,
+ ti_static_base, dyn_module);
+ else
+ process_rela_table(handle, (seg + seg_idx),
+ (struct Elf32_Rela *)plt_reloc_table,
+ pltnum, &plt_relidx,
+ ti_static_base, dyn_module);
+
+ break;
+ }
+ }
+}
+
+/*****************************************************************************/
+/* RELOCATE() - Perform RELA and REL type relocations for given ELF object */
+/* file that we are in the process of loading and relocating. */
+/*****************************************************************************/
+void DLREL_c60_relocate(DLOAD_HANDLE handle,
+ LOADER_FILE_DESC *fd, DLIMP_Dynamic_Module *dyn_module)
+{
+ struct Elf32_Dyn *dyn_nugget = dyn_module->dyntab;
+ struct Elf32_Rela *rela_table = NULL;
+ struct Elf32_Rel *rel_table = NULL;
+ struct Elf32_Rela *rela_plt_table = NULL;
+ struct Elf32_Rel *rel_plt_table = NULL;
+
+ /*------------------------------------------------------------------------*/
+ /* Read the size of the relocation table (DT_RELASZ) and the size per */
+ /* relocation (DT_RELAENT) from the dynamic segment. */
+ /*------------------------------------------------------------------------*/
+ uint32_t relasz = DLIMP_get_first_dyntag(DT_RELASZ, dyn_nugget);
+ uint32_t relaent = DLIMP_get_first_dyntag(DT_RELAENT, dyn_nugget);
+ uint32_t relanum = 0;
+
+ /*------------------------------------------------------------------------*/
+ /* Read the size of the relocation table (DT_RELSZ) and the size per */
+ /* relocation (DT_RELENT) from the dynamic segment. */
+ /*------------------------------------------------------------------------*/
+ uint32_t relsz = DLIMP_get_first_dyntag(DT_RELSZ, dyn_nugget);
+ uint32_t relent = DLIMP_get_first_dyntag(DT_RELENT, dyn_nugget);
+ uint32_t relnum = 0;
+
+ /*------------------------------------------------------------------------*/
+ /* Read the size of the relocation table (DT_PLTRELSZ) and the type of */
+ /* of the PLTGOT relocation table (DT_PLTREL): one of DT_REL or DT_RELA */
+ /*------------------------------------------------------------------------*/
+ uint32_t pltrelsz = DLIMP_get_first_dyntag(DT_PLTRELSZ, dyn_nugget);
+ int pltreltyp = DLIMP_get_first_dyntag(DT_PLTREL, dyn_nugget);
+ uint32_t pltnum = 0;
+
+ /*------------------------------------------------------------------------*/
+ /* Find/record DSBT index associated with this module. */
+ /*------------------------------------------------------------------------*/
+ if (is_dsbt_module(dyn_module) &&
+ (dyn_module->dsbt_index == DSBT_INDEX_INVALID))
+ dyn_module->dsbt_index =
+ DLIF_get_dsbt_index(dyn_module->loaded_module->file_handle);
+
+ /*------------------------------------------------------------------------*/
+ /* Read the PLTGOT relocation table from the file */
+ /* The PLTGOT table is a subsection at the end of either the DT_REL or */
+ /* DT_RELA table. The size of the table it belongs to DT_REL(A)SZ */
+ /* includes the size of the PLTGOT table. So it must be adjusted so that */
+ /* the GOT relocation tables only contain actual GOT relocations. */
+ /*------------------------------------------------------------------------*/
+ if (pltrelsz != INT_MAX && pltrelsz != 0)
+ {
+ if (pltreltyp == DT_REL)
+ {
+ pltnum = pltrelsz/relent;
+ relsz -= pltrelsz;
+ read_rel_table((&rel_plt_table),
+ DLIMP_get_first_dyntag(DT_JMPREL, dyn_nugget),
+ pltnum, relent, fd, dyn_module->wrong_endian);
+ }
+
+ else if (pltreltyp == DT_RELA)
+ {
+ pltnum = pltrelsz/relaent;
+ relasz -= pltrelsz;
+ read_rela_table((&rela_plt_table),
+ DLIMP_get_first_dyntag(DT_JMPREL, dyn_nugget),
+ pltnum, relaent, fd, dyn_module->wrong_endian);
+ }
+
+ else
+ {
+ DLIF_error(DLET_RELOC,
+ "DT_PLTREL is invalid: must be either %d or %d\n",
+ DT_REL, DT_RELA);
+ }
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Read the DT_RELA GOT relocation table from the file */
+ /*------------------------------------------------------------------------*/
+ if (relasz != INT_MAX && relasz != 0)
+ {
+ relanum = relasz/relaent;
+ read_rela_table(&rela_table, DLIMP_get_first_dyntag(DT_RELA, dyn_nugget),
+ relanum, relaent, fd, dyn_module->wrong_endian);
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Read the DT_REL GOT relocation table from the file */
+ /*------------------------------------------------------------------------*/
+ if (relsz != INT_MAX && relsz != 0)
+ {
+ relnum = relsz/relent;
+ read_rel_table(&rel_table, DLIMP_get_first_dyntag(DT_REL, dyn_nugget),
+ relnum, relent, fd, dyn_module->wrong_endian);
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Process the PLTGOT relocations */
+ /*------------------------------------------------------------------------*/
+ if (rela_plt_table)
+ process_pltgot_relocs(handle, rela_plt_table, pltreltyp, pltnum,
+ dyn_module);
+
+ if (rel_plt_table)
+ process_pltgot_relocs(handle, rel_plt_table, pltreltyp, pltnum,
+ dyn_module);
+
+ /*------------------------------------------------------------------------*/
+ /* Process the GOT relocations */
+ /*------------------------------------------------------------------------*/
+ if (rel_table || rela_table)
+ process_got_relocs(handle, rel_table, relnum, rela_table, relanum,
+ dyn_module);
+
+ /*-------------------------------------------------------------------------*/
+ /* Free memory used for ELF relocation table copies. */
+ /*-------------------------------------------------------------------------*/
+ if (rela_table) DLIF_free(rela_table);
+ if (rel_table) DLIF_free(rel_table);
+ if (rela_plt_table) DLIF_free(rela_plt_table);
+ if (rel_plt_table) DLIF_free(rel_plt_table);
+}
+
+/*****************************************************************************/
+/* UNIT TESTING INTERFACE */
+/*****************************************************************************/
+#ifdef UNIT_TEST
+void unit_c60_reloc_do(C60_RELOC_TYPE r_type,
+ uint8_t *address_space,
+ uint32_t addend, uint32_t symval, uint32_t pc,
+ uint32_t static_base, int wrong_endian,
+ int32_t dsbt_index)
+{
+ reloc_do(r_type, (uint32_t)address_space, address_space,
+ addend, symval, pc, FALSE, static_base, dsbt_index);
+}
+
+#if 0 /* RELA TYPE RELOCATIONS HAVE ADDEND IN RELOCATION ENTRY */
+void unit_c60_rel_unpack_addend(C60_RELOC_TYPE r_type,
+ uint8_t* address,
+ uint32_t* addend)
+{
+ rel_unpack_addend(r_type, address, addend);
+}
+#endif
+
+BOOL unit_c60_rel_overflow(C60_RELOC_TYPE r_type, int32_t reloc_value)
+{
+ return rel_overflow(r_type, reloc_value);
+}
+#endif
+
diff --git a/src/core/dsp/ocl_load/C60_DLOAD_REL/c60_reloc.h b/src/core/dsp/ocl_load/C60_DLOAD_REL/c60_reloc.h
new file mode 100644
index 0000000..8ccd60e
--- /dev/null
+++ b/src/core/dsp/ocl_load/C60_DLOAD_REL/c60_reloc.h
@@ -0,0 +1,30 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+
+void DLREL_c60_relocate(DLOAD_HANDLE handle, LOADER_FILE_DESC *fd,
+ DLIMP_Dynamic_Module *dyn_module);
diff --git a/src/core/dsp/ocl_load/C60_DLOAD_REL/test_c60_reloc.cpp b/src/core/dsp/ocl_load/C60_DLOAD_REL/test_c60_reloc.cpp
new file mode 100644
index 0000000..acde023
--- /dev/null
+++ b/src/core/dsp/ocl_load/C60_DLOAD_REL/test_c60_reloc.cpp
@@ -0,0 +1,825 @@
+/*
+* test_c60_reloc.cpp
+*
+* C6x Relocation Unit Tests.
+*
+* Copyright (C) 2009-2014 Texas Instruments Incorporated - http://www.ti.com/
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the
+* distribution.
+*
+* Neither the name of Texas Instruments Incorporated nor the names of
+* its contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#include "test_c60_reloc.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+/*****************************************************************************/
+/* C60_TestRelocDo */
+/* */
+/* Tests the C60 version of reloc_do. In cases where multiple relocation */
+/* types are implemented in the same way, only one type is tested. For */
+/* instance, R_C6000_xxx, R_C6000_yyy, and R_C6000_zzz are implemented in */
+/* the exact same way and, therefore, only R_C6000_xxx is tested. */
+/* */
+/* Each test follows the same flow: */
+/* 1. A valid instruction is constructed for the relocation type being */
+/* tested. */
+/* 2. Addend, symbol value, and pc are then created. */
+/* (NOTE: static base is not needed, and so 0 is passed. Also, same */
+/* endianness is assumed.) */
+/* 3. reloc_do() is called */
+/* 4. The result is checked. */
+/* 5. Repeat if variations should be considered. */
+/* */
+/*****************************************************************************/
+//void C60_TestRelocDo::test_R_C6000_NONE() { }
+
+void C60_TestRelocDo::test_R_C6000_ABS32()
+{
+ uint32_t address_space = 0x0;
+ uint32_t addend = 0x4;
+ uint32_t symval = 0x2001000;
+ uint32_t pc = 0x0;
+
+ unit_c60_reloc_do(R_C6000_ABS32,
+ (uint8_t*) &address_space,
+ addend, symval, pc, 0, 0, 0);
+
+ TS_ASSERT_EQUALS(address_space, 0x2001004);
+}
+
+void C60_TestRelocDo::test_R_C6000_ABS16()
+{
+ uint16_t address_space = 0x0;
+ uint32_t addend = 0x4;
+ uint32_t symval = 0xFFE;
+ uint32_t pc = 0x0;
+
+ unit_c60_reloc_do(R_C6000_ABS16,
+ (uint8_t*) &address_space,
+ addend, symval, pc, 0, 0, 0);
+
+ TS_ASSERT_EQUALS(address_space, 0x1002);
+}
+
+void C60_TestRelocDo::test_R_C6000_ABS8()
+{
+ uint8_t address_space = 0x0;
+ uint32_t addend = 0x4;
+ uint32_t symval = 0xE;
+ uint32_t pc = 0x0;
+
+ unit_c60_reloc_do(R_C6000_ABS8,
+ &address_space,
+ addend, symval, pc, 0, 0, 0);
+
+ TS_ASSERT_EQUALS(address_space, 0x12);
+}
+
+/*---------------------------------------------------------------------------*/
+/* PC-Relative Relocation Tests */
+/* */
+/* Our relocation handler assumes that the address of 'opcode' is where the */
+/* relocation is. Therefore, when creating a PCR test case, we will compute */
+/* a value for symval and pc in terms of &opcode. */
+/* */
+/*---------------------------------------------------------------------------*/
+void C60_TestRelocDo::test_R_C6000_PCR_S21()
+{
+ uint32_t opcode = 0x00000010;
+ uint32_t addend = 0x4;
+ uint32_t symval = ((uint32_t)&opcode & 0xffffffe0) + 0x50000;
+ uint32_t pc = 0x0;
+
+ /* Test #1 -- destination is forward from PC */
+ /* PCR21 offset = 0x14001 */
+ unit_c60_reloc_do(R_C6000_PCR_S21,
+ (uint8_t*) &opcode,
+ addend, symval, pc, 0, 0, 0);
+
+ TS_ASSERT_EQUALS(opcode, 0x00a00090);
+
+ /* Test #2 -- symval definition implies offset is negative */
+ /* PCR21 offset = 0x1d4001 (signed - negative) */
+ opcode = 0x00000010;
+ symval = ((uint32_t)&opcode & 0xffffffe0) - 0xb0000;
+ unit_c60_reloc_do(R_C6000_PCR_S21,
+ (uint8_t*) &opcode,
+ addend, symval, pc, 0, 0, 0);
+
+ TS_ASSERT_EQUALS(opcode, 0x0ea00090);
+}
+
+void C60_TestRelocDo::test_R_C6000_PCR_S12()
+{
+ uint32_t opcode = 0x00002120; /* BNOP */
+ uint32_t addend = 0x4;
+ uint32_t symval = ((uint32_t)&opcode & 0xffffffe0) + 0x500;
+ uint32_t pc = 0x0;
+
+ /* Test #1 -- destination is forward from PC */
+ /* PCR12 offset = 0x141 */
+ unit_c60_reloc_do(R_C6000_PCR_S12,
+ (uint8_t*) &opcode,
+ addend, symval, pc, 0, 0, 0);
+
+ TS_ASSERT_EQUALS(opcode, 0x01412120);
+
+ /* Test #2 -- symval definition implies offset is negative */
+ /* PCR12 offset = 0xd41 (signed - negative) */
+ opcode = 0x00002120;
+ symval = ((uint32_t)&opcode & 0xffffffe0) - 0xb00;
+ unit_c60_reloc_do(R_C6000_PCR_S12,
+ (uint8_t*) &opcode,
+ addend, symval, pc, 0, 0, 0);
+
+ TS_ASSERT_EQUALS(opcode, 0x0d412120);
+}
+
+void C60_TestRelocDo::test_R_C6000_PCR_S10()
+{
+ uint32_t opcode = 0x01001020; /* BDEC */
+ uint32_t addend = 0x4;
+ uint32_t symval = ((uint32_t)&opcode & 0xffffffe0) + 0x50;
+ uint32_t pc = 0x0;
+
+ /* Test #1 -- destination is forward from PC */
+ /* PCR10 offset = 0x15 */
+ unit_c60_reloc_do(R_C6000_PCR_S10,
+ (uint8_t*) &opcode,
+ addend, symval, pc, 0, 0, 0);
+
+ TS_ASSERT_EQUALS(opcode, 0x0102b020);
+
+ /* Test #2 -- symval definition implies offset is negative */
+ /* PCR10 offset = 0x355 (signed - negative) */
+ opcode = 0x01001020;
+ symval = ((uint32_t)&opcode & 0xffffffe0) - 0xb0;
+ unit_c60_reloc_do(R_C6000_PCR_S10,
+ (uint8_t*) &opcode,
+ addend, symval, pc, 0, 0, 0);
+
+ TS_ASSERT_EQUALS(opcode, 0x017ab020);
+}
+
+void C60_TestRelocDo::test_R_C6000_PCR_S7()
+{
+ uint32_t opcode = 0x03006160; /* ADDKPC */
+ uint32_t addend = 0x4;
+ uint32_t symval = ((uint32_t)&opcode & 0xffffffe0) + 0x50;
+ uint32_t pc = 0x0;
+
+ /* Test #1 -- destination is forward from PC */
+ /* PCR7 offset = 0x15 */
+ unit_c60_reloc_do(R_C6000_PCR_S7,
+ (uint8_t*) &opcode,
+ addend, symval, pc, 0, 0, 0);
+
+ TS_ASSERT_EQUALS(opcode, 0x03156160);
+
+ /* Test #2 -- symval definition implies offset is negative */
+ /* PCR7 offset = 0x75 (signed - negative) */
+ opcode = 0x03006160;
+ symval = ((uint32_t)&opcode & 0xffffffe0) - 0x30;
+ unit_c60_reloc_do(R_C6000_PCR_S7,
+ (uint8_t*) &opcode,
+ addend, symval, pc, 0, 0, 0);
+
+ TS_ASSERT_EQUALS(opcode, 0x03756160);
+}
+
+void C60_TestRelocDo::test_R_C6000_ABS_S16()
+{
+ uint32_t opcode = 0x03000028; /* MVK */
+ uint32_t addend = 0x4;
+ uint32_t symval = 0xFFE;
+ uint32_t pc = 0x0;
+
+ unit_c60_reloc_do(R_C6000_ABS_S16,
+ (uint8_t*) &opcode,
+ addend, symval, pc, 0, 0, 0);
+
+ TS_ASSERT_EQUALS(opcode, 0x03080128);
+}
+
+void C60_TestRelocDo::test_R_C6000_ABS_L16()
+{
+ uint32_t opcode = 0x03000028; /* MVKL */
+ uint32_t addend = 0x4;
+ uint32_t symval = 0x04560FFE;
+ uint32_t pc = 0x0;
+
+ unit_c60_reloc_do(R_C6000_ABS_L16,
+ (uint8_t*) &opcode,
+ addend, symval, pc, 0, 0, 0);
+
+ TS_ASSERT_EQUALS(opcode, 0x03080128);
+}
+
+void C60_TestRelocDo::test_R_C6000_ABS_H16()
+{
+ uint32_t opcode = 0x03000068; /* MVKH */
+ uint32_t addend = 0x4;
+ uint32_t symval = 0x04560FFE;
+ uint32_t pc = 0x0;
+
+ unit_c60_reloc_do(R_C6000_ABS_H16,
+ (uint8_t*) &opcode,
+ addend, symval, pc, 0, 0, 0);
+
+ TS_ASSERT_EQUALS(opcode, 0x03022b68);
+}
+
+void C60_TestRelocDo::test_R_C6000_SBR_U15_B()
+{
+ uint32_t opcode = 0x0300002c; /* LDB */
+ uint32_t addend = 0x0;
+ uint32_t static_base = 0x04000000;
+ uint32_t symval = (static_base + 0x1357);
+ uint32_t pc = 0x0;
+
+ /* unsigned 15-bit SBR offset = 0x1357 */
+ /* encoded in bits 22 - 8 */
+ unit_c60_reloc_do(R_C6000_SBR_U15_B,
+ (uint8_t*) &opcode,
+ addend, symval, pc, static_base, 0, 0);
+
+ TS_ASSERT_EQUALS(opcode, 0x0313572c);
+}
+
+void C60_TestRelocDo::test_R_C6000_SBR_U15_H()
+{
+ uint32_t opcode = 0x0300004c; /* LDH */
+ uint32_t addend = 0x0;
+ uint32_t static_base = 0x04000000;
+ uint32_t symval = (static_base + 0x2246);
+ uint32_t pc = 0x0;
+
+ /* unsigned 16-bit SBR offset = 0x2246 */
+ /* scaled 15-bit SBR offset = 0x1123 */
+ /* encoded in bits 22 - 8 */
+ unit_c60_reloc_do(R_C6000_SBR_U15_H,
+ (uint8_t*) &opcode,
+ addend, symval, pc, static_base, 0, 0);
+
+ TS_ASSERT_EQUALS(opcode, 0x0311234c);
+}
+
+void C60_TestRelocDo::test_R_C6000_SBR_U15_W()
+{
+ uint32_t opcode = 0x0300006c; /* LDW */
+ uint32_t addend = 0x0;
+ uint32_t static_base = 0x04000000;
+ uint32_t symval = (static_base + 0x448c);
+ uint32_t pc = 0x0;
+
+ /* unsigned 17-bit SBR offset = 0x448c */
+ /* scaled 15-bit SBR offset = 0x1123 */
+ /* encoded in bits 22 - 8 */
+ unit_c60_reloc_do(R_C6000_SBR_U15_W,
+ (uint8_t*) &opcode,
+ addend, symval, pc, static_base, 0, 0);
+
+ TS_ASSERT_EQUALS(opcode, 0x0311236c);
+}
+
+void C60_TestRelocDo::test_R_C6000_SBR_S16()
+{
+ uint32_t opcode = 0x03000028; /* MVK */
+ uint32_t addend = 0x0;
+ uint32_t static_base = 0x04000000;
+ uint32_t symval = (static_base + 0x1357);
+ uint32_t pc = 0x0;
+
+ /* Test #1 positive signed 16-bit offset */
+ /* 16-bit SBR offset = 0x1357 */
+ /* encoded in bits 22-7 of opcode */
+ unit_c60_reloc_do(R_C6000_SBR_S16,
+ (uint8_t*) &opcode,
+ addend, symval, pc, static_base, 0, 0);
+
+ TS_ASSERT_EQUALS(opcode, 0x0309aba8);
+
+ /* Test #2 negative signed 16-bit offset */
+ /* 16-bit SBR offset = 0xeca9 (-0x1357) */
+ /* encoded in bits 22-7 of opcode */
+ symval = (static_base - 0x1357);
+ unit_c60_reloc_do(R_C6000_SBR_S16,
+ (uint8_t*) &opcode,
+ addend, symval, pc, static_base, 0, 0);
+
+ TS_ASSERT_EQUALS(opcode, 0x037654a8);
+}
+
+void C60_TestRelocDo::test_R_C6000_SBR_L16_B()
+{
+ uint32_t opcode = 0x03000028; /* MVKL */
+ uint32_t addend = 0x0;
+ uint32_t static_base = 0x04000000;
+ uint32_t symval = (static_base + 0x11123);
+ uint32_t pc = 0x0;
+
+ /* 16-bit SBR offset = 0x1123 */
+ /* encoded in bits 22-7 of opcode */
+ unit_c60_reloc_do(R_C6000_SBR_L16_B,
+ (uint8_t*) &opcode,
+ addend, symval, pc, static_base, 0, 0);
+
+ TS_ASSERT_EQUALS(opcode, 0x030891a8);
+}
+
+void C60_TestRelocDo::test_R_C6000_SBR_L16_H()
+{
+ uint32_t opcode = 0x03000028; /* MVKL */
+ uint32_t addend = 0x0;
+ uint32_t static_base = 0x04000000;
+ uint32_t symval = (static_base + 0x12246);
+ uint32_t pc = 0x0;
+
+ /* 17-bit SBR offset = 0x12246 */
+ /* scaled SBR offset = 0x9123 */
+ /* encoded in bits 22-7 of opcode */
+ unit_c60_reloc_do(R_C6000_SBR_L16_H,
+ (uint8_t*) &opcode,
+ addend, symval, pc, static_base, 0, 0);
+
+ TS_ASSERT_EQUALS(opcode, 0x034891a8);
+}
+
+void C60_TestRelocDo::test_R_C6000_SBR_L16_W()
+{
+ uint32_t opcode = 0x03000028; /* MVKL */
+ uint32_t addend = 0x0;
+ uint32_t static_base = 0x04000000;
+ uint32_t symval = (static_base + 0x1448c);
+ uint32_t pc = 0x0;
+
+ /* 18-bit SBR offset = 0x1448c */
+ /* scaled SBR offset = 0x5123 */
+ /* encoded in bits 22-7 of opcode */
+ unit_c60_reloc_do(R_C6000_SBR_L16_W,
+ (uint8_t*) &opcode,
+ addend, symval, pc, static_base, 0, 0);
+
+ TS_ASSERT_EQUALS(opcode, 0x032891a8);
+}
+
+void C60_TestRelocDo::test_R_C6000_SBR_H16_B()
+{
+ uint32_t opcode = 0x03000068; /* MVKH */
+ uint32_t addend = 0x0;
+ uint32_t static_base = 0x04000000;
+ uint32_t symval = (static_base + 0x357448c);
+ uint32_t pc = 0x0;
+
+ /* total SBR offset = 0x357448c */
+ /* upper 16-bits of SBR offset = 0x357 */
+ /* encoded in bits 22-7 of opcode */
+ unit_c60_reloc_do(R_C6000_SBR_H16_B,
+ (uint8_t*) &opcode,
+ addend, symval, pc, static_base, 0, 0);
+
+ TS_ASSERT_EQUALS(opcode, 0x0301abe8);
+}
+
+void C60_TestRelocDo::test_R_C6000_SBR_H16_H()
+{
+ uint32_t opcode = 0x03000068; /* MVKH */
+ uint32_t addend = 0x0;
+ uint32_t static_base = 0x04000000;
+ uint32_t symval = (static_base + 0x357448c);
+ uint32_t pc = 0x0;
+
+ /* total SBR offset = 0x357448c */
+ /* scaled SBR offset = 0x1aba246 */
+ /* upper 16-bits of scaled SBR offset = 0x1ab */
+ /* encoded in bits 22-7 of opcode */
+ unit_c60_reloc_do(R_C6000_SBR_H16_H,
+ (uint8_t*) &opcode,
+ addend, symval, pc, static_base, 0, 0);
+
+ TS_ASSERT_EQUALS(opcode, 0x0300d5e8);
+}
+
+void C60_TestRelocDo::test_R_C6000_SBR_H16_W()
+{
+ uint32_t opcode = 0x03000068; /* MVKH */
+ uint32_t addend = 0x0;
+ uint32_t static_base = 0x04000000;
+ uint32_t symval = (static_base + 0x357448c);
+ uint32_t pc = 0x0;
+
+ /* total SBR offset = 0x357448c */
+ /* scaled SBR offset = 0x0d5d123 */
+ /* upper 16-bits of scaled SBR offset = 0x0d5 */
+ /* encoded in bits 22-7 of opcode */
+ unit_c60_reloc_do(R_C6000_SBR_H16_W,
+ (uint8_t*) &opcode,
+ addend, symval, pc, static_base, 0, 0);
+
+ TS_ASSERT_EQUALS(opcode, 0x03006ae8);
+}
+
+/* The DSBT table is accessed via DP-relative addressing with */
+/* an LDW instruction, but the DSBT_INDEX is really an index */
+/* into the DSBT table, the index is scaled to a 4-word offset. */
+void C60_TestRelocDo::test_R_C6000_DSBT_INDEX()
+{
+ uint32_t opcode = 0x0300006c; /* LDW */
+ uint32_t addend = 0x0;
+ uint32_t static_base = 0x04000000;
+ uint32_t symval = static_base;
+ uint32_t pc = 0x0;
+
+ unit_c60_reloc_do(R_C6000_DSBT_INDEX,
+ (uint8_t*) &opcode,
+ addend, symval, pc, static_base, 0, 3);
+
+ TS_ASSERT_EQUALS(opcode, 0x0300036c);
+}
+
+/*****************************************************************************/
+/* C60_TestRelUnpackAddend */
+/* */
+/* Tests the C60 rel_unpack_addend function. */
+/* */
+/* In cases where the addends are unpacked in the same way, only one is */
+/* tested. */
+/* */
+/* All tests follow the same flow: */
+/* */
+/* 1. Create a valid instruction for the relocation type, where the addend */
+/* is packed in the instruction. */
+/* 2. Call rel_unpack_addend(). */
+/* 3. Check that the addend is correct. */
+/* */
+/* Relocations may be tested multiple times to handle variations, such as */
+/* positive/negative addends, extra bits depending on the encoding, etc. */
+/* */
+/* NOTE!! C60 ONLY SUPPORTS RELA TYPE RELOCATIONS, SO ADDEND FIELD IS STORED */
+/* IN RELOCATION ENTRY ITSELF. */
+/*****************************************************************************/
+#if 0
+void C60_TestRelUnpackAddend::test_R_C6000_ABS32()
+{
+ uint32_t address_space=0xFEDCBA9;
+ uint32_t addend;
+
+ unit_c60_rel_unpack_addend(R_C6000_ABS32,
+ (uint8_t*)&address_space,
+ &addend);
+
+ TS_ASSERT_EQUALS(addend, address_space);
+}
+
+void C60_TestRelUnpackAddend::test_R_C6000_ABS16()
+{
+ uint16_t address_space=0x7FFF;
+ uint32_t addend;
+
+ unit_c60_rel_unpack_addend(R_C6000_ABS16,
+ (uint8_t*)&address_space,
+ &addend);
+
+ TS_ASSERT_EQUALS(addend, 0x7FFF);
+
+ address_space = 0x8000;
+
+ unit_c60_rel_unpack_addend(R_C6000_ABS16,
+ (uint8_t*)&address_space,
+ &addend);
+
+ TS_ASSERT_EQUALS(addend, 0xFFFF8000);
+}
+#endif
+
+
+/*****************************************************************************/
+/* C60_TestRelOverflow */
+/* */
+/* Test the C60 rel_overflow function. */
+/* */
+/* In each case, we test the upper and lower bounds of each relocation type. */
+/* Only relocation types where the overflow is checked in rel_overflow are */
+/* considered. In most cases four tests are performed to test the upper and */
+/* lower bounds (1 pass and 1 fail for each). */
+/* */
+/* NOTE!! HAVEN'T REFACTORED OVERFLOW CHECK OUT OF RELOCATION HANDLERS FOR */
+/* C60, SO OVERFLOW SHOULD BE TESTED AS PART OF THE RELOC DO(???) */
+/* */
+/*****************************************************************************/
+void C60_TestRelOverflow::test_R_C6000_ABS16()
+{
+ int32_t reloc_val = 0xFFFF;
+ int rval;
+
+ rval = unit_c60_rel_overflow(R_C6000_ABS16, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 0);
+
+ reloc_val = 0x10000;
+
+ rval = unit_c60_rel_overflow(R_C6000_ABS16, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 1);
+
+ reloc_val = -0x8000;
+
+ rval = unit_c60_rel_overflow(R_C6000_ABS16, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 0);
+
+ reloc_val = -0x8001;
+
+ rval = unit_c60_rel_overflow(R_C6000_ABS16, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 1);
+}
+
+void C60_TestRelOverflow::test_R_C6000_ABS8()
+{
+ int32_t reloc_val = 0xFF;
+ int rval;
+
+ rval = unit_c60_rel_overflow(R_C6000_ABS8, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 0);
+
+ reloc_val = 0x100;
+
+ rval = unit_c60_rel_overflow(R_C6000_ABS8, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 1);
+
+ reloc_val = -0x80;
+
+ rval = unit_c60_rel_overflow(R_C6000_ABS8, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 0);
+
+ reloc_val = -0x81;
+
+ rval = unit_c60_rel_overflow(R_C6000_ABS8, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 1);
+}
+
+void C60_TestRelOverflow::test_R_C6000_PCR_S21()
+{
+ int32_t reloc_val = 0x3FFFFC;
+ int rval;
+
+ rval = unit_c60_rel_overflow(R_C6000_PCR_S21, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 0);
+
+ reloc_val = 0x400000;
+
+ rval = unit_c60_rel_overflow(R_C6000_PCR_S21, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 1);
+
+ reloc_val = -0x400000;
+
+ rval = unit_c60_rel_overflow(R_C6000_PCR_S21, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 0);
+
+ reloc_val = -0x400001;
+
+ rval = unit_c60_rel_overflow(R_C6000_PCR_S21, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 1);
+}
+
+void C60_TestRelOverflow::test_R_C6000_PCR_S12()
+{
+ int32_t reloc_val = 0x1FFC;
+ int rval;
+
+ rval = unit_c60_rel_overflow(R_C6000_PCR_S12, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 0);
+
+ reloc_val = 0x2000;
+
+ rval = unit_c60_rel_overflow(R_C6000_PCR_S12, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 1);
+
+ reloc_val = -0x2000;
+
+ rval = unit_c60_rel_overflow(R_C6000_PCR_S12, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 0);
+
+ reloc_val = -0x2001;
+
+ rval = unit_c60_rel_overflow(R_C6000_PCR_S12, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 1);
+}
+
+void C60_TestRelOverflow::test_R_C6000_PCR_S10()
+{
+ int32_t reloc_val = 0x7FC;
+ int rval;
+
+ rval = unit_c60_rel_overflow(R_C6000_PCR_S10, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 0);
+
+ reloc_val = 0x800;
+
+ rval = unit_c60_rel_overflow(R_C6000_PCR_S10, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 1);
+
+ reloc_val = -0x800;
+
+ rval = unit_c60_rel_overflow(R_C6000_PCR_S10, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 0);
+
+ reloc_val = -0x801;
+
+ rval = unit_c60_rel_overflow(R_C6000_PCR_S10, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 1);
+}
+
+void C60_TestRelOverflow::test_R_C6000_PCR_S7()
+{
+ int32_t reloc_val = 0xFC;
+ int rval;
+
+ rval = unit_c60_rel_overflow(R_C6000_PCR_S7, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 0);
+
+ reloc_val = 0x100;
+
+ rval = unit_c60_rel_overflow(R_C6000_PCR_S7, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 1);
+
+ reloc_val = -0x100;
+
+ rval = unit_c60_rel_overflow(R_C6000_PCR_S7, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 0);
+
+ reloc_val = -0x101;
+
+ rval = unit_c60_rel_overflow(R_C6000_PCR_S7, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 1);
+}
+
+void C60_TestRelOverflow::test_R_C6000_SBR_S16()
+{
+ int32_t reloc_val = 0x7FFF;
+ int rval;
+
+ rval = unit_c60_rel_overflow(R_C6000_SBR_S16, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 0);
+
+ reloc_val = 0x8000;
+
+ rval = unit_c60_rel_overflow(R_C6000_SBR_S16, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 1);
+
+ reloc_val = -0x8000;
+
+ rval = unit_c60_rel_overflow(R_C6000_SBR_S16, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 0);
+
+ reloc_val = -0x8001;
+
+ rval = unit_c60_rel_overflow(R_C6000_SBR_S16, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 1);
+}
+
+void C60_TestRelOverflow::test_R_C6000_ABS_S16()
+{
+ int32_t reloc_val = 0x7FFF;
+ int rval;
+
+ rval = unit_c60_rel_overflow(R_C6000_ABS_S16, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 0);
+
+ reloc_val = 0x8000;
+
+ rval = unit_c60_rel_overflow(R_C6000_ABS_S16, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 1);
+
+ reloc_val = -0x8000;
+
+ rval = unit_c60_rel_overflow(R_C6000_ABS_S16, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 0);
+
+ reloc_val = -0x8001;
+
+ rval = unit_c60_rel_overflow(R_C6000_ABS_S16, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 1);
+}
+
+void C60_TestRelOverflow::test_R_C6000_SBR_U15_B()
+{
+ uint32_t reloc_val = 0x7FFF;
+ int rval;
+
+ rval = unit_c60_rel_overflow(R_C6000_SBR_U15_B, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 0);
+
+ reloc_val = 0x8000;
+
+ rval = unit_c60_rel_overflow(R_C6000_SBR_U15_B, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 1);
+}
+
+void C60_TestRelOverflow::test_R_C6000_SBR_U15_H()
+{
+ uint32_t reloc_val = 0xFFFE;
+ int rval;
+
+ rval = unit_c60_rel_overflow(R_C6000_SBR_U15_H, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 0);
+
+ reloc_val = 0xFFFF;
+
+ rval = unit_c60_rel_overflow(R_C6000_SBR_U15_H, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 1);
+}
+
+void C60_TestRelOverflow::test_R_C6000_SBR_U15_W()
+{
+ uint32_t reloc_val = 0x1FFFC;
+ int rval;
+
+ rval = unit_c60_rel_overflow(R_C6000_SBR_U15_W, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 0);
+
+ reloc_val = 0x1FFFD;
+
+ rval = unit_c60_rel_overflow(R_C6000_SBR_U15_W, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 1);
+}
+
+void C60_TestRelOverflow::test_R_C6000_DSBT_INDEX()
+{
+ uint32_t reloc_val = 0x1FFFC;
+ int rval;
+
+ rval = unit_c60_rel_overflow(R_C6000_DSBT_INDEX, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 0);
+
+ reloc_val = 0x1FFFD;
+
+ rval = unit_c60_rel_overflow(R_C6000_DSBT_INDEX, reloc_val);
+
+ TS_ASSERT_EQUALS(rval, 1);
+}
+
diff --git a/src/core/dsp/ocl_load/C60_DLOAD_REL/test_c60_reloc.h b/src/core/dsp/ocl_load/C60_DLOAD_REL/test_c60_reloc.h
new file mode 100644
index 0000000..67a437d
--- /dev/null
+++ b/src/core/dsp/ocl_load/C60_DLOAD_REL/test_c60_reloc.h
@@ -0,0 +1,101 @@
+/*
+* test_c60_reloc.h
+*
+* Specification of C6x-specific relocation handler unit tests.
+*
+* Copyright (C) 2009-2014 Texas Instruments Incorporated - http://www.ti.com/
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the
+* distribution.
+*
+* Neither the name of Texas Instruments Incorporated nor the names of
+* its contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#ifndef _TEST_C60_RELOC_H_
+#define _TEST_C60_RELOC_H_
+#include "c60_elf32.h"
+#include <cxxtest/TestSuite.h>
+
+extern "C"
+{
+extern void unit_c60_reloc_do(C60_RELOC_TYPE r_type, uint8_t* address,
+ uint32_t addend, uint32_t symval, uint32_t pc,
+ uint32_t base_pointer, int wrong_endian, int32_t dsbt_index);
+
+extern void unit_c60_rel_unpack_addend(C60_RELOC_TYPE r_type,
+ uint8_t* address,
+ uint32_t* addend);
+
+extern int unit_c60_rel_overflow(C60_RELOC_TYPE r_type, int32_t reloc_value);
+
+}
+
+class C60_TestRelocDo : public CxxTest::TestSuite
+{
+ public:
+ void test_R_C6000_ABS32();
+ void test_R_C6000_ABS16();
+ void test_R_C6000_ABS8();
+ void test_R_C6000_PCR_S21();
+ void test_R_C6000_PCR_S12();
+ void test_R_C6000_PCR_S10();
+ void test_R_C6000_PCR_S7();
+ void test_R_C6000_ABS_S16();
+ void test_R_C6000_ABS_L16();
+ void test_R_C6000_ABS_H16();
+ void test_R_C6000_SBR_U15_B();
+ void test_R_C6000_SBR_U15_H();
+ void test_R_C6000_SBR_U15_W();
+ void test_R_C6000_SBR_S16();
+ void test_R_C6000_SBR_L16_B();
+ void test_R_C6000_SBR_L16_H();
+ void test_R_C6000_SBR_L16_W();
+ void test_R_C6000_SBR_H16_B();
+ void test_R_C6000_SBR_H16_H();
+ void test_R_C6000_SBR_H16_W();
+ void test_R_C6000_DSBT_INDEX();
+};
+
+class C60_TestRelOverflow : public CxxTest::TestSuite
+{
+ public:
+ void test_R_C6000_ABS16();
+ void test_R_C6000_ABS8();
+ void test_R_C6000_PCR_S21();
+ void test_R_C6000_PCR_S12();
+ void test_R_C6000_PCR_S10();
+ void test_R_C6000_PCR_S7();
+ void test_R_C6000_SBR_S16();
+ void test_R_C6000_ABS_S16();
+ void test_R_C6000_SBR_U15_B();
+ void test_R_C6000_SBR_U15_H();
+ void test_R_C6000_SBR_U15_W();
+ void test_R_C6000_DSBT_INDEX();
+};
+
+#endif /* _TEST_C60_RELOC_H_ */
diff --git a/src/core/dsp/ocl_load/CMakeLists.txt b/src/core/dsp/ocl_load/CMakeLists.txt
new file mode 100644
index 0000000..a459542
--- /dev/null
+++ b/src/core/dsp/ocl_load/CMakeLists.txt
@@ -0,0 +1,26 @@
+include_directories (.
+ C60_DLOAD_REL
+ C60_DLOAD_DYN
+ DLOAD_SYM
+ DLOAD
+ DLOAD_API
+ DLWRAPPER
+ )
+
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -DC60_TARGET -DLOADER_DEBUG -g -Wno-pointer-to-int-cast -Wno-int-to-pointer-cast")
+
+set(OCL_LOAD_SRC_FILES
+ ocl_load.c
+ C60_DLOAD_REL/c60_reloc.c
+ C60_DLOAD_DYN/c60_dynamic.c
+ DLOAD_SYM/symtab.c
+ DLOAD/ArrayList.c
+ DLOAD/dload.c
+ DLOAD/elf32.c
+ DLOAD/dload_endian.c
+)
+
+add_library(oclload STATIC ${OCL_LOAD_SRC_FILES})
+
+SET(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/lib)
+
diff --git a/src/core/dsp/ocl_load/DLOAD/ArrayList.c b/src/core/dsp/ocl_load/DLOAD/ArrayList.c
new file mode 100644
index 0000000..4452bfc
--- /dev/null
+++ b/src/core/dsp/ocl_load/DLOAD/ArrayList.c
@@ -0,0 +1,122 @@
+/*
+* ArrayList.c
+*
+* Array_List is a C implementation of a C++ vector class.
+*
+* This class emulates a resizable array along the lines of a C++
+* vector or Java ArrayList class in C, and uses the convention
+* of passing a pointer to the current "object" as the first
+* argument.
+*
+* Usage is defined as follows:
+*
+* Array_List obj;
+* AL_initialize(&obj, sizeof(type_name));
+*
+* ...
+*
+* type_name *ptr = (type_name*)(obj.buf);
+* for(i = 0; i < AL_size(&obj); i++)
+* do_something_to(ptr[i]);
+* type_name to_append = ...;
+* AL_append(&obj, &to_append);
+*
+* ...
+*
+* AL_destroy(&obj);
+*
+*
+* Copyright (C) 2009-2014 Texas Instruments Incorporated - http://www.ti.com/
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the
+* distribution.
+*
+* Neither the name of Texas Instruments Incorporated nor the names of
+* its contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#include <inttypes.h>
+#include <string.h>
+#include "ArrayList.h"
+#include "dload_api.h"
+
+/*****************************************************************************/
+/* AL_INITIALIZE() - Initialize a newly created Array_List object. */
+/*****************************************************************************/
+void AL_initialize(Array_List* obj, int32_t type_size, int32_t num_elem)
+{
+ if (num_elem == 0) num_elem = 1;
+ obj->buf = DLIF_malloc(type_size * num_elem);
+ obj->type_size = type_size;
+ obj->size = 0;
+ obj->buffer_size = num_elem;
+}
+
+/*****************************************************************************/
+/* AL_APPEND() - Append an element to the end of an Array_List. */
+/*****************************************************************************/
+void AL_append(Array_List* obj, void* to_append)
+{
+ /*------------------------------------------------------------------------*/
+ /* If there is already space in the specified buffer for the new data, */
+ /* just append it to the end of the data that is already in the buffer. */
+ /*------------------------------------------------------------------------*/
+ if (obj->size < obj->buffer_size)
+ memcpy(((uint8_t*)obj->buf) + obj->type_size * ((obj->size)++), to_append,
+ obj->type_size);
+
+ /*------------------------------------------------------------------------*/
+ /* Grow the buffer if we need more space to add the new data to it. */
+ /*------------------------------------------------------------------------*/
+ else
+ {
+ void* old_buffer = obj->buf;
+ obj->buffer_size *= 2;
+ obj->buf = DLIF_malloc(obj->buffer_size*obj->type_size);
+ memcpy(obj->buf,old_buffer,obj->size*obj->type_size);
+ DLIF_free(old_buffer);
+ memcpy(((uint8_t*)obj->buf) + obj->type_size *((obj->size)++), to_append,
+ obj->type_size);
+ }
+}
+
+/*****************************************************************************/
+/* AL_SIZE() - Get the number of elements in an Array_List. */
+/*****************************************************************************/
+int32_t AL_size(Array_List* obj)
+{
+ return obj->size;
+}
+
+/*****************************************************************************/
+/* AL_DESTROY() - Free up memory associated with an Array_List that is no */
+/* longer in use. */
+/*****************************************************************************/
+void AL_destroy(Array_List* obj)
+{
+ DLIF_free(obj->buf);
+}
diff --git a/src/core/dsp/ocl_load/DLOAD/ArrayList.h b/src/core/dsp/ocl_load/DLOAD/ArrayList.h
new file mode 100644
index 0000000..2c03788
--- /dev/null
+++ b/src/core/dsp/ocl_load/DLOAD/ArrayList.h
@@ -0,0 +1,92 @@
+/*
+* ArrayList.h
+*
+* This implementation of ArrayList is a replacement for the C++
+* vector class in C.
+*
+* This class emulates a resizable array along the lines of a C++
+* vector or Java ArrayList class in C, and uses the convention
+* of passing a pointer to the current "object" as the first
+* argument.
+*
+* Usage is defined as follows:
+*
+* Array_List obj;
+* AL_initialize(&obj, sizeof(type_name));
+*
+* ...
+*
+* type_name *ptr = (type_name*)(obj.buf);
+* for(i = 0; i < AL_size(&obj); i++)
+* do_something_to(ptr[i]);
+* type_name to_append = ...;
+* AL_append(&obj, &to_append);
+*
+* ...
+*
+* AL_destroy(&obj);
+*
+* Copyright (C) 2009-2014 Texas Instruments Incorporated - http://www.ti.com/
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the
+* distribution.
+*
+* Neither the name of Texas Instruments Incorporated nor the names of
+* its contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#ifndef ARRAYLIST_H
+#define ARRAYLIST_H
+
+#include <inttypes.h>
+
+/**********************************************************************/
+/* Array_List - structure type specification. */
+/**********************************************************************/
+typedef struct
+{
+ void *buf;
+ int32_t type_size;
+ int32_t size;
+ int32_t buffer_size;
+} Array_List;
+
+/*--------------------------------------------------------------------*/
+/* Array_List Member Functions: */
+/* */
+/* AL_initialize() - Initialize a newly created Array_List object. */
+/* AL_append() - Append an element to the end of an Array_List. */
+/* AL_size() - Get number of elements in an Array_List. */
+/* AL_destroy() - Free memory associated with an Array_List that is */
+/* no longer in use. */
+/*--------------------------------------------------------------------*/
+void AL_initialize(Array_List* obj, int32_t type_size, int32_t num_elem);
+void AL_append(Array_List* obj, void* to_append);
+int32_t AL_size(Array_List* obj);
+void AL_destroy(Array_List* obj);
+
+#endif
diff --git a/src/core/dsp/ocl_load/DLOAD/Queue.h b/src/core/dsp/ocl_load/DLOAD/Queue.h
new file mode 100644
index 0000000..3f85c16
--- /dev/null
+++ b/src/core/dsp/ocl_load/DLOAD/Queue.h
@@ -0,0 +1,194 @@
+/*
+* Queue.h
+*
+* Interface to Linked List
+* ------------------------
+*
+* This is an implementation of a type-independent linked list class for C.
+* It's basically a template class, but uses macros instead so that it can
+* be compiled with a C-only compiler.
+*
+* To define a linked list class:
+* #include "Queue.h"
+* TYPE_QUEUE_DEFINITION(object_type,Class_Identifier)
+*
+* In a separate C file:
+* #include "Queue.h"
+* TYPE_QUEUE_DEFINITION(object_type,Class_Identifier)
+* TYPE_QUEUE_IMPLEMENTATION(object_type,Class_Identifier)
+*
+* Now, to create a list:
+* Class_Identifier_Queue name;
+* Get it initialized to zero everywhere somehow, maybe like this:
+* Class_Identifier_initialize_queue(&name);
+*
+* To add to the list:
+* Class_Identifier_enqueue(&name, object);
+*
+* To iterate over the list:
+* Class_Identifier_Queue_Node *it = name.front;
+* while(it) { do_something_to_(it->value); it = it->next; }
+*
+* To delete from the list:
+* If it's the first node:
+* Class_Identifier_dequeue(&name);
+* If it's not:
+* predecessor_node->next_ptr = deleted_node->next_ptr;
+* name.size--;
+*
+* Copyright (C) 2009-2014 Texas Instruments Incorporated - http://www.ti.com/
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the
+* distribution.
+*
+* Neither the name of Texas Instruments Incorporated nor the names of
+* its contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#ifndef QUEUE_H
+#define QUEUE_H
+
+#include <inttypes.h>
+#include "dload_api.h"
+
+/*****************************************************************************/
+/* TYPE_QUEUE_DEFINITION() - Define structure specifications for a linked */
+/* list of t_name objects. */
+/*****************************************************************************/
+#define TYPE_QUEUE_DEFINITION(t, t_name) \
+struct t_name##_Queue_Node_ \
+{ \
+ t value; \
+ struct t_name##_Queue_Node_* next_ptr; \
+}; \
+typedef struct t_name##_Queue_Node_ t_name##_Queue_Node; \
+ \
+typedef struct \
+{ \
+ t_name##_Queue_Node* front_ptr; \
+ t_name##_Queue_Node* back_ptr; \
+ int32_t size; \
+} t_name##_Queue; \
+ \
+extern void t_name##_initialize_queue(t_name##_Queue* queue); \
+extern void t_name##_enqueue(t_name##_Queue* queue, t to_enqueue); \
+extern t t_name##_dequeue(t_name##_Queue* queue); \
+extern void t_name##_remove(t_name##_Queue* queue, t to_remove);
+
+/*****************************************************************************/
+/* TYPE_QUEUE_INITIALIZER() - Define the initializer to initialize Queues. */
+/*****************************************************************************/
+#define TYPE_QUEUE_INITIALIZER {NULL, NULL, 0}
+
+
+/*****************************************************************************/
+/* TYPE_QUEUE_IMPLEMENTATION() - Define member functions of new linked list */
+/* "class" of t_name objects. */
+/* */
+/* <type>_initialize_queue() - clears the queue */
+/* <type>_enqueue() - adds a <t> type object to the end of the queue */
+/* <type>_dequeue() - remove a <t> type object from the front of the queue */
+/* and provide access to it to the caller */
+/* <type>_remove() - find and remove a <t> type object from the queue */
+/*****************************************************************************/
+#define TYPE_QUEUE_IMPLEMENTATION(t, t_name) \
+void t_name##_initialize_queue (t_name##_Queue* queue) \
+{ \
+ queue->front_ptr = queue->back_ptr = NULL; \
+ queue->size = 0; \
+} \
+void t_name##_enqueue(t_name##_Queue* queue, t to_enqueue) \
+{ \
+ queue->size++; \
+ \
+ if(!queue->back_ptr) \
+ queue->back_ptr = queue->front_ptr = \
+ (t_name##_Queue_Node*) \
+ (DLIF_malloc(sizeof(t_name##_Queue_Node))); \
+ else \
+ { \
+ queue->back_ptr->next_ptr = \
+ (t_name##_Queue_Node*)(DLIF_malloc( \
+ sizeof(t_name##_Queue_Node))); \
+ queue->back_ptr = queue->back_ptr->next_ptr; \
+ } \
+ \
+ queue->back_ptr->value = to_enqueue; \
+ queue->back_ptr->next_ptr = NULL; \
+} \
+ \
+t t_name##_dequeue(t_name##_Queue* queue) \
+{ \
+ t to_ret; \
+ t_name##_Queue_Node* next_ptr = NULL; \
+ \
+ if (!queue->size) return (t) NULL; \
+ \
+ next_ptr = queue->front_ptr->next_ptr; \
+ queue->size--; \
+ to_ret = queue->front_ptr->value; \
+ DLIF_free((void*)(queue->front_ptr)); \
+ \
+ if(!queue->size) \
+ queue->front_ptr = queue->back_ptr = NULL; \
+ else \
+ queue->front_ptr = next_ptr; \
+ \
+ return to_ret; \
+} \
+ \
+void t_name##_remove(t_name##_Queue* queue, t to_remove) \
+{ \
+ t_name##_Queue_Node* prev_ptr = NULL; \
+ t_name##_Queue_Node* curr_ptr = queue->front_ptr; \
+ t_name##_Queue_Node* next_ptr = NULL; \
+ \
+ for (; curr_ptr; curr_ptr = next_ptr) \
+ { \
+ next_ptr = curr_ptr->next_ptr; \
+ if (curr_ptr->value == to_remove) break; \
+ prev_ptr = curr_ptr; \
+ } \
+ \
+ if (curr_ptr) \
+ { \
+ if (prev_ptr) prev_ptr->next_ptr = next_ptr; \
+ queue->size--; \
+ DLIF_free((void*)(curr_ptr)); \
+ } \
+ \
+ if (!queue->size) \
+ queue->front_ptr = queue->back_ptr = NULL; \
+ else \
+ { \
+ if (!prev_ptr) queue->front_ptr = next_ptr; \
+ if (!next_ptr) queue->back_ptr = prev_ptr; \
+ } \
+}
+
+
+#endif
diff --git a/src/core/dsp/ocl_load/DLOAD/Stack.h b/src/core/dsp/ocl_load/DLOAD/Stack.h
new file mode 100644
index 0000000..d36f5e0
--- /dev/null
+++ b/src/core/dsp/ocl_load/DLOAD/Stack.h
@@ -0,0 +1,155 @@
+/*
+* Stack.h
+*
+* Interface to Stack
+* ------------------
+*
+* This is an implementation of a type-independent stack implemented as
+* a signly linked list class for C. It's basically a template class, but
+* uses macros instead, so that it can be compiled with a C-only compiler.
+*
+* To define a Stack class:
+* #include "Stack.h"
+* TYPE_STACK_DEFINITION(object_type,Class_Identifier)
+*
+* In a separate C file:
+* #include "Stack.h"
+* TYPE_STACK_DEFINITION(object_type,Class_Identifier)
+* TYPE_STACK_IMPLEMENTATION(object_type,Class_Identifier)
+*
+* Now, to create a stack:
+* struct Class_Identifier_Stack name;
+* Get it initialized to zero everywhere somehow, maybe like this:
+* initialize_stack_Class_Identifier(&name);
+*
+* To add to the stack:
+* push_Class_Identifier(&name, object);
+*
+* To access the top of the stack:
+* Class_Identifier_Stack_Node *tos = name.top_ptr;
+* do_something_to_(tos->value);
+*
+* To delete from the stack:
+* if (name.size > 0) pop_Class_Identifier(&name);
+*
+* Copyright (C) 2009-2014 Texas Instruments Incorporated - http://www.ti.com/
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the
+* distribution.
+*
+* Neither the name of Texas Instruments Incorporated nor the names of
+* its contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#ifndef STACK_H
+#define STACK_H
+
+#include <inttypes.h>
+#include "dload_api.h"
+
+/*****************************************************************************/
+/* TYPE_STACK_DEFINITION() - Define structure specifications for a last-in, */
+/* first-out linked list of t_name objects. */
+/*****************************************************************************/
+#define TYPE_STACK_DEFINITION(t, t_name) \
+struct t_name##_Stack_Node_ \
+{ \
+ t value; \
+ struct t_name##_Stack_Node_* next_ptr; \
+}; \
+typedef struct t_name##_Stack_Node_ t_name##_Stack_Node; \
+ \
+typedef struct \
+{ \
+ t_name##_Stack_Node* top_ptr; \
+ t_name##_Stack_Node* bottom_ptr; \
+ int size; \
+} t_name##_Stack; \
+ \
+extern void t_name##_initialize_stack(t_name##_Stack* stack); \
+extern void t_name##_push(t_name##_Stack* stack, t to_push); \
+extern t t_name##_pop(t_name##_Stack* stack);
+
+/*****************************************************************************/
+/* TYPE_STACK_DEFINITION() - Define the initializer to initalize Stacks. */
+/*****************************************************************************/
+#define TYPE_STACK_INITIALIZER {NULL, NULL, 0 }
+
+/*****************************************************************************/
+/* TYPE_STACK_IMPLEMENTATION() - Define member functions of new LIFO linked */
+/* list "class" of t_name objects. */
+/* */
+/* <type>_initialize_stack() - clears the stack */
+/* <type>_push() - pushes a <t> type object to the top of the stack */
+/* <type>_pop() - pop a <t> type object from the top of the stack */
+/* and provide access to it to the caller */
+/*****************************************************************************/
+#define TYPE_STACK_IMPLEMENTATION(t, t_name) \
+void t_name##_initialize_stack (t_name##_Stack* stack) \
+{ \
+ stack->top_ptr = stack->bottom_ptr = NULL; \
+ stack->size = 0; \
+} \
+void t_name##_push(t_name##_Stack* stack, t to_push) \
+{ \
+ stack->size++; \
+ \
+ if(!stack->top_ptr) \
+ { \
+ stack->bottom_ptr = stack->top_ptr = \
+ (t_name##_Stack_Node*)(DLIF_malloc(sizeof(t_name##_Stack_Node))); \
+ stack->top_ptr->next_ptr = NULL; \
+ } \
+ else \
+ { \
+ t_name##_Stack_Node* next_ptr = stack->top_ptr; \
+ stack->top_ptr = \
+ (t_name##_Stack_Node*)(DLIF_malloc(sizeof(t_name##_Stack_Node))); \
+ stack->top_ptr->next_ptr = next_ptr; \
+ } \
+ \
+ stack->top_ptr->value = to_push; \
+} \
+ \
+t t_name##_pop(t_name##_Stack* stack) \
+{ \
+ t to_ret; \
+ t_name##_Stack_Node* next_ptr = stack->top_ptr->next_ptr; \
+ \
+ stack->size--; \
+ to_ret = stack->top_ptr->value; \
+ DLIF_free((void*)(stack->top_ptr)); \
+ \
+ if(!stack->size) \
+ stack->top_ptr = stack->bottom_ptr = NULL; \
+ else \
+ stack->top_ptr = next_ptr; \
+ \
+ return to_ret; \
+}
+
+#endif
diff --git a/src/core/dsp/ocl_load/DLOAD/dload.c b/src/core/dsp/ocl_load/DLOAD/dload.c
new file mode 100644
index 0000000..e5924d8
--- /dev/null
+++ b/src/core/dsp/ocl_load/DLOAD/dload.c
@@ -0,0 +1,3534 @@
+/*
+* dload.c
+*
+* Core Dynamic Loader Reference Implementation
+*
+* This implementation of the core dynamic loader is platform independent,
+* but it is object file format dependent. In particular, this
+* implementation supports ELF object file format.
+*
+* Copyright (C) 2009-2014 Texas Instruments Incorporated - http://www.ti.com/
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the
+* distribution.
+*
+* Neither the name of Texas Instruments Incorporated nor the names of
+* its contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#include <limits.h>
+#include <inttypes.h>
+#include <string.h>
+#include <time.h>
+
+#include "ArrayList.h"
+#include "Queue.h"
+#include "Stack.h"
+
+#include "symtab.h"
+#include "dload_endian.h"
+#include "elf32.h"
+#include "dload.h"
+#include "relocate.h"
+#include "dload_api.h"
+
+#ifdef ARM_TARGET
+#include "arm_dynamic.h"
+#endif
+
+#ifdef C60_TARGET
+#include "c60_dynamic.h"
+#endif
+
+#include "virtual_targets.h"
+
+/*---------------------------------------------------------------------------*/
+/* These globals are used only to test the reference client implementation. */
+/*---------------------------------------------------------------------------*/
+int global_argc;
+char **global_argv;
+
+/*---------------------------------------------------------------------------*/
+/* Contains filenames (type const char*) the system is in the process of */
+/* loading. Used to detect cycles in incorrectly compiled ELF binaries. */
+/*---------------------------------------------------------------------------*/
+Array_List DLIMP_module_dependency_list;
+
+/*---------------------------------------------------------------------------*/
+/* Contains objects (type DLIMP_Loaded_Module) that the system has loaded into */
+/* target memory. */
+/*---------------------------------------------------------------------------*/
+TYPE_QUEUE_IMPLEMENTATION(DLIMP_Loaded_Module*, loaded_module_ptr)
+loaded_module_ptr_Queue DLIMP_loaded_objects = TYPE_QUEUE_INITIALIZER;
+
+/*---------------------------------------------------------------------------*/
+/* Dependency Graph Queue - FIFO queue of dynamic modules that are loaded */
+/* when client asks to load a dynamic executable or library. Note that */
+/* dependents that have already been loaded with another module will not */
+/* appear on this queue. */
+/*---------------------------------------------------------------------------*/
+TYPE_STACK_IMPLEMENTATION(DLIMP_Dynamic_Module*, dynamic_module_ptr)
+dynamic_module_ptr_Stack DLIMP_dependency_stack = TYPE_STACK_INITIALIZER;
+
+/*---------------------------------------------------------------------------*/
+/* Current virtual target set after reading the file headers. This is used */
+/* to access target specific functions. */
+/*---------------------------------------------------------------------------*/
+VIRTUAL_TARGET *cur_target = NULL;
+
+/*---------------------------------------------------------------------------*/
+/* Support for profiling performance of dynamic loader core. */
+/*---------------------------------------------------------------------------*/
+#if LOADER_DEBUG
+static clock_t cycle0 = 0;
+static clock_t cycle_end = 0;
+#define profile_start_clock() (cycle0 = clock())
+#define profile_stop_clock() (cycle_end = clock())
+#define profile_cycle_count() (cycle_end - cycle0)
+#endif
+
+/*---------------------------------------------------------------------------*/
+/* The dynamic loader will now create a table TI_init_table to store */
+/* pre-init and init data. This is done because pre-init and */
+/* init functions could reference as-yet unrelocated symbols from other */
+/* modules. As such it is safer to store relevant function addresses and */
+/* execute them only after all modules are relocated. */
+/*---------------------------------------------------------------------------*/
+TYPE_QUEUE_IMPLEMENTATION(IF_single_record*, IF_table)
+IF_table_Queue TI_init_table = TYPE_QUEUE_INITIALIZER;
+
+static VIRTUAL_TARGET *get_vt_obj(int given_id);
+static void read_args_from_section(DLIMP_Loaded_Module* ep_module);
+static BOOL seg_has_space_for_write(DLIMP_Loaded_Module* lmodule, int sz);
+static BOOL write_arguments_to_args_section(DLOAD_HANDLE handle,
+ int argc, char** argv,
+ DLIMP_Loaded_Module *ep_module);
+
+/*****************************************************************************/
+/* DLOAD_create() */
+/* */
+/* Create an instance of the dynamic loader core. */
+/* */
+/* client_handle: Private client token to be returned during select DLIF */
+/* function calls. */
+/* */
+/* returns: an opaque DLOAD core loader handle, identifying this instance.*/
+/* */
+/*****************************************************************************/
+DLOAD_HANDLE DLOAD_create(void *client_handle)
+{
+ LOADER_OBJECT *pLoaderObject = DLIF_malloc(sizeof(LOADER_OBJECT));
+
+ /*-----------------------------------------------------------------------*/
+ /* Fill out the Loader Object: */
+ /*-----------------------------------------------------------------------*/
+ /* Set up initial objects_loading queue. */
+ /*-----------------------------------------------------------------------*/
+ AL_initialize(&(pLoaderObject->DLIMP_module_dependency_list),
+ sizeof (const char*), 1);
+
+ /*-----------------------------------------------------------------------*/
+ /* Initialize Loaded Module Ptr Queue */
+ /*-----------------------------------------------------------------------*/
+ loaded_module_ptr_initialize_queue(&pLoaderObject->DLIMP_loaded_objects);
+
+ /*-----------------------------------------------------------------------*/
+ /* Initialize Dynamic Module Ptr Stack */
+ /*-----------------------------------------------------------------------*/
+ dynamic_module_ptr_initialize_stack(&pLoaderObject->DLIMP_dependency_stack);
+
+ pLoaderObject->file_handle = 1;
+
+ /*-----------------------------------------------------------------------*/
+ /* Store client token, so it can be handed back during DLIF calls */
+ /*-----------------------------------------------------------------------*/
+ pLoaderObject->client_handle = client_handle;
+
+ return((DLOAD_HANDLE)pLoaderObject);
+}
+
+/*****************************************************************************/
+/* DLOAD_destroy() */
+/* */
+/* Remove an instance of the dynamic loader core, and free all resources */
+/* allocated during DLOAD_create(). */
+/* */
+/* client_handle: Private client token to be returned during select DLIF */
+/* function calls. */
+/* Preconditions: 1) handle must be valid. */
+/* 2) Loader instance must be in "UNLOADED" state. */
+/* */
+/*****************************************************************************/
+void DLOAD_destroy(DLOAD_HANDLE handle)
+{
+ LOADER_OBJECT * pLoaderObject;
+
+ pLoaderObject = (LOADER_OBJECT *)handle;
+ AL_destroy(&(pLoaderObject->DLIMP_module_dependency_list));
+
+ /*--------------------------*/
+ /* Free the instance object */
+ /*--------------------------*/
+ DLIF_free (pLoaderObject);
+}
+
+/*****************************************************************************/
+/* DLIMP_get_first_dyntag() */
+/* */
+/* Return value for first tag entry in the given dynamic table whose */
+/* tag type matches the given key. */
+/* */
+/*****************************************************************************/
+uint32_t DLIMP_get_first_dyntag(int tag, struct Elf32_Dyn* dyn_table)
+{
+ /*------------------------------------------------------------------------*/
+ /* Spin through dynamic segment looking for a specific dynamic tag. */
+ /* Return the value associated with the tag, if the tag is found. */
+ /*------------------------------------------------------------------------*/
+ struct Elf32_Dyn *dtp = dyn_table;
+
+ while (dtp->d_tag != DT_NULL)
+ {
+ if (dtp->d_tag == tag) return dtp->d_un.d_val;
+ else dtp++;
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Tag wasn't found, return a known bogus value for the tag. */
+ /*------------------------------------------------------------------------*/
+ return INT_MAX;
+}
+
+/*****************************************************************************/
+/* dload_and_allocate_dependencies() */
+/* */
+/* If not already loaded, load each dependent file identified in the */
+/* dynamic segment with a DT_NEEDED tag. Dependent files are listed in */
+/* order and should be loaded in the same order that they appear in the */
+/* dynamic segment. */
+/* */
+/*****************************************************************************/
+static BOOL dload_and_allocate_dependencies( DLOAD_HANDLE handle,
+ DLIMP_Dynamic_Module *dyn_module)
+{
+ /*------------------------------------------------------------------------*/
+ /* Spin through each dynamic tag entry in the dynamic segment. */
+ /*------------------------------------------------------------------------*/
+ struct Elf32_Dyn* dyn_nugget = dyn_module->dyntab;
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+
+#if LOADER_DEBUG
+ if (debugging_on)
+ DLIF_trace("Starting dload_and_allocate_dependencies() for %s ...\n",
+ dyn_module->name);
+#endif
+
+ while(dyn_nugget->d_tag != DT_NULL)
+ {
+ /*---------------------------------------------------------------------*/
+ /* For each DT_NEEDED dynamic tag that we find in the dynamic segment, */
+ /* load the dependent file identified by the so_name value attached */
+ /* to the DT_NEEDED dynamic tag. */
+ /*---------------------------------------------------------------------*/
+ if (dyn_nugget->d_tag == DT_NEEDED)
+ {
+ loaded_module_ptr_Queue_Node* ptr;
+
+#if LOADER_DEBUG
+ if (debugging_on)
+ DLIF_trace("Found DT_NEEDED: %s\n",
+ dyn_module->strtab+dyn_nugget->d_un.d_val);
+#endif
+
+ /*------------------------------------------------------------------*/
+ /* Find out if the file named by the DT_NEEDED tag has already */
+ /* been loaded. If it has, then we only have to bump the use count */
+ /* of the named dependent file. */
+ /*------------------------------------------------------------------*/
+ for (ptr = pHandle->DLIMP_loaded_objects.front_ptr; ptr != NULL;
+ ptr = ptr->next_ptr)
+ {
+
+
+ if (!strcmp(ptr->value->name,
+ dyn_module->strtab + dyn_nugget->d_un.d_val))
+ {
+ ptr->value->use_count++;
+ AL_append(&(dyn_module->loaded_module->dependencies),
+ &(ptr->value->file_handle));
+ break;
+ }
+ }
+
+ /*------------------------------------------------------------------*/
+ /* If the named dependent file has not been loaded, then we ask the */
+ /* client to invoke a load of the dependent file on our behalf. */
+ /*------------------------------------------------------------------*/
+ if (ptr == NULL)
+ {
+ int32_t dependent_handle = DLIF_load_dependent(
+ pHandle->client_handle,
+ dyn_module->strtab +
+ dyn_nugget->d_un.d_val);
+ AL_append(&(dyn_module->loaded_module->dependencies),
+ &dependent_handle);
+ if (dependent_handle == 0) return FALSE;
+ }
+ }
+
+ dyn_nugget++;
+ }
+
+#if LOADER_DEBUG
+ if (debugging_on)
+ DLIF_trace("Finished dload_and_allocate_dependencies() for %s\n",
+ dyn_module->name);
+#endif
+
+ return TRUE;
+}
+
+/*****************************************************************************/
+/* load_object() */
+/* */
+/* Finish the process of loading an object file. */
+/* */
+/*****************************************************************************/
+static int load_object(LOADER_FILE_DESC *fd, DLIMP_Dynamic_Module *dyn_module)
+{
+ /*------------------------------------------------------------------------*/
+ /* With the dynamic loader already running on the target, we are able to */
+ /* relocate directly into target memory, so there is nothing more to be */
+ /* done (at least in the bare-metal dynamic linking ABI model). */
+ /*------------------------------------------------------------------------*/
+ return 1;
+}
+
+/*****************************************************************************/
+/* write_arguments_to_args_section() */
+/* */
+/* Write argv and argc to .args section. */
+/* */
+/*****************************************************************************/
+static BOOL write_arguments_to_args_section(DLOAD_HANDLE handle,
+ int argc, char** argv,
+ DLIMP_Loaded_Module *ep_module)
+{
+ int mem_inc = MEM_INC;
+ int ptr_sz = PTR_SZ;
+ int p_size = ptr_sz / mem_inc;
+ int i_size = T_INTSZ / mem_inc;
+ int c_size = T_CHARSZ /mem_inc;
+ int argv_offset = 0;
+ int str_offset = 0;
+ int size = 0;
+ int arg;
+ int *targ_argv_pointers = NULL;
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+
+ uint8_t *c_args = NULL;
+
+#if LOADER_DEBUG
+ if (debugging_on)
+ DLIF_trace("Write_arguments_to_args_section:\n");
+#endif
+
+ /*-----------------------------------------------------------------------*/
+ /* IF NO ARGUMENTS, ABORT QUIETLY, WITH a SUCCESSFUL CODE. */
+ /*-----------------------------------------------------------------------*/
+ if (argc == 0) return TRUE;
+
+ /*-----------------------------------------------------------------------*/
+ /* __c_args__ points to the beginning of the .args section, if there */
+ /* is one. This is stored in the Loaded Module, and must have a */
+ /* legitimate address. If not, abort with Warning. */
+ /*-----------------------------------------------------------------------*/
+ c_args = ep_module->c_args;
+ if (!c_args || c_args == (uint8_t *)0xFFFFFFFF)
+ {
+ DLIF_warning(DLWT_MISC, "__c_args__ does not have valid value.\n");
+ return FALSE;
+ }
+
+ /*-----------------------------------------------------------------------*/
+ /* WE OUGHT TO WORRY ABOUT ALIGNMENT: IF SECTION ISN'T PROPERLY ALIGNED, */
+ /* ABORT THE PROCESSING OF ARGUMENTS WITH A NICE ERROR MESSAGE. */
+ /*-----------------------------------------------------------------------*/
+ if (c_args && ((Elf32_Addr)c_args & (MAX(p_size, i_size) - 1)))
+ {
+ DLIF_warning(DLWT_MISC, ".args section not properly aligned\n");
+ return FALSE;
+ }
+
+ /*-----------------------------------------------------------------------*/
+ /* CALCULATE OFFSET IN TABLE WHERE ARGV AND THE STRINGS WILL BE STORED. */
+ /* NOTE THAT argv MAY NEED MORE ALIGNMENT THAN AN INTEGER, SO ITS OFFSET */
+ /* IS REALLY THE MAXIMUM OF A POINTER SIZE AND INTEGER SIZE. ALSO NOTE */
+ /* WE NEED TO ALLOCATE AN EXTRA POINTER FOR argv[argc]. */
+ /*-----------------------------------------------------------------------*/
+ argv_offset = MAX(p_size, i_size);
+ str_offset = argv_offset + (argc * p_size) + p_size ;
+
+ /*-----------------------------------------------------------------------*/
+ /* CALCULATE SPACE REQUIRED FOR WRITING OUT .args SECTION. CHECK IF THE */
+ /* SEGMENT HAS ENOUGH SPACE AVAILABLE. IF NOT, RETURN WITH ERROR CODE. */
+ /*-----------------------------------------------------------------------*/
+ size = str_offset;
+
+ for (arg = 0; arg < argc; arg++)
+ size += (c_size * (strlen(argv[arg]) + 1));
+
+ if (!seg_has_space_for_write(ep_module, size))
+ {
+ DLIF_warning(DLWT_MISC,
+ "Segment has insufficient space for .args contents\n");
+ return FALSE;
+ }
+
+ /*-----------------------------------------------------------------------*/
+ /* OVERALL, WE NEED TO CREATE A TARGET IMAGE THAT CORRESPONDS TO: */
+ /* int argc; */
+ /* char *argv[argc]; */
+ /* <strings pointed to by argv> */
+ /* So say, for C6x, for "-v -d", we would need 22 bytes: */
+ /* 4 bytes // argc */
+ /* 4 bytes // argv[0] pointer value */
+ /* 4 bytes // argv[1] pointer value */
+ /* 4 bytes // argv[argc] end of pointer value array, normally 0 */
+ /* 3 bytes // "-v" */
+ /* 3 bytes // "-d" */
+ /*-----------------------------------------------------------------------*/
+
+ /*-----------------------------------------------------------------------*/
+ /* FIRST WRITE OUT ARGC. */
+ /*-----------------------------------------------------------------------*/
+#if LOADER_DEBUG
+ if (debugging_on)
+ DLIF_trace ("-- Copy %d bytes from 0x%x to 0x%x\n",
+ i_size, (uint32_t) &argc, (uint32_t) c_args);
+#endif
+
+ DLIF_memcpy(pHandle->client_handle, c_args, &argc, i_size);
+
+ /*-----------------------------------------------------------------------*/
+ /* CREATE AN INTERNAL ARRAY OF ARGV POINTER VALUES, THEN WRITE THEM OUT */
+ /*-----------------------------------------------------------------------*/
+ targ_argv_pointers = (int *)DLIF_malloc((argc + 1) * sizeof(int));
+ for (arg = 0; arg < argc ; arg++)
+ {
+ targ_argv_pointers[arg] = (int)(str_offset + c_args);
+ str_offset += (strlen(argv[arg]) + 1) * c_size;
+
+#if LOADER_DEBUG
+ if (debugging_on)
+ DLIF_trace ("\t\ttarg_argv_pointers[%d] : 0x%x\n",
+ arg, targ_argv_pointers[arg]);
+#endif
+ }
+
+ targ_argv_pointers[argc] = 0;
+
+ /*-----------------------------------------------------------------------*/
+ /* WRITE OUT THIS INTERNAL ARRAY OF ARGV POINTER VALUES */
+ /*-----------------------------------------------------------------------*/
+ for (arg = 0; arg <= argc; arg++)
+ {
+#if LOADER_DEBUG
+ if (debugging_on)
+ DLIF_trace ("-- Copy %d bytes from 0x%x to 0x%x\n",
+ p_size, (uint32_t) &targ_argv_pointers[arg],
+ (uint32_t) (c_args + argv_offset));
+#endif
+ DLIF_memcpy(pHandle->client_handle,
+ (void *)(c_args + argv_offset),
+ &targ_argv_pointers[arg],
+ p_size);
+ argv_offset += p_size;
+ }
+
+#if LOADER_DEBUG
+if (debugging_on)
+{
+ DLIF_trace ("\t\targv being copied : 0x%x\n",(uint32_t)argv);
+ for (arg = 0; arg < argc; arg++)
+ {
+ DLIF_trace ("\t\t---\n\t\t&argv[%d] being copied : 0x%x\n", arg,
+ (uint32_t)&argv[arg]);
+ DLIF_trace ("\t\targv[%d] being copied : 0x%x\n",arg,
+ (uint32_t)argv[arg]);
+ DLIF_trace ("\t\targv[%d] being copied : %s\n",arg, (char *)argv[arg]);
+ }
+}
+#endif
+
+ /*-----------------------------------------------------------------------*/
+ /* LASTLY WRITE OUT ALL THE STRINGS. */
+ /*-----------------------------------------------------------------------*/
+ for (arg = 0; arg < argc; arg++)
+ {
+#if LOADER_DEBUG
+ if (debugging_on)
+ DLIF_trace ("-- Copy %d bytes from 0x%x to 0x%x\n",
+ (uint32_t)strlen(argv[arg]) + 1,
+ (uint32_t)&argv[arg],
+ (uint32_t)(targ_argv_pointers[arg]));
+#endif
+ DLIF_memcpy(pHandle->client_handle,
+ (void *)(targ_argv_pointers[arg]),
+ argv[arg],
+ strlen(argv[arg]) + 1);
+ }
+
+ return TRUE;
+}
+
+
+/*****************************************************************************/
+/* initialize_loaded_module() */
+/* */
+/* Initialize DLIMP_Loaded_Module internal data object associated with a */
+/* dynamic module. This function will also set up a queue of */
+/* DLIMP_Loaded_Segment(s) associated with the loaded module. */
+/* This function is called as we are getting ready to actually load the */
+/* object file contents into target memory. Each segment will get a */
+/* target memory request that it can use to ask the client for target */
+/* memory space. This function will also assign a file handle to the */
+/* loaded module. */
+/* */
+/*---------------------------------------------------------------------------*/
+/* */
+/* In applications that use the DSBT model, this function will also need to */
+/* negotiate the module's DSBT index with the client. */
+/* */
+/*****************************************************************************/
+static void initialize_loaded_module(DLOAD_HANDLE handle,
+ DLIMP_Dynamic_Module *dyn_module)
+{
+ int i;
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+
+ /*------------------------------------------------------------------------*/
+ /* Allocate a DLIMP_Loaded_Module data structure for the specified ELF */
+ /* file and assign a file handle for it (bumping the file handle counter */
+ /* as we go). */
+ /*------------------------------------------------------------------------*/
+ DLIMP_Loaded_Module *loaded_module =
+ dyn_module->loaded_module = DLIF_malloc(sizeof(DLIMP_Loaded_Module));
+
+#if LOADER_DEBUG || LOADER_PROFILE
+ /*------------------------------------------------------------------------*/
+ /* Start clock on initialization of loaded module object. */
+ /*------------------------------------------------------------------------*/
+ if (debugging_on || profiling_on)
+ {
+ DLIF_trace("Starting initialize_loaded_module() ...\n");
+ if (profiling_on) profile_start_clock();
+ }
+#endif
+
+ if (dyn_module->name)
+ {
+ loaded_module->name = DLIF_malloc(strlen(dyn_module->name) + 1);
+ strcpy(loaded_module->name, dyn_module->name);
+ }
+ else
+ loaded_module->name = "<unknown>";
+
+ loaded_module->file_handle = pHandle->file_handle++;
+ loaded_module->direct_dependent_only = dyn_module->direct_dependent_only;
+ loaded_module->use_count = 1;
+
+ /*------------------------------------------------------------------------*/
+ /* In case we wrapped around the file handle, return error. */
+ /*------------------------------------------------------------------------*/
+ if (pHandle->file_handle == 0)
+ DLIF_error(DLET_MISC, "DLOAD File handle overflowed.\n");
+
+ /*------------------------------------------------------------------------*/
+ /* Initially the loaded module does not have access to its global */
+ /* symbols. These need to be copied from the dynamic module (see call */
+ /* to DLSYM_copy_globals() below). */
+ /* */
+ /* THESE INITIALIZATIONS SHOULD BE MOVED TO AN INIT ROUTINE FOR THE */
+ /* LOADED MODULE */
+ /*------------------------------------------------------------------------*/
+ loaded_module->gsymtab = NULL;
+ loaded_module->gstrtab = NULL;
+ loaded_module->gsymnum = loaded_module->gstrsz = 0;
+
+ /*------------------------------------------------------------------------*/
+ /* Initialize the Array_List of dependencies. */
+ /*------------------------------------------------------------------------*/
+ AL_initialize(&(loaded_module->dependencies), sizeof(int), 1);
+
+ if (dyn_module->symtab)
+ DLSYM_copy_globals(dyn_module);
+
+ /*------------------------------------------------------------------------*/
+ /* Initialize the module loaded segments Array_List. */
+ /*------------------------------------------------------------------------*/
+ AL_initialize(&(loaded_module->loaded_segments),
+ sizeof(DLIMP_Loaded_Segment), dyn_module->phnum);
+
+ /*------------------------------------------------------------------------*/
+ /* Spin thru segment headers and process each load segment encountered. */
+ /*------------------------------------------------------------------------*/
+ for (i = 0; i < dyn_module->phnum; i++)
+ if (dyn_module->phdr[i].p_type == PT_LOAD)
+ {
+ /*------------------------------------------------------------------*/
+ /* Note that this is parallel to and does not supplant the ELF */
+ /* phdr tables. */
+ /*------------------------------------------------------------------*/
+ DLIMP_Loaded_Segment seg;
+ seg.obj_desc = DLIF_malloc(sizeof(struct DLOAD_MEMORY_SEGMENT));
+ seg.phdr.p_vaddr = dyn_module->phdr[i].p_vaddr;
+ seg.phdr.p_offset = dyn_module->phdr[i].p_offset;
+ seg.obj_desc->target_page = 0; /*not used*/
+ seg.modified = 0;
+ seg.phdr.p_filesz = seg.obj_desc->objsz_in_bytes
+ = dyn_module->phdr[i].p_filesz;
+ seg.phdr.p_memsz = seg.obj_desc->memsz_in_bytes
+ = dyn_module->phdr[i].p_memsz;
+ seg.phdr.p_align = dyn_module->phdr[i].p_align;
+ seg.phdr.p_flags = dyn_module->phdr[i].p_flags;
+ AL_append(&(loaded_module->loaded_segments), &seg);
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Initialize the DSO termination information for this module. */
+ /* It will be copied over from the enclosing dyn_module object when */
+ /* placement is completed and dyn_module's local copy of the dynamic */
+ /* table is updated. */
+ /*------------------------------------------------------------------------*/
+ loaded_module->fini_array = (Elf32_Addr) NULL;
+ loaded_module->fini_arraysz = 0;
+ loaded_module->fini = (Elf32_Addr) NULL;
+
+#if LOADER_DEBUG || LOADER_PROFILE
+ if (debugging_on || profiling_on)
+ {
+ DLIF_trace("Finished initialize_loaded_module()\n");
+ if (profiling_on)
+ {
+ profile_stop_clock();
+ DLIF_trace("Took %lu cycles.\n",
+ (unsigned long)profile_cycle_count());
+ }
+ }
+#endif
+
+}
+
+/*****************************************************************************/
+/* load_static_segment() */
+/* */
+/* The core dynamic loader requires that a statically linked executable */
+/* be placed in target memory at the location that was determined during */
+/* the static link that created the executable. Failure to get the */
+/* required target memory where the static executable is to be loaded */
+/* will cause the dynamic loader to emit an error and abort the load. */
+/* */
+/*****************************************************************************/
+static BOOL load_static_segment(DLOAD_HANDLE handle, LOADER_FILE_DESC *fd,
+ DLIMP_Dynamic_Module *dyn_module)
+{
+ int i;
+ DLIMP_Loaded_Segment* seg = (DLIMP_Loaded_Segment*)
+ (dyn_module->loaded_module->loaded_segments.buf);
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+
+ /*------------------------------------------------------------------------*/
+ /* For each segment in the loaded module, build up a target memory */
+ /* request for the segment, get rights to target memory where we want */
+ /* to load the segment from the client, then get the client to write the */
+ /* segment contents out to target memory to the appropriate address. */
+ /*------------------------------------------------------------------------*/
+ for (i = 0; i < dyn_module->loaded_module->loaded_segments.size; i++)
+ {
+ struct DLOAD_MEMORY_REQUEST targ_req;
+ seg[i].obj_desc->target_page = 0;
+ targ_req.flags = 0;
+
+ /*---------------------------------------------------------------------*/
+ /* This is a static executable. DLIF_allocate should give us the */
+ /* address we ask for or fail. */
+ /*---------------------------------------------------------------------*/
+ if (seg[i].phdr.p_flags & PF_X) targ_req.flags |= DLOAD_SF_executable;
+ if (seg[i].phdr.p_flags & PF_W) targ_req.flags |= DLOAD_SF_writable;
+
+
+ targ_req.align = seg[i].phdr.p_align;
+ seg[i].obj_desc->target_address = (TARGET_ADDRESS)seg[i].phdr.p_vaddr;
+ targ_req.flags &= ~DLOAD_SF_relocatable;
+ targ_req.fp = fd;
+ targ_req.segment = seg[i].obj_desc;
+ targ_req.offset = seg[i].phdr.p_offset;
+ targ_req.flip_endian = dyn_module->wrong_endian;
+
+ /*---------------------------------------------------------------------*/
+ /* Ask the client side of the dynamic loader to allocate target memory */
+ /* for this segment to be loaded into. */
+ /*---------------------------------------------------------------------*/
+ if (!DLIF_allocate(pHandle->client_handle, &targ_req)) return FALSE;
+
+ /*---------------------------------------------------------------------*/
+ /* If there is any initialized data in the segment, we'll first write */
+ /* it into a host writable buffer (DLIF_copy()) and then flush it to */
+ /* target memory. */
+ /*---------------------------------------------------------------------*/
+ if (seg[i].phdr.p_filesz)
+ {
+ DLIF_copy(pHandle->client_handle, &targ_req);
+ DLIF_write(pHandle->client_handle, &targ_req);
+ }
+ }
+
+ return TRUE;
+}
+
+/*****************************************************************************/
+/* relocate_target_dynamic_tag_info() */
+/* */
+/* Update a target specific dynamic tag value that happens to be a */
+/* virtual address of a section. Returns TRUE if the tag was updated or */
+/* is not a virtual address and FALSE if it was not successfully updated */
+/* or was not recognized. */
+/*****************************************************************************/
+static BOOL relocate_target_dynamic_tag_info(DLIMP_Dynamic_Module *dyn_module,
+ int i)
+{
+ return cur_target->relocate_dynamic_tag_info(dyn_module, i);
+}
+
+/*****************************************************************************/
+/* DLIMP_update_dyntag_section_address() */
+/* */
+/* Given the index of a dynamic tag which we happen to know points to a */
+/* section address, find the program header table entry associated with */
+/* the specified address and update the tag value with the real address */
+/* of the section. */
+/* */
+/*****************************************************************************/
+BOOL DLIMP_update_dyntag_section_address(DLIMP_Dynamic_Module *dyn_module,
+ int32_t i)
+{
+ int j;
+ DLIMP_Loaded_Segment *seg = (DLIMP_Loaded_Segment *)
+ (dyn_module->loaded_module->loaded_segments.buf);
+
+ /*------------------------------------------------------------------------*/
+ /* If dynamic tag does not access an existing section, then no update */
+ /* is required. */
+ /*------------------------------------------------------------------------*/
+ if (dyn_module->dyntab[i].d_un.d_ptr == (Elf32_Addr)0)
+ { return TRUE; }
+
+ for (j = 0; j < dyn_module->loaded_module->loaded_segments.size; j++)
+ {
+ if ((dyn_module->dyntab[i].d_un.d_ptr >= seg[j].input_vaddr) &&
+ (dyn_module->dyntab[i].d_un.d_ptr <
+ (seg[j].input_vaddr + seg[j].phdr.p_memsz)))
+ {
+ dyn_module->dyntab[i].d_un.d_ptr +=
+ (seg[j].phdr.p_vaddr - seg[j].input_vaddr);
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+/*****************************************************************************/
+/* relocate_dynamic_tag_info() */
+/* */
+/* Once segment allocation has been completed, we'll need to go through */
+/* the dynamic table and update any tag values that happen to be virtual */
+/* addresses of segments (DT_C6000_DSBT_BASE, for example). */
+/* */
+/*****************************************************************************/
+static BOOL relocate_dynamic_tag_info(LOADER_FILE_DESC *fd,
+ DLIMP_Dynamic_Module *dyn_module)
+{
+ /*------------------------------------------------------------------------*/
+ /* Spin through dynamic table loking for tags that have a value which is */
+ /* the virtual address of a section. After the sections are allocated, */
+ /* we'll need to update these values with the new address of the section. */
+ /*------------------------------------------------------------------------*/
+ int i;
+ for (i = 0; dyn_module->dyntab[i].d_tag != DT_NULL; i++)
+ {
+ switch (dyn_module->dyntab[i].d_tag)
+ {
+ /*------------------------------------------------------------------*/
+ /* Only tag values that are virtual addresses will be affected. */
+ /*------------------------------------------------------------------*/
+ case DT_NEEDED:
+ case DT_PLTRELSZ:
+ case DT_HASH:
+ case DT_STRTAB:
+ case DT_SYMTAB:
+ case DT_RELA:
+ case DT_RELASZ:
+ case DT_RELAENT:
+ case DT_STRSZ:
+ case DT_SYMENT:
+ case DT_SONAME:
+ case DT_RPATH:
+ case DT_SYMBOLIC:
+ case DT_REL:
+ case DT_RELSZ:
+ case DT_RELENT:
+ case DT_PLTREL:
+ case DT_DEBUG:
+ case DT_TEXTREL:
+ case DT_BIND_NOW:
+ case DT_INIT_ARRAYSZ:
+ case DT_RUNPATH:
+ case DT_FLAGS:
+ case DT_PREINIT_ARRAYSZ:
+ continue;
+
+ /*------------------------------------------------------------------*/
+ /* NOTE!!! */
+ /* case DT_ENCODING: -- tag type has same "id" as DT_PREINIT_ARRAY */
+ /*------------------------------------------------------------------*/
+
+ /*------------------------------------------------------------------*/
+ /* This is a generic dynamic tag whose value is a virtual address */
+ /* of a section. It needs to be relocated to the section's actual */
+ /* address in target memory. */
+ /*------------------------------------------------------------------*/
+ case DT_PREINIT_ARRAY:
+ case DT_INIT:
+ case DT_INIT_ARRAY:
+ if (!DLIMP_update_dyntag_section_address(dyn_module, i))
+ return FALSE;
+
+ continue;
+
+ /*------------------------------------------------------------------*/
+ /* Once we have resolved the actual address of termination function */
+ /* sections, we need to copy their addresses over to the loaded */
+ /* module object (dyn_module will be deleted before we get to */
+ /* unloading the module). */
+ /*------------------------------------------------------------------*/
+ case DT_FINI_ARRAY:
+ case DT_FINI:
+ if (!DLIMP_update_dyntag_section_address(dyn_module, i))
+ return FALSE;
+
+ if (dyn_module->dyntab[i].d_tag == DT_FINI)
+ dyn_module->loaded_module->fini =
+ dyn_module->dyntab[i].d_un.d_ptr;
+ else
+ dyn_module->loaded_module->fini_array =
+ dyn_module->dyntab[i].d_un.d_ptr;
+
+ continue;
+
+ case DT_FINI_ARRAYSZ:
+ dyn_module->loaded_module->fini_arraysz =
+ dyn_module->dyntab[i].d_un.d_val;
+ continue;
+
+ /*------------------------------------------------------------------*/
+ /* Is this a virtual address??? */
+ /*------------------------------------------------------------------*/
+ case DT_JMPREL: /* is this a virtual address??? */
+ continue;
+
+ /*------------------------------------------------------------------*/
+ /* The remaining dynamic tag types should be target specific. If */
+ /* something generic slips through to here, then the handler for */
+ /* relocating target specific dynamic tags should fail. */
+ /*------------------------------------------------------------------*/
+ default:
+ if (!relocate_target_dynamic_tag_info(dyn_module, i))
+ return FALSE;
+ }
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* We've gotten through all of the dynamic table without incident. */
+ /* All dynamic tag values that were virtual section addresses should have */
+ /* been updated with the final address of the section that they point to. */
+ /*------------------------------------------------------------------------*/
+ return TRUE;
+}
+
+/*****************************************************************************/
+/* allocate_dynamic_segments_and relocate_symbols() */
+/* */
+/* Allocate target memory for each segment in this module, getting a */
+/* host-accessible space to copy the content of each segment into. Then */
+/* update the symbol table and program header table to reflect the new */
+/* target address for each segment. Processing of the dynamic relocation */
+/* entries will wait until all dependent files have been loaded and */
+/* allocated into target memory. */
+/* */
+/*---------------------------------------------------------------------------*/
+/* */
+/* The relocation entries in the ELF file do not handle the necessary */
+/* adjustments to the memory addresses in the program header or symbol */
+/* tables. These must be done manually. */
+/* */
+/* This is harder for us than for most dynamic loaders, because we have to */
+/* work in environments without virtual memory and thus where the offsets */
+/* between segments in memory may be different than they were in the file. */
+/* So, even though a dynamic loader usually only has to adjust all the */
+/* segments by a single fixed offset, we need to offset the symbols and */
+/* program header addresses segment by segment. This job is done by the */
+/* function below. */
+/* */
+/*****************************************************************************/
+static BOOL allocate_dynamic_segments_and_relocate_symbols
+ (DLOAD_HANDLE handle,
+ LOADER_FILE_DESC *fd,
+ DLIMP_Dynamic_Module *dyn_module)
+{
+ int i,j;
+ DLIMP_Loaded_Segment* seg = (DLIMP_Loaded_Segment*)
+ (dyn_module->loaded_module->loaded_segments.buf);
+ struct Elf32_Ehdr *fhdr = &(dyn_module->fhdr);
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+
+#if LOADER_DEBUG || LOADER_PROFILE
+ if (debugging_on || profiling_on)
+ {
+ DLIF_trace("Dynamic executable found.\n"
+ "Starting allocate_dynamic_segments_and_relocate_symbols()"
+ "...\n");
+ if (profiling_on) profile_start_clock();
+ }
+#endif
+
+ /*------------------------------------------------------------------------*/
+ /* Spin through the list of loaded segments from the current module. */
+ /*------------------------------------------------------------------------*/
+ for (i = 0; i < dyn_module->loaded_module->loaded_segments.size; i++)
+ {
+ /*--------------------------------------------------------------------*/
+ /* Allocate target memory for segment via client-provided target */
+ /* memory API. */
+ /*--------------------------------------------------------------------*/
+ int32_t addr_offset;
+ struct DLOAD_MEMORY_REQUEST targ_req;
+ seg[i].obj_desc->target_page = 0;
+ targ_req.flags = 0;
+ if (seg[i].phdr.p_flags & PF_X) targ_req.flags |= DLOAD_SF_executable;
+ if (seg[i].phdr.p_flags & PF_W) targ_req.flags |= DLOAD_SF_writable;
+
+ targ_req.align = 0x20;
+ seg[i].obj_desc->target_address = (TARGET_ADDRESS)seg[i].phdr.p_vaddr;
+ targ_req.flags |= DLOAD_SF_relocatable;
+ targ_req.fp = fd;
+ targ_req.segment = seg[i].obj_desc;
+ targ_req.offset = seg[i].phdr.p_offset;
+ targ_req.flip_endian = dyn_module->wrong_endian;
+
+ if (!DLIF_allocate(pHandle->client_handle, &targ_req))
+ {
+ DLIF_error(DLET_MEMORY, "DLIF allocation failure.\n");
+ return FALSE;
+ }
+
+ /*--------------------------------------------------------------------*/
+ /* Calculate the offset we need to adjust segment header and symbol */
+ /* table addresses. */
+ /*--------------------------------------------------------------------*/
+ addr_offset = (int32_t)(seg[i].obj_desc->target_address) -
+ (int32_t)(seg[i].phdr.p_vaddr);
+
+#if LOADER_DEBUG
+ if (debugging_on)
+ {
+ DLIF_trace("Segment %d (at 0x%x, 0x%x bytes) relocated to 0x%x\n", i,
+ (int32_t)(seg[i].phdr.p_vaddr),
+ (int32_t)(seg[i].phdr.p_memsz),
+ (int32_t)(seg[i].obj_desc->target_address));
+ DLIF_trace("Addr Offset is 0x%x\n", addr_offset);
+ }
+#endif
+
+ /*--------------------------------------------------------------------*/
+ /* Update program entry point if needed. Need to replace to deal */
+ /* with full ELF initialization routine. */
+ /*--------------------------------------------------------------------*/
+ if (dyn_module->relocate_entry_point &&
+ fhdr->e_entry >= (Elf32_Addr)(seg[i].phdr.p_vaddr) &&
+ fhdr->e_entry <
+ (Elf32_Addr)((uint8_t*)(seg[i].phdr.p_vaddr) +
+ (uint32_t)(seg[i].phdr.p_memsz)))
+ {
+#if LOADER_DEBUG
+ if (debugging_on)
+ {
+ DLIF_trace("Entry point 0x%x relocated to 0x%x\n",
+ fhdr->e_entry, fhdr->e_entry + addr_offset);
+ }
+#endif
+ fhdr->e_entry += addr_offset;
+
+ /*------------------------------------------------------------------*/
+ /* Mark the entry point as being relocated so we will not do it */
+ /* again. */
+ /*------------------------------------------------------------------*/
+ dyn_module->relocate_entry_point = FALSE;
+ }
+
+ /*---------------------------------------------------------------------*/
+ /* Fix program header entries in segment and Elf32_Phdr structs. */
+ /*---------------------------------------------------------------------*/
+ for (j = 0; j < fhdr->e_phnum; j++)
+ if (dyn_module->phdr[j].p_vaddr == (Elf32_Addr)seg[i].phdr.p_vaddr)
+ {
+ dyn_module->phdr[j].p_vaddr += addr_offset;
+ dyn_module->phdr[i].p_paddr += addr_offset;
+ break;
+ }
+
+ seg[i].input_vaddr = (Elf32_Addr)(seg[i].phdr.p_vaddr);
+ seg[i].phdr.p_vaddr += addr_offset;
+
+ /*---------------------------------------------------------------------*/
+ /* Great, now the hard part: fix offsets in symbols. It would be nice */
+ /* if there were an easier way to deal with this. */
+ /*---------------------------------------------------------------------*/
+ {
+ struct Elf32_Sym *gsymtab =
+ ((struct Elf32_Sym*)(dyn_module->loaded_module->gsymtab));
+ Elf32_Addr segment_start = (Elf32_Addr)seg[i].phdr.p_vaddr;
+ Elf32_Addr segment_end = (Elf32_Addr)seg[i].phdr.p_vaddr +
+ seg[i].phdr.p_memsz;
+ Elf32_Word global_index = dyn_module->symnum -
+ dyn_module->loaded_module->gsymnum;
+
+ for (j = 0; j < dyn_module->symnum; j++)
+ {
+ /*---------------------------------------------------------------*/
+ /* Get the relocated symbol value. */
+ /*---------------------------------------------------------------*/
+ Elf32_Addr symval_adj = dyn_module->symtab[j].st_value +
+ addr_offset;
+
+ /*---------------------------------------------------------------*/
+ /* If the symbol is defined in this segment, update the symbol */
+ /* value and mark the symbol so that we don't relocate it again. */
+ /*---------------------------------------------------------------*/
+ if (symval_adj >= segment_start && symval_adj < segment_end &&
+ dyn_module->symtab[j].st_shndx != INT16_MAX)
+ {
+ dyn_module->symtab[j].st_value = symval_adj;
+
+ /*------------------------------------------------------------*/
+ /* The module symbol table only has the global symbols. */
+ /*------------------------------------------------------------*/
+ if (j >= global_index)
+ gsymtab[j-global_index].st_value = symval_adj;
+
+ /*------------------------------------------------------------*/
+ /* Mark the symbol as relocated. */
+ /*------------------------------------------------------------*/
+ dyn_module->symtab[j].st_shndx = INT16_MAX;
+ }
+ }
+ }
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Update dynamic tag information. Some dynamic tags have values which */
+ /* are virtual addresses of sections. These values need to be updated */
+ /* once segment allocation is completed and the new segment addresses are */
+ /* known. */
+ /*------------------------------------------------------------------------*/
+ /* We should only traverse through the dynamic table once because we want */
+ /* to avoid the possibility of updating the same tag multiple times (an */
+ /* error, if it happens). */
+ /*------------------------------------------------------------------------*/
+ if (!relocate_dynamic_tag_info(fd, dyn_module))
+ {
+ DLIF_error(DLET_MISC, "Failed dynamic table update.\n");
+ return FALSE;
+ }
+
+#if LOADER_DEBUG || LOADER_PROFILE
+ if (debugging_on || profiling_on)
+ {
+ DLIF_trace("Finished allocate_dynamic_segments_and_relocate_symbols()\n");
+ if (profiling_on)
+ {
+ profile_stop_clock();
+ DLIF_trace("Took %lu cycles.\n", (unsigned long) profile_cycle_count());
+ }
+ }
+#endif
+
+ return TRUE;
+}
+
+/*****************************************************************************/
+/* delete_DLIMP_Loaded_Module() */
+/* */
+/* Free host memory associated with a DLIMP_Loaded_Module data structure */
+/* and all of the DLIMP_Loaded_Segment objects that are associated with */
+/* it. */
+/* */
+/*****************************************************************************/
+static void delete_DLIMP_Loaded_Module(DLOAD_HANDLE handle,
+ DLIMP_Loaded_Module **pplm)
+{
+ DLIMP_Loaded_Module *loaded_module = *pplm;
+ DLIMP_Loaded_Segment *segments = (DLIMP_Loaded_Segment*)
+ (loaded_module->loaded_segments.buf);
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+
+ /*-----------------------------------------------------------------------*/
+ /* Spin through the segments attached to this loaded module, freeing up */
+ /* any target memory that was allocated by the client for the segment. */
+ /*-----------------------------------------------------------------------*/
+ int i;
+ for (i = 0; i < loaded_module->loaded_segments.size; i++)
+ {
+ if (!DLIF_release(pHandle->client_handle, segments[i].obj_desc))
+ DLIF_error(DLET_MISC, "Failed call to DLIF_release!\n");;
+ DLIF_free(segments[i].obj_desc);
+ }
+
+ /*----------------------------------------------------------------------*/
+ /* Hacky way of indicating that the base image is no longer available. */
+ /* WHHHHAAAAAAATTT!?!?!?!?!?! */
+ /*----------------------------------------------------------------------*/
+ if (loaded_module->file_handle == DLIMP_application_handle)
+ DLIMP_application_handle = 0;
+
+ /*-----------------------------------------------------------------------*/
+ /* Free host heap memory that was allocated for the internal loaded */
+ /* module data structure members. */
+ /*-----------------------------------------------------------------------*/
+ if (loaded_module->name) DLIF_free(loaded_module->name);
+ if (loaded_module->gsymtab) DLIF_free(loaded_module->gsymtab);
+ loaded_module->gsymnum = 0;
+ if (loaded_module->gstrtab) DLIF_free(loaded_module->gstrtab);
+ loaded_module->gstrsz = 0;
+ AL_destroy(&(loaded_module->loaded_segments));
+ AL_destroy(&(loaded_module->dependencies));
+
+ /*-----------------------------------------------------------------------*/
+ /* Finally, free the host memory for the loaded module object, then NULL */
+ /* the pointer that was passed in. */
+ /*-----------------------------------------------------------------------*/
+ DLIF_free(loaded_module);
+ *pplm = NULL;
+}
+
+/*****************************************************************************/
+/* new_DLIMP_Dynamic_Module() */
+/* */
+/* Allocate a dynamic module data structure from host memory and */
+/* initialize its members to their default values. */
+/* */
+/*****************************************************************************/
+static DLIMP_Dynamic_Module *new_DLIMP_Dynamic_Module(LOADER_FILE_DESC *fd)
+{
+ /*-----------------------------------------------------------------------*/
+ /* Allocate space for dynamic module data structure from host memory. */
+ /*-----------------------------------------------------------------------*/
+ DLIMP_Dynamic_Module *dyn_module =
+ (DLIMP_Dynamic_Module *)DLIF_malloc(sizeof(DLIMP_Dynamic_Module));
+
+ /*-----------------------------------------------------------------------*/
+ /* Initialize data members of the new dynamic module data structure. */
+ /*-----------------------------------------------------------------------*/
+ dyn_module->name = NULL;
+ dyn_module->fd = fd;
+ dyn_module->phdr = NULL;
+ dyn_module->phnum = 0;
+ dyn_module->strtab = NULL;
+ dyn_module->strsz = 0;
+ dyn_module->dyntab = NULL;
+ dyn_module->symtab = NULL;
+ dyn_module->symnum = 0;
+ dyn_module->gsymtab_offset = 0;
+ dyn_module->gstrtab_offset = 0;
+ dyn_module->c_args = NULL;
+ dyn_module->argc = 0;
+ dyn_module->argv = NULL;
+ dyn_module->loaded_module = NULL;
+ dyn_module->wrong_endian = 0;
+ dyn_module->direct_dependent_only = TRUE;
+ dyn_module->relocatable = FALSE;
+ dyn_module->relocate_entry_point = TRUE;
+
+ dyn_module->dsbt_size = 0;
+ dyn_module->dsbt_index = DSBT_INDEX_INVALID;
+ dyn_module->dsbt_base_tagidx = -1;
+
+ dyn_module->preinit_array_idx = -1;
+ dyn_module->preinit_arraysz = 0;
+ dyn_module->init_idx = -1;
+ dyn_module->init_array_idx = -1;
+ dyn_module->init_arraysz = 0;
+
+ return dyn_module;
+}
+
+/*****************************************************************************/
+/* detach_loaded_module() */
+/* */
+/* Detach loaded module data structure from given dynamic module. When */
+/* an object file has been successfully loaded, the loader core will */
+/* detach the loaded module data structure from the dynamic module data */
+/* structure because the loaded module must continue to persist until is */
+/* is actually unloaded from target memory. If there is a problem with */
+/* the load, then the host memory associated with the loaded module will */
+/* be released as part of the destruction of the dynamic module. */
+/* */
+/*****************************************************************************/
+static
+DLIMP_Loaded_Module *detach_loaded_module(DLIMP_Dynamic_Module *dyn_module)
+{
+ if (dyn_module && dyn_module->loaded_module)
+ {
+ DLIMP_Loaded_Module *loaded_module = dyn_module->loaded_module;
+ dyn_module->loaded_module = NULL;
+ return loaded_module;
+ }
+
+ return NULL;
+}
+/*****************************************************************************/
+/* delete_DLIMP_Dynamic_Module() */
+/* */
+/* Remove local copies of the string table, symbol table, program header */
+/* table, and dynamic table. */
+/* */
+/*****************************************************************************/
+static void delete_DLIMP_Dynamic_Module(DLOAD_HANDLE handle,
+ DLIMP_Dynamic_Module **ppdm)
+{
+ DLIMP_Dynamic_Module *dyn_module = NULL;
+
+ if (!ppdm || (*ppdm == NULL))
+ {
+ DLIF_error(DLET_MISC,
+ "Internal Error: invalid argument to dynamic module "
+ "destructor function; aborting loader\n");
+ DLIF_exit(1);
+ }
+
+ dyn_module = *ppdm;
+ if (dyn_module->name) DLIF_free(dyn_module->name);
+ if (dyn_module->strtab) DLIF_free(dyn_module->strtab);
+ if (dyn_module->symtab) DLIF_free(dyn_module->symtab);
+ if (dyn_module->phdr) DLIF_free(dyn_module->phdr);
+ if (dyn_module->dyntab) DLIF_free(dyn_module->dyntab);
+
+ /*------------------------------------------------------------------------*/
+ /* If we left the loaded module attached to the dynamic module, then */
+ /* something must have gone wrong with the load. Remove the loaded */
+ /* module from the queue of loaded modules, if it is there. Then free */
+ /* the host memory allocated to the loaded module and its segments. */
+ /*------------------------------------------------------------------------*/
+ if (dyn_module->loaded_module != NULL)
+ delete_DLIMP_Loaded_Module(handle, &(dyn_module->loaded_module));
+
+ /*------------------------------------------------------------------------*/
+ /* Finally, free the host memory for this dynamic module object and NULL */
+ /* the pointer to the object. */
+ /*------------------------------------------------------------------------*/
+ DLIF_free(dyn_module);
+ *ppdm = NULL;
+}
+
+/*****************************************************************************/
+/* file_header_magic_number_is_valid() */
+/* */
+/* Given an object file header, check the magic number to ensure that it */
+/* is an object file format that we recognize. This implementation of */
+/* the dynamic loader core will handle ELF object file format. */
+/* */
+/*****************************************************************************/
+static BOOL file_header_magic_number_is_valid(struct Elf32_Ehdr* header)
+{
+ /*------------------------------------------------------------------------*/
+ /* Check for correct ELF magic numbers in file header. */
+ /*------------------------------------------------------------------------*/
+ if (!header->e_ident[EI_MAG0] == ELFMAG0 ||
+ !header->e_ident[EI_MAG1] == ELFMAG1 ||
+ !header->e_ident[EI_MAG2] == ELFMAG2 ||
+ !header->e_ident[EI_MAG3] == ELFMAG3)
+ {
+ DLIF_error(DLET_FILE, "Invalid ELF magic number.\n");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+/*****************************************************************************/
+/* file_header_machine_is_valid() */
+/* */
+/* Check if the machine specified in the file header is supported by the */
+/* loader. If the loader was compiled with support for all targets, */
+/* the machine will be initially set to EM_NONE. Once a module has been */
+/* loaded, all remaining modules must have the same machine value. */
+/*****************************************************************************/
+static int file_header_machine_is_valid(Elf32_Half e_machine)
+{
+ /*------------------------------------------------------------------------*/
+ /* Currently we support only ARM or C6x */
+ /*------------------------------------------------------------------------*/
+ switch(e_machine)
+ {
+#ifdef ARM_TARGET
+ case EM_ARM : return TRUE;
+#endif
+#ifdef C60_TARGET
+ case EM_TI_C6000 : return TRUE;
+#endif
+
+ default : return FALSE;
+ }
+}
+
+/*****************************************************************************/
+/* is_valid_elf_object_file() */
+/* */
+/* Check file size against anticipated end location of string table, */
+/* symbol table, program header tables, etc. If we anything untoward, */
+/* then we declare that the ELF file is corrupt and the load is aborted. */
+/* */
+/*****************************************************************************/
+static BOOL is_valid_elf_object_file(LOADER_FILE_DESC *fd,
+ DLIMP_Dynamic_Module *dyn_module)
+{
+ uint32_t fsz;
+ int i;
+
+ /*------------------------------------------------------------------------*/
+ /* Get file size. */
+ /*------------------------------------------------------------------------*/
+ DLIF_fseek(fd, 0, LOADER_SEEK_END);
+ fsz = DLIF_ftell(fd);
+
+ /*------------------------------------------------------------------------*/
+ /* Check for invalid table sizes (string table, symbol table, and */
+ /* program header tables). */
+ /*------------------------------------------------------------------------*/
+ if (!((dyn_module->strsz < fsz) &&
+ (dyn_module->symnum < fsz) &&
+ (dyn_module->phnum * sizeof(struct Elf32_Phdr)) < fsz))
+ {
+ DLIF_error(DLET_FILE, "Invalid ELF table bounds.\n");
+ return FALSE;
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Check for null so_name string in file with dynamic information. */
+ /*------------------------------------------------------------------------*/
+ if (dyn_module->dyntab && !strcmp(dyn_module->name, ""))
+ {
+ DLIF_error(DLET_MISC, "Dynamic file lacks SO_NAME identifier.\n");
+ return FALSE;
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Check for invalid program header information. */
+ /*------------------------------------------------------------------------*/
+ for (i = 0; i < dyn_module->phnum; i++)
+ {
+ struct Elf32_Phdr* phdr = dyn_module->phdr + i;
+
+ /*---------------------------------------------------------------------*/
+ /* Sanity check for relative sizes of filesz and memsz. */
+ /*---------------------------------------------------------------------*/
+ if (!(phdr->p_type != PT_LOAD || phdr->p_filesz <= phdr->p_memsz))
+ {
+ DLIF_error(DLET_MISC,
+ "Invalid file or memory size for segment %d.\n", i);
+ return FALSE;
+ }
+
+ /*---------------------------------------------------------------------*/
+ /* Check that segment file offset doesn't go off the end of the file. */
+ /*---------------------------------------------------------------------*/
+ if (!(phdr->p_offset + phdr->p_filesz < fsz))
+ {
+ DLIF_error(DLET_FILE,
+ "File location of segment %d is past the end of file.\n", i);
+ return FALSE;
+ }
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Check that a ET_DYN-type file is relocatable. */
+ /*------------------------------------------------------------------------*/
+ if (dyn_module->fhdr.e_type == ET_DYN && !dyn_module->symtab) return FALSE;
+
+ /*------------------------------------------------------------------------*/
+ /* All checks passed. */
+ /*------------------------------------------------------------------------*/
+ return TRUE;
+}
+
+/*****************************************************************************/
+/* process_eiosabi() */
+/* */
+/* Check the EI_OSABI field to validate it and set any parameters based on */
+/* it. */
+/*****************************************************************************/
+static BOOL process_eiosabi(DLIMP_Dynamic_Module* dyn_module)
+{
+ return cur_target->process_eiosabi(dyn_module);
+}
+
+/*****************************************************************************/
+/* dload_file_header() */
+/* */
+/* Read ELF file header. Store critical information in the provided */
+/* DLIMP_Dynamic_Module record. Check file header for validity. */
+/* */
+/*****************************************************************************/
+static BOOL dload_file_header(LOADER_FILE_DESC *fd,
+ DLIMP_Dynamic_Module *dyn_module)
+{
+ /*------------------------------------------------------------------------*/
+ /* Read ELF file header from given input file. */
+ /*------------------------------------------------------------------------*/
+ DLIF_fread(&(dyn_module->fhdr), sizeof(struct Elf32_Ehdr), 1, fd);
+
+ /*------------------------------------------------------------------------*/
+ /* Determine target vs. host endian-ness. Does header data need to be */
+ /* byte swapped? */
+ /*------------------------------------------------------------------------*/
+ dyn_module->wrong_endian =
+ (dyn_module->fhdr.e_ident[EI_DATA] != DLIMP_get_endian());
+
+ /*------------------------------------------------------------------------*/
+ /* Swap file header structures, if needed. */
+ /*------------------------------------------------------------------------*/
+ if (dyn_module->wrong_endian)
+ DLIMP_change_ehdr_endian(&(dyn_module->fhdr));
+
+ /*------------------------------------------------------------------------*/
+ /* Write out magic ELF information for debug purposes. */
+ /*------------------------------------------------------------------------*/
+#if LOADER_DEBUG
+ if (debugging_on)
+ {
+ DLIF_trace("ELF: %c%c%c\n", dyn_module->fhdr.e_ident[1],
+ dyn_module->fhdr.e_ident[2],
+ dyn_module->fhdr.e_ident[3]);
+ DLIF_trace("ELF file header entry point: %x\n",
+ dyn_module->fhdr.e_entry);
+ }
+#endif
+
+
+ /*------------------------------------------------------------------------*/
+ /* Verify magic numbers in ELF file header. */
+ /*------------------------------------------------------------------------*/
+ if (!file_header_magic_number_is_valid(&(dyn_module->fhdr)))
+ {
+ DLIF_error(DLET_FILE, "Invalid ELF file header magic number.\n");
+ return FALSE;
+ }
+
+ if (!file_header_machine_is_valid(dyn_module->fhdr.e_machine))
+ {
+ DLIF_error(DLET_FILE, "Invalid ELF file target machine.\n");
+ return FALSE;
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Verify file is an executable or dynamic shared object or library. */
+ /*------------------------------------------------------------------------*/
+ if ((dyn_module->fhdr.e_type != ET_EXEC) &&
+ (dyn_module->fhdr.e_type != ET_DYN))
+ {
+ DLIF_error(DLET_FILE, "Invalid ELF file type.\n");
+ return FALSE;
+ }
+
+#if LOADER_DEBUG || LOADER_PROFILE
+ /*------------------------------------------------------------------------*/
+ /* Stop profiling clock when file header information has finished */
+ /* loading. Re-start clock on initialization of symbol table, and */
+ /* dynamic table pointers. */
+ /*------------------------------------------------------------------------*/
+ if (debugging_on || profiling_on)
+ {
+ DLIF_trace("done.\n");
+ if (profiling_on)
+ {
+ profile_stop_clock();
+ DLIF_trace("Took %lu cycles.\n",
+ (unsigned long)profile_cycle_count());
+ profile_start_clock();
+ }
+ }
+#endif
+
+ return TRUE;
+}
+
+/*****************************************************************************/
+/* dload_program_header_table() */
+/* */
+/* Make a local copy of the ELF object file's program header table in the */
+/* dynamic module data structure. */
+/* */
+/*****************************************************************************/
+static void dload_program_header_table(LOADER_FILE_DESC *fd,
+ DLIMP_Dynamic_Module *dyn_module)
+{
+ /*------------------------------------------------------------------------*/
+ /* Read the program header tables from the object file. */
+ /*------------------------------------------------------------------------*/
+ struct Elf32_Ehdr *fhdr = &(dyn_module->fhdr);
+ dyn_module->phdr = (struct Elf32_Phdr*)
+ (DLIF_malloc(fhdr->e_phnum * fhdr->e_phentsize));
+ DLIF_fseek(fd, fhdr->e_phoff, LOADER_SEEK_SET);
+ DLIF_fread(dyn_module->phdr, fhdr->e_phentsize, fhdr->e_phnum,fd);
+ dyn_module->phnum = fhdr->e_phnum;
+
+ /*------------------------------------------------------------------------*/
+ /* Byte swap the program header tables if the target endian-ness is not */
+ /* the same as the host endian-ness. */
+ /*------------------------------------------------------------------------*/
+ if (dyn_module->wrong_endian)
+ {
+ int i;
+ for (i = 0; i < dyn_module->phnum; i++)
+ DLIMP_change_phdr_endian(dyn_module->phdr + i);
+ }
+}
+
+/*****************************************************************************/
+/* dload_headers() */
+/* */
+/* Read ELF object file header and program header table information into */
+/* the given dynamic module data structure. If the object file contains */
+/* dynamic information, read in the dynamic tags, dynamic symbol table, */
+/* and global string table. Check to make sure that we are not already */
+/* in the process of loading the module (circular dependencies), then */
+/* perform some level of sanity checking on the content of the file to */
+/* provide some assurance that the file is not corrupted. */
+/* */
+/*****************************************************************************/
+static BOOL dload_headers(LOADER_FILE_DESC *fd,
+ DLIMP_Dynamic_Module *dyn_module)
+{
+#if LOADER_DEBUG || LOADER_PROFILE
+ /*------------------------------------------------------------------------*/
+ /* More progress information. Start timing if profiling is enabled. */
+ /*------------------------------------------------------------------------*/
+ if (debugging_on || profiling_on)
+ {
+ DLIF_trace("\nReading file headers ...\n");
+ if (profiling_on) profile_start_clock();
+ }
+#endif
+
+ /*------------------------------------------------------------------------*/
+ /* Read file header information and check vs. expected ELF object file */
+ /* header content. */
+ /*------------------------------------------------------------------------*/
+ if (!dload_file_header(fd, dyn_module))
+ return FALSE;
+
+ /*------------------------------------------------------------------------*/
+ /* Read program header table information into the dynamic module object. */
+ /*------------------------------------------------------------------------*/
+ dload_program_header_table(fd, dyn_module);
+
+ /*------------------------------------------------------------------------*/
+ /* Once headers have been read in, use e_machine to set virtual target. */
+ /* This can then be used to access target specific functions. */
+ /*------------------------------------------------------------------------*/
+ cur_target = get_vt_obj(dyn_module->fhdr.e_machine);
+ if (!cur_target)
+ {
+ DLIF_error(DLET_FILE, "Attempt to load invalid ELF file, '%s'.\n",
+ dyn_module->name);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+/*****************************************************************************/
+/* find_dynamic_segment() */
+/* */
+/* Find the dynamic segment in the given ELF object file, if there is */
+/* one. If the segment is found, then the segment ID output parameter */
+/* is set to the index of the dynamic segment in the program header */
+/* table. If the dynamic segment is not found, the dynamic module's */
+/* relocatable flag is set to FALSE, and return FALSE. */
+/* */
+/*****************************************************************************/
+static BOOL find_dynamic_segment(DLIMP_Dynamic_Module *dyn_module,
+ Elf32_Word *dyn_seg_idx)
+{
+ int i;
+
+ /*------------------------------------------------------------------------*/
+ /* We should have a valid dynamic module pointer and somewhere to put the */
+ /* dynamic segment id, if we find one. If either of these are missing, */
+ /* we should get an internal error and abort the loader. */
+ /*------------------------------------------------------------------------*/
+ if ((dyn_module == NULL) || (dyn_seg_idx == NULL))
+ {
+ DLIF_error(DLET_MISC, "Internal error: find_dynamic_segment() needs "
+ "non-NULL arguments.\n");
+ DLIF_exit(1);
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Spin through segment program headers to find the dynamic segment. */
+ /*------------------------------------------------------------------------*/
+ dyn_module->relocatable = TRUE;
+ for (i = 0; i < dyn_module->phnum; i++)
+ if (dyn_module->phdr[i].p_type == PT_DYNAMIC)
+ { *dyn_seg_idx = i; return TRUE; }
+
+ /*------------------------------------------------------------------------*/
+ /* No dynamic segment found, mark the object module as not relocatable */
+ /* and warn the user. */
+ /*------------------------------------------------------------------------*/
+ dyn_module->relocatable = FALSE;
+
+ return FALSE;
+}
+
+/*****************************************************************************/
+/* copy_dynamic_table() */
+/* */
+/* Make a local copy of the dynamic table read from the dynamic segment */
+/* in the ELF object file. */
+/* */
+/*****************************************************************************/
+static void copy_dynamic_table(LOADER_FILE_DESC *fd,
+ DLIMP_Dynamic_Module *dyn_module,
+ Elf32_Word dyn_seg_idx)
+{
+ /*------------------------------------------------------------------------*/
+ /* Allocate space for the dynamic table from host memory and read its */
+ /* content from the ELF object file. */
+ /*------------------------------------------------------------------------*/
+ Elf32_Word num_elem;
+ dyn_module->dyntab = DLIF_malloc(dyn_module->phdr[dyn_seg_idx].p_filesz);
+ num_elem = dyn_module->phdr[dyn_seg_idx].p_filesz / sizeof(struct Elf32_Dyn);
+ DLIF_fseek(fd, dyn_module->phdr[dyn_seg_idx].p_offset, LOADER_SEEK_SET);
+ DLIF_fread(dyn_module->dyntab, sizeof(struct Elf32_Dyn), num_elem, fd);
+
+ /*------------------------------------------------------------------------*/
+ /* If necessary, byte swap each entry in the dynamic table. */
+ /*------------------------------------------------------------------------*/
+ if (dyn_module->wrong_endian)
+ {
+ int i;
+ for (i = 0; i < num_elem; i++)
+ DLIMP_change_dynent_endian(&dyn_module->dyntab[i]);
+ }
+}
+
+/*****************************************************************************/
+/* process_target_dynamic_tag() */
+/* */
+/* Process a target specific dynamic tag entry. Returns TRUE if the tag */
+/* was handled and FALSE if it was not recognized. */
+/*****************************************************************************/
+static BOOL process_target_dynamic_tag(DLIMP_Dynamic_Module* dyn_module, int i)
+{
+ return cur_target->process_dynamic_tag(dyn_module, i);
+}
+
+/*****************************************************************************/
+/* process_dynamic_table() */
+/* */
+/* Process dynamic tag entries from the dynamic table. At the conclusion */
+/* of this function, we should have made a copy of the global symbols */
+/* and the global symbol names. */
+/* */
+/*****************************************************************************/
+static BOOL process_dynamic_table(LOADER_FILE_DESC *fd,
+ DLIMP_Dynamic_Module *dyn_module)
+{
+ int i;
+ BOOL soname_found = FALSE;
+ Elf32_Addr soname_offset = 0;
+ Elf32_Addr strtab_offset = 0;
+ Elf32_Addr hash_offset = 0;
+ Elf32_Addr symtab_offset = 0;
+
+ /*------------------------------------------------------------------------*/
+ /* Iterate over the dynamic table in order to process dynamic tags. */
+ /* See ELF TIS Specification for details on the meaning of each dynamic */
+ /* tag. The C6000 ELF ABI Specification provides more details about the */
+ /* TI specific C6000 ELF ABI tags. */
+ /*------------------------------------------------------------------------*/
+ for (i = 0; dyn_module->dyntab[i].d_tag != DT_NULL; i++)
+ {
+ switch(dyn_module->dyntab[i].d_tag)
+ {
+ /*------------------------------------------------------------------*/
+ /* DT_SONAME: Contains name of dynamic object, used for dependency */
+ /* comparisons. Its value is an offset from the start */
+ /* of the string table. We need to copy the string at */
+ /* this offset into dmodule->name. */
+ /*------------------------------------------------------------------*/
+ case DT_SONAME:
+#if LOADER_DEBUG
+ if (debugging_on) DLIF_trace("Found SO_NAME.\n");
+#endif
+ /*---------------------------------------------------------------*/
+ /* We store the offset of the so_name in the dynamic string */
+ /* table so that it doesn't matter which dynamic tag we see */
+ /* first (DT_SONAME actually is generated before DT_STRTAB). */
+ /*---------------------------------------------------------------*/
+ soname_found = TRUE;
+ soname_offset = dyn_module->dyntab[i].d_un.d_ptr;
+ break;
+
+ /*------------------------------------------------------------------*/
+ /* DT_STRSZ: Contains the size of the string table. */
+ /*------------------------------------------------------------------*/
+ case DT_STRSZ:
+ dyn_module->strsz = dyn_module->dyntab[i].d_un.d_val;
+
+#if LOADER_DEBUG
+ if (debugging_on)
+ DLIF_trace("Found string table Size: 0x%x\n", dyn_module->strsz);
+#endif
+ break;
+
+ /*------------------------------------------------------------------*/
+ /* DT_STRTAB: Contains the file offset of the string table. The */
+ /* tag directly after this is guaranteed to be DT_STRSZ, */
+ /* containing the string table size. We need to */
+ /* allocate memory for the string table and copy it from */
+ /* the file. */
+ /*------------------------------------------------------------------*/
+ case DT_STRTAB:
+ strtab_offset = dyn_module->dyntab[i].d_un.d_ptr;
+#if LOADER_DEBUG
+ if (debugging_on)
+ DLIF_trace("Found string table: 0x%x\n", strtab_offset);
+#endif
+ break;
+
+ /*------------------------------------------------------------------*/
+ /* DT_HASH: Contains the file offset of the symbol hash table. */
+ /*------------------------------------------------------------------*/
+ case DT_HASH:
+ hash_offset = dyn_module->dyntab[i].d_un.d_ptr;
+#if LOADER_DEBUG
+ if (debugging_on)
+ DLIF_trace("Found symbol hash table: 0x%x\n", hash_offset);
+#endif
+ break;
+
+ /*------------------------------------------------------------------*/
+ /* DT_SYMTAB: Contains the file offset of the symbol table. */
+ /*------------------------------------------------------------------*/
+ case DT_SYMTAB:
+ symtab_offset = dyn_module->dyntab[i].d_un.d_ptr;
+#if LOADER_DEBUG
+ if (debugging_on)
+ DLIF_trace("Found symbol table: 0x%x\n", symtab_offset);
+#endif
+ break;
+
+ /*------------------------------------------------------------------*/
+ /* DSO Initialization / Termination Model Dynamic Tags */
+ /*------------------------------------------------------------------*/
+ /* For initialization tags, we store indices and array sizes in */
+ /* the dyn_module. Termination works a little different, the */
+ /* indices into the local copy of the dynamic table are stored in */
+ /* dyn_module, but the DT_FINI_ARRAYSZ value is recorded with the */
+ /* loaded module. */
+ /*------------------------------------------------------------------*/
+ /* After placement is done, the DT_FINI and DT_FINI_ARRAY values */
+ /* need to be copied from the local dynamic table into the loaded */
+ /* module object. */
+ /*------------------------------------------------------------------*/
+ case DT_PREINIT_ARRAY:
+ dyn_module->preinit_array_idx = i;
+ break;
+
+ case DT_PREINIT_ARRAYSZ:
+ dyn_module->preinit_arraysz = dyn_module->dyntab[i].d_un.d_val;
+ break;
+
+ case DT_INIT:
+ dyn_module->init_idx = i;
+ break;
+
+ case DT_INIT_ARRAY:
+ dyn_module->init_array_idx = i;
+ break;
+
+ case DT_INIT_ARRAYSZ:
+ dyn_module->init_arraysz = dyn_module->dyntab[i].d_un.d_val;
+ break;
+
+ /*------------------------------------------------------------------*/
+ /* This information will be copied over to the loaded module */
+ /* object after placement has been completed and the information */
+ /* in the dynamic table has been relocated. */
+ /*------------------------------------------------------------------*/
+ case DT_FINI_ARRAY:
+ case DT_FINI_ARRAYSZ:
+ case DT_FINI:
+ break;
+
+ /*------------------------------------------------------------------*/
+ /* Unrecognized tag, may not be illegal, but is not explicitly */
+ /* handled by this function. Should it be? */
+ /*------------------------------------------------------------------*/
+ default:
+ {
+ if (!process_target_dynamic_tag(dyn_module, i))
+ {
+#if LOADER_DEBUG
+ if (debugging_on)
+ DLIF_trace("Unrecognized dynamic tag: 0x%X\n",
+ dyn_module->dyntab[i].d_tag);
+#endif
+ }
+
+ break;
+ }
+
+ }
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* If string table offset and size were found, read string table in from */
+ /* the ELF object file. */
+ /*------------------------------------------------------------------------*/
+ if (strtab_offset && dyn_module->strsz)
+ {
+ DLIF_fseek(fd, strtab_offset, LOADER_SEEK_SET);
+ dyn_module->strtab = DLIF_malloc(dyn_module->strsz);
+ DLIF_fread(dyn_module->strtab, sizeof(uint8_t), dyn_module->strsz, fd);
+ }
+ else
+ {
+ DLIF_warning(DLWT_MISC,
+ "Mandatory dynamic tag DT_STRTAB/DT_STRSZ not found!\n");
+ return FALSE;
+ }
+
+
+ /*------------------------------------------------------------------------*/
+ /* If symbol hash table is found read-in the hash table. */
+ /*------------------------------------------------------------------------*/
+ if (hash_offset)
+ {
+ /*---------------------------------------------------------------------*/
+ /* Hash table has the following format. nchain equals the number of */
+ /* entries in the symbol table (symnum) */
+ /* */
+ /* +----------------------------+ */
+ /* | nbucket | */
+ /* +----------------------------+ */
+ /* | nchain | */
+ /* +----------------------------+ */
+ /* | bucket[0] | */
+ /* | ... | */
+ /* | bucket[nbucket-1] | */
+ /* +----------------------------+ */
+ /* | chain[0] | */
+ /* | ... | */
+ /* | chain[nchain-1] | */
+ /* +----------------------------+ */
+ /*---------------------------------------------------------------------*/
+ Elf32_Word hash_nbucket;
+ Elf32_Word hash_nchain;
+
+ /*---------------------------------------------------------------------*/
+ /* Seek to the hash offset and read first two words into nbucket and */
+ /* symnum. */
+ /*---------------------------------------------------------------------*/
+ DLIF_fseek(fd, hash_offset, LOADER_SEEK_SET);
+ DLIF_fread(&(hash_nbucket), sizeof(Elf32_Word), 1, fd);
+ DLIF_fread(&(hash_nchain), sizeof(Elf32_Word), 1, fd);
+ if (dyn_module->wrong_endian)
+ {
+ DLIMP_change_endian32((int32_t*)(&(hash_nbucket)));
+ DLIMP_change_endian32((int32_t*)(&(hash_nchain)));
+ }
+
+ /*---------------------------------------------------------------------*/
+ /* The number of entires in the dynamic symbol table is not encoded */
+ /* anywhere in the elf file. However, the nchain is guaranteed to be */
+ /* the same as the number of symbols. Use nchain to set the symnum. */
+ /*---------------------------------------------------------------------*/
+ dyn_module->symnum = hash_nchain;
+#if LOADER_DEBUG
+ if (debugging_on) DLIF_trace("symnum=%d\n", hash_nchain);
+#endif
+ }
+ else
+ {
+ DLIF_warning(DLWT_MISC, "Mandatory dynamic tag DT_HASH is not found!\n");
+ return FALSE;
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Read dynamic symbol table. */
+ /*------------------------------------------------------------------------*/
+ if (symtab_offset)
+ {
+ int j = 0;
+ DLIF_fseek(fd, symtab_offset, LOADER_SEEK_SET);
+ dyn_module->symtab =
+ DLIF_malloc(dyn_module->symnum * sizeof(struct Elf32_Sym));
+ DLIF_fread(dyn_module->symtab, sizeof(struct Elf32_Sym),
+ dyn_module->symnum, fd);
+ if (dyn_module->wrong_endian)
+ {
+ for (j = 0; j < dyn_module->symnum; j++)
+ DLIMP_change_sym_endian(dyn_module->symtab + j);
+ }
+
+ /*---------------------------------------------------------------------*/
+ /* The st_name field of an Elf32_Sym entity is an offset into the */
+ /* string table. Convert it into a pointer to the string. */
+ /*---------------------------------------------------------------------*/
+ if (strtab_offset)
+ for (j = 0; j < dyn_module->symnum; j++)
+ dyn_module->symtab[j].st_name += (Elf32_Word) dyn_module->strtab;
+ }
+ else
+ {
+ DLIF_warning(DLWT_MISC,
+ "Mandatory dynamic tag DT_SYMTAB is not found!\n");
+ return FALSE;
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Read the SONAME. */
+ /*------------------------------------------------------------------------*/
+ if (!soname_found)
+ {
+ DLIF_warning(DLWT_MISC, "Dynamic tag DT_SONAME is not found!\n");
+ dyn_module->name = DLIF_malloc(sizeof(char));
+ *dyn_module->name = '\0';
+ }
+ else
+ {
+ dyn_module->name =
+ DLIF_malloc(strlen(dyn_module->strtab + soname_offset) + 1);
+ strcpy(dyn_module->name, dyn_module->strtab + soname_offset);
+
+#if LOADER_DEBUG
+ if (debugging_on)
+ DLIF_trace("Name of dynamic object: %s\n", dyn_module->name);
+#endif
+ }
+
+ return TRUE;
+}
+
+
+/*****************************************************************************/
+/* dload_dynamic_information() */
+/* */
+/* Given a dynamic module with a dynamic segment which is located via */
+/* given dynamic segment index, make a local copy of the dynamic table */
+/* in the dynamic module object, then process the dynamic tag entries in */
+/* the table. */
+/* */
+/*****************************************************************************/
+static BOOL dload_dynamic_information(LOADER_FILE_DESC *fd,
+ DLIMP_Dynamic_Module *dyn_module,
+ Elf32_Word dyn_seg_idx)
+{
+ /*------------------------------------------------------------------------*/
+ /* Read a copy of the dynamic table into the dynamic module object. */
+ /*------------------------------------------------------------------------*/
+ copy_dynamic_table(fd, dyn_module, dyn_seg_idx);
+
+ /*------------------------------------------------------------------------*/
+ /* Process dynamic entries in the dynamic table. If any problems are */
+ /* encountered, the loader should emit an error or warning and return */
+ /* FALSE here. */
+ /*------------------------------------------------------------------------*/
+ return process_dynamic_table(fd, dyn_module);
+}
+
+/*****************************************************************************/
+/* check_circular_dependency() */
+/* */
+/* Determine whether a dynamic module is already in the process of being */
+/* loaded before we try to start loading it again. If it is already */
+/* being loaded, then the dynamic loader has detected a circular */
+/* dependency. An error will be emitted and the load will be aborted. */
+/* */
+/*****************************************************************************/
+static BOOL check_circular_dependency(DLOAD_HANDLE handle,
+ const char *dyn_mod_name)
+{
+ /*------------------------------------------------------------------------*/
+ /* Check the name of the given dependency module to be loaded against the */
+ /* list of modules that are currently in the process of being loaded. */
+ /* Report an error if any circular dependencies are detected. */
+ /*------------------------------------------------------------------------*/
+ int i;
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+
+ for (i = 0; i < pHandle->DLIMP_module_dependency_list.size; i++)
+ if (!strcmp(dyn_mod_name,
+ ((char**)(pHandle->DLIMP_module_dependency_list.buf))[i]))
+ {
+ DLIF_error(DLET_MISC,
+ "Circular dependency detected, '%s' is already in the "
+ "process of loading.\n", dyn_mod_name);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+/*****************************************************************************/
+/* dload_dynamic_segment() */
+/* */
+/* Find the dynamic segment in the given ELF module, if there is one. */
+/* If there is a dynamic segment, then make a local copy of the dynamic */
+/* table in the dynamic module object provided, then process the dynamic */
+/* tag entries in the table. */
+/* */
+/* If there is no dynamic segment, then we return success from this */
+/* function, marking the dynamic module as "not relocatable". */
+/* */
+/*****************************************************************************/
+static BOOL dload_dynamic_segment(DLOAD_HANDLE handle,
+ LOADER_FILE_DESC *fd,
+ DLIMP_Dynamic_Module *dyn_module)
+{
+ /*------------------------------------------------------------------------*/
+ /* If we don't find dynamic segment, the relocatable flag will have been */
+ /* set to false to indicate that the module is a static executable. We */
+ /* still return TRUE from this function so that we can proceed with */
+ /* static loading. */
+ /*------------------------------------------------------------------------*/
+ Elf32_Word dyn_seg_idx = 0;
+ if (!find_dynamic_segment(dyn_module, &dyn_seg_idx))
+ return TRUE;
+
+ /*------------------------------------------------------------------------*/
+ /* Process the OSABI now, after we know if the module is relocatable. */
+ /*------------------------------------------------------------------------*/
+ if (!process_eiosabi(dyn_module))
+ {
+ DLIF_error(DLET_FILE, "Unsupported EI_OSABI value.\n");
+ return FALSE;
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Read the dynamic table from the ELF file, then process the dynamic */
+ /* tags in the table. */
+ /*------------------------------------------------------------------------*/
+ if (!dload_dynamic_information(fd, dyn_module, dyn_seg_idx))
+ return FALSE;
+
+ /*------------------------------------------------------------------------*/
+ /* Check to make sure that this module is not already being loaded. If */
+ /* is, then it will cause a circular dependency to be introduced. */
+ /* Loader should detect circular dependencies and emit an error. */
+ /*------------------------------------------------------------------------*/
+ if (!check_circular_dependency(handle, dyn_module->name))
+ return FALSE;
+
+ return TRUE;
+}
+
+/*****************************************************************************/
+/* COPY_SEGMENTS() - */
+/* */
+/* Copy all segments into host memory. */
+/*****************************************************************************/
+static void copy_segments(DLOAD_HANDLE handle, LOADER_FILE_DESC* fp,
+ DLIMP_Dynamic_Module* dyn_module)
+{
+ DLIMP_Loaded_Segment* seg =
+ (DLIMP_Loaded_Segment*)(dyn_module->loaded_module->loaded_segments.buf);
+ int s, seg_size = dyn_module->loaded_module->loaded_segments.size;
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+
+
+ for (s=0; s<seg_size; s++)
+ {
+ struct DLOAD_MEMORY_REQUEST targ_req;
+ targ_req.fp = fp;
+ targ_req.segment = seg[s].obj_desc;
+ targ_req.offset = seg[s].phdr.p_offset;
+ targ_req.flags = DLOAD_SF_relocatable;
+
+ if (seg[s].phdr.p_flags & PF_X) targ_req.flags |= DLOAD_SF_executable;
+ if (seg[s].phdr.p_flags & PF_W) targ_req.flags |= DLOAD_SF_writable;
+
+ targ_req.align = seg[s].phdr.p_align;
+
+ /*---------------------------------------------------------------------*/
+ /* Copy segment data from the file into host buffer where it can */
+ /* be relocated. */
+ /*---------------------------------------------------------------------*/
+ DLIF_copy(pHandle->client_handle, &targ_req);
+ seg[s].host_address = targ_req.host_address;
+ }
+}
+
+/*****************************************************************************/
+/* WRITE_SEGMENTS() - */
+/* */
+/* Write all segments to target memory. */
+/*****************************************************************************/
+static void write_segments(DLOAD_HANDLE handle,
+ LOADER_FILE_DESC* fp,
+ DLIMP_Dynamic_Module* dyn_module)
+{
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+ DLIMP_Loaded_Segment* seg =
+ (DLIMP_Loaded_Segment*)(dyn_module->loaded_module->loaded_segments.buf);
+ int s, seg_size = dyn_module->loaded_module->loaded_segments.size;
+
+ for (s=0; s<seg_size; s++)
+ {
+ struct DLOAD_MEMORY_REQUEST targ_req;
+
+ targ_req.fp = fp;
+ targ_req.segment = seg[s].obj_desc;
+ targ_req.offset = seg[s].phdr.p_offset;
+ targ_req.flags = DLOAD_SF_relocatable;
+
+ if (seg[s].phdr.p_flags & PF_X) targ_req.flags |= DLOAD_SF_executable;
+ if (seg[s].phdr.p_flags & PF_W) targ_req.flags |= DLOAD_SF_writable;
+
+ targ_req.align = seg[s].phdr.p_align;
+ targ_req.host_address = seg[s].host_address;
+
+ /*---------------------------------------------------------------------*/
+ /* Copy segment data from the file into host buffer where it can */
+ /* be relocated. */
+ /*---------------------------------------------------------------------*/
+ DLIF_write(pHandle->client_handle, &targ_req);
+ }
+}
+
+/*****************************************************************************/
+/* SEG_HAS_SPACE_FOR_WRITE() - */
+/* */
+/* Check if segment has enough space to recieve contents of .args section. */
+/*****************************************************************************/
+static BOOL seg_has_space_for_write(DLIMP_Loaded_Module* lmodule, int sz)
+{
+ DLIMP_Loaded_Segment* seg =
+ (DLIMP_Loaded_Segment*)(lmodule->loaded_segments.buf);
+ int s, seg_size = lmodule->loaded_segments.size;
+
+ Elf32_Addr write_address = (Elf32_Addr)lmodule->c_args;
+
+ for (s=0; s<seg_size; s++)
+ {
+ Elf32_Addr seg_boundary =
+ seg[s].phdr.p_vaddr + seg[s].obj_desc->memsz_in_bytes;
+
+ /*---------------------------------------------------------------------*/
+ /* If address to write to is greater than segment addr and less than */
+ /* segment end, it must lie in current segment. */
+ /*---------------------------------------------------------------------*/
+ if ((write_address >= seg[s].phdr.p_vaddr) &&
+ (write_address < seg_boundary))
+ {
+ if ((write_address + sz) > seg_boundary)
+ {
+#if LOADER_DEBUG
+ if (debugging_on)
+ {
+ DLIF_trace("Write requires 0x%x bytes\n",write_address + sz);
+ DLIF_trace("Seg boundary at : 0x%x\n",seg_boundary);
+ DLIF_trace("WARNING - Not enough space in segment\n");
+ }
+#endif
+ return FALSE;
+ }
+ else return TRUE;
+ }
+ }
+ /*------------------------------------------------------------------------*/
+ /* Given address doesn't belong to any known segment. */
+ /*------------------------------------------------------------------------*/
+ return FALSE;
+}
+
+
+/*****************************************************************************/
+/* DLOAD_initialize() */
+/* */
+/* Construct and initialize data structures internal to the dynamic */
+/* loader core. */
+/* */
+/*---------------------------------------------------------------------------*/
+/* */
+/* This function is deprecated, replaced by DLOAD_create(). */
+/* */
+/*****************************************************************************/
+void DLOAD_initialize(DLOAD_HANDLE handle)
+{
+}
+
+/*****************************************************************************/
+/* DLOAD_finalize() */
+/* */
+/* Destroy and finalize data structures internal to the dynamic */
+/* loader core. */
+/* */
+/*---------------------------------------------------------------------------*/
+/* */
+/* This function is deprecated, replaced by DLOAD_destroy(). */
+/* */
+/*****************************************************************************/
+void DLOAD_finalize(DLOAD_HANDLE handle)
+{
+}
+
+/*****************************************************************************/
+/* dload_static_executable() */
+/* */
+/* Account for target memory allocated to static executable and wrap up */
+/* loading. No relocation is necessary. */
+/* */
+/*****************************************************************************/
+static int32_t dload_static_executable(DLOAD_HANDLE handle,
+ LOADER_FILE_DESC *fd,
+ DLIMP_Dynamic_Module *dyn_module)
+{
+ int32_t local_file_handle = 0;
+
+#if LOADER_DEBUG
+ if (debugging_on) DLIF_trace("Starting dload_static_executable() ...\n");
+#endif
+
+ /*------------------------------------------------------------------------*/
+ /* Set entry point for static executable and attempt to allocate target */
+ /* memory for the static executable. */
+ /*------------------------------------------------------------------------*/
+ dyn_module->loaded_module->entry_point = dyn_module->fhdr.e_entry;
+ if (load_static_segment(handle, fd, dyn_module) &&
+ load_object(fd, dyn_module))
+ {
+ /*---------------------------------------------------------------------*/
+ /* If successful, we'll want to detach the loaded module object from */
+ /* the dynamic module object that created it. Take note of the file */
+ /* handle. */
+ /*---------------------------------------------------------------------*/
+ DLIMP_Loaded_Module *loaded_module = detach_loaded_module(dyn_module);
+ local_file_handle = loaded_module->file_handle;
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Static load failed. Flag an error. */
+ /*------------------------------------------------------------------------*/
+ else
+ DLIF_error(DLET_MEMORY,
+ "Failed to allocate target memory for static executable.\n");
+
+ /*------------------------------------------------------------------------*/
+ /* Destruct dynamic module object. */
+ /*------------------------------------------------------------------------*/
+ delete_DLIMP_Dynamic_Module(handle, &dyn_module);
+
+#if LOADER_DEBUG
+ if (debugging_on) DLIF_trace("Finished dload_static_executable()\n");
+#endif
+
+ return local_file_handle;
+}
+
+#if LOADER_DEBUG || LOADER_PROFILE
+int DLREL_relocations;
+time_t DLREL_total_reloc_time;
+#endif
+
+/*****************************************************************************/
+/* process_dynamic_module_relocations() */
+/* */
+/* Make a host-accessible copy of all of the segments, process all */
+/* relocation entries associated with the given module within that */
+/* space, then write the updated segment buffers back out to target */
+/* memory. */
+/* */
+/*****************************************************************************/
+static void process_dynamic_module_relocations(DLOAD_HANDLE handle,
+ LOADER_FILE_DESC *fd,
+ DLIMP_Dynamic_Module *dyn_module)
+{
+#if LOADER_DEBUG || LOADER_PROFILE
+ if(debugging_on || profiling_on)
+ {
+ DLIF_trace("Running relocate()...\n");
+ if (profiling_on) profile_start_clock();
+ }
+#endif
+
+ /*------------------------------------------------------------------------*/
+ /* Copy segments from file to host memory */
+ /*------------------------------------------------------------------------*/
+ copy_segments(handle, fd, dyn_module);
+
+ /*------------------------------------------------------------------------*/
+ /* Process dynamic relocations. */
+ /*------------------------------------------------------------------------*/
+ DLREL_relocate(handle, fd, dyn_module);
+
+ /*------------------------------------------------------------------------*/
+ /* Write segments from host memory to target memory */
+ /*------------------------------------------------------------------------*/
+ write_segments(handle, fd, dyn_module);
+
+#if LOADER_DEBUG || LOADER_PROFILE
+ /*------------------------------------------------------------------------*/
+ /* Report timing and progress information for relocation step. */
+ /*------------------------------------------------------------------------*/
+ if (debugging_on || profiling_on)
+ {
+ if (profiling_on)
+ {
+ profile_stop_clock();
+ DLIF_trace("Took %lu cycles.\n",
+ (unsigned long) profile_cycle_count());
+ DLIF_trace("Total reloc time: %lu\n",
+ (unsigned long) DLREL_total_reloc_time);
+ DLIF_trace("Time per relocation: %ld\n",
+ DLREL_relocations ? DLREL_total_reloc_time / DLREL_relocations : 0);
+ }
+
+ DLIF_trace("Number of relocations: %d\n", DLREL_relocations);
+ DLIF_trace("\nAbout to run load_object()...");
+ DLREL_total_reloc_time = DLREL_relocations = 0;
+ if (profiling_on) profile_start_clock();
+ }
+#endif
+
+}
+
+/*****************************************************************************/
+/* store_preinit_data() */
+/* */
+/* Given a dynamic module object, store pre-initialization function */
+/* information. The user may also provide a custom iniitialization */
+/* function that needs to be executed before the compiler */
+/* generated static initialization functions are executed. */
+/* The dynamic loader will now create a table TI_init_table to store */
+/* pre-init and init data. This is done because pre-init and */
+/* init functions could reference as-yet unrelocated symbols from other */
+/* modules. As such it is safer to store relevant function addresses and */
+/* execute them only after all modules are relocated (CQ34088). */
+/* */
+/*****************************************************************************/
+static void store_preinit_data(DLIMP_Dynamic_Module *dyn_module)
+{
+ IF_single_record *preinit_rec = NULL;
+ /*------------------------------------------------------------------------*/
+ /* Check for presence of DT_PREINIT_ARRAY and DT_PREINIT_ARRAYSZ */
+ /* dynamic tags associated with this module. The dyn_module object will */
+ /* hold the relevant indices into the local copy of the dynamic table. */
+ /* The value of the DT_INIT_ARRAY tag will have been updated after */
+ /* placement of the module was completed. Arrays of size 0 will be */
+ /* ignored (CQ36935). */
+ /*------------------------------------------------------------------------*/
+ if (dyn_module->preinit_arraysz > 0)
+ {
+ preinit_rec = (IF_single_record *)DLIF_malloc(sizeof(IF_single_record));
+ /*---------------------------------------------------------------------*/
+ /* Retrieve the address of the .preinit_array section from the value */
+ /* of the DT_PREINIT_ARRAY tag, and store it in the TI_init_table. */
+ /*---------------------------------------------------------------------*/
+ preinit_rec->size = dyn_module->preinit_arraysz;
+ preinit_rec->sect_addr = (TARGET_ADDRESS)
+ (dyn_module->dyntab[dyn_module->preinit_array_idx].d_un.d_ptr);
+ }
+
+ if (preinit_rec) IF_table_enqueue(&TI_init_table, preinit_rec);
+}
+
+/*****************************************************************************/
+/* store_init_data() */
+/* */
+/* Given a dynamic module object, save off initialization function(s) for */
+/* all global and static data objects that are defined in the module */
+/* which require construction. The dynamic loader will now create a table */
+/* TI_init_table to store pre-init and init data. This is done because */
+/* pre-init and init functions could reference as-yet unrelocated symbols */
+/* from other modules. As such it is safer to store relevant function */
+/* addresses and execute them only after all modules are relocated. */
+/* */
+/*****************************************************************************/
+static void store_init_data(DLIMP_Dynamic_Module *dyn_module)
+{
+ /*------------------------------------------------------------------------*/
+ /* Check for presence of a DT_INIT dynamic tag associated with this */
+ /* module. The dynamic module will hold the index into the local copy of */
+ /* the dynamic table. This entry in the dynamic table will have been */
+ /* updated after placement of the module is completed. */
+ /*------------------------------------------------------------------------*/
+ if (dyn_module->init_idx != -1)
+ {
+ IF_single_record *init_rec =
+ (IF_single_record *)DLIF_malloc(sizeof(IF_single_record));
+ /*---------------------------------------------------------------------*/
+ /* Retrieve the address of the initialization function from the value */
+ /* of the DT_INIT tag, and get the client to execute the function. */
+ /*---------------------------------------------------------------------*/
+ init_rec->size = 0;
+ init_rec->sect_addr = (TARGET_ADDRESS)
+ (dyn_module->dyntab[dyn_module->init_idx].d_un.d_ptr);
+
+ IF_table_enqueue(&TI_init_table, init_rec);
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Check for presence of a DT_INIT_ARRAY and DT_INIT_ARRAYSZ dynamic tags */
+ /* associated with this module. The dyn_module object will hold the */
+ /* relevant indices into the local copy of the dynamic table. The value */
+ /* of the DT_INIT_ARRAY tag will have been updated after placement of the */
+ /* module was completed. Arraysz must be a postive number > 0, else it */
+ /* be ignored (CQ36935). */
+ /*------------------------------------------------------------------------*/
+ if (dyn_module->init_arraysz > 0)
+ {
+ IF_single_record *arr_rec =
+ (IF_single_record *)DLIF_malloc(sizeof(IF_single_record));
+ /*---------------------------------------------------------------------*/
+ /* Retrieve the address of the .init_array section from the value of */
+ /* DT_INIT_ARRAY tag. */
+ /*---------------------------------------------------------------------*/
+ arr_rec->size = dyn_module->init_arraysz;
+ arr_rec->sect_addr = (TARGET_ADDRESS)
+ (dyn_module->dyntab[dyn_module->init_array_idx].d_un.d_ptr);
+
+ IF_table_enqueue(&TI_init_table, arr_rec);
+ }
+}
+
+/*****************************************************************************/
+/* execute_module_initialization() */
+/* */
+/* Given a dynamic module object, execute pre-initialization and */
+/* initialization function(s) for all global and static data objects that */
+/* are defined in the module which require construction. The user may */
+/* also provide a custom iniitialization function that needs to be */
+/* executed before the compiler generated static initialization functions */
+/* are executed. */
+/* Note that the functions to be executed have already been saved off in */
+/* the TI_init_table, by store_preinit_data() and store_init_data(). */
+/* */
+/*****************************************************************************/
+static void execute_module_initialization(DLOAD_HANDLE handle)
+{
+ IF_single_record *val = NULL;
+ IF_table_Queue_Node *curr_ptr = TI_init_table.front_ptr;
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+
+ for (; curr_ptr; curr_ptr = curr_ptr->next_ptr)
+ {
+ val = curr_ptr->value;
+
+ /*---------------------------------------------------------------------*/
+ /* A size of 0 indicates DT_INIT, otherwise this is an ARRAY. */
+ /*---------------------------------------------------------------------*/
+ if (val->size != 0)
+ {
+ /*------------------------------------------------------------------*/
+ /* Now make a loader-accessible copy of the .init_array section. */
+ /*------------------------------------------------------------------*/
+ int32_t i;
+ int32_t num_init_fcns = val->size/sizeof(TARGET_ADDRESS);
+ TARGET_ADDRESS *init_array_buf = (TARGET_ADDRESS *)
+ DLIF_malloc(val->size);
+
+ DLIF_read(pHandle->client_handle,
+ init_array_buf, 1, val->size,
+ (TARGET_ADDRESS)val->sect_addr);
+
+ /*------------------------------------------------------------------*/
+ /* Call each function whose address occupies an entry in array in */
+ /* the order that they appear in the array. The size of the array is*/
+ /* provided by the init_arraysz field in the dynamic module (copied */
+ /* earlier when the dynamic table was read in). Make sure that */
+ /* function addresses are valid before execution. */
+ /*------------------------------------------------------------------*/
+ for (i = 0; i < num_init_fcns; i++)
+ if (init_array_buf[i])
+ DLIF_execute(pHandle->client_handle,
+ (TARGET_ADDRESS)(init_array_buf[i]));
+ else
+ DLIF_warning(DLWT_MISC,
+ "DT_INIT_ARRAY/DT_PREINIT_ARRAY function address is NULL!");
+
+ DLIF_free(init_array_buf);
+ }
+ else
+ {
+ if (val->sect_addr)
+ DLIF_execute(pHandle->client_handle,
+ (TARGET_ADDRESS)(val->sect_addr));
+ else
+ DLIF_warning(DLWT_MISC, "DT_INIT function address is NULL!");
+ }
+ }
+}
+
+/*****************************************************************************/
+/* adjust_module_init_fini() */
+/* If the dynamic loader need not process the module initialization */
+/* and termination (fini section) then adjust the module info so that */
+/* the respective sizes become zero. */
+/*****************************************************************************/
+static void adjust_module_init_fini(DLIMP_Dynamic_Module *dm)
+{
+ /*------------------------------------------------------------------------*/
+ /* The C6x RTS boot code has the function _c_int00 which performs */
+ /* the C/C++ initialization. This function processes the .init_array */
+ /* to perform the C/C++ initialization and handles termination through */
+ /* the at_exit functionality. If the dynamic executable we are loading */
+ /* includes _c_int00, the loader assumes that the application code takes */
+ /* care of all initialization and termination. Hence the loader won't */
+ /* perform the initialization and termination. */
+ /* NOTE: Use of __TI_STACK_SIZE is a hack. The _c_int00 symbol is not */
+ /* in the dynamic symbol table. The right fix is for the linker */
+ /* not to generate the init array tags if the build includes RTS */
+ /* boot routine. */
+ /*------------------------------------------------------------------------*/
+ if (dm->fhdr.e_type == ET_EXEC &&
+ DLSYM_lookup_local_symtab("__TI_STACK_SIZE", dm->symtab, dm->symnum,
+ NULL))
+ {
+ dm->init_arraysz = 0;
+ dm->init_array_idx = -1;
+
+ dm->preinit_arraysz = 0;
+ dm->preinit_array_idx = -1;
+
+ dm->loaded_module->fini_arraysz = 0;
+ dm->loaded_module->fini_array = (Elf32_Addr) NULL;
+ dm->loaded_module->fini = (Elf32_Addr) NULL;
+ }
+}
+
+/*****************************************************************************/
+/* relocate_dependency_graph_modules() */
+/* */
+/* For each dynamic module on the dependency stack, process dynamic */
+/* relocation entries then perform initialization for all global and */
+/* static objects that are defined in tha given module. The stack is */
+/* emptied from the top (LIFO). Each dynamic module object is popped */
+/* off the top of the stack, the module gets relocated, its global and */
+/* static objects that need to be constructed will be constructed, and */
+/* then, after detaching the loaded module object from its dynamic */
+/* module, the dynamic module object is destructed. */
+/* */
+/*****************************************************************************/
+static
+int32_t relocate_dependency_graph_modules(DLOAD_HANDLE handle,
+ LOADER_FILE_DESC *fd,
+ DLIMP_Dynamic_Module *dyn_module)
+{
+ /*------------------------------------------------------------------------*/
+ /* Processing of relocations will only be triggered when this function */
+ /* is called from the top-level object module (at the bottom of the */
+ /* dependency graph stack). */
+ /*------------------------------------------------------------------------*/
+ int32_t local_file_handle = dyn_module->loaded_module->file_handle;
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+ dynamic_module_ptr_Stack_Node *ptr =
+ pHandle->DLIMP_dependency_stack.bottom_ptr;
+ if (ptr && (ptr->value != dyn_module)) return local_file_handle;
+
+ if (is_dsbt_module(dyn_module))
+ {
+ /*--------------------------------------------------------------------*/
+ /* Assign DSBT indices. */
+ /*--------------------------------------------------------------------*/
+ DLIF_assign_dsbt_indices();
+
+ /*--------------------------------------------------------------------*/
+ /* Update the content of all DSBTs for any module that uses the */
+ /* DSBT model. */
+ /*--------------------------------------------------------------------*/
+ DLIF_update_all_dsbts();
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Ok, we are ready to process relocations. The relocation tables */
+ /* associated with dependent files will be processed first. Consume */
+ /* dynamic module objects from the dependency graph stack from dependents */
+ /* to the root of the dependency graph. */
+ /*------------------------------------------------------------------------*/
+ while (pHandle->DLIMP_dependency_stack.size > 0)
+ {
+ DLIMP_Dynamic_Module *dyn_mod_ptr =
+ dynamic_module_ptr_pop(&pHandle->DLIMP_dependency_stack);
+
+ /*---------------------------------------------------------------------*/
+ /* Process dynamic relocations associated with this module. */
+ /*---------------------------------------------------------------------*/
+ process_dynamic_module_relocations(handle, dyn_mod_ptr->fd, dyn_mod_ptr);
+
+ /*---------------------------------------------------------------------*/
+ /* __c_args__ points to the beginning of the .args section, if there */
+ /* is one. Record this pointer in the ELF file internal data object. */
+ /* Also store this in the loaded module, since this will be needed to */
+ /* write argv, argc to .args at execution time. */
+ /*---------------------------------------------------------------------*/
+ DLSYM_lookup_local_symtab("__c_args__", dyn_mod_ptr->symtab,
+ dyn_mod_ptr->symnum,
+ (Elf32_Addr *)&dyn_mod_ptr->c_args);
+ dyn_mod_ptr->loaded_module->c_args = dyn_mod_ptr->c_args;
+
+ /*---------------------------------------------------------------------*/
+ /* Pick up entry point address from ELF file header. */
+ /* We currently only support a single entry point into the ELF file. */
+ /* To support Braveheart notion of nodes, with multiple entry points,*/
+ /* we'll need to get the list of entry points associated with a node,*/
+ /* then add capability to the "execute" command to select the entry */
+ /* point that we want to start executing from. */
+ /*---------------------------------------------------------------------*/
+ dyn_mod_ptr->loaded_module->entry_point = dyn_mod_ptr->fhdr.e_entry;
+
+ /*---------------------------------------------------------------------*/
+ /* Copy command-line arguments into args section and deal with DSBT */
+ /* issues (copy DSBT to its run location). */
+ /* Note that below function is commented out because this doesn't do */
+ /* much as of now. */
+ /*---------------------------------------------------------------------*/
+ //load_object(dyn_mod_ptr->fd, dyn_mod_ptr);
+
+ /*---------------------------------------------------------------------*/
+ /* Perform initialization, if needed, for this module. */
+ /*---------------------------------------------------------------------*/
+ store_init_data(dyn_mod_ptr);
+
+ /*---------------------------------------------------------------------*/
+ /* Free all dependent file pointers. */
+ /*---------------------------------------------------------------------*/
+ if (dyn_mod_ptr->fd != fd)
+ {
+ DLIF_fclose(dyn_mod_ptr->fd);
+ dyn_mod_ptr->fd = NULL;
+ }
+
+ /*---------------------------------------------------------------------*/
+ /* Detach loaded module object from the dynamic module object that */
+ /* created it, then throw away the dynamic module object. */
+ /*---------------------------------------------------------------------*/
+ detach_loaded_module(dyn_mod_ptr);
+ delete_DLIMP_Dynamic_Module(handle, &dyn_mod_ptr);
+ }
+
+ return local_file_handle;
+}
+
+/*****************************************************************************/
+/* DLOAD_load() */
+/* */
+/* Dynamically load the specified file and return a file handle for the */
+/* loaded file. If the load fails, this function will return a value of */
+/* zero (0) for the file handle. */
+/* */
+/* The core loader must have read access to the file pointed to by fd. */
+/* */
+/*****************************************************************************/
+int32_t DLOAD_load(DLOAD_HANDLE handle, LOADER_FILE_DESC *fd)
+{
+ int32_t fl_handle;
+
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+ DLIMP_Dynamic_Module *dyn_module = new_DLIMP_Dynamic_Module(fd);
+
+ if (!dyn_module)
+ return 0;
+
+#if LOADER_DEBUG
+ /*------------------------------------------------------------------------*/
+ /* Spit out some loader progress information when we begin loading an */
+ /* object. */
+ /*------------------------------------------------------------------------*/
+ if (debugging_on) DLIF_trace("Loading file...\n");
+#endif
+
+ /*------------------------------------------------------------------------*/
+ /* If no access to a program was provided, there is nothing to do. */
+ /*------------------------------------------------------------------------*/
+ if (!fd)
+ {
+ DLIF_error(DLET_FILE, "Missing file specification.\n");
+ delete_DLIMP_Dynamic_Module(handle, &dyn_module);
+ return 0;
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Read file headers and dynamic information into dynamic module. */
+ /*------------------------------------------------------------------------*/
+ if (!dload_headers(fd, dyn_module))
+ {
+ delete_DLIMP_Dynamic_Module(handle, &dyn_module);
+ return 0;
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Find the dynamic segment, if there is one, and read dynamic */
+ /* information from the ELF object file into the dynamic module data */
+ /* structure associated with this file. */
+ /*------------------------------------------------------------------------*/
+ if (!dload_dynamic_segment(handle, fd, dyn_module))
+ return 0;
+
+ /*------------------------------------------------------------------------*/
+ /* Perform sanity checking on the read-in ELF file. */
+ /*------------------------------------------------------------------------*/
+ if (!is_valid_elf_object_file(fd, dyn_module))
+ {
+ DLIF_error(DLET_FILE, "Attempt to load invalid ELF file, '%s'.\n",
+ dyn_module->name);
+ return 0;
+ }
+
+#if LOADER_DEBUG || LOADER_PROFILE
+ /*------------------------------------------------------------------------*/
+ /* Stop clock on initialization of ELF file information. Start clock on */
+ /* initialization of ELF module. */
+ /*------------------------------------------------------------------------*/
+ if (debugging_on || profiling_on)
+ {
+ DLIF_trace("Finished dload_dynamic_segment.\n");
+ if (profiling_on)
+ {
+ profile_stop_clock();
+ DLIF_trace("Took %lu cycles.\n",
+ (unsigned long) profile_cycle_count());
+ }
+ }
+#endif
+
+ /*------------------------------------------------------------------------*/
+ /* Initialize internal ELF module and segment structures. Sets */
+ /* loaded_module in *dyn_module. This also deals with assigning a file */
+ /* handle and bumping file handle counter. */
+ /*------------------------------------------------------------------------*/
+ initialize_loaded_module(handle, dyn_module);
+
+ /*------------------------------------------------------------------------*/
+ /* Append Module structure to loaded object list. */
+ /*------------------------------------------------------------------------*/
+ loaded_module_ptr_enqueue(&pHandle->DLIMP_loaded_objects,
+ dyn_module->loaded_module);
+
+ /*------------------------------------------------------------------------*/
+ /* Support static loading as special case. */
+ /*------------------------------------------------------------------------*/
+ if (!dyn_module->relocatable)
+ return dload_static_executable(handle, fd, dyn_module);
+
+ /*------------------------------------------------------------------------*/
+ /* Get space & address for segments, and offset symbols and program */
+ /* header table to reflect the relocated address. Also offset the */
+ /* addresses in the internal Segment structures used by the Module */
+ /* structure. Note that this step needs to be performed prior and in */
+ /* addition to the relocation entry processing. */
+ /*------------------------------------------------------------------------*/
+ if (!allocate_dynamic_segments_and_relocate_symbols(handle, fd, dyn_module))
+ return 0;
+
+ /*------------------------------------------------------------------------*/
+ /* __c_args__ points to the beginning of the .args section, if there is */
+ /* one. __TI_STATIC_BASE points to the beginning of the DP-relative data */
+ /* segment (value to initialize DP). Record these addresses in the ELF */
+ /* file internal data object. */
+ /*------------------------------------------------------------------------*/
+ DLSYM_lookup_local_symtab("__c_args__", dyn_module->symtab,
+ dyn_module->symnum,
+ (Elf32_Addr *)&dyn_module->c_args);
+
+ DLSYM_lookup_local_symtab("__TI_STATIC_BASE", dyn_module->symtab,
+ dyn_module->symnum,
+ (Elf32_Addr *)&dyn_module->static_base);
+ dyn_module->loaded_module->static_base = dyn_module->static_base;
+
+ /*------------------------------------------------------------------------*/
+ /* If the user application performs initialization and termination, */
+ /* the dynamic loader shouldn't process the init/fini sections. */
+ /* Check and adjust the init/fini information accordingly. */
+ /*------------------------------------------------------------------------*/
+ adjust_module_init_fini(dyn_module);
+
+ /*------------------------------------------------------------------------*/
+ /* Execute any user defined pre-initialization functions that may be */
+ /* associated with a dynamic executable module. */
+ /*------------------------------------------------------------------------*/
+ if (dyn_module->fhdr.e_type == ET_EXEC)
+ store_preinit_data(dyn_module);
+
+ /*------------------------------------------------------------------------*/
+ /* Append current ELF file to list of objects currently loading. */
+ /* This is used to detect circular dependencies while we are processing */
+ /* the dependents of this file. */
+ /*------------------------------------------------------------------------*/
+ AL_append(&pHandle->DLIMP_module_dependency_list, &dyn_module->name);
+
+ /*------------------------------------------------------------------------*/
+ /* Push this dynamic module object onto the dependency stack. */
+ /* All of the modules on the stack will get relocated after all of the */
+ /* dependent files have been loaded and allocated. */
+ /*------------------------------------------------------------------------*/
+ dynamic_module_ptr_push(&pHandle->DLIMP_dependency_stack, dyn_module);
+
+ /*------------------------------------------------------------------------*/
+ /* If this object file uses the DSBT model, then register a DSBT index */
+ /* request with the client's DSBT support management. */
+ /*------------------------------------------------------------------------*/
+ if (is_dsbt_module(dyn_module) &&
+ !DLIF_register_dsbt_index_request(handle,
+ dyn_module->name,
+ dyn_module->loaded_module->file_handle,
+ dyn_module->dsbt_index))
+ return 0;
+
+ /*------------------------------------------------------------------------*/
+ /* Load this ELF file's dependees (all files on its DT_NEEDED list). */
+ /* Dependees must be loaded and relocated before processing this module's */
+ /* relocations. */
+ /*------------------------------------------------------------------------*/
+ if (!dload_and_allocate_dependencies(handle, dyn_module))
+ return 0;
+
+ /*------------------------------------------------------------------------*/
+ /* Remove the current ELF file from the list of files that are in the */
+ /* process of loading. */
+ /*------------------------------------------------------------------------*/
+ pHandle->DLIMP_module_dependency_list.size--;
+
+ /*------------------------------------------------------------------------*/
+ /* Process relocation entries. */
+ /*------------------------------------------------------------------------*/
+ fl_handle = relocate_dependency_graph_modules(handle, fd, dyn_module);
+
+ /*------------------------------------------------------------------------*/
+ /* With initialization complete, and all relocations having been resolved */
+ /* do module initialization. */
+ /*------------------------------------------------------------------------*/
+ execute_module_initialization(handle);
+
+ return fl_handle;
+}
+
+/*****************************************************************************/
+/* DLOAD_get_entry_names() */
+/* */
+/* Build a list of entry point names for a loaded object. Currently, */
+/* any global symbol in the module is considered a valid entry point */
+/* regardless of whether it is defined in code or associated with a */
+/* data object. We would need to process the content of the symbol */
+/* table entry or its debug information to determine whether it is a */
+/* valid entry point or not. */
+/* */
+/*****************************************************************************/
+BOOL DLOAD_get_entry_names(DLOAD_HANDLE handle,
+ uint32_t file_handle,
+ int32_t *entry_pt_cnt,
+ char ***entry_pt_names)
+{
+ /*------------------------------------------------------------------------*/
+ /* Spin through list of loaded files until we find the file handle we */
+ /* are looking for. Then build a list of entry points from that file's */
+ /* symbol table. */
+ /*------------------------------------------------------------------------*/
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+
+ loaded_module_ptr_Queue_Node* ptr;
+ for (ptr = pHandle->DLIMP_loaded_objects.front_ptr; ptr != NULL;
+ ptr = ptr->next_ptr)
+ {
+ if (ptr->value->file_handle == file_handle)
+ {
+ DLIMP_Loaded_Module *module = ptr->value;
+ struct Elf32_Sym *symtab;
+ int i;
+
+ /*------------------------------------------------------------------*/
+ /* Any symbol in our file's symbol table is considered a valid */
+ /* entry point. */
+ /*------------------------------------------------------------------*/
+ symtab = (struct Elf32_Sym*)module->gsymtab;
+ *entry_pt_cnt = module->gsymnum;
+ *entry_pt_names = DLIF_malloc(*entry_pt_cnt * sizeof(char*));
+ for (i = 0; i < module->gsymnum; i++)
+ {
+ const char *sym_name = (const char *)symtab[i].st_name;
+ **entry_pt_names = DLIF_malloc(strlen(sym_name) + 1);
+ strcpy(**entry_pt_names,sym_name);
+ }
+
+ return TRUE;
+ }
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* We didn't find the file we were looking for, return false. */
+ /*------------------------------------------------------------------------*/
+ return FALSE;
+}
+
+/*****************************************************************************/
+/* DLOAD_prepare_for_execution() */
+/* */
+/* Given a file handle, prepare for execution : */
+/* - Return entry point associated with that module in the *sym_val */
+/* output parameter. */
+/* - Write out the given arguments to the .args section contained in the */
+/* same module. */
+/* - As a test (for the Reference implementation) read the arguments */
+/* using the DLIF_read_arguments() function and set global argc,argv. */
+/* */
+/*****************************************************************************/
+BOOL DLOAD_prepare_for_execution(DLOAD_HANDLE handle, uint32_t file_handle,
+ TARGET_ADDRESS *sym_val,
+ int argc, char** argv)
+{
+ /*------------------------------------------------------------------------*/
+ /* Spin through list of loaded files until we find the file handle we */
+ /* are looking for. Then return the entry point address associated with */
+ /* that module. */
+ /*------------------------------------------------------------------------*/
+ DLIMP_Loaded_Module *ep_loaded_module;
+ loaded_module_ptr_Queue_Node* ptr;
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+
+ for (ptr = pHandle->DLIMP_loaded_objects.front_ptr; ptr != NULL;
+ ptr = ptr->next_ptr)
+ if (ptr->value->file_handle == file_handle)
+ {
+ *sym_val = (TARGET_ADDRESS)(ptr->value->entry_point);
+ ep_loaded_module = ptr->value;
+
+ /*------------------------------------------------------------------*/
+ /* Write argc, argv to the .args section in this module. */
+ /*------------------------------------------------------------------*/
+ if (!write_arguments_to_args_section(handle, argc, argv,
+ ep_loaded_module))
+ {
+ DLIF_error(DLET_MISC, "Couldn't write to .args section\n");
+ return FALSE;
+ }
+
+ /*------------------------------------------------------------------*/
+ /* For the Reference Implementation we simulate a "boot" (rts boot */
+ /* routine reads argc, argv from .args), by reading argc, argv from */
+ /* .args section. Note that we just wrote these values to the .args */
+ /* so this read serves as a test for the Reference Implementation. */
+ /*------------------------------------------------------------------*/
+ read_args_from_section(ep_loaded_module);
+ return TRUE;
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* We didn't find the file we were looking for, return false. */
+ /*------------------------------------------------------------------------*/
+ return FALSE;
+}
+
+/*****************************************************************************/
+/* DLOAD_load_arguments() */
+/* */
+/* Write out the given arguments to the .args section contained in the */
+/* same module. */
+/* */
+/*****************************************************************************/
+BOOL DLOAD_load_arguments(DLOAD_HANDLE handle, uint32_t file_handle,
+ int argc, char** argv)
+{
+ /*------------------------------------------------------------------------*/
+ /* Spin through list of loaded files until we find the file handle we */
+ /* are looking for. Then return the entry point address associated with */
+ /* that module. */
+ /*------------------------------------------------------------------------*/
+ DLIMP_Loaded_Module *ep_loaded_module;
+ loaded_module_ptr_Queue_Node* ptr;
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+
+ for (ptr = pHandle->DLIMP_loaded_objects.front_ptr; ptr != NULL;
+ ptr = ptr->next_ptr)
+ if (ptr->value->file_handle == file_handle)
+ {
+ ep_loaded_module = ptr->value;
+
+ /*------------------------------------------------------------------*/
+ /* Write argc, argv to the .args section in this module. */
+ /*------------------------------------------------------------------*/
+ if (!write_arguments_to_args_section(handle, argc, argv,
+ ep_loaded_module))
+ {
+ DLIF_error(DLET_MISC, "Couldn't write to .args section\n");
+ return FALSE;
+ }
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* We didn't find the file we were looking for, return false. */
+ /*------------------------------------------------------------------------*/
+ return FALSE;
+}
+
+/*****************************************************************************/
+/* DLOAD_get_entry_point() */
+/* */
+/* Given a file handle, return the entry point associated with that */
+/* module in the *sym_val output parameter. */
+/* */
+/*****************************************************************************/
+BOOL DLOAD_get_entry_point(DLOAD_HANDLE handle, uint32_t file_handle,
+ TARGET_ADDRESS *sym_val)
+{
+ /*------------------------------------------------------------------------*/
+ /* Spin through list of loaded files until we find the file handle we */
+ /* are looking for. Then return the entry point address associated with */
+ /* that module. */
+ /*------------------------------------------------------------------------*/
+ loaded_module_ptr_Queue_Node* ptr;
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+
+ for (ptr = pHandle->DLIMP_loaded_objects.front_ptr; ptr != NULL;
+ ptr = ptr->next_ptr)
+ if (ptr->value->file_handle == file_handle)
+ {
+ *sym_val = (TARGET_ADDRESS)(ptr->value->entry_point);
+ return TRUE;
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* We didn't find the file we were looking for, return false. */
+ /*------------------------------------------------------------------------*/
+ return FALSE;
+}
+
+/*****************************************************************************/
+/* DLOAD_query_symbol() */
+/* */
+/* Query the value of a global symbol from a specific file. The value */
+/* result will be written to *sym_val. The function returns TRUE if the */
+/* symbol was found, and FALSE if it wasn't. */
+/* */
+/*****************************************************************************/
+BOOL DLOAD_query_symbol(DLOAD_HANDLE handle,
+ uint32_t file_handle,
+ const char *sym_name,
+ TARGET_ADDRESS *sym_val)
+{
+ /*------------------------------------------------------------------------*/
+ /* Spin through list of loaded files until we find the file handle we */
+ /* are looking for. Then return the value (target address) associated */
+ /* with the symbol we are looking for in that file. */
+ /*------------------------------------------------------------------------*/
+ loaded_module_ptr_Queue_Node* ptr;
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+
+ for (ptr = pHandle->DLIMP_loaded_objects.front_ptr; ptr != NULL;
+ ptr = ptr->next_ptr)
+ {
+ if (ptr->value->file_handle == file_handle)
+ {
+ DLIMP_Loaded_Module *module = ptr->value;
+ struct Elf32_Sym *symtab;
+ int i;
+
+ /*------------------------------------------------------------------*/
+ /* Search through the symbol table by name. */
+ /*------------------------------------------------------------------*/
+ symtab = (struct Elf32_Sym*)module->gsymtab;
+ for(i=0; i < module->gsymnum; i++)
+ {
+ if (!strcmp(sym_name, (const char *)symtab[i].st_name))
+ {
+ *sym_val = (TARGET_ADDRESS) symtab[i].st_value;
+ return TRUE;
+ }
+ }
+ }
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* We didn't find the symbol we were looking for, return false. */
+ /*------------------------------------------------------------------------*/
+ return FALSE;
+}
+
+
+
+/*****************************************************************************/
+/* unlink_loaded_module() */
+/* */
+/* Unlink a loaded module data object from the list of loaded objects, */
+/* returning a pointer to the object so that it can be deconstructed. */
+/* */
+/*****************************************************************************/
+static DLIMP_Loaded_Module *unlink_loaded_module(DLOAD_HANDLE handle,
+ loaded_module_ptr_Queue_Node *back_ptr,
+ loaded_module_ptr_Queue_Node *lm_node)
+{
+ DLIMP_Loaded_Module *loaded_module = lm_node->value;
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+ loaded_module_ptr_remove(&pHandle->DLIMP_loaded_objects, lm_node->value);
+ return loaded_module;
+}
+
+/*****************************************************************************/
+/* execute_module_termination() */
+/* */
+/* Execute termination functions associated with this loaded module. */
+/* Termination functions are called in the reverse order as their */
+/* corresponding initialization functions. */
+/* */
+/*****************************************************************************/
+static void execute_module_termination(DLOAD_HANDLE handle,
+ DLIMP_Loaded_Module *loaded_module)
+{
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+
+ /*------------------------------------------------------------------------*/
+ /* If a DT_FINI_ARRAY dynamic tag was encountered for this module, spin */
+ /* through the array in reverse order, calling each function address */
+ /* stored in the array. */
+ /*------------------------------------------------------------------------*/
+ if (loaded_module->fini_arraysz != 0)
+ {
+ /*---------------------------------------------------------------------*/
+ /* Now make a loader-accessible copy of the .fini_array section. */
+ /*---------------------------------------------------------------------*/
+ int32_t i;
+ int32_t num_fini_fcns =
+ loaded_module->fini_arraysz/sizeof(TARGET_ADDRESS);
+ TARGET_ADDRESS *fini_array_buf = (TARGET_ADDRESS *)
+ DLIF_malloc(loaded_module->fini_arraysz);
+
+ DLIF_read(pHandle->client_handle,
+ fini_array_buf, 1, loaded_module->fini_arraysz,
+ (TARGET_ADDRESS)loaded_module->fini_array);
+
+ /*---------------------------------------------------------------------*/
+ /* Now spin through the array in reverse order, executing each */
+ /* termination function whose address occupies an entry in the array. */
+ /*---------------------------------------------------------------------*/
+ for (i = num_fini_fcns - 1; i >= 0; i--)
+ DLIF_execute(pHandle->client_handle,
+ (TARGET_ADDRESS)(fini_array_buf[i]));
+
+ DLIF_free(fini_array_buf);
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* If a DT_FINI dynamic tag was encountered for this module, call the */
+ /* function indicated by the tag's value to complete the termination */
+ /* process for this module. */
+ /*------------------------------------------------------------------------*/
+ if (loaded_module->fini != (Elf32_Addr) NULL)
+ DLIF_execute(pHandle->client_handle,
+ (TARGET_ADDRESS)loaded_module->fini);
+}
+
+/*****************************************************************************/
+/* remove_loaded_module() */
+/* */
+/* Find and unlink a loaded module data object from the list of loaded */
+/* objects, then call its destructor to free the host memory associated */
+/* with the loaded module and all of its loaded segments. */
+/* */
+/*****************************************************************************/
+static void remove_loaded_module(DLOAD_HANDLE handle,
+ loaded_module_ptr_Queue_Node *lm_node)
+{
+ DLIMP_Loaded_Module *lm_object = NULL;
+ loaded_module_ptr_Queue_Node *back_ptr = NULL;
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+
+ if (lm_node != pHandle->DLIMP_loaded_objects.front_ptr)
+ for (back_ptr = pHandle->DLIMP_loaded_objects.front_ptr;
+ back_ptr->next_ptr != lm_node;
+ back_ptr = back_ptr->next_ptr);
+
+ lm_object = unlink_loaded_module(handle, back_ptr, lm_node);
+
+ delete_DLIMP_Loaded_Module(handle, &lm_object);
+}
+
+/*****************************************************************************/
+/* DLOAD_unload() */
+/* */
+/* Unload specified module (identified by its file handle) from target */
+/* memory. Free up any target memory that was allocated for the module's */
+/* segments and also any host heap memory that was allocated for the */
+/* internal module and segment data structures. */
+/* */
+/* Return TRUE if program entry is actually destroyed. This is a way of */
+/* communicating to the client when it needs to actually remove debug */
+/* information associated with this module (so that client does not have */
+/* to maintain a use count that mirrors the program entry). */
+/* */
+/*****************************************************************************/
+BOOL DLOAD_unload(DLOAD_HANDLE handle, uint32_t file_handle)
+{
+ loaded_module_ptr_Queue_Node* lm_node;
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+
+ for (lm_node = pHandle->DLIMP_loaded_objects.front_ptr; lm_node != NULL;
+ lm_node = lm_node->next_ptr)
+ {
+ if (lm_node->value->file_handle == file_handle)
+ {
+ --lm_node->value->use_count;
+ if (lm_node->value->use_count == 0)
+ {
+ DLIMP_Loaded_Module *loaded_module =
+ (DLIMP_Loaded_Module *)lm_node->value;
+ int j;
+ int *dep_file_handles;
+
+ /*---------------------------------------------------------------*/
+ /* Termination functions need to be executed in the reverse */
+ /* order as the corresponding initialization functions, so */
+ /* before we go unload this module's dependents, we need to */
+ /* perform the user/global/static termination functions */
+ /* associated with this module. */
+ /*---------------------------------------------------------------*/
+ execute_module_termination(handle, loaded_module);
+
+ /*---------------------------------------------------------------*/
+ /* Unload dependent modules via the client. Client needs to know */
+ /* when a dependent gets unloaded so that it can update debug */
+ /* information. */
+ /*---------------------------------------------------------------*/
+ dep_file_handles = (int*)(loaded_module->dependencies.buf);
+ for (j = 0; j < loaded_module->dependencies.size; j++)
+ DLIF_unload_dependent(pHandle->client_handle,
+ dep_file_handles[j]);
+
+ /*---------------------------------------------------------------*/
+ /* Find the predecessor node of the value we're deleting, */
+ /* because its next_ptr will need to be updated. */
+ /* */
+ /* We can't keep a back pointer around because */
+ /* DLIF_unload_dependent() might free that node, making our */
+ /* pointer invalid. Turn the Queue template into a doubly */
+ /* linked list if this overhead becomes a problem. */
+ /*---------------------------------------------------------------*/
+ remove_loaded_module(handle, lm_node);
+
+ /*---------------------------------------------------------------*/
+ /* Once unloading is done, reset virtual target to NULL. */
+ /*---------------------------------------------------------------*/
+ cur_target = NULL;
+
+ return TRUE;
+ }
+ }
+ }
+
+ return FALSE;
+}
+
+/*****************************************************************************/
+/* DLOAD_load_symbols() */
+/* */
+/* Load the symbols from the given file and make symbols available for */
+/* global symbol linkage. */
+/* */
+/*****************************************************************************/
+int32_t DLOAD_load_symbols(DLOAD_HANDLE handle, LOADER_FILE_DESC *fd)
+{
+ DLIMP_Dynamic_Module *dyn_module = new_DLIMP_Dynamic_Module(fd);
+ DLIMP_Loaded_Module *loaded_module = NULL;
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+
+ /*------------------------------------------------------------------------*/
+ /* Ensure we have a valid dynamic module object from the constructor. */
+ /*------------------------------------------------------------------------*/
+ if (!dyn_module)
+ return 0;
+
+ /*------------------------------------------------------------------------*/
+ /* If no access to a program was provided, there is nothing to do. */
+ /*------------------------------------------------------------------------*/
+ if (!fd)
+ {
+ DLIF_error(DLET_FILE, "Missing file specification.\n");
+ return 0;
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Record argc and argv pointers with the dynamic module record. */
+ /*------------------------------------------------------------------------*/
+ dyn_module->argc = 0;
+ dyn_module->argv = NULL;
+
+ /*------------------------------------------------------------------------*/
+ /* Read file headers and dynamic information into dynamic module. */
+ /*------------------------------------------------------------------------*/
+ if (!dload_headers(fd, dyn_module))
+ {
+ delete_DLIMP_Dynamic_Module(handle, &dyn_module);
+ return 0;
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Find the dynamic segment, if there is one, and read dynamic */
+ /* information from the ELF object file into the dynamic module data */
+ /* structure associated with this file. */
+ /*------------------------------------------------------------------------*/
+ if (!dload_dynamic_segment(handle, fd, dyn_module))
+ {
+ delete_DLIMP_Dynamic_Module(handle, &dyn_module);
+ return 0;
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Perform sanity checking on the read-in ELF file. */
+ /*------------------------------------------------------------------------*/
+ if (!is_valid_elf_object_file(fd, dyn_module))
+ {
+ DLIF_error(DLET_FILE, "Attempt to load invalid ELF file, '%s'.\n",
+ dyn_module->name);
+ delete_DLIMP_Dynamic_Module(handle, &dyn_module);
+ return 0;
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Initialize internal ELF module and segment structures. Sets */
+ /* loaded_module in *dyn_module. This also deals with assigning a file */
+ /* handle and bumping file handle counter. */
+ /*------------------------------------------------------------------------*/
+ initialize_loaded_module(handle, dyn_module);
+
+ /*------------------------------------------------------------------------*/
+ /* Add this module to the loaded module queue. */
+ /* Detach the loaded module object from the dynamic module thath created */
+ /* it. Ownership of the host memory allocated for the loaded module */
+ /* object now belongs to the DLIMP_loaded_objects list. */
+ /*------------------------------------------------------------------------*/
+ loaded_module_ptr_enqueue(&pHandle->DLIMP_loaded_objects,
+ dyn_module->loaded_module);
+
+ /*------------------------------------------------------------------------*/
+ /* Register a DSBT index request for this module and update its own copy */
+ /* of the DSBT with the contents of the client's master DSBT. */
+ /*------------------------------------------------------------------------*/
+ if (is_dsbt_module(dyn_module))
+ {
+ dynamic_module_ptr_push(&pHandle->DLIMP_dependency_stack, dyn_module);
+ DLIF_register_dsbt_index_request(handle,
+ dyn_module->name,
+ dyn_module->loaded_module->file_handle,
+ dyn_module->dsbt_index);
+ DLIF_assign_dsbt_indices();
+ DLIF_update_all_dsbts();
+ dynamic_module_ptr_pop(&pHandle->DLIMP_dependency_stack);
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Ownership of the host memory allocated for the loaded module object is */
+ /* transferred to the DLIMP_loaded_objects list. Free up the host memory */
+ /* for the dynamic module that created the loaded module object. Just */
+ /* call the destructor function for DLIMP_Dynamic_Module. */
+ /*------------------------------------------------------------------------*/
+ loaded_module = detach_loaded_module(dyn_module);
+ if(loaded_module == NULL)
+ {
+ delete_DLIMP_Dynamic_Module(handle, &dyn_module);
+ return 0;
+ }
+ delete_DLIMP_Dynamic_Module(handle, &dyn_module);
+
+ /*------------------------------------------------------------------------*/
+ /* Return a file handle so that the client can match this file to an ID. */
+ /*------------------------------------------------------------------------*/
+ return loaded_module->file_handle;
+}
+
+/*****************************************************************************/
+/* DSBT Support Functions */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* DLOAD_get_dsbt_size() */
+/* */
+/* Find the amount of space allocated for the specified module's DSBT. */
+/* It must be big enough to hold a copy of the master DSBT or the client */
+/* will flag an error. Those modules whose DSBT size is zero are assumed */
+/* to not be using the DSBT model. */
+/* */
+/*****************************************************************************/
+uint32_t DLOAD_get_dsbt_size(DLOAD_HANDLE handle, int32_t file_handle)
+{
+ dynamic_module_ptr_Stack_Node *ptr;
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+
+ for (ptr = pHandle->DLIMP_dependency_stack.top_ptr; ptr != NULL;
+ ptr = ptr->next_ptr)
+ {
+ DLIMP_Dynamic_Module *dmp = ptr->value;
+ if (dmp->loaded_module->file_handle == file_handle)
+ return dmp->dsbt_size;
+ }
+
+ return 0;
+}
+
+/*****************************************************************************/
+/* DLOAD_get_static_base() */
+/* */
+/* Look up static base symbol associated with the specified module. */
+/* */
+/*****************************************************************************/
+BOOL DLOAD_get_static_base(DLOAD_HANDLE handle, int32_t file_handle,
+ TARGET_ADDRESS *static_base)
+{
+ loaded_module_ptr_Queue_Node* ptr;
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+
+ for (ptr = pHandle->DLIMP_loaded_objects.front_ptr; ptr != NULL;
+ ptr = ptr->next_ptr)
+ {
+ DLIMP_Loaded_Module *lmp = ptr->value;
+ if (lmp->file_handle == file_handle)
+ {
+ *static_base = (TARGET_ADDRESS)lmp->static_base;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+/*****************************************************************************/
+/* DLOAD_get_dsbt_base() */
+/* */
+/* Look up address of DSBT for the specified module. */
+/* */
+/*****************************************************************************/
+BOOL DLOAD_get_dsbt_base(DLOAD_HANDLE handle, int32_t file_handle, TARGET_ADDRESS *dsbt_base)
+{
+ dynamic_module_ptr_Stack_Node *ptr;
+ LOADER_OBJECT *pHandle = (LOADER_OBJECT *)handle;
+
+ for (ptr = pHandle->DLIMP_dependency_stack.top_ptr; ptr != NULL;
+ ptr = ptr->next_ptr)
+ {
+ DLIMP_Dynamic_Module *dmp = ptr->value;
+ if (dmp->loaded_module->file_handle == file_handle)
+ {
+ *dsbt_base =
+ (TARGET_ADDRESS)dmp->dyntab[dmp->dsbt_base_tagidx].d_un.d_ptr;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+/*****************************************************************************/
+/* RELOCATE() - Perform RELA and REL type relocations for given ELF object */
+/* file that we are in the process of loading and relocating. */
+/*****************************************************************************/
+void DLREL_relocate(DLOAD_HANDLE handle, LOADER_FILE_DESC* elf_file,
+ DLIMP_Dynamic_Module* dyn_module)
+
+{
+ cur_target->relocate(handle, elf_file, dyn_module);
+}
+
+/*****************************************************************************/
+/* GET_VT_OBJ() - Once file headers have been read, use the e_machine id to */
+/* figure out the virtul target, so we can access trg specific funcs. */
+/*****************************************************************************/
+static VIRTUAL_TARGET *get_vt_obj(int given_id)
+{
+ VIRTUAL_TARGET *ptr;
+
+ for(ptr = vt_arr; ptr->machine_id != EM_NONE ; ptr++)
+ if (ptr->machine_id == given_id) return ptr;
+
+ return NULL;
+}
+
+#if 0 && LOADER_DEBUG // enable to make available in debugger
+/*****************************************************************************/
+/* DEBUG_QUEUE() - Debug function. */
+/*****************************************************************************/
+static void debug_queue(LOADER_OBJECT *pHandle, char* position)
+{
+ loaded_module_ptr_Queue_Node* ptr;
+
+ if (!debugging_on) return;
+
+ DLIF_trace ("\nDEBUG QUEUE : %s, pHandle : 0x%x\n\n", position,
+ (uint32_t)pHandle);
+
+ for (ptr = pHandle->DLIMP_loaded_objects.front_ptr; ptr != NULL;
+ ptr = ptr->next_ptr)
+ {
+ DLIF_trace ("ptr->value->name : %s\n",ptr->value->name);
+ }
+ DLIF_trace ("\n");
+}
+#endif
+
+/*****************************************************************************/
+/* READ_ARGS_FROM_SECTION() - This function reads the argc, argv from the */
+/* .args section, and is used to test Reference implementation. */
+/*****************************************************************************/
+static void read_args_from_section(DLIMP_Loaded_Module* ep_module)
+{
+ /*------------------------------------------------------------------------*/
+ /* Before this function in called, the loader has gotten argv/argc from */
+ /* the module and written it out to the .args section. c_args points to */
+ /* the .args section. */
+ /*------------------------------------------------------------------------*/
+ ARGS_CONTAINER *pargs = (ARGS_CONTAINER *)(ep_module->c_args);
+ if (!pargs || pargs == (ARGS_CONTAINER *)0xFFFFFFFF)
+ {
+ global_argc = 0;
+ global_argv = NULL;
+ }
+ else
+ {
+ global_argc = pargs->argc;
+ global_argv = pargs->argv;
+ }
+}
diff --git a/src/core/dsp/ocl_load/DLOAD/dload.h b/src/core/dsp/ocl_load/DLOAD/dload.h
new file mode 100644
index 0000000..bb7d427
--- /dev/null
+++ b/src/core/dsp/ocl_load/DLOAD/dload.h
@@ -0,0 +1,334 @@
+/*
+* dload.h
+*
+* Define internal data structures used by core dynamic loader.
+*
+* Copyright (C) 2009-2014 Texas Instruments Incorporated - http://www.ti.com/
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the
+* distribution.
+*
+* Neither the name of Texas Instruments Incorporated nor the names of
+* its contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#ifndef DLOAD_H
+#define DLOAD_H
+
+#include "ArrayList.h"
+#include "Queue.h"
+#include "Stack.h"
+#include "elf32.h"
+#include "dload_api.h"
+#include "util.h"
+
+/*---------------------------------------------------------------------------*/
+/* Contains strings with names of files the loader is in process of loading. */
+/* This list is used to keep track of what objects are in the process of */
+/* loading while their dependents are being loaded so that we can detect */
+/* circular dependencies. */
+/*---------------------------------------------------------------------------*/
+extern Array_List DLIMP_module_dependency_list;
+
+/*---------------------------------------------------------------------------*/
+/* DLIMP_Loaded_Segment */
+/* */
+/* This structure represents a segment loaded on memory. */
+/* */
+/* This data structure should be created using host memory when a module */
+/* is being loaded into target memory. The data structure should persist */
+/* as long as the module stays resident in target memory. It should be */
+/* removed when the last use of the module is unloaded from the target. */
+/*---------------------------------------------------------------------------*/
+typedef struct
+{
+ struct Elf32_Phdr phdr;
+ Elf32_Addr input_vaddr; /* original segment load addr */
+ BOOL modified;
+ struct DLOAD_MEMORY_SEGMENT *obj_desc;
+ void * host_address;
+} DLIMP_Loaded_Segment;
+
+/*---------------------------------------------------------------------------*/
+/* DLIMP_Loaded_Module */
+/* */
+/* This structure contains all the information the dynamic loader needs */
+/* to retain after loading an object file's segments into target memory. */
+/* The data structure is created while the object file is being loaded, */
+/* and should persist until the last use of the module is unloaded from */
+/* target memory. */
+/* */
+/* The information contained here is used by the dynamic loader to */
+/* perform dynamic symbol resolution, to track the use count, and to */
+/* finally deallocate the module's segments when the module is unloaded. */
+/*---------------------------------------------------------------------------*/
+typedef struct
+{
+ char *name; /* Local copy of so_name */
+ int32_t file_handle;
+ int32_t use_count;
+ Elf32_Addr entry_point; /* Entry point address into module */
+ struct Elf32_Sym *gsymtab; /* Module's global symbol table */
+ Elf32_Word gsymnum; /* # global symbols */
+ char *gstrtab; /* Module's global symbol names */
+ Elf32_Word gstrsz; /* Size of global string table */
+ Array_List loaded_segments; /* List of DLIMP_Loaded_Segment(s) */
+ Array_List dependencies; /* List of dependent file handles */
+ BOOL direct_dependent_only;
+
+ Elf32_Addr fini; /* .fini function/section address */
+ Elf32_Addr fini_array; /* .fini_array term fcn ary addr */
+ int32_t fini_arraysz; /* sizeof .fini_array */
+ uint8_t *c_args; /* address of module's .args sect */
+ uint8_t *static_base; /* address of module's STATIC_BASE */
+
+} DLIMP_Loaded_Module;
+
+/*---------------------------------------------------------------------------*/
+/* DLIMP_loaded_objects */
+/* */
+/* A list of loaded module objects (DLIMP_Loaded_Module *) that the */
+/* loader has placed into target memory. */
+/*---------------------------------------------------------------------------*/
+TYPE_QUEUE_DEFINITION(DLIMP_Loaded_Module*, loaded_module_ptr)
+extern loaded_module_ptr_Queue DLIMP_loaded_objects;
+
+/*---------------------------------------------------------------------------*/
+/* DLIMP_Dynamic_Module */
+/* */
+/* This structure represents a dynamic module to be loaded by the dynamic */
+/* loader. It contains all the information necessary to load and relocate */
+/* the module. It actually contains most of the headers, dynamic info, */
+/* dynamic symbol table, string table etc. */
+/* */
+/* This structure is allocated in host memory while an ELF object file is */
+/* being loaded and will be destructed after the file has been */
+/* successfully loaded. To simplify loading and relocation of the object */
+/* file's segments, this data structure maintains a link to the loaded */
+/* module. This link is severed when the load is successfully completed. */
+/* The loaded module data structure will persist until the module is */
+/* actually unloaded from target memory, but this data structure will be */
+/* freed. */
+/* */
+/* If the load of the object file is not successful for any reason, then */
+/* the loaded module will not be detached from the dynamic module. In */
+/* such case, the destructor for the dynamic module will assume */
+/* responsibility for freeing any host memory associated with the loaded */
+/* module and its segments. */
+/*---------------------------------------------------------------------------*/
+typedef struct
+{
+ char *name; /* Local copy of so_name */
+ LOADER_FILE_DESC *fd; /* Access to ELF object file */
+ struct Elf32_Ehdr fhdr; /* ELF Object File Header */
+ struct Elf32_Phdr *phdr; /* ELF Program Header Table */
+ Elf32_Word phnum; /* # entries in program header table */
+ char* strtab; /* String Table */
+ Elf32_Word strsz; /* String Table size in bytes */
+ struct Elf32_Dyn *dyntab; /* Elf Dynamic Table (.dynamic scn) */
+ /* This contains a list of dynamic */
+ /* tags which is terminated by a NULL */
+ /* record. */
+ struct Elf32_Sym *symtab; /* Elf Dynamic Symbol Table */
+ Elf32_Word symnum; /* # symbols in dynamic symbol table */
+ Elf32_Word gsymtab_offset;/* Offset into symbol table where */
+ /* global symbols start. */
+ Elf32_Word gstrtab_offset;/* Offset into string table where */
+ /* global symbol names start. */
+
+ uint8_t *c_args;
+ uint8_t *static_base; /* address of module's STATIC_BASE */
+ int32_t argc;
+ char **argv;
+ DLIMP_Loaded_Module *loaded_module;
+ int32_t wrong_endian;
+ BOOL direct_dependent_only;
+ BOOL relocatable; /* TRUE if module can be relocated */
+ /* at load-time. FALSE if module is */
+ /* a static executable. */
+ BOOL relocate_entry_point; /* TRUE if the entry point has */
+ /* not been relocated */
+
+ int32_t dsbt_index; /* DSBT index requested/assigned */
+ uint32_t dsbt_size; /* DSBT size for this module */
+ int32_t dsbt_base_tagidx;/* Location of DSBT base dyn tag */
+
+ int32_t preinit_array_idx; /* DT_PREINIT_ARRAY dyn tag loc */
+ int32_t preinit_arraysz; /* sizeof pre-init array */
+ int32_t init_idx; /* DT_INIT dynamic tag location */
+ int32_t init_array_idx; /* DT_INIT_ARRAY dyn tag location */
+ int32_t init_arraysz; /* sizeof init array */
+
+} DLIMP_Dynamic_Module;
+
+/*---------------------------------------------------------------------------*/
+/* DLIMP_dependency_stack */
+/* */
+/* A LIFO stack of dynamic module objects (DLIMP_Dynamic_Module *) that */
+/* is retained while dependent files are being loaded and allocated. It */
+/* is used to guide which dynamic modules need to be relocated after all */
+/* items in the dependency graph have been allocated. The stack is only */
+/* used when the client asks the core loader to load a dynamic executable */
+/* or library. When relocation is completed, this stack should be empty. */
+/*---------------------------------------------------------------------------*/
+TYPE_STACK_DEFINITION(DLIMP_Dynamic_Module*, dynamic_module_ptr)
+extern dynamic_module_ptr_Stack DLIMP_dependency_stack;
+
+/*---------------------------------------------------------------------------*/
+/* Private Loader Object instance. */
+/*---------------------------------------------------------------------------*/
+typedef struct
+{
+ /*-----------------------------------------------------------------------*/
+ /* Contains filenames (type const char*) the system is in the process of */
+ /* loading. Used to detect cycles in incorrectly compiled ELF binaries. */
+ /*-----------------------------------------------------------------------*/
+ Array_List DLIMP_module_dependency_list;
+
+ /*-----------------------------------------------------------------------*/
+ /* Contains objects (type DLIMP_Loaded_Module) that the system has loaded*/
+ /* into target memory. */
+ /*-----------------------------------------------------------------------*/
+ loaded_module_ptr_Queue DLIMP_loaded_objects;
+
+ /*-----------------------------------------------------------------------*/
+ /* Dependency Graph Queue - FIFO queue of dynamic modules that are loaded*/
+ /* when client asks to load a dynamic executable or library. Note that */
+ /* dependents that have already been loaded with another module will not */
+ /* appear on this queue. */
+ /*-----------------------------------------------------------------------*/
+ dynamic_module_ptr_Stack DLIMP_dependency_stack;
+
+ /*-----------------------------------------------------------------------*/
+ /* Counter for generating unique IDs for file handles. */
+ /* NOTE: File handle is assigned sequencially but is never reclaimed */
+ /* when the modules are unloaded. It is conceivable that a loader*/
+ /* running for a long time and loading and unloading modules */
+ /* could wrap-around. The loader generates error in this case. */
+ /* Presumably each loader instance has a list of file handles, one for */
+ /* each file that it loads, and the file handle serves as an index into */
+ /* the list. Therefore even if the same file is loaded by two loader */
+ /* instances, both loader instances have a different file handle for the */
+ /* file - the file is mapped uniquely to it's appopriate file handle per */
+ /* loader instance. */
+ /*-----------------------------------------------------------------------*/
+ int32_t file_handle;
+
+ /*-----------------------------------------------------------------------*/
+ /* Client token, passed in via DLOAD_create() */
+ /*-----------------------------------------------------------------------*/
+ void * client_handle;
+} LOADER_OBJECT;
+
+
+/*****************************************************************************/
+/* IF data : Below are the data structures used to store init-fini data. */
+/*****************************************************************************/
+typedef struct
+{
+ TARGET_ADDRESS sect_addr;
+ int32_t size;
+}
+IF_single_record;
+
+TYPE_QUEUE_DEFINITION(IF_single_record*, IF_table)
+extern IF_table_Queue TI_init_table;
+
+
+/*****************************************************************************/
+/* Container used to read in argc, argv from the .srgs section. */
+/*****************************************************************************/
+typedef struct { int argc; char *argv[1]; } ARGS_CONTAINER;
+
+
+/*****************************************************************************/
+/* is_DSBT_module() */
+/* */
+/* return true if the module uses DSBT model */
+/*****************************************************************************/
+static inline BOOL is_dsbt_module(DLIMP_Dynamic_Module *dyn_module)
+{
+ return (dyn_module->dsbt_size != 0);
+}
+
+/*****************************************************************************/
+/* is_arm_module() */
+/* */
+/* return true if the module being processed is for ARM */
+/*****************************************************************************/
+static inline BOOL is_arm_module(struct Elf32_Ehdr* fhdr)
+{
+ return fhdr->e_machine == EM_ARM;
+}
+
+/*****************************************************************************/
+/* is_c60_module() */
+/* */
+/* return true if the module being processed is for C60 */
+/*****************************************************************************/
+static inline BOOL is_c60_module(struct Elf32_Ehdr* fhdr)
+{
+ return fhdr->e_machine == EM_TI_C6000;
+}
+
+/*---------------------------------------------------------------------------*/
+/* DLIMP_update_dyntag_section_address() */
+/* */
+/* Given the index of a dynamic tag which we happen to know points to a */
+/* section address, find the program header table entry associated with */
+/* the specified address and update the tag value with the real address */
+/* of the section. */
+/* */
+/*---------------------------------------------------------------------------*/
+extern BOOL DLIMP_update_dyntag_section_address(DLIMP_Dynamic_Module *dyn_module,
+ int32_t i);
+
+extern uint32_t DLIMP_get_first_dyntag(int tag, struct Elf32_Dyn* dyn_table);
+
+/*---------------------------------------------------------------------------*/
+/* Global flags to help manage internal debug and profiling efforts. */
+/*---------------------------------------------------------------------------*/
+#ifndef __TI_COMPILER_VERSION__
+#define LOADER_DEBUG 1
+#else
+#define LOADER_DEBUG 0
+#endif
+
+#undef LOADER_DEBUG
+
+#define LOADER_DEBUG 1
+#define LOADER_PROFILE 1
+
+#if LOADER_DEBUG
+extern BOOL debugging_on;
+#endif
+
+#if LOADER_DEBUG || LOADER_PROFILE
+extern BOOL profiling_on;
+#endif
+
+#endif
diff --git a/src/core/dsp/ocl_load/DLOAD/dload_endian.c b/src/core/dsp/ocl_load/DLOAD/dload_endian.c
new file mode 100644
index 0000000..ac6413b
--- /dev/null
+++ b/src/core/dsp/ocl_load/DLOAD/dload_endian.c
@@ -0,0 +1,151 @@
+/*
+* dload_endian.c
+*
+* Simple helper functions to assist core loader with endian-ness issues
+* when the host endian-ness may be opposite the endian-ness of the target.
+*
+* Copyright (C) 2009-2014 Texas Instruments Incorporated - http://www.ti.com/
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the
+* distribution.
+*
+* Neither the name of Texas Instruments Incorporated nor the names of
+* its contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#include "dload_endian.h"
+
+/*****************************************************************************/
+/* DLIMP_GET_ENDIAN() - Determine endianness of the host. Uses ELF */
+/* endianness constants. */
+/*****************************************************************************/
+int DLIMP_get_endian()
+{
+ int32_t x = 0x1;
+
+ if (*((int16_t*)(&x))) return ELFDATA2LSB;
+
+ return ELFDATA2MSB;
+}
+
+/*****************************************************************************/
+/* DLIMP_CHANGE_ENDIAN32() - Swap endianness of a 32-bit integer. */
+/*****************************************************************************/
+void DLIMP_change_endian32(int32_t* to_change)
+{
+ int32_t temp = 0;
+ temp += (*to_change & 0x000000FF) << 24;
+ temp += (*to_change & 0x0000FF00) << 8;
+ temp += (*to_change & 0x00FF0000) >> 8;
+ temp += (*to_change & 0xFF000000) >> 24;
+ *to_change = temp;
+}
+
+/*****************************************************************************/
+/* DLIMP_CHANGE_ENDIAN16() - Swap endianness of a 16-bit integer. */
+/*****************************************************************************/
+void DLIMP_change_endian16(int16_t* to_change)
+{
+ int16_t temp = 0;
+ temp += (*to_change & 0x00FF) << 8;
+ temp += (*to_change & 0xFF00) >> 8;
+ *to_change = temp;
+}
+
+/*****************************************************************************/
+/* DLIMP_CHANGE_EHDR_ENDIAN() - Swap endianness of an ELF file header. */
+/*****************************************************************************/
+void DLIMP_change_ehdr_endian(struct Elf32_Ehdr* ehdr)
+{
+ DLIMP_change_endian16((int16_t*)(&ehdr->e_type));
+ DLIMP_change_endian16((int16_t*)(&ehdr->e_machine));
+ DLIMP_change_endian32((int32_t*)(&ehdr->e_version));
+ DLIMP_change_endian32((int32_t*)(&ehdr->e_entry));
+ DLIMP_change_endian32((int32_t*)(&ehdr->e_phoff));
+ DLIMP_change_endian32((int32_t*)(&ehdr->e_shoff));
+ DLIMP_change_endian32((int32_t*)(&ehdr->e_flags));
+ DLIMP_change_endian16((int16_t*)(&ehdr->e_ehsize));
+ DLIMP_change_endian16((int16_t*)(&ehdr->e_phentsize));
+ DLIMP_change_endian16((int16_t*)(&ehdr->e_phnum));
+ DLIMP_change_endian16((int16_t*)(&ehdr->e_shentsize));
+ DLIMP_change_endian16((int16_t*)(&ehdr->e_shnum));
+ DLIMP_change_endian16((int16_t*)(&ehdr->e_shstrndx));
+}
+
+/*****************************************************************************/
+/* DLIMP_CHANGE_PHDR_ENDIAN() - Swap endianness of an ELF program header. */
+/*****************************************************************************/
+void DLIMP_change_phdr_endian(struct Elf32_Phdr* phdr)
+{
+ DLIMP_change_endian32((int32_t*)(&phdr->p_type));
+ DLIMP_change_endian32((int32_t*)(&phdr->p_offset));
+ DLIMP_change_endian32((int32_t*)(&phdr->p_vaddr));
+ DLIMP_change_endian32((int32_t*)(&phdr->p_paddr));
+ DLIMP_change_endian32((int32_t*)(&phdr->p_filesz));
+ DLIMP_change_endian32((int32_t*)(&phdr->p_memsz));
+ DLIMP_change_endian32((int32_t*)(&phdr->p_flags));
+ DLIMP_change_endian32((int32_t*)(&phdr->p_align));
+}
+
+/*****************************************************************************/
+/* DLIMP_CHANGE_DYNENT_ENDIAN() - Swap endianness of a dynamic table entry. */
+/*****************************************************************************/
+void DLIMP_change_dynent_endian(struct Elf32_Dyn* dyn)
+{
+ DLIMP_change_endian32((int32_t*)(&dyn->d_tag));
+ DLIMP_change_endian32((int32_t*)(&dyn->d_un.d_val));
+}
+
+/*****************************************************************************/
+/* DLIMP_CHANGE_SYM_ENDIAN() - Swap endianness of an ELF symbol table entry. */
+/*****************************************************************************/
+void DLIMP_change_sym_endian(struct Elf32_Sym* sym)
+{
+ DLIMP_change_endian32((int32_t*)(&sym->st_name));
+ DLIMP_change_endian32((int32_t*)(&sym->st_value));
+ DLIMP_change_endian32((int32_t*)(&sym->st_size));
+ DLIMP_change_endian16((int16_t*)(&sym->st_shndx));
+}
+
+/*****************************************************************************/
+/* DLIMP_CHANGE_RELA_ENDIAN() - Swap endianness of a RELA-type relocation. */
+/*****************************************************************************/
+void DLIMP_change_rela_endian(struct Elf32_Rela* ra)
+{
+ DLIMP_change_endian32((int32_t*)(&ra->r_offset));
+ DLIMP_change_endian32((int32_t*)(&ra->r_info));
+ DLIMP_change_endian32((int32_t*)(&ra->r_addend));
+}
+
+/*****************************************************************************/
+/* DLIMP_CHANGE_REL_ENDIAN() - Swap endianness of a REL-type relocation. */
+/*****************************************************************************/
+void DLIMP_change_rel_endian(struct Elf32_Rel* r)
+{
+ DLIMP_change_endian32((int32_t*)(&r->r_offset));
+ DLIMP_change_endian32((int32_t*)(&r->r_info));
+}
diff --git a/src/core/dsp/ocl_load/DLOAD/dload_endian.h b/src/core/dsp/ocl_load/DLOAD/dload_endian.h
new file mode 100644
index 0000000..ee74e11
--- /dev/null
+++ b/src/core/dsp/ocl_load/DLOAD/dload_endian.h
@@ -0,0 +1,58 @@
+/*
+* dload_endian.h
+*
+* Specification of functions used to assist loader with endian-ness issues.
+*
+* Copyright (C) 2009-2014 Texas Instruments Incorporated - http://www.ti.com/
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the
+* distribution.
+*
+* Neither the name of Texas Instruments Incorporated nor the names of
+* its contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#ifndef DLOAD_ENDIAN_H
+#define DLOAD_ENDIAN_H
+
+#include "elf32.h"
+
+/*---------------------------------------------------------------------------*/
+/* Prototypes for ELF file object reader endianness swap routines. */
+/*---------------------------------------------------------------------------*/
+
+int DLIMP_get_endian(void);
+void DLIMP_change_endian32(int32_t* to_change);
+void DLIMP_change_endian16(int16_t* to_change);
+void DLIMP_change_ehdr_endian(struct Elf32_Ehdr* to_change);
+void DLIMP_change_phdr_endian(struct Elf32_Phdr* to_change);
+void DLIMP_change_dynent_endian(struct Elf32_Dyn* to_change);
+void DLIMP_change_sym_endian(struct Elf32_Sym* to_change);
+void DLIMP_change_rela_endian(struct Elf32_Rela* to_change);
+void DLIMP_change_rel_endian(struct Elf32_Rel* to_change);
+
+#endif
diff --git a/src/core/dsp/ocl_load/DLOAD/elf32.c b/src/core/dsp/ocl_load/DLOAD/elf32.c
new file mode 100644
index 0000000..082ba01
--- /dev/null
+++ b/src/core/dsp/ocl_load/DLOAD/elf32.c
@@ -0,0 +1,652 @@
+/*
+* elf32.c
+*
+* Basic Data Structures for 32-Bit ELF Object Format Files
+*
+* The data structures in this file come primarily from this specification:
+*
+* Tool Interface Standard (TIS)
+* Executable and Linking Format (ELF) Specification
+* Version 1.2
+*
+* TIS Committee
+* May 1995
+*
+* Additions and enhancements from this specification are also included:
+*
+* System V Application Binary Interface
+* DRAFT 17
+* December 2003
+*
+* http://sco.com/developers/gabi/2003-12-17/contents.html
+*
+* This is a C implementation of the data base objects that are commonly
+* used in the source for TI development tools that support ELF.
+*
+* Copyright (C) 2009-2014 Texas Instruments Incorporated - http://www.ti.com/
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the
+* distribution.
+*
+* Neither the name of Texas Instruments Incorporated nor the names of
+* its contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#include "elf32.h"
+
+/*---------------------------------------------------------------------------*/
+/* Dynamic Tag Database */
+/*---------------------------------------------------------------------------*/
+
+const struct EDYN_TAG EDYN_TAG_DB[] =
+{
+ /* EDYN_TAG_NULL */
+ {
+ /* d_tag_name */ "DT_NULL",
+ /* d_tag_value */ DT_NULL,
+ /* d_untype */ EDYN_UNTYPE_IGNORED,
+ /* d_exec_req */ EDYN_TAGREQ_MANDATORY,
+ /* d_shared_req */ EDYN_TAGREQ_MANDATORY
+ },
+
+ /* EDYN_TAG_NEEDED */
+ {
+ /* d_tag_name */ "DT_NEEDED",
+ /* d_tag_value */ DT_NEEDED,
+ /* d_untype */ EDYN_UNTYPE_VAL,
+ /* d_exec_req */ EDYN_TAGREQ_OPTIONAL,
+ /* d_shared_req */ EDYN_TAGREQ_OPTIONAL
+ },
+
+ /* EDYN_TAG_PLTRELSZ */
+ {
+ /* d_tag_name */ "DT_PLTRELSZ",
+ /* d_tag_value */ DT_PLTRELSZ,
+ /* d_untype */ EDYN_UNTYPE_VAL,
+ /* d_exec_req */ EDYN_TAGREQ_OPTIONAL,
+ /* d_shared_req */ EDYN_TAGREQ_OPTIONAL
+ },
+
+ /* EDYN_TAG_PLTGOT */
+ {
+ /* d_tag_name */ "DT_PLTGOT",
+ /* d_tag_value */ DT_PLTGOT,
+ /* d_untype */ EDYN_UNTYPE_PTR,
+ /* d_exec_req */ EDYN_TAGREQ_OPTIONAL,
+ /* d_shared_req */ EDYN_TAGREQ_OPTIONAL
+ },
+
+ /* EDYN_TAG_HASH */
+ {
+ /* d_tag_name */ "DT_HASH",
+ /* d_tag_value */ DT_HASH,
+ /* d_untype */ EDYN_UNTYPE_PTR,
+ /* d_exec_req */ EDYN_TAGREQ_MANDATORY,
+ /* d_shared_req */ EDYN_TAGREQ_MANDATORY
+ },
+
+ /* EDYN_TAG_STRTAB */
+ {
+ /* d_tag_name */ "DT_STRTAB",
+ /* d_tag_value */ DT_STRTAB,
+ /* d_untype */ EDYN_UNTYPE_PTR,
+ /* d_exec_req */ EDYN_TAGREQ_MANDATORY,
+ /* d_shared_req */ EDYN_TAGREQ_MANDATORY
+ },
+
+ /* EDYN_TAG_SYMTAB */
+ {
+ /* d_tag_name */ "DT_SYMTAB",
+ /* d_tag_value */ DT_SYMTAB,
+ /* d_untype */ EDYN_UNTYPE_PTR,
+ /* d_exec_req */ EDYN_TAGREQ_MANDATORY,
+ /* d_shared_req */ EDYN_TAGREQ_MANDATORY
+ },
+
+ /* EDYN_TAG_RELA */
+ {
+ /* d_tag_name */ "DT_RELA",
+ /* d_tag_value */ DT_RELA,
+ /* d_untype */ EDYN_UNTYPE_PTR,
+ /* d_exec_req */ EDYN_TAGREQ_MANDATORY,
+ /* d_shared_req */ EDYN_TAGREQ_OPTIONAL
+ },
+
+ /* EDYN_TAG_RELASZ */
+ {
+ /* d_tag_name */ "DT_RELASZ",
+ /* d_tag_value */ DT_RELASZ,
+ /* d_untype */ EDYN_UNTYPE_VAL,
+ /* d_exec_req */ EDYN_TAGREQ_MANDATORY,
+ /* d_shared_req */ EDYN_TAGREQ_OPTIONAL
+ },
+
+ /* EDYN_TAG_RELAENT */
+ {
+ /* d_tag_name */ "DT_RELAENT",
+ /* d_tag_value */ DT_RELAENT,
+ /* d_untype */ EDYN_UNTYPE_VAL,
+ /* d_exec_req */ EDYN_TAGREQ_MANDATORY,
+ /* d_shared_req */ EDYN_TAGREQ_OPTIONAL
+ },
+
+ /* EDYN_TAG_STRSZ */
+ {
+ /* d_tag_name */ "DT_STRSZ",
+ /* d_tag_value */ DT_STRSZ,
+ /* d_untype */ EDYN_UNTYPE_VAL,
+ /* d_exec_req */ EDYN_TAGREQ_MANDATORY,
+ /* d_shared_req */ EDYN_TAGREQ_MANDATORY
+ },
+
+ /* EDYN_TAG_SYMENT */
+ {
+ /* d_tag_name */ "DT_SYMENT",
+ /* d_tag_value */ DT_SYMENT,
+ /* d_untype */ EDYN_UNTYPE_VAL,
+ /* d_exec_req */ EDYN_TAGREQ_MANDATORY,
+ /* d_shared_req */ EDYN_TAGREQ_MANDATORY
+ },
+
+ /* EDYN_TAG_INIT */
+ {
+ /* d_tag_name */ "DT_INIT",
+ /* d_tag_value */ DT_INIT,
+ /* d_untype */ EDYN_UNTYPE_PTR,
+ /* d_exec_req */ EDYN_TAGREQ_OPTIONAL,
+ /* d_shared_req */ EDYN_TAGREQ_OPTIONAL
+ },
+
+ /* EDYN_TAG_FINI */
+ {
+ /* d_tag_name */ "DT_FINI",
+ /* d_tag_value */ DT_FINI,
+ /* d_untype */ EDYN_UNTYPE_PTR,
+ /* d_exec_req */ EDYN_TAGREQ_OPTIONAL,
+ /* d_shared_req */ EDYN_TAGREQ_OPTIONAL
+ },
+
+ /* EDYN_TAG_SONAME */
+ {
+ /* d_tag_name */ "DT_SONAME",
+ /* d_tag_value */ DT_SONAME,
+ /* d_untype */ EDYN_UNTYPE_VAL,
+ /* d_exec_req */ EDYN_TAGREQ_IGNORED,
+ /* d_shared_req */ EDYN_TAGREQ_OPTIONAL
+ },
+
+ /* EDYN_TAG_RPATH */
+ {
+ /* d_tag_name */ "DT_RPATH",
+ /* d_tag_value */ DT_RPATH,
+ /* d_untype */ EDYN_UNTYPE_VAL,
+ /* d_exec_req */ EDYN_TAGREQ_OPTIONAL,
+ /* d_shared_req */ EDYN_TAGREQ_IGNORED
+ },
+
+ /* EDYN_TAG_SYMBOLIC */
+ {
+ /* d_tag_name */ "DT_SYMBOLIC",
+ /* d_tag_value */ DT_SYMBOLIC,
+ /* d_untype */ EDYN_UNTYPE_IGNORED,
+ /* d_exec_req */ EDYN_TAGREQ_IGNORED,
+ /* d_shared_req */ EDYN_TAGREQ_OPTIONAL
+ },
+
+ /* EDYN_TAG_REL */
+ {
+ /* d_tag_name */ "DT_REL",
+ /* d_tag_value */ DT_REL,
+ /* d_untype */ EDYN_UNTYPE_PTR,
+ /* d_exec_req */ EDYN_TAGREQ_MANDATORY,
+ /* d_shared_req */ EDYN_TAGREQ_OPTIONAL
+ },
+
+ /* EDYN_TAG_RELSZ */
+ {
+ /* d_tag_name */ "DT_RELSZ",
+ /* d_tag_value */ DT_RELSZ,
+ /* d_untype */ EDYN_UNTYPE_VAL,
+ /* d_exec_req */ EDYN_TAGREQ_MANDATORY,
+ /* d_shared_req */ EDYN_TAGREQ_OPTIONAL
+ },
+
+ /* EDYN_TAG_RELENT */
+ {
+ /* d_tag_name */ "DT_RELENT",
+ /* d_tag_value */ DT_RELENT,
+ /* d_untype */ EDYN_UNTYPE_VAL,
+ /* d_exec_req */ EDYN_TAGREQ_MANDATORY,
+ /* d_shared_req */ EDYN_TAGREQ_OPTIONAL
+ },
+
+ /* EDYN_TAG_PLTREL */
+ {
+ /* d_tag_name */ "DT_PLTREL",
+ /* d_tag_value */ DT_PLTREL,
+ /* d_untype */ EDYN_UNTYPE_VAL,
+ /* d_exec_req */ EDYN_TAGREQ_OPTIONAL,
+ /* d_shared_req */ EDYN_TAGREQ_OPTIONAL
+ },
+
+ /* EDYN_TAG_DEBUG */
+ {
+ /* d_tag_name */ "DT_DEBUG",
+ /* d_tag_value */ DT_DEBUG,
+ /* d_untype */ EDYN_UNTYPE_PTR,
+ /* d_exec_req */ EDYN_TAGREQ_OPTIONAL,
+ /* d_shared_req */ EDYN_TAGREQ_IGNORED
+ },
+
+ /* EDYN_TAG_TEXTREL */
+ {
+ /* d_tag_name */ "DT_TEXTREL",
+ /* d_tag_value */ DT_TEXTREL,
+ /* d_untype */ EDYN_UNTYPE_IGNORED,
+ /* d_exec_req */ EDYN_TAGREQ_OPTIONAL,
+ /* d_shared_req */ EDYN_TAGREQ_OPTIONAL
+ },
+
+ /* EDYN_TAG_JMPREL */
+ {
+ /* d_tag_name */ "DT_JMPREL",
+ /* d_tag_value */ DT_JMPREL,
+ /* d_untype */ EDYN_UNTYPE_PTR,
+ /* d_exec_req */ EDYN_TAGREQ_OPTIONAL,
+ /* d_shared_req */ EDYN_TAGREQ_OPTIONAL
+ },
+
+ /* EDYN_TAG_BIND_NOW */
+ {
+ /* d_tag_name */ "DT_BIND_NOW",
+ /* d_tag_value */ DT_BIND_NOW,
+ /* d_untype */ EDYN_UNTYPE_IGNORED,
+ /* d_exec_req */ EDYN_TAGREQ_OPTIONAL,
+ /* d_shared_req */ EDYN_TAGREQ_OPTIONAL
+ },
+
+ /* EDYN_TAG_INIT_ARRAY */
+ {
+ /* d_tag_name */ "DT_INIT_ARRAY",
+ /* d_tag_value */ DT_INIT_ARRAY,
+ /* d_untype */ EDYN_UNTYPE_PTR,
+ /* d_exec_req */ EDYN_TAGREQ_OPTIONAL,
+ /* d_shared_req */ EDYN_TAGREQ_OPTIONAL
+ },
+
+ /* EDYN_TAG_FINI_ARRAY */
+ {
+ /* d_tag_name */ "DT_FINI_ARRAY",
+ /* d_tag_value */ DT_FINI_ARRAY,
+ /* d_untype */ EDYN_UNTYPE_PTR,
+ /* d_exec_req */ EDYN_TAGREQ_OPTIONAL,
+ /* d_shared_req */ EDYN_TAGREQ_OPTIONAL
+ },
+
+ /* EDYN_TAG_INIT_ARRAYSZ */
+ {
+ /* d_tag_name */ "DT_INIT_ARRAYSZ",
+ /* d_tag_value */ DT_INIT_ARRAYSZ,
+ /* d_untype */ EDYN_UNTYPE_VAL,
+ /* d_exec_req */ EDYN_TAGREQ_OPTIONAL,
+ /* d_shared_req */ EDYN_TAGREQ_OPTIONAL
+ },
+
+ /* EDYN_TAG_FINI_ARRAYSZ */
+ {
+ /* d_tag_name */ "DT_FINI_ARRAYSZ",
+ /* d_tag_value */ DT_FINI_ARRAYSZ,
+ /* d_untype */ EDYN_UNTYPE_VAL,
+ /* d_exec_req */ EDYN_TAGREQ_OPTIONAL,
+ /* d_shared_req */ EDYN_TAGREQ_OPTIONAL
+ },
+
+ /* EDYN_TAG_RUNPATH */
+ {
+ /* d_tag_name */ "DT_RUNPATH",
+ /* d_tag_value */ DT_RUNPATH,
+ /* d_untype */ EDYN_UNTYPE_VAL,
+ /* d_exec_req */ EDYN_TAGREQ_OPTIONAL,
+ /* d_shared_req */ EDYN_TAGREQ_OPTIONAL
+ },
+
+ /* EDYN_TAG_FLAGS */
+ {
+ /* d_tag_name */ "DT_FLAGS",
+ /* d_tag_value */ DT_FLAGS,
+ /* d_untype */ EDYN_UNTYPE_VAL,
+ /* d_exec_req */ EDYN_TAGREQ_OPTIONAL,
+ /* d_shared_req */ EDYN_TAGREQ_OPTIONAL
+ },
+
+ /* EDYN_TAG_ENCODING */
+ {
+ /* d_tag_name */ "DT_ENCODING",
+ /* d_tag_value */ DT_ENCODING,
+ /* d_untype */ EDYN_UNTYPE_UNSPECIFIED,
+ /* d_exec_req */ EDYN_TAGREQ_UNSPECIFIED,
+ /* d_shared_req */ EDYN_TAGREQ_UNSPECIFIED
+ },
+
+ /* EDYN_TAG_PREINIT_ARRAY */
+ {
+ /* d_tag_name */ "DT_PREINIT_ARRAY",
+ /* d_tag_value */ DT_PREINIT_ARRAY,
+ /* d_untype */ EDYN_UNTYPE_PTR,
+ /* d_exec_req */ EDYN_TAGREQ_OPTIONAL,
+ /* d_shared_req */ EDYN_TAGREQ_IGNORED
+ },
+
+ /* EDYN_TAG_PREINIT_ARRAYSZ */
+ {
+ /* d_tag_name */ "DT_PREINIT_ARRAYSZ",
+ /* d_tag_value */ DT_PREINIT_ARRAYSZ,
+ /* d_untype */ EDYN_UNTYPE_VAL,
+ /* d_exec_req */ EDYN_TAGREQ_OPTIONAL,
+ /* d_shared_req */ EDYN_TAGREQ_IGNORED
+ },
+
+ /* Terminate array with an id of -1 */
+ {
+ /* d_tag_name */ "",
+ /* d_tag_value */ -1,
+ /* d_untype */ EDYN_UNTYPE_UNSPECIFIED,
+ /* d_exec_req */ EDYN_TAGREQ_OPTIONAL,
+ /* d_shared_req */ EDYN_TAGREQ_IGNORED
+ }
+};
+
+/*---------------------------------------------------------------------------*/
+/* Special Section Database */
+/*---------------------------------------------------------------------------*/
+const struct ESCN ESCN_DB[] =
+{
+ /* .bss */
+ {
+ /* name */ ESCN_BSS_name,
+ /* sh_type */ SHT_NOBITS,
+ /* sh_entsize */ 0,
+ /* sh_flags */ SHF_ALLOC + SHF_WRITE
+ },
+
+ /* .comment */
+ {
+ /* name */ ESCN_COMMENT_name,
+ /* sh_type */ SHT_PROGBITS,
+ /* sh_entsize */ 0,
+ /* sh_flags */ 0
+ },
+
+ /* .data */
+ {
+ /* name */ ESCN_DATA_name,
+ /* sh_type */ SHT_PROGBITS,
+ /* sh_entsize */ 0,
+ /* sh_flags */ SHF_ALLOC + SHF_WRITE
+ },
+
+ /* .data1 */
+ {
+ /* name */ ESCN_DATA1_name,
+ /* sh_type */ SHT_PROGBITS,
+ /* sh_entsize */ 0,
+ /* sh_flags */ SHF_ALLOC + SHF_WRITE
+ },
+
+ /* .debug */
+ {
+ /* name */ ESCN_DEBUG_name,
+ /* sh_type */ SHT_PROGBITS,
+ /* sh_entsize */ 0,
+ /* sh_flags */ 0
+ },
+
+ /* .dynamic */
+ {
+ /* name */ ESCN_DYNAMIC_name,
+ /* sh_type */ SHT_DYNAMIC,
+ /* sh_entsize */ sizeof(struct Elf32_Dyn),
+ /* sh_flags */ SHF_ALLOC
+ },
+
+ /* .dynstr */
+ {
+ /* name */ ESCN_DYNSTR_name,
+ /* sh_type */ SHT_STRTAB,
+ /* sh_entsize */ sizeof(char),
+ /* sh_flags */ SHF_ALLOC + SHF_STRINGS
+ },
+
+ /* .dynsym */
+ {
+ /* name */ ESCN_DYNSYM_name,
+ /* sh_type */ SHT_DYNSYM,
+ /* sh_entsize */ sizeof(struct Elf32_Sym),
+ /* sh_flags */ SHF_ALLOC
+ },
+
+ /* .fini */
+ {
+ /* name */ ESCN_FINI_name,
+ /* sh_type */ SHT_PROGBITS,
+ /* sh_entsize */ 0,
+ /* sh_flags */ SHF_ALLOC + SHF_EXECINSTR
+ },
+
+ /* .fini_array */
+ {
+ /* name */ ESCN_FINI_ARRAY_name,
+ /* sh_type */ SHT_FINI_ARRAY,
+ /* sh_entsize */ 0,
+ /* sh_flags */ SHF_ALLOC + SHF_WRITE
+ },
+
+ /* .got */
+ {
+ /* name */ ESCN_GOT_name,
+ /* sh_type */ SHT_PROGBITS,
+ /* sh_entsize */ 0,
+ /* sh_flags */ 0
+ },
+
+ /* .hash */
+ {
+ /* name */ ESCN_HASH_name,
+ /* sh_type */ SHT_HASH,
+ /* sh_entsize */ 0,
+ /* sh_flags */ SHF_ALLOC
+ },
+
+ /* .init */
+ {
+ /* name */ ESCN_INIT_name,
+ /* sh_type */ SHT_PROGBITS,
+ /* sh_entsize */ 0,
+ /* sh_flags */ SHF_ALLOC + SHF_EXECINSTR
+ },
+
+ /* .init_array */
+ {
+ /* name */ ESCN_INIT_ARRAY_name,
+ /* sh_type */ SHT_INIT_ARRAY,
+ /* sh_entsize */ 0,
+ /* sh_flags */ SHF_ALLOC + SHF_WRITE
+ },
+
+ /* .interp */
+ {
+ /* name */ ESCN_INTERP_name,
+ /* sh_type */ SHT_PROGBITS,
+ /* sh_entsize */ 0,
+ /* sh_flags */ 0
+ },
+
+ /* .line */
+ {
+ /* name */ ESCN_LINE_name,
+ /* sh_type */ SHT_PROGBITS,
+ /* sh_entsize */ 0,
+ /* sh_flags */ 0
+ },
+
+ /* .note */
+ {
+ /* name */ ESCN_NOTE_name,
+ /* sh_type */ SHT_NOTE,
+ /* sh_entsize */ 0,
+ /* sh_flags */ 0
+ },
+
+ /* .plt */
+ {
+ /* name */ ESCN_PLT_name,
+ /* sh_type */ SHT_PROGBITS,
+ /* sh_entsize */ 0,
+ /* sh_flags */ 0
+ },
+
+ /* .preinit_array */
+ {
+ /* name */ ESCN_PREINIT_ARRAY_name,
+ /* sh_type */ SHT_PREINIT_ARRAY,
+ /* sh_entsize */ 0,
+ /* sh_flags */ SHF_ALLOC + SHF_WRITE
+ },
+
+ /* .rel */
+ {
+ /* name */ ESCN_REL_name,
+ /* sh_type */ SHT_REL,
+ /* sh_entsize */ sizeof(struct Elf32_Rel),
+ /* sh_flags */ 0
+ },
+
+ /* .rela */
+ {
+ /* name */ ESCN_RELA_name,
+ /* sh_type */ SHT_RELA,
+ /* sh_entsize */ sizeof(struct Elf32_Rela),
+ /* sh_flags */ 0
+ },
+
+ /* .rodata */
+ {
+ /* name */ ESCN_RODATA_name,
+ /* sh_type */ SHT_PROGBITS,
+ /* sh_entsize */ 0,
+ /* sh_flags */ SHF_ALLOC
+ },
+
+ /* .rodata1 */
+ {
+ /* name */ ESCN_RODATA1_name,
+ /* sh_type */ SHT_PROGBITS,
+ /* sh_entsize */ 0,
+ /* sh_flags */ SHF_ALLOC
+ },
+
+ /* .shstrtab */
+ {
+ /* name */ ESCN_SHSTRTAB_name,
+ /* sh_type */ SHT_STRTAB,
+ /* sh_entsize */ sizeof(char),
+ /* sh_flags */ SHF_STRINGS
+ },
+
+ /* .strtab */
+ {
+ /* name */ ESCN_STRTAB_name,
+ /* sh_type */ SHT_STRTAB,
+ /* sh_entsize */ sizeof(char),
+ /* sh_flags */ SHF_STRINGS
+ },
+
+ /* .symtab */
+ {
+ /* name */ ESCN_SYMTAB_name,
+ /* sh_type */ SHT_SYMTAB,
+ /* sh_entsize */ sizeof(struct Elf32_Sym),
+ /* sh_flags */ 0
+ },
+
+ /* .symtab_shndx */
+ {
+ /* name */ ESCN_SYMTAB_SHNDX_name,
+ /* sh_type */ SHT_SYMTAB_SHNDX,
+ /* sh_entsize */ sizeof(Elf32_Word),
+ /* sh_flags */ 0
+ },
+
+ /* .tbss */
+ {
+ /* name */ ESCN_TBSS_name,
+ /* sh_type */ SHT_NOBITS,
+ /* sh_entsize */ 0,
+ /* sh_flags */ SHF_ALLOC + SHF_WRITE + SHF_TLS
+ },
+
+ /* .tdata */
+ {
+ /* name */ ESCN_TDATA_name,
+ /* sh_type */ SHT_PROGBITS,
+ /* sh_entsize */ 0,
+ /* sh_flags */ SHF_ALLOC + SHF_WRITE + SHF_TLS
+ },
+
+ /* .tdata1 */
+ {
+ /* name */ ESCN_TDATA1_name,
+ /* sh_type */ SHT_PROGBITS,
+ /* sh_entsize */ 0,
+ /* sh_flags */ SHF_ALLOC + SHF_WRITE + SHF_TLS
+ },
+
+ /* .text */
+ {
+ /* name */ ESCN_TEXT_name,
+ /* sh_type */ SHT_PROGBITS,
+ /* sh_entsize */ 0,
+ /* sh_flags */ SHF_ALLOC + SHF_EXECINSTR
+ },
+#if 0
+ /* .build.attributes */
+ {
+ /* name */ ESCN_ATTRIBUTES_name,
+ /* sh_type */ SHT_ATTRIBUTES,
+ /* sh_entsize */ 0,
+ /* sh_flags */ 0
+ },
+#endif
+ /* Terminate array with a NULL name field */
+ {
+ /* name */ (const char*)0,
+ /* sh_type */ 0,
+ /* sh_entsize */ 0,
+ /* sh_flags */ 0
+ }
+};
+
diff --git a/src/core/dsp/ocl_load/DLOAD/elf32.h b/src/core/dsp/ocl_load/DLOAD/elf32.h
new file mode 100644
index 0000000..67358d6
--- /dev/null
+++ b/src/core/dsp/ocl_load/DLOAD/elf32.h
@@ -0,0 +1,756 @@
+/*
+* elf32.h
+*
+* Basic Data Structures for 32-bit ELF Object Format Files
+*
+* The data structures in this file come primarily from this specification:
+*
+* Tool Interface Standard (TIS)
+* Executable and Linking Format (ELF) Specification
+* Version 1.2
+*
+* TIS Committee
+* May 1995
+*
+* Additions and enhancements from this specification are also included:
+*
+* System V Application Binary Interface
+* DRAFT 17
+* December 2003
+*
+* http://sco.com/developers/gabi/2003-12-17/contents.html
+*
+*
+* Copyright (C) 2009-2014 Texas Instruments Incorporated - http://www.ti.com/
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the
+* distribution.
+*
+* Neither the name of Texas Instruments Incorporated nor the names of
+* its contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#ifndef ELF32_H
+#define ELF32_H
+
+#include <inttypes.h>
+
+/*---------------------------------------------------------------------------*/
+/* 32-Bit Data Types (Figure 1-2, page 1-2) */
+/*---------------------------------------------------------------------------*/
+typedef uint32_t Elf32_Addr;
+typedef uint16_t Elf32_Half;
+typedef uint32_t Elf32_Off;
+typedef int32_t Elf32_Sword;
+typedef uint32_t Elf32_Word;
+
+
+/*****************************************************************************/
+/* ELF Header */
+/* PP. 1-4 */
+/*****************************************************************************/
+
+/*---------------------------------------------------------------------------*/
+/* ELF Identification Indexes (indexes into Elf32_Ehdr.e_ident[] below) */
+/*---------------------------------------------------------------------------*/
+enum
+{
+ EI_MAG0 = 0, /* File identification */
+ EI_MAG1 = 1, /* File identification */
+ EI_MAG2 = 2, /* File identification */
+ EI_MAG3 = 3, /* File identification */
+ EI_CLASS = 4, /* File class */
+ EI_DATA = 5, /* Data encoding */
+ EI_VERSION = 6, /* File version */
+ EI_OSABI = 7, /* Operating system / ABI */
+ EI_ABIVERSION = 8, /* ABI version */
+ EI_PAD = 9, /* Start of padding bytes */
+ EI_NIDENT = 16 /* Size of Elf32_Ehdr.e_ident[] */
+};
+
+
+/*---------------------------------------------------------------------------*/
+/* ELF Header Data Structure */
+/*---------------------------------------------------------------------------*/
+struct Elf32_Ehdr
+{
+ uint8_t e_ident[EI_NIDENT]; /* ELF Magic Number */
+ Elf32_Half e_type; /* Object File Type */
+ Elf32_Half e_machine; /* Target Processor */
+ Elf32_Word e_version; /* Object File Version */
+ Elf32_Addr e_entry; /* Entry Point */
+ Elf32_Off e_phoff; /* Program Header Table Offset */
+ Elf32_Off e_shoff; /* Section Header Table Offset */
+ Elf32_Word e_flags; /* Processor-Specific Flags */
+ Elf32_Half e_ehsize; /* Size of ELF header */
+ Elf32_Half e_phentsize; /* Size of a Program Header */
+ Elf32_Half e_phnum; /* # Entries in Program Header Table */
+ Elf32_Half e_shentsize; /* Size of a Section Header */
+ Elf32_Half e_shnum; /* # Entries in Section Header Table */
+ Elf32_Half e_shstrndx; /* Section Name String Table Section */
+};
+
+
+/*---------------------------------------------------------------------------*/
+/* Object File Types (value of "e_type") */
+/*---------------------------------------------------------------------------*/
+enum
+{
+ ET_NONE = 0, /* No file type */
+ ET_REL = 1, /* Relocatable file */
+ ET_EXEC = 2, /* Executable file */
+ ET_DYN = 3, /* Shared object file */
+ ET_CORE = 4, /* Core file */
+ ET_LOOS = 0xfe00, /* First OS-specific value */
+ ET_HIPS = 0xfeff, /* Last OS-specific value */
+ ET_LOPROC = 0xff00, /* First processor-specific value */
+ ET_HIPROC = 0xffff /* Last processor-specific value */
+};
+
+
+/*---------------------------------------------------------------------------*/
+/* Target Processors (value of "e_machine") */
+/*---------------------------------------------------------------------------*/
+enum
+{
+ EM_NONE = 0, /* No machine */
+ EM_M32 = 1, /* AT&T WE 32100 */
+ EM_SPARC = 2, /* SPARC */
+ EM_386 = 3, /* Intel 80386 */
+ EM_68K = 4, /* Motorola 68000 */
+ EM_88K = 5, /* Motorola 88000 */
+ EM_860 = 7, /* Intel 80860 */
+ EM_MIPS = 8, /* MIPS I Architecture */
+ EM_S370 = 9, /* IBM System/370 Processor */
+ EM_MIPS_RS3_LE = 10, /* MIPS RS3000 Little-endian */
+ EM_PARISC = 15, /* Hewlett-Packard PA-RISC */
+ EM_VPP500 = 17, /* Fujitsu VPP500 */
+ EM_SPARC32PLUS = 18, /* Enhanced instruction set SPARC */
+ EM_960 = 19, /* Intel 80960 */
+ EM_PPC = 20, /* PowerPC */
+ EM_PPC64 = 21, /* 64-bit PowerPC */
+ EM_S390 = 22, /* IBM System/390 Processor */
+ EM_V800 = 36, /* NEC V800 */
+ EM_FR20 = 37, /* Fujitsu FR20 */
+ EM_RH32 = 38, /* TRW RH-32 */
+ EM_RCE = 39, /* Motorola RCE */
+ EM_ARM = 40, /* Advanced RISC Machines ARM */
+ EM_ALPHA = 41, /* Digital Alpha */
+ EM_SH = 42, /* Hitachi SH */
+ EM_SPARCV9 = 43, /* SPARC Version 9 */
+ EM_TRICORE = 44, /* Siemens TriCore embedded processor */
+ EM_ARC = 45, /* "Argonaut RISC Core, Argonaut Technologies Inc. */
+ EM_H8_300 = 46, /* Hitachi H8/300 */
+ EM_H8_300H = 47, /* Hitachi H8/300H */
+ EM_H8S = 48, /* Hitachi H8S */
+ EM_H8_500 = 49, /* Hitachi H8/500 */
+ EM_IA_64 = 50, /* Intel IA-64 processor architecture */
+ EM_MIPS_X = 51, /* Stanford MIPS-X */
+ EM_COLDFIRE = 52, /* Motorola ColdFire */
+ EM_68HC12 = 53, /* Motorola M68HC12 */
+ EM_MMA = 54, /* Fujitsu MMA Multimedia Accelerator */
+ EM_PCP = 55, /* Siemens PCP */
+ EM_NCPU = 56, /* Sony nCPU embedded RISC processor */
+ EM_NDR1 = 57, /* Denso NDR1 microprocessor */
+ EM_STARCORE = 58, /* Motorola Star*Core processor */
+ EM_ME16 = 59, /* Toyota ME16 processor */
+ EM_ST100 = 60, /* STMicroelectronics ST100 processor */
+ EM_TINYJ = 61, /* Advanced Logic Corp. TinyJ embedded processor f */
+ EM_X86_64 = 62, /* AMD x86-64 architecture */
+ EM_PDSP = 63, /* Sony DSP Processor */
+ EM_PDP10 = 64, /* Digital Equipment Corp. PDP-10 */
+ EM_PDP11 = 65, /* Digital Equipment Corp. PDP-11 */
+ EM_FX66 = 66, /* Siemens FX66 microcontroller */
+ EM_ST9PLUS = 67, /* STMicroelectronics ST9+ 8/16 bit microcontrolle */
+ EM_ST7 = 68, /* STMicroelectronics ST7 8-bit microcontroller */
+ EM_68HC16 = 69, /* Motorola MC68HC16 Microcontroller */
+ EM_68HC11 = 70, /* Motorola MC68HC11 Microcontroller */
+ EM_68HC08 = 71, /* Motorola MC68HC08 Microcontroller */
+ EM_68HC05 = 72, /* Motorola MC68HC05 Microcontroller */
+ EM_SVX = 73, /* Silicon Graphics SVx */
+ EM_ST19 = 74, /* STMicroelectronics ST19 8-bit microcontroller */
+ EM_VAX = 75, /* Digital VAX */
+ EM_CRIS = 76, /* Axis Communications 32-bit embedded processor */
+ EM_JAVELIN = 77, /* Infineon Technologies 32-bit embedded processor */
+ EM_FIREPATH = 78, /* Element 14 64-bit DSP Processor */
+ EM_ZSP = 79, /* LSI Logic 16-bit DSP Processor */
+ EM_MMIX = 80, /* Donald Knuth's educational 64-bit processor */
+ EM_HUANY = 81, /* Harvard University machine-independent object f */
+ EM_PRISM = 82, /* SiTera Prism */
+ EM_AVR = 83, /* Atmel AVR 8-bit microcontroller */
+ EM_FR30 = 84, /* Fujitsu FR30 */
+ EM_D10V = 85, /* Mitsubishi D10V */
+ EM_D30V = 86, /* Mitsubishi D30V */
+ EM_V850 = 87, /* NEC v850 */
+ EM_M32R = 88, /* Mitsubishi M32R */
+ EM_MN10300 = 89, /* Matsushita MN10300 */
+ EM_MN10200 = 90, /* Matsushita MN10200 */
+ EM_PJ = 91, /* picoJava */
+ EM_OPENRISC = 92, /* OpenRISC 32-bit embedded processor */
+ EM_ARC_A5 = 93, /* ARC Cores Tangent-A5 */
+ EM_XTENSA = 94, /* Tensilica Xtensa Architecture */
+ EM_VIDEOCORE = 95, /* Alphamosaic VideoCore processor */
+ EM_TMM_GPP = 96, /* Thompson Multimedia General Purpose Processor */
+ EM_NS32K = 97, /* National Semiconductor 32000 series */
+ EM_TPC = 98, /* Tenor Network TPC processor */
+ EM_SNP1K = 99, /* Trebia SNP 1000 processor */
+ EM_ST200 = 100, /* STMicroelectronics (www.st.com) ST200 microcont */
+ EM_IP2K = 101, /* Ubicom IP2xxx microcontroller family */
+ EM_MAX = 102, /* MAX Processor */
+ EM_CR = 103, /* National Semiconductor CompactRISC microprocess */
+ EM_F2MC16 = 104, /* Fujitsu F2MC16 */
+ EM_MSP430 = 105, /* Texas Instruments embedded microcontroller msp4 */
+ EM_BLACKFIN = 106, /* Analog Devices Blackfin (DSP) processor */
+ EM_SE_C33 = 107, /* S1C33 Family of Seiko Epson processors */
+ EM_SEP = 108, /* Sharp embedded microprocessor */
+ EM_ARCA = 109, /* Arca RISC Microprocessor */
+ EM_UNICORE = 110, /* Microprocessor series from PKU-Unity Ltd. and M */
+
+ /*------------------------------------------------------------------------*/
+ /* ELF Magic Numbers Reserved For Texas Instruments */
+ /* */
+ /* The magic numbers 140-159 were reserved through SCO to be included */
+ /* in the official ELF specification. Please see Don Darling */
+ /* regarding any changes or allocation of the numbers below. */
+ /* */
+ /* When we allocate a number for use, SCO needs to be notified so they */
+ /* can update the ELF specification accordingly. */
+ /*------------------------------------------------------------------------*/
+ EM_TI_C6000 = 140, /* Reserved for Texas Instruments; unused */
+ EM_TI_UNUSED02 = 141, /* Reserved for Texas Instruments; unused */
+ EM_TI_UNUSED03 = 142, /* Reserved for Texas Instruments; unused */
+ EM_TI_UNUSED04 = 143, /* Reserved for Texas Instruments; unused */
+ EM_TI_UNUSED05 = 144, /* Reserved for Texas Instruments; unused */
+ EM_TI_UNUSED06 = 145, /* Reserved for Texas Instruments; unused */
+ EM_TI_UNUSED07 = 146, /* Reserved for Texas Instruments; unused */
+ EM_TI_UNUSED08 = 147, /* Reserved for Texas Instruments; unused */
+ EM_TI_UNUSED09 = 148, /* Reserved for Texas Instruments; unused */
+ EM_TI_UNUSED10 = 149, /* Reserved for Texas Instruments; unused */
+ EM_TI_UNUSED11 = 150, /* Reserved for Texas Instruments; unused */
+ EM_TI_UNUSED12 = 151, /* Reserved for Texas Instruments; unused */
+ EM_TI_UNUSED13 = 152, /* Reserved for Texas Instruments; unused */
+ EM_TI_UNUSED14 = 153, /* Reserved for Texas Instruments; unused */
+ EM_TI_UNUSED15 = 154, /* Reserved for Texas Instruments; unused */
+ EM_TI_UNUSED16 = 155, /* Reserved for Texas Instruments; unused */
+ EM_TI_UNUSED17 = 156, /* Reserved for Texas Instruments; unused */
+ EM_TI_UNUSED18 = 157, /* Reserved for Texas Instruments; unused */
+ EM_TI_UNUSED19 = 158, /* Reserved for Texas Instruments; unused */
+ EM_TI_UNUSED20 = 159 /* Reserved for Texas Instruments; unused */
+};
+
+
+/*---------------------------------------------------------------------------*/
+/* Object File Version (value of "e_version") */
+/*---------------------------------------------------------------------------*/
+enum
+{
+ EV_NONE = 0, /* Invalid version */
+ EV_CURRENT = 1 /* Current version */
+};
+
+
+/*****************************************************************************/
+/* ELF Identification */
+/* PP. 1-6 */
+/*****************************************************************************/
+
+/*---------------------------------------------------------------------------*/
+/* Identification Values for ELF Files */
+/*---------------------------------------------------------------------------*/
+
+/* EI_MAG0 to EI_MAG3 */
+enum
+{
+ ELFMAG0 = 0x7f, /* e_ident[EI_MAG0] */
+ ELFMAG1 = 'E', /* e_ident[EI_MAG1] */
+ ELFMAG2 = 'L', /* e_ident[EI_MAG2] */
+ ELFMAG3 = 'F' /* e_ident[EI_MAG3] */
+};
+
+/* EI_CLASS */
+enum
+{
+ ELFCLASSNONE = 0, /* Invalid class */
+ ELFCLASS32 = 1, /* 32-bit objects */
+ ELFCLASS64 = 2 /* 64-bit objects */
+};
+
+/* EI_DATA */
+enum
+{
+ ELFDATANONE = 0, /* Invalid data encoding */
+ ELFDATA2LSB = 1, /* Little-endian data */
+ ELFDATA2MSB = 2 /* Big-endian data */
+};
+
+/* EI_OSABI */
+enum
+{
+ ELFOSABI_NONE = 0, /* No extensions or unspecified */
+ ELFOSABI_HPUX = 1, /* Hewlett-Packard HP-UX */
+ ELFOSABI_NETBSD = 2, /* NetBSD */
+ ELFOSABI_LINUX = 3, /* Linux */
+ ELFOSABI_SOLARIS = 6, /* Sun Solaris */
+ ELFOSABI_AIX = 7, /* AIX */
+ ELFOSABI_IRIX = 8, /* IRIX */
+ ELFOSABI_FREEBSD = 9, /* FreeBSD */
+ ELFOSABI_TRU64 = 10, /* Compaq TRU64 UNIX */
+ ELFOSABI_MODESTO = 11, /* Novell Modesto */
+ ELFOSABI_OPENBSD = 12, /* Open BSD */
+ ELFOSABI_OPENVMS = 13, /* Open VMS */
+ ELFOSABI_NSK = 14, /* Hewlett-Packard Non-Stop Kernel */
+ ELFOSABI_AROS = 15 /* Amiga Research OS */
+};
+
+/*****************************************************************************/
+/* Program Header */
+/* PP. 2-2 */
+/*****************************************************************************/
+
+/*---------------------------------------------------------------------------*/
+/* Program Header Data Structure */
+/*---------------------------------------------------------------------------*/
+struct Elf32_Phdr
+{
+ Elf32_Word p_type; /* Segment type */
+ Elf32_Off p_offset; /* Segment file offset */
+ Elf32_Addr p_vaddr; /* Segment virtual address */
+ Elf32_Addr p_paddr; /* Segment physical address */
+ Elf32_Word p_filesz; /* Segment file image size */
+ Elf32_Word p_memsz; /* Segment memory image size */
+ Elf32_Word p_flags; /* Segment flags */
+ Elf32_Word p_align; /* Segment alignment */
+};
+
+/*---------------------------------------------------------------------------*/
+/* Segment Types (value of "p_type") */
+/*---------------------------------------------------------------------------*/
+enum
+{
+ PT_NULL = 0, /* Unused table entry */
+ PT_LOAD = 1, /* Loadable segment */
+ PT_DYNAMIC = 2, /* Dynamic linking information */
+ PT_INTERP = 3, /* Interpreter path string location */
+ PT_NOTE = 4, /* Location and size of auxiliary information */
+ PT_SHLIB = 5, /* Shared library information */
+ PT_PHDR = 6, /* Location and size of program header table */
+ PT_TLS = 7, /* Specifies the Thread-Local Storage template */
+ PT_LOOS = 0x60000000, /* First OS-specific value */
+ PT_HIOS = 0x6fffffff, /* Last OS-specific value */
+ PT_LOPROC = 0x70000000, /* First processor-specific value */
+ PT_HIPROC = 0x7fffffff /* Last processor-specific value */
+};
+
+/*---------------------------------------------------------------------------*/
+/* Segment Permissions (value of "p_flags") */
+/*---------------------------------------------------------------------------*/
+enum
+{
+ PF_X = 0x1, /* Execute */
+ PF_W = 0x2, /* Write */
+ PF_R = 0x4, /* Read */
+ PF_MASKOS = 0x0ff00000, /* OS-specific mask */
+ PF_MASKPROC = 0xf0000000 /* Processor-specific mask */
+};
+
+/*****************************************************************************/
+/* Sections */
+/* PP. 1-9 */
+/*****************************************************************************/
+
+/*---------------------------------------------------------------------------*/
+/* Section Header Data Structure */
+/*---------------------------------------------------------------------------*/
+struct Elf32_Shdr
+{
+ Elf32_Word sh_name; /* Section name (offset into string section) */
+ Elf32_Word sh_type; /* Section type */
+ Elf32_Word sh_flags; /* Section flags */
+ Elf32_Addr sh_addr; /* Address in memory image */
+ Elf32_Off sh_offset; /* File offset of section data */
+ Elf32_Word sh_size; /* Size of the section in bytes */
+ Elf32_Word sh_link; /* Link to the section header table */
+ Elf32_Word sh_info; /* Extra information depending on section type */
+ Elf32_Word sh_addralign; /* Address alignment constraints */
+ Elf32_Word sh_entsize; /* Size of fixed-size entries in section */
+};
+
+/*---------------------------------------------------------------------------*/
+/* Special Section Indexes */
+/*---------------------------------------------------------------------------*/
+enum
+{
+ SHN_UNDEF = 0, /* Referenced by undefined values */
+ SHN_LORESERVE = 0xff00, /* First reserved index */
+ SHN_LOPROC = 0xff00, /* First processor-specific index */
+ SHN_HIPROC = 0xff1f, /* Last processor-specific index */
+ SHN_LOOS = 0xff20, /* First OS-specific index */
+ SHN_HIOS = 0xff3f, /* Last OS-specific index */
+ SHN_ABS = 0xfff1, /* Referenced by absolute values */
+ SHN_COMMON = 0xfff2, /* Referenced by common values */
+ SHN_XINDEX = 0xffff, /* Indirect index reference (escape value) */
+ SHN_HIRESERVE = 0xffff /* Last reserved index */
+};
+
+/*---------------------------------------------------------------------------*/
+/* Section Types (value of "sh_type") */
+/*---------------------------------------------------------------------------*/
+enum
+{
+ SHT_NULL = 0, /* Inactive section */
+ SHT_PROGBITS = 1, /* Application-specific information */
+ SHT_SYMTAB = 2, /* Symbol table */
+ SHT_STRTAB = 3, /* String table */
+ SHT_RELA = 4, /* Relocation entries (explicit addends) */
+ SHT_HASH = 5, /* Symbol hash table */
+ SHT_DYNAMIC = 6, /* Dynamic linking information */
+ SHT_NOTE = 7, /* Miscellaneous information */
+ SHT_NOBITS = 8, /* Contains no data in file */
+ SHT_REL = 9, /* Relocation entries (no expl. addends) */
+ SHT_SHLIB = 10, /* Shared library */
+ SHT_DYNSYM = 11, /* Dynamic symbol table */
+ SHT_INIT_ARRAY = 14, /* Pointers to initialization functions */
+ SHT_FINI_ARRAY = 15, /* Pointers to termination functions */
+ SHT_PREINIT_ARRAY = 16, /* Pointers to pre-init functions */
+ SHT_GROUP = 17, /* Section group */
+ SHT_SYMTAB_SHNDX = 18, /* Section indexes for SHN_XINDEX refs. */
+ SHT_LOOS = 0x60000000, /* First OS-specific type */
+ SHT_HIOS = 0x6fffffff, /* Last OS-specific type */
+ SHT_LOPROC = 0x70000000, /* First processor-specific type */
+ SHT_HIPROC = 0x7fffffff, /* Last processor-specific type */
+ SHT_LOUSER = 0x80000000, /* First application-specific type */
+ SHT_HIUSER = 0xffffffff /* Last application-specific type */
+};
+
+/*---------------------------------------------------------------------------*/
+/* Section Attribute Flags (value of "sh_flags") */
+/*---------------------------------------------------------------------------*/
+enum
+{
+ SHF_WRITE = 0x1, /* Writable during process execution */
+ SHF_ALLOC = 0x2, /* Loaded into processor memory */
+ SHF_EXECINSTR = 0x4, /* Contains executable instructions */
+ SHF_MERGE = 0x10, /* Can be merged */
+ SHF_STRINGS = 0x20, /* Contains null-terminated strings */
+ SHF_INFO_LINK = 0x40, /* sh_info contains a section index */
+ SHF_LINK_ORDER = 0x80, /* Maintain section ordering */
+ SHF_OS_NONCONFORMING = 0x100, /* OS-specific processing required */
+ SHF_GROUP = 0x200, /* Member of a section group */
+ SHF_TLS = 0x400, /* Contains Thread-Local Storage */
+ SHF_MASKOS = 0x0ff00000, /* Mask of OS-specific flags */
+ SHF_MASKPROC = 0xf0000000 /* Mask for processor-specific flags */
+};
+
+/*---------------------------------------------------------------------------*/
+/* Section Group Flags */
+/*---------------------------------------------------------------------------*/
+enum
+{
+ GRP_COMDAT = 0x1, /* Common data; only one is kept by linker */
+ GRP_MASKOS = 0x0ff00000, /* Mask for OS-specific group flags */
+ GRP_MASKPROC = 0xf0000000 /* Mask for processor-specific group flags */
+};
+
+
+/*****************************************************************************/
+/* Symbol Table */
+/* PP. 1-18 */
+/*****************************************************************************/
+
+/*---------------------------------------------------------------------------*/
+/* Symbol Table Entry Data Structure */
+/*---------------------------------------------------------------------------*/
+struct Elf32_Sym
+{
+ Elf32_Word st_name; /* String table offset for symbol name */
+ Elf32_Addr st_value; /* Symbol value */
+ Elf32_Word st_size; /* Symbol size */
+ uint8_t st_info; /* Symbol type and binding */
+ uint8_t st_other; /* Symbol visibility */
+ Elf32_Half st_shndx; /* Symbol type / defining section */
+};
+
+/*---------------------------------------------------------------------------*/
+/* Undefined Symbol Index */
+/*---------------------------------------------------------------------------*/
+enum
+{
+ STN_UNDEF = 0 /* First symbol table entry is always undefined */
+};
+
+/*---------------------------------------------------------------------------*/
+/* Symbol Binding and Type Utility Functions. */
+/*---------------------------------------------------------------------------*/
+static inline uint8_t ELF32_ST_BIND(uint8_t i) { return (i >> 4); }
+static inline uint8_t ELF32_ST_TYPE(uint8_t i) { return (i & 0xf); }
+static inline uint8_t ELF32_ST_INFO(uint8_t b, uint8_t t)
+ { return ((b << 4) + (t & 0xf)); }
+static inline uint8_t ELF32_ST_VISIBILITY(uint8_t o) { return (o & 0x3); }
+
+
+/*---------------------------------------------------------------------------*/
+/* Symbol Binding (value returned by ELF32_ST_BIND()) */
+/*---------------------------------------------------------------------------*/
+enum
+{
+ STB_LOCAL = 0, /* Symbol does not have external linkage */
+ STB_GLOBAL = 1, /* Symbol has external linkage */
+ STB_WEAK = 2, /* Symbol has weak external linkage */
+ STB_LOOS = 10, /* First OS-specific binding */
+ STB_HIOS = 12, /* Last OS-specific binding */
+ STB_LOPROC = 13, /* First processor-specific binding */
+ STB_HIPROC = 15 /* Last processor-specific binding */
+};
+
+/*---------------------------------------------------------------------------*/
+/* Symbol Types (value returned by ELF32_ST_TYPE()) */
+/*---------------------------------------------------------------------------*/
+enum
+{
+ STT_NOTYPE = 0, /* Unspecified type */
+ STT_OBJECT = 1, /* Associated with a data object */
+ STT_FUNC = 2, /* Associated with executable code */
+ STT_SECTION = 3, /* Associated with a section */
+ STT_FILE = 4, /* Associated with a source file */
+ STT_COMMON = 5, /* Labels an uninitialized common block */
+ STT_TLS = 6, /* Specifies a thread-local storage entity */
+ STT_LOOS = 10, /* First OS-specific type */
+ STT_HIOS = 12, /* Last OS-specific type */
+ STT_LOPROC = 13, /* First processor-specific type */
+ STT_HIPROC = 15 /* Last processor-specific type */
+};
+
+/*---------------------------------------------------------------------------*/
+/* Symbol Visibility (value returned by ELF32_ST_VISIBILITY()) */
+/*---------------------------------------------------------------------------*/
+enum
+{
+ STV_DEFAULT = 0, /* Visibility specified by binding type */
+ STV_INTERNAL = 1, /* Like STV_HIDDEN, with processor-specific semantics */
+ STV_HIDDEN = 2, /* Not visible to other components */
+ STV_PROTECTED = 3 /* Visible in other components but not preemptable */
+};
+
+/*****************************************************************************/
+/* Relocation */
+/* PP. 1-22 */
+/*****************************************************************************/
+
+/*---------------------------------------------------------------------------*/
+/* Relocation Entries Data Structures */
+/*---------------------------------------------------------------------------*/
+struct Elf32_Rel
+{
+ Elf32_Addr r_offset; /* Offset of the relocatable value in the section */
+ Elf32_Word r_info; /* Symbol table index and relocation type */
+};
+
+struct Elf32_Rela
+{
+ Elf32_Addr r_offset; /* Offset of the relocatable value in the section */
+ Elf32_Word r_info; /* Symbol table index and relocation type */
+ Elf32_Sword r_addend; /* Constant addend used to compute new value */
+};
+
+/*---------------------------------------------------------------------------*/
+/* Relocation Symbol and Type Utility Functions. */
+/*---------------------------------------------------------------------------*/
+static inline uint32_t ELF32_R_SYM(uint32_t i) { return (i >> 8); }
+static inline uint8_t ELF32_R_TYPE(uint32_t i) { return (i & 0xFF); }
+static inline uint32_t ELF32_R_INFO(uint32_t s, uint8_t t)
+ { return ((s << 8) + t); }
+
+
+/*****************************************************************************/
+/* Dynamic Section */
+/* PP. 2-8 */
+/*****************************************************************************/
+struct Elf32_Dyn
+{
+ Elf32_Sword d_tag;
+ union
+ {
+ Elf32_Word d_val;
+ Elf32_Addr d_ptr;
+ } d_un;
+};
+
+/* Name Value d_un Executable Shared Obj. */
+/* ---- ----- ---- ---------- ----------- */
+enum
+{
+ DT_NULL = 0, /* ignored mandatory mandatory */
+ DT_NEEDED = 1, /* d_val optional optional */
+ DT_PLTRELSZ = 2, /* d_val optional optional */
+ DT_PLTGOT = 3, /* d_ptr optional optional */
+ DT_HASH = 4, /* d_ptr mandatory mandatory */
+ DT_STRTAB = 5, /* d_ptr mandatory mandatory */
+ DT_SYMTAB = 6, /* d_ptr mandatory mandatory */
+ DT_RELA = 7, /* d_ptr mandatory optional */
+ DT_RELASZ = 8, /* d_val mandatory optional */
+ DT_RELAENT = 9, /* d_val mandatory optional */
+ DT_STRSZ = 10, /* d_val mandatory mandatory */
+ DT_SYMENT = 11, /* d_val mandatory mandatory */
+ DT_INIT = 12, /* d_ptr optional optional */
+ DT_FINI = 13, /* d_ptr optional optional */
+ DT_SONAME = 14, /* d_val ignored optional */
+ DT_RPATH = 15, /* d_val optional ignored */
+ DT_SYMBOLIC = 16, /* ignored ignored optional */
+ DT_REL = 17, /* d_ptr mandatory optional */
+ DT_RELSZ = 18, /* d_val mandatory optional */
+ DT_RELENT = 19, /* d_val mandatory optional */
+ DT_PLTREL = 20, /* d_val optional optional */
+ DT_DEBUG = 21, /* d_ptr optional ignored */
+ DT_TEXTREL = 22, /* ignored optional optional */
+ DT_JMPREL = 23, /* d_ptr optional optional */
+ DT_BIND_NOW = 24, /* ignored optional optional */
+ DT_INIT_ARRAY = 25, /* d_ptr optional optional */
+ DT_FINI_ARRAY = 26, /* d_ptr optional optional */
+ DT_INIT_ARRAYSZ = 27, /* d_val optional optional */
+ DT_FINI_ARRAYSZ = 28, /* d_val optional optional */
+ DT_RUNPATH = 29, /* d_val optional optional */
+ DT_FLAGS = 30, /* d_val optional optional */
+ DT_ENCODING = 32, /* unspecified unspecified unspecified */
+ DT_PREINIT_ARRAY = 32, /* d_ptr optional ignored */
+ DT_PREINIT_ARRAYSZ = 33, /* d_val optional ignored */
+ DT_LOOS = 0x60000000, /* unspecified unspecified unspecified */
+ DT_HIOS = 0x6ffff000, /* unspecified unspecified unspecified */
+ DT_LOPROC = 0x70000000, /* unspecified unspecified unspecified */
+ DT_HIPROC = 0x7fffffff /* unspecified unspecified unspecified */
+};
+
+
+/*---------------------------------------------------------------------------*/
+/* DT_FLAGS values. */
+/*---------------------------------------------------------------------------*/
+enum
+{
+ DF_ORIGIN = 0x01, /* loaded object may reference $ORIGIN subst. string */
+ DF_SYMBOLIC = 0x02, /* changes dynamic linker symbol resolution */
+ DF_TEXTREL = 0x04, /* do not allow relocation of non-writable segments */
+ DF_BIND_NOW = 0x08, /* don't use lazy binding */
+ DF_STATIC_TLS = 0x10, /* do not load this file dynamically */
+ DF_DIRECT_DEPENDENT = 0x20, /* limit global sym lookup to dependent list */
+ DF_WORLD = 0x40 /* Linux style global sym lookup, breadth-first */
+};
+
+
+/*---------------------------------------------------------------------------*/
+/* Dynamic Tag Database. */
+/*---------------------------------------------------------------------------*/
+
+/* Specifiers for which d_un union member to use */
+
+enum
+{
+ EDYN_UNTYPE_IGNORED,
+ EDYN_UNTYPE_VAL,
+ EDYN_UNTYPE_PTR,
+ EDYN_UNTYPE_UNSPECIFIED
+};
+
+
+/* Specifiers for executable/shared object file requirements */
+
+enum
+{
+ EDYN_TAGREQ_IGNORED,
+ EDYN_TAGREQ_MANDATORY,
+ EDYN_TAGREQ_OPTIONAL,
+ EDYN_TAGREQ_UNSPECIFIED
+};
+
+
+/* Data structure for one dynamic tag database entry */
+
+struct EDYN_TAG
+{
+ const char* d_tag_name; /* tag name string */
+ Elf32_Sword d_tag_value; /* DT_* tag value */
+ Elf32_Word d_untype; /* which d_un union member to use */
+ Elf32_Word d_exec_req; /* requirement for executable files */
+ Elf32_Word d_shared_req; /* requirement for shared object files */
+};
+
+extern const struct EDYN_TAG EDYN_TAG_DB[];
+
+/*****************************************************************************/
+/* Special Section Database */
+/*****************************************************************************/
+
+/*---------------------------------------------------------------------------*/
+/* Special Section Names */
+/*---------------------------------------------------------------------------*/
+#define ESCN_BSS_name ".bss"
+#define ESCN_COMMENT_name ".comment"
+#define ESCN_DATA1_name ".data1"
+#define ESCN_DATA_name ".data"
+#define ESCN_DEBUG_name ".debug"
+#define ESCN_DYNAMIC_name ".dynamic"
+#define ESCN_DYNSTR_name ".dynstr"
+#define ESCN_DYNSYM_name ".dynsym"
+#define ESCN_FINI_ARRAY_name ".fini_array"
+#define ESCN_FINI_name ".fini"
+#define ESCN_GOT_name ".got"
+#define ESCN_HASH_name ".hash"
+#define ESCN_INIT_ARRAY_name ".init_array"
+#define ESCN_INIT_name ".init"
+#define ESCN_INTERP_name ".interp"
+#define ESCN_LINE_name ".line"
+#define ESCN_NOTE_name ".note"
+#define ESCN_PLT_name ".plt"
+#define ESCN_PREINIT_ARRAY_name ".preinit_array"
+#define ESCN_RELA_name ".rela"
+#define ESCN_REL_name ".rel"
+#define ESCN_RODATA1_name ".rodata1"
+#define ESCN_RODATA_name ".rodata"
+#define ESCN_SHSTRTAB_name ".shstrtab"
+#define ESCN_STRTAB_name ".strtab"
+#define ESCN_SYMTAB_SHNDX_name ".symtab_shndx"
+#define ESCN_SYMTAB_name ".symtab"
+#define ESCN_TBSS_name ".tbss"
+#define ESCN_TDATA1_name ".tdata1"
+#define ESCN_TDATA_name ".tdata"
+#define ESCN_TEXT_name ".text"
+#define ESCN_ATTRIBUTES_name "__TI_build_attributes"
+#define ESCN_ICODE_name "__TI_ICODE"
+#define ESCN_XREF_name "__TI_XREF"
+
+/*---------------------------------------------------------------------------*/
+/* Special Section Information Data Structure. */
+/*---------------------------------------------------------------------------*/
+struct ESCN
+{
+ const char *name;
+ Elf32_Word sh_type;
+ Elf32_Word sh_entsize;
+ Elf32_Word sh_flags;
+};
+
+extern const struct ESCN ESCN_DB[];
+
+#endif /* ELF32_H */
diff --git a/src/core/dsp/ocl_load/DLOAD/relocate.h b/src/core/dsp/ocl_load/DLOAD/relocate.h
new file mode 100644
index 0000000..ee21aa9
--- /dev/null
+++ b/src/core/dsp/ocl_load/DLOAD/relocate.h
@@ -0,0 +1,64 @@
+/*
+* relocate.h
+*
+* Declare names and IDs of all C6x-specific relocation types supported
+* in the dynamic loader.
+*
+* Copyright (C) 2009-2014 Texas Instruments Incorporated - http://www.ti.com/
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the
+* distribution.
+*
+* Neither the name of Texas Instruments Incorporated nor the names of
+* its contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#ifndef RELOCATE_H
+#define RELOCATE_H
+
+#include <inttypes.h>
+#include "elf32.h"
+#include "dload.h"
+#include "dload_api.h"
+
+/*---------------------------------------------------------------------------*/
+/* Declare some globals that are used for internal debugging and profiling. */
+/*---------------------------------------------------------------------------*/
+#if LOADER_DEBUG || LOADER_PROFILE
+#include <time.h>
+extern int DLREL_relocations;
+extern time_t DLREL_total_reloc_time;
+#endif
+
+
+/*---------------------------------------------------------------------------*/
+/* Landing point for core loader's relocation processor. */
+/*---------------------------------------------------------------------------*/
+void DLREL_relocate(DLOAD_HANDLE handle, LOADER_FILE_DESC *fd,
+ DLIMP_Dynamic_Module *dyn_module);
+
+#endif
diff --git a/src/core/dsp/ocl_load/DLOAD/symtab.h b/src/core/dsp/ocl_load/DLOAD/symtab.h
new file mode 100644
index 0000000..1f06584
--- /dev/null
+++ b/src/core/dsp/ocl_load/DLOAD/symtab.h
@@ -0,0 +1,72 @@
+/*
+* symtab.h
+*
+* Specification of functions used by the core loader to create, maintain,
+* and destroy internal symbol tables.
+*
+* Copyright (C) 2009-2014 Texas Instruments Incorporated - http://www.ti.com/
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the
+* distribution.
+*
+* Neither the name of Texas Instruments Incorporated nor the names of
+* its contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#ifndef SYMTAB_H
+#define SYMTAB_H
+
+#include "ArrayList.h"
+#include "dload.h"
+
+/*****************************************************************************/
+/* This is the top-level application file handle. It should only be needed */
+/* under the Linux and DSBT models. */
+/*****************************************************************************/
+extern int32_t DLIMP_application_handle;
+
+/*---------------------------------------------------------------------------*/
+/* Core Loader Symbol Table Management Functions */
+/*---------------------------------------------------------------------------*/
+BOOL DLSYM_canonical_lookup(DLOAD_HANDLE handle,
+ int32_t sym_index,
+ DLIMP_Dynamic_Module *dyn_module,
+ Elf32_Addr *sym_value);
+
+BOOL DLSYM_global_lookup(DLOAD_HANDLE handle,
+ const char *sym_name,
+ DLIMP_Loaded_Module *pentry,
+ Elf32_Addr *sym_value);
+
+BOOL DLSYM_lookup_local_symtab(const char *sym_name,
+ struct Elf32_Sym *symtab,
+ Elf32_Word symnum,
+ Elf32_Addr *sym_value);
+
+void DLSYM_copy_globals(DLIMP_Dynamic_Module *dyn_module);
+
+#endif
diff --git a/src/core/dsp/ocl_load/DLOAD/util.h b/src/core/dsp/ocl_load/DLOAD/util.h
new file mode 100644
index 0000000..24c5b3f
--- /dev/null
+++ b/src/core/dsp/ocl_load/DLOAD/util.h
@@ -0,0 +1,89 @@
+/*
+* util.h
+*
+* Definition of some useful string comparison routines (not
+* not provided on all platforms) and a few generic macros.
+*
+* Copyright (C) 2009-2014 Texas Instruments Incorporated - http://www.ti.com/
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the
+* distribution.
+*
+* Neither the name of Texas Instruments Incorporated nor the names of
+* its contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#ifndef UTIL_H
+#define UTIL_H
+
+#include <ctype.h>
+
+#if !defined(__linux)
+
+/*****************************************************************************/
+/* STRCASECMP() - Case-insensitive strcmp. */
+/*****************************************************************************/
+static int strcasecmp(const char* s1, const char* s2)
+{
+ char c1, c2;
+ do { c1 = *s1++; c2 = *s2++; }
+ while (c1 && c2 && (tolower(c1) == tolower(c2)));
+
+ return tolower(c1) - tolower(c2);
+}
+
+/*****************************************************************************/
+/* STRNCASECMP() - Case-insensitive strncmp. */
+/*****************************************************************************/
+static int strncasecmp(const char* s1, const char* s2, size_t n)
+{
+ char c1, c2;
+
+ if (!n) return 0;
+
+ do { c1 = *s1++; c2 = *s2++; }
+ while (--n && c1 && c2 && (tolower(c1) == tolower(c2)));
+
+ return tolower(c1) - tolower(c2);
+}
+
+#endif
+
+/*****************************************************************************/
+/* Define MIN and MAX macros. */
+/*****************************************************************************/
+#define MIN(x,y) (((x) > (y)) ? (y) : (x))
+#define MAX(x,y) (((x) >= (y)) ? (x) : (y))
+
+/*****************************************************************************/
+/* C implementation of 'bool' type. */
+/*****************************************************************************/
+typedef int BOOL;
+#define TRUE 1
+#define FALSE 0
+
+#endif
diff --git a/src/core/dsp/ocl_load/DLOAD/version.h b/src/core/dsp/ocl_load/DLOAD/version.h
new file mode 100644
index 0000000..e36d1a9
--- /dev/null
+++ b/src/core/dsp/ocl_load/DLOAD/version.h
@@ -0,0 +1,63 @@
+/*
+* version.h
+*
+* Dynamic Loader source version identifictaion.
+*
+* Copyright (C) 2009-2014 Texas Instruments Incorporated - http://www.ti.com/
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the
+* distribution.
+*
+* Neither the name of Texas Instruments Incorporated nor the names of
+* its contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#ifndef _VERSION_H_
+#define _VERSION_H_
+
+/*****************************************************************************/
+/* VERSION NUMBER COMPONENTS - ALWAYS INCREASING!! */
+/* Initial version ID is 1.0.0. Successive version ID's will be incremented */
+/* by automated processes during release port. */
+/*****************************************************************************/
+#define VERSION_MAJOR 1
+#define VERSION_MINOR 0
+#define VERSION_PATCH 0
+
+/******************************************************************************/
+/* Macros used to convert version macros into strings. */
+/******************************************************************************/
+#define MKCSTR(_str) #_str
+#define MKMSTR(_str) MKCSTR(_str)
+
+/******************************************************************************/
+/* VERSION string construction macros. */
+/******************************************************************************/
+#define VERSTR MKMSTR(VERSION_MAJOR) "." MKMSTR(VERSION_MINOR) "." MKMSTR(VERSION_PATCH)
+#define VERSION "Texas Instruments Dynamic Loader API/Core v"VERSTR
+
+#endif
diff --git a/src/core/dsp/ocl_load/DLOAD/virtual_targets.h b/src/core/dsp/ocl_load/DLOAD/virtual_targets.h
new file mode 100644
index 0000000..1d44b4d
--- /dev/null
+++ b/src/core/dsp/ocl_load/DLOAD/virtual_targets.h
@@ -0,0 +1,90 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#include "dload.h"
+#include "elf32.h"
+
+#ifdef C60_TARGET
+#include "c60_dynamic.h"
+#include "c60_reloc.h"
+#endif
+
+#ifdef ARM_TARGET
+#include "arm_dynamic.h"
+#include "arm_reloc.h"
+#endif
+
+/*****************************************************************************/
+/* Define a virtual target class to give access to target specific functions */
+/*****************************************************************************/
+typedef struct vtarget
+{
+ int machine_id;
+
+ BOOL (*relocate_dynamic_tag_info)(DLIMP_Dynamic_Module *dyn_module, int i);
+ BOOL (*process_eiosabi)(DLIMP_Dynamic_Module* dyn_module);
+ BOOL (*process_dynamic_tag)(DLIMP_Dynamic_Module *dyn_module, int i);
+ void (*relocate)(DLOAD_HANDLE handle, LOADER_FILE_DESC *elf_file,
+ DLIMP_Dynamic_Module *dyn_module);
+
+} VIRTUAL_TARGET;
+
+
+
+/*****************************************************************************/
+/* Populate this for each target supported. */
+/*****************************************************************************/
+VIRTUAL_TARGET vt_arr[] = {
+
+#ifdef C60_TARGET
+ {
+ EM_TI_C6000,
+ DLDYN_c60_relocate_dynamic_tag_info,
+ DLDYN_c60_process_eiosabi,
+ DLDYN_c60_process_dynamic_tag,
+ DLREL_c60_relocate
+ },
+#endif
+#ifdef ARM_TARGET
+ {
+ EM_ARM,
+ DLDYN_arm_relocate_dynamic_tag_info,
+ DLDYN_arm_process_eiosabi,
+ DLDYN_arm_process_dynamic_tag,
+ DLREL_arm_relocate
+ },
+#endif
+ {
+ EM_NONE,
+ 0,
+ 0,
+ 0,
+ 0
+ }
+};
+
+
diff --git a/src/core/dsp/ocl_load/DLOAD_API/api_version_change.log b/src/core/dsp/ocl_load/DLOAD_API/api_version_change.log
new file mode 100644
index 0000000..689cfe6
--- /dev/null
+++ b/src/core/dsp/ocl_load/DLOAD_API/api_version_change.log
@@ -0,0 +1,33 @@
+
+ Dynamic Loader API and Loader Core - Version Number Change Log
+ ==============================================================
+
+ Version Number Description
+ --------------------------
+
+ The version number associated with the Dynamic Loader API and the Loader Core
+ sources has three components:
+
+ <major version>.<minor version>.<patch version>
+
+ major version - is incremented if there is a change to the API that creates a
+ compatibility discontinuity.
+
+ minor version - is incremented if functionality is added to the API without
+ causing a compatibility discontinuity.
+
+ patch version - is incremented if a defect has been repaired, a performance
+ enhancement has been added, or the source code has been
+ refactored in some way. There should not be a compatibility
+ discontinuity created by an increment to the patch version.
+
+ Version Number Change Log
+ -------------------------
+
+ 1.0.0 - 17 July 2009 - Initial release of dynamic loader API and loader
+ core sources.
+
+ 2.0.0 - 1 Feb 2013 - Add client handle to several DLIF functions.
+ - Add DLIF_exit() for loader abort.
+
+
diff --git a/src/core/dsp/ocl_load/DLOAD_API/dload_api.h b/src/core/dsp/ocl_load/DLOAD_API/dload_api.h
new file mode 100644
index 0000000..95de10f
--- /dev/null
+++ b/src/core/dsp/ocl_load/DLOAD_API/dload_api.h
@@ -0,0 +1,700 @@
+/*
+* dload_api.h
+*
+* Dynamic Loader API Specification
+* --------------------------------
+*
+* Client-side of API is assumed to be platform dependent, but object file
+* format independent.
+*
+* Core Loader side of API is assumed to be platform independent, but
+* object file format dependent and target dependent.
+*
+* Copyright (C) 2009-2014 Texas Instruments Incorporated - http://www.ti.com/
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the
+* distribution.
+*
+* Neither the name of Texas Instruments Incorporated nor the names of
+* its contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#ifndef DLOAD_API_H
+#define DLOAD_API_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include "util.h"
+
+extern int debugging_on;
+
+/*****************************************************************************/
+/* Specification of Loader File Descriptor. If client side of the loader */
+/* supports virtual memory, this may need to be updated to facilitate the */
+/* use of mmap(). */
+/*****************************************************************************/
+typedef FILE LOADER_FILE_DESC;
+
+static const int LOADER_SEEK_SET = SEEK_SET;
+static const int LOADER_SEEK_CUR = SEEK_CUR;
+static const int LOADER_SEEK_END = SEEK_END;
+
+/*****************************************************************************/
+/* TARGET_ADDRESS - type suitable for storing target memory address values. */
+/*****************************************************************************/
+typedef uint32_t TARGET_ADDRESS;
+
+/*****************************************************************************/
+/* Define DLOAD Object Handle */
+/*****************************************************************************/
+typedef void * DLOAD_HANDLE;
+
+/*****************************************************************************/
+/* Core Loader Provided API Functions (Core Loader Entry Points) */
+/*****************************************************************************/
+
+/*---------------------------------------------------------------------------*/
+/* DLOAD_version() */
+/* */
+/* Return a string constant representation for the version ID of the */
+/* dynamic loader's core loader source code. */
+/* */
+/*---------------------------------------------------------------------------*/
+#include "version.h"
+#define DLOAD_version() VERSION
+
+/*---------------------------------------------------------------------------*/
+/* DLOAD_create() */
+/* */
+/* Construct and initialize the dynamic loader core's handle. */
+/* */
+/*---------------------------------------------------------------------------*/
+DLOAD_HANDLE DLOAD_create(void * client_handle);
+
+/*---------------------------------------------------------------------------*/
+/* DLOAD_destroy() */
+/* */
+/* Destroy and finalize the dynamic loader core's handle. */
+/* */
+/*---------------------------------------------------------------------------*/
+void DLOAD_destroy(DLOAD_HANDLE handle);
+
+/*---------------------------------------------------------------------------*/
+/* DLOAD_initialize() */
+/* */
+/* Construct and initialize data structures internal to the dynamic */
+/* loader core. */
+/* */
+/*---------------------------------------------------------------------------*/
+void DLOAD_initialize(DLOAD_HANDLE handle);
+
+/*---------------------------------------------------------------------------*/
+/* DLOAD_finalize() */
+/* */
+/* Destroy and finalize data structures internal to the dynamic */
+/* loader core. */
+/* */
+/*---------------------------------------------------------------------------*/
+void DLOAD_finalize(DLOAD_HANDLE handle);
+
+/*---------------------------------------------------------------------------*/
+/* DLOAD_load_symbols() */
+/* */
+/* Load externally visible symbols from the specified file so that they */
+/* can be linked against when another object file is subsequntly loaded. */
+/* External symbols will be made available for global symbol linkage. */
+/* */
+/*---------------------------------------------------------------------------*/
+BOOL DLOAD_load_symbols(DLOAD_HANDLE handle, LOADER_FILE_DESC* fp);
+
+/*---------------------------------------------------------------------------*/
+/* DLOAD_load() */
+/* */
+/* Dynamically load the specified file and return a file handle for the */
+/* loaded file. If the load fails, this function will return a value */
+/* zero (0). */
+/* */
+/* The core loader must have read access to the file pointed by fp. */
+/* */
+/*---------------------------------------------------------------------------*/
+int DLOAD_load(DLOAD_HANDLE handle, LOADER_FILE_DESC* fp);
+
+/*---------------------------------------------------------------------------*/
+/* DLOAD_unload() */
+/* */
+/* Given a file handle ID, unload all object segments associated with */
+/* the identified file and any of its dependents that are not still in */
+/* use. */
+/* */
+/*---------------------------------------------------------------------------*/
+BOOL DLOAD_unload(DLOAD_HANDLE handle, uint32_t pseudopid);
+
+/*---------------------------------------------------------------------------*/
+/* DLOAD_get_entry_names_info() */
+/* */
+/* Given a file handle, return the number entry points that are */
+/* available in the specified file as well as the max name length. This */
+/* can then be used by the client to allocate the appropriate amount of */
+/* memory needed to call DLOAD_get_entry_names() */
+/* */
+/*---------------------------------------------------------------------------*/
+BOOL DLOAD_get_entry_names_info(DLOAD_HANDLE handle, uint32_t file_handle,
+ int32_t *entry_pt_cnt,
+ int32_t *entry_pt_max_name_len);
+
+/*---------------------------------------------------------------------------*/
+/* DLOAD_get_entry_names() */
+/* */
+/* Given a file handle, build a list of entry point names that are */
+/* available in the specified file. This can be used when querying */
+/* the list of global functions available in a shared library. */
+/* */
+/*---------------------------------------------------------------------------*/
+BOOL DLOAD_get_entry_names(DLOAD_HANDLE handle, uint32_t file_handle,
+ int32_t* entry_pt_cnt, char*** entry_pt_names);
+
+/*---------------------------------------------------------------------------*/
+/* DLOAD_query_symbol() */
+/* */
+/* Query the value of a symbol that is defined by an object file that */
+/* has previously been loaded. Boolean return value will be false if */
+/* the symbol is not found. */
+/* */
+/*---------------------------------------------------------------------------*/
+BOOL DLOAD_query_symbol(DLOAD_HANDLE handle, uint32_t file_handle,
+ const char *sym_name, TARGET_ADDRESS *sym_val);
+
+/*---------------------------------------------------------------------------*/
+/* DLOAD_get_entry_point() */
+/* */
+/* Given a file handle, return the entry point target address associated */
+/* with that object file. The entry point address value is written to */
+/* *sym_val. The return value of the function indicates whether the */
+/* file with the specified handle was found or not. */
+/* */
+/*---------------------------------------------------------------------------*/
+BOOL DLOAD_get_entry_point(DLOAD_HANDLE handle, uint32_t file_handle,
+ TARGET_ADDRESS *sym_val);
+
+/*---------------------------------------------------------------------------*/
+/* DLOAD_load_arguments() */
+/* */
+/* Given a file handle, find the object file assicated with that handle */
+/* and copy the argc/argv information from the client into that object */
+/* file's .args section. The return value indicates whether the operation */
+/* was successful. If there are no loaded object files which match the */
+/* handle or if there is insufficient space in the .args section to hold */
+/* the specified argc/argv information, the function will return false. */
+/* */
+/*---------------------------------------------------------------------------*/
+BOOL DLOAD_load_arguments(DLOAD_HANDLE handle, uint32_t file_handle,
+ int argc, char** argv);
+
+/*---------------------------------------------------------------------------*/
+/* DLOAD_prepare_for_execution() */
+/* */
+/* Given a file handle, prepare for execution : */
+/* - Return entry point associated with that module in the *sym_val */
+/* output parameter. */
+/* - Write out the given arguments to the .args section contained in the */
+/* same module. */
+/* - As a test (for the Reference implementation) read the arguments */
+/* using the DLIF_read_arguments() function and set global argc,argv. */
+/* */
+/* The return value of the function indicates whether the file with the */
+/* specified handle was found or not. */
+/* */
+/*---------------------------------------------------------------------------*/
+BOOL DLOAD_prepare_for_execution(DLOAD_HANDLE handle, uint32_t file_handle,
+ TARGET_ADDRESS *sym_val,
+ int argc, char** argv);
+
+
+/*****************************************************************************/
+/* Client Provided API Functions */
+/*****************************************************************************/
+
+/*---------------------------------------------------------------------------*/
+/* File I/O */
+/* */
+/* The client side of the dynamic loader must provide basic file I/O */
+/* capabilities so that the core loader has random access into any */
+/* object file that it is asked to load. */
+/* */
+/* The client side of the dynamic loader must provide a definition of */
+/* the LOADER_FILE_DESC in dload_filedefs.h. This allows the core loader */
+/* to be independent of how the client accesses raw data in an object */
+/* file. */
+/* */
+/*---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------*/
+/* DLIF_fseek() */
+/* */
+/* Seek to a position in a file (accessed via 'stream') based on the */
+/* values for offset and origin. */
+/* */
+/*---------------------------------------------------------------------------*/
+int DLIF_fseek(LOADER_FILE_DESC *stream, int32_t offset, int origin);
+
+/*---------------------------------------------------------------------------*/
+/* DLIF_ftell() */
+/* */
+/* Return the current file position in the file identified in the */
+/* LOADER_FILE_DESC pointed to by 'stream'. */
+/* */
+/*---------------------------------------------------------------------------*/
+int32_t DLIF_ftell(LOADER_FILE_DESC *stream);
+
+/*---------------------------------------------------------------------------*/
+/* DLIF_fread() */
+/* */
+/* Read 'size' * 'nmemb' bytes of data from the file identified in the */
+/* LOADER_FILE_DESC object pointed to by 'stream', and write that data */
+/* into the memory accessed via 'ptr'. */
+/* */
+/*---------------------------------------------------------------------------*/
+size_t DLIF_fread(void *ptr, size_t size, size_t nmemb,
+ LOADER_FILE_DESC *stream);
+
+/*---------------------------------------------------------------------------*/
+/* DLIF_fclose() */
+/* */
+/* Close a file that was opened on behalf of the core loader. Ownership */
+/* of the file pointer in question belongs to the core loader, but the */
+/* client has exclusive access to the file system. */
+/* */
+/*---------------------------------------------------------------------------*/
+int DLIF_fclose(LOADER_FILE_DESC *fd);
+
+/*---------------------------------------------------------------------------*/
+/* Host Memory Management */
+/* */
+/* Allocate and free host memory as needed for the dynamic loader's */
+/* internal data structures. If the dynamic loader resides on the */
+/* target architecture, then this memory is allocated from a target */
+/* memory heap that must be managed separately from memory that is */
+/* allocated for a dynamically loaded object file. */
+/* */
+/*---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------*/
+/* DLIF_malloc() */
+/* */
+/* Allocate 'size' bytes of memory space that is usable as scratch space */
+/* (appropriate for the loader's internal data structures) by the dynamic */
+/* loader. */
+/* */
+/* If allocation fails, this function must not return. */
+/* */
+/*---------------------------------------------------------------------------*/
+void* DLIF_malloc(size_t size);
+
+/*---------------------------------------------------------------------------*/
+/* DLIF_free() */
+/* */
+/* Free memory space that was previously allocated by DLIF_malloc(). */
+/* */
+/*---------------------------------------------------------------------------*/
+void DLIF_free(void* ptr);
+
+/*---------------------------------------------------------------------------*/
+/* Target Memory Allocator Interface */
+/* */
+/* The client side of the dynamic loader must create and maintain an */
+/* infrastructure to manage target memory. The client must keep track */
+/* of what target memory is associated with each object segment, */
+/* allocating target memory for newly loaded objects and release target */
+/* memory that is associated with objects that are being unloaded from */
+/* the target architecture. */
+/* */
+/* The two client-supplied functions, DLIF_allocate() and DLIF_release(), */
+/* are used by the core loader to interface into the client side's */
+/* target memory allocator infrastructure. */
+/* */
+/*---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------*/
+/* DLOAD_SEGMENT_FLAGS - segment characteristics. */
+/*---------------------------------------------------------------------------*/
+typedef uint32_t DLOAD_SEGMENT_FLAGS;
+static const int DLOAD_SF_executable = 0x1; /* Memory must be executable */
+static const int DLOAD_SF_relocatable = 0x2; /* Segment must be relocatable */
+static const int DLOAD_SF_writable = 0x4; /* Memory must be writable */
+
+/*---------------------------------------------------------------------------*/
+/* DLOAD_MEMORY_SEGMENT - Define structure to represent placement and size */
+/* details of a segment to be loaded. */
+/*---------------------------------------------------------------------------*/
+struct DLOAD_MEMORY_SEGMENT
+{
+ uint32_t target_page; /* requested/returned memory page */
+ TARGET_ADDRESS target_address; /* requested/returned address */
+ uint32_t objsz_in_bytes; /* size of init'd part of segment */
+ uint32_t memsz_in_bytes; /* size of memory block for segment */
+// DLOAD_SEGMENT_FLAGS flags; /* allocation request flags */
+};
+
+/*---------------------------------------------------------------------------*/
+/* DLOAD_MEMORY_REQUEST - Define structure to represent a target memory */
+/* request made by the core loader on behalf of a segment that the */
+/* loader needs to relocate and write into target memory. */
+/*---------------------------------------------------------------------------*/
+struct DLOAD_MEMORY_REQUEST
+{
+ LOADER_FILE_DESC *fp; /* file being loaded */
+ struct DLOAD_MEMORY_SEGMENT *segment; /* obj for req/ret alloc */
+ void *host_address; /* ret hst ptr from DLIF_copy()*/
+ BOOL is_loaded; /* returned as true if segment */
+ /* is already in target memory */
+ uint32_t offset; /* file offset of segment's */
+ /* raw data */
+ uint32_t flip_endian; /* endianness of trg opp host */
+ DLOAD_SEGMENT_FLAGS flags; /* allocation request flags */
+ uint32_t align; /* align of trg memory block */
+};
+
+/*---------------------------------------------------------------------------*/
+/* DLIF_initMem() */
+/* */
+/* Given an address and size, initialize the memory used to load the */
+/* dynamic segments. This should be called by the client before */
+/* beginning dynamic loading. */
+/* */
+/*---------------------------------------------------------------------------*/
+BOOL DLIF_initMem(void* client_handle, uint32_t dynMemAddr, uint32_t size);
+
+/*---------------------------------------------------------------------------*/
+/* DLIF_deinitMem() */
+/* */
+/* De-initialize the memory used to load the dynamic segments. */
+/* */
+/*---------------------------------------------------------------------------*/
+BOOL DLIF_deinitMem(void* client_handle);
+
+/*---------------------------------------------------------------------------*/
+/* DLIF_allocate() */
+/* */
+/* Given a DLOAD_MEMORY_REQUEST created by the core loader, allocate */
+/* target memory to fulfill the request using the target memory */
+/* management infrastrucutre on the client side of the dynamic loader. */
+/* The contents of the DLOAD_MEMORY_REQUEST will be updated per the */
+/* details of a successful allocation. The allocated page and address */
+/* can be found in the DLOAD_MEMORY_SEGMENT attached to the request. */
+/* The boolean return value reflects whether the allocation was */
+/* successful or not. */
+/* */
+/*---------------------------------------------------------------------------*/
+BOOL DLIF_allocate(void* client_handle, struct DLOAD_MEMORY_REQUEST *req);
+
+/*---------------------------------------------------------------------------*/
+/* DLIF_release() */
+/* */
+/* Given a DLOAD_MEMORY_SEGMENT description, free the target memory */
+/* associated with the segment using the target memory management */
+/* infrastructure on the client side of the dynamic loader. */
+/* */
+/*---------------------------------------------------------------------------*/
+BOOL DLIF_release(void* client_handle, struct DLOAD_MEMORY_SEGMENT* ptr);
+
+/*---------------------------------------------------------------------------*/
+/* Target Memory Access / Write Services */
+/* */
+/* The client side's target memory allocator infrastructure communicates */
+/* with the core loader through the DLOAD_MEMORY_REQUEST and */
+/* DLOAD_MEMORY_SEGMENT data structures defined above. To complete the */
+/* loading of an object segment, the segment may need to be relocated */
+/* before it is actually written to target memory in the space that was */
+/* allocated for it by DLIF_allocate(). */
+/* */
+/* The client side of the dynamic loader provides two functions to help */
+/* complete the process of loading an object segment, DLIF_copy() and */
+/* DLIF_write(). */
+/* */
+/* These functions help to make the core loader truly independent of */
+/* whether it is running on the host or target architecture and how the */
+/* client provides for reading/writing from/to target memory. */
+/* */
+/*---------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------*/
+/* DLIF_copy() */
+/* */
+/* Copy segment data from the object file described in the 'fp' and */
+/* 'offset' of the DLOAD_MEMORY_REQUEST into host accessible memory so */
+/* that it can relocated or otherwise manipulated by the core loader. */
+/* */
+/*---------------------------------------------------------------------------*/
+BOOL DLIF_copy(void* client_handle, struct DLOAD_MEMORY_REQUEST* req);
+
+/*---------------------------------------------------------------------------*/
+/* DLIF_write() */
+/* */
+/* Once the segment data described in the DLOAD_MEMORY_REQUEST is ready */
+/* (relocated, if needed), write the segment contents to the target */
+/* memory identified in the DLOAD_MEMORY_SEGMENT attached to the request. */
+/* */
+/* After the segment contents have been written to target memory, the */
+/* core loader should discard the DLOAD_MEMORY_REQUEST object, but retain */
+/* the DLOAD_MEMORY_SEGMENT object so that the target memory associated */
+/* with the segment can be releases when the segment is unloaded. */
+/* */
+/*---------------------------------------------------------------------------*/
+BOOL DLIF_write(void* client_handle, struct DLOAD_MEMORY_REQUEST* req);
+
+/*---------------------------------------------------------------------------*/
+/* DLIF_read() */
+/* */
+/* Given a host accessible buffer, read content of indicated target */
+/* memory address into the buffer. */
+/*---------------------------------------------------------------------------*/
+BOOL DLIF_read(void* client_handle,
+ void *ptr, size_t size, size_t nmemb, TARGET_ADDRESS src);
+
+/*---------------------------------------------------------------------------*/
+/* DLIF_memcpy() */
+/* */
+/* Given a host accessible buffer, copy content from specified buffer */
+/* into target memory. */
+/*---------------------------------------------------------------------------*/
+BOOL DLIF_memcpy(void* client_handle, void *to, void *from, size_t size);
+
+/*---------------------------------------------------------------------------*/
+/* DLIF_execute() */
+/* */
+/* Start execution on the target architecture from given 'exec_addr'. */
+/* If the dynamic loader is running on the target architecture, this can */
+/* be effected as a simple function call. */
+/* */
+/*---------------------------------------------------------------------------*/
+int32_t DLIF_execute(void* client_handle, TARGET_ADDRESS exec_addr);
+
+/*---------------------------------------------------------------------------*/
+/* Loading and Unloading of Dependent Files */
+/* */
+/* The dynamic loader core loader must coordinate loading and unloading */
+/* dependent object files with the client side of the dynamic loader. */
+/* This allows the client to keep its bookkeeping information up to date */
+/* with what is currently loaded on the target architecture. */
+/* */
+/* For instance, the client may need to interact with a file system or */
+/* registry. The client may also need to update debug information in */
+/* synch with the loading and unloading of shared objects. */
+/* */
+/*---------------------------------------------------------------------------*/
+/*---------------------------------------------------------------------------*/
+/* DLIF_load_dependent() */
+/* */
+/* Ask client to find and open a dependent file identified by the */
+/* 'so_name' parameter, then, if necessary, initiate a DLOAD_load() */
+/* call to actually load the shared object onto the target. A */
+/* successful load will return a file handle ID that the client can */
+/* associate with the newly loaded file. */
+/* */
+/*---------------------------------------------------------------------------*/
+int DLIF_load_dependent(void* client_handle, const char* so_name);
+
+/*---------------------------------------------------------------------------*/
+/* DLIF_unload_dependent() */
+/* */
+/* Ask client to unload a dependent file identified by the 'file_handle' */
+/* parameter. Initiate a call to DLOAD_unload() to actually free up */
+/* the target memory that was occupied by the object file. */
+/* */
+/*---------------------------------------------------------------------------*/
+void DLIF_unload_dependent(void* client_handle, uint32_t file_handle);
+
+/*---------------------------------------------------------------------------*/
+/* Error/Warning Registration Functions */
+/* */
+/* The client will maintain an error/warning log. This will allow the */
+/* core loader to register errors and warnings in the load during a */
+/* given dynamic load. The client is required to check the log after */
+/* each load attempt to report any problems. */
+/* */
+/*---------------------------------------------------------------------------*/
+
+
+/*---------------------------------------------------------------------------*/
+/* Loader Warning Types */
+/*---------------------------------------------------------------------------*/
+typedef enum {
+ DLWT_MISC = 0, /* Miscellaneous warning */
+ DLWT_FILE /* Warning missing/invalid file information */
+} LOADER_WARNING_TYPE;
+
+/*---------------------------------------------------------------------------*/
+/* DLIF_warning() */
+/* */
+/* Log a warning message with the client's error/warning handling */
+/* infrastructure. */
+/* */
+/*---------------------------------------------------------------------------*/
+void DLIF_warning(LOADER_WARNING_TYPE wtype, const char *fmt, ...);
+
+/*---------------------------------------------------------------------------*/
+/* Loader Error Types */
+/*---------------------------------------------------------------------------*/
+typedef enum {
+ DLET_MISC = 0, /* Miscellaneous error */
+ DLET_FILE, /* Error reading/processing file */
+ DLET_SYMBOL, /* Symbol resolution error */
+ DLET_RELOC, /* Relocation error */
+ DLET_MEMORY, /* Host memory allocation/free error */
+ DLET_TRGMEM, /* Target memory allocation/free error */
+ DLET_DEBUG /* Shared object or DLL debug error */
+} LOADER_ERROR_TYPE;
+
+/*---------------------------------------------------------------------------*/
+/* DLIF_error() */
+/* */
+/* Log an error message with the client's error/warning handling */
+/* infrastructure. */
+/* */
+/*---------------------------------------------------------------------------*/
+void DLIF_error(LOADER_ERROR_TYPE etype, const char *fmt, ...);
+
+/*---------------------------------------------------------------------------*/
+/* DLIF_exit() */
+/* */
+/* Abort the loader following a fatal error. */
+/* */
+/*---------------------------------------------------------------------------*/
+void DLIF_exit(int code);
+
+/*---------------------------------------------------------------------------*/
+/* DLIF_trace() */
+/* */
+/* Log a message with the client's trace handling infrastructure. */
+/* */
+/*---------------------------------------------------------------------------*/
+void DLIF_trace(const char *fmt, ...);
+
+/*---------------------------------------------------------------------------*/
+/* Dynamic Static Base Table (DSBT) Support Functions */
+/*---------------------------------------------------------------------------*/
+#define DSBT_INDEX_INVALID -1
+#define DSBT_DSBT_BASE_INVALID 0
+#define DSBT_STATIC_BASE_INVALID 0
+
+/*****************************************************************************/
+/* Core Loader Side of DSBT Support */
+/*****************************************************************************/
+
+/*---------------------------------------------------------------------------*/
+/* DLOAD_get_dsbt_size() */
+/* */
+/* Query the size of the DSBT associated with a specified file. The */
+/* client will check the size of a module's DSBT before it writes a copy */
+/* of the master DSBT to the module's DSBT. If the module's DSBT is not */
+/* big enough, an error will be emitted and the load will fail. */
+/* */
+/*---------------------------------------------------------------------------*/
+uint32_t DLOAD_get_dsbt_size(DLOAD_HANDLE handle, int32_t file_handle);
+
+/*---------------------------------------------------------------------------*/
+/* DLOAD_get_dsbt_base() */
+/* */
+/* Find DSBT address for specified file. The client will query for this */
+/* address after allocation and symbol relocation has been completed. */
+/* The client will write a copy of the master DSBT to the returned DSBT */
+/* address if the module's DSBT size is big enough. */
+/* */
+/*---------------------------------------------------------------------------*/
+BOOL DLOAD_get_dsbt_base(DLOAD_HANDLE handle, int32_t file_handle,
+ TARGET_ADDRESS *dsbt_base);
+
+/*---------------------------------------------------------------------------*/
+/* DLOAD_get_static_base() */
+/* */
+/* Find static base for a specified file. The client will query for this */
+/* address after allocation and symbol relocation has been completed. */
+/* The client will use the returned static base value to fill the slot */
+/* in the master DSBT that is associated with this module. */
+/* */
+/*---------------------------------------------------------------------------*/
+BOOL DLOAD_get_static_base(DLOAD_HANDLE handle, int32_t file_handle,
+ TARGET_ADDRESS *static_base);
+
+
+/*****************************************************************************/
+/* Client Side of DSBT Support */
+/*****************************************************************************/
+
+/*---------------------------------------------------------------------------*/
+/* DLIF_register_dsbt_index_request() */
+/* */
+/* Register a request for a DSBT index with the client. A module can */
+/* make a specific DSBT index request or it can allow the client to */
+/* assign a DSBT index on its behalf (requested_dsbt_index == -1). The */
+/* client implementation of this function must check that a specific DSBT */
+/* index request does not conflict with a previous specific DSBT index */
+/* request. */
+/* */
+/*---------------------------------------------------------------------------*/
+BOOL DLIF_register_dsbt_index_request(DLOAD_HANDLE handle,
+ const char *requestor_name,
+ int32_t requestor_file_handle,
+ int32_t requested_dsbt_index);
+
+/*---------------------------------------------------------------------------*/
+/* DLIF_assign_dsbt_indices() */
+/* */
+/* Bind each module that registered a request for a DSBT index to a */
+/* specific slot in the DSBT. Specific requests for DSBT indices will be */
+/* honored first. Any general requests that remain will be assigned to */
+/* the first available slot in the DSBT. */
+/* */
+/*---------------------------------------------------------------------------*/
+void DLIF_assign_dsbt_indices(void);
+
+/*---------------------------------------------------------------------------*/
+/* DLIF_get_dsbt_index() */
+/* */
+/* Given a module that uses the DSBT model, return the identity of the */
+/* DSBT slot that was assigned to it by the client. This function can */
+/* only be called after the client has assigned DSBT indices to all */
+/* loaded object modules that use the DSBT model. The implementation of */
+/* this function will check that a proper DSBT index has been assigned to */
+/* the specified module and an invalid index (-1) if there is a problem. */
+/* */
+/*---------------------------------------------------------------------------*/
+int32_t DLIF_get_dsbt_index(int32_t file_handle);
+
+/*---------------------------------------------------------------------------*/
+/* DLIF_update_all_dsbts() */
+/* */
+/* Populate the client's model of the master DSBT with the static base */
+/* for each assigned slot in the DSBT, then write a copy of the master */
+/* DSBT to each module's DSBT location. The implementation of this */
+/* function must check the size of each module's DSBT to make sure that */
+/* it is large enough to hold a copy of the master DSBT. The function */
+/* will return FALSE if there is a problem. */
+/* */
+/*---------------------------------------------------------------------------*/
+BOOL DLIF_update_all_dsbts(void);
+
+#endif
diff --git a/src/core/dsp/ocl_load/DLOAD_SYM/symtab.c b/src/core/dsp/ocl_load/DLOAD_SYM/symtab.c
new file mode 100644
index 0000000..fbcdbeb
--- /dev/null
+++ b/src/core/dsp/ocl_load/DLOAD_SYM/symtab.c
@@ -0,0 +1,417 @@
+/*
+* symtab.c
+*
+* Symbol table creation, maintenance, and management. This module also
+* contains implementations of local and global symbol table lookup
+* algorithms, as appropriate for the platform that we are running on
+* (assumed to be DSP Bridge or Linux model, indicated by
+* direct_dependent_only flag in a given Module).
+*
+* Copyright (C) 2009-2014 Texas Instruments Incorporated - http://www.ti.com/
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the
+* distribution.
+*
+* Neither the name of Texas Instruments Incorporated nor the names of
+* its contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#include "elf32.h"
+#include "ArrayList.h"
+
+/*---------------------------------------------------------------------------*/
+/* Set up a Queue of Int32 type data objects. */
+/*---------------------------------------------------------------------------*/
+#include "Queue.h"
+TYPE_QUEUE_DEFINITION(int32_t, Int32)
+TYPE_QUEUE_IMPLEMENTATION(int32_t, Int32)
+
+#include "symtab.h"
+#include "dload_api.h"
+#include <string.h>
+
+/*---------------------------------------------------------------------------*/
+/* Holds the handle of the ET_EXEC-type mmodule loaded, if any. */
+/*---------------------------------------------------------------------------*/
+int32_t DLIMP_application_handle = 0;
+
+/*---------------------------------------------------------------------------*/
+/* Function prototypes */
+/*---------------------------------------------------------------------------*/
+BOOL DLSYM_lookup_global_symtab(const char *sym_name, struct Elf32_Sym *symtab,
+ Elf32_Word symnum, Elf32_Addr *sym_value);
+
+/*****************************************************************************/
+/* DLSYM_COPY_GLOBALS() - Copy global symbols from the dynamic module's */
+/* symbol table to the loader's global symbol table. */
+/*****************************************************************************/
+void DLSYM_copy_globals(DLIMP_Dynamic_Module *dyn_module)
+{
+ Elf32_Word i, global_index, global_symnum;
+ DLIMP_Loaded_Module *module = dyn_module->loaded_module;
+
+#if LOADER_DEBUG
+ if (debugging_on)
+ DLIF_trace("DLSYM_copy_globals:\n");
+#endif
+
+ /*------------------------------------------------------------------------*/
+ /* The dynamic symbol table is sorted so that the local symbols come */
+ /* before the global symbols. gsymtab_offset points to the address where */
+ /* the first global symbol starts. Only the global symbols need to be */
+ /* copied into the persistent info. */
+ /*------------------------------------------------------------------------*/
+ global_index = dyn_module->gsymtab_offset / sizeof(struct Elf32_Sym);
+ global_symnum = dyn_module->symnum - global_index;
+
+ /*------------------------------------------------------------------------*/
+ /* Create space for the new global symbol table. */
+ /*------------------------------------------------------------------------*/
+ if (module->gsymtab)
+ {
+ DLIF_free(module->gsymtab);
+ module->gsymtab = NULL;
+ }
+
+ if (global_symnum > 0)
+ {
+ module->gsymtab = DLIF_malloc(sizeof(struct Elf32_Sym) * global_symnum);
+
+ memcpy(module->gsymtab,
+ &dyn_module->symtab[global_index],
+ sizeof(struct Elf32_Sym) * global_symnum);
+ }
+ module->gsymnum = global_symnum;
+
+ /*------------------------------------------------------------------------*/
+ /* Copy the string table part that contains the global symbol names. */
+ /*------------------------------------------------------------------------*/
+ if (module->gstrtab)
+ {
+ DLIF_free(module->gstrtab);
+ module->gstrtab = NULL;
+ }
+
+ module->gstrsz = dyn_module->strsz - dyn_module->gstrtab_offset;
+ if (module->gstrsz)
+ {
+ module->gstrtab = DLIF_malloc(module->gstrsz);
+
+ memcpy(module->gstrtab,
+ dyn_module->strtab + dyn_module->gstrtab_offset,
+ module->gstrsz);
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* Update the symbol names of the global symbol entries to point to */
+ /* the symbol names in the string table. */
+ /* NOTE: Note that we don't set the offset into the string table. We */
+ /* instead set the full address so that the st_name field can be accessed */
+ /* as char *. */
+ /*------------------------------------------------------------------------*/
+ for (i = 0; i < global_symnum; i++)
+ {
+
+ Elf32_Word old_offset = dyn_module->symtab[i + global_index].st_name -
+ (Elf32_Addr) dyn_module->strtab;
+ Elf32_Word new_offset = old_offset - dyn_module->gstrtab_offset;
+ struct Elf32_Sym *sym = &((struct Elf32_Sym*)(module->gsymtab))[i];
+ sym->st_name = new_offset + (Elf32_Addr)module->gstrtab;
+
+#if LOADER_DEBUG
+ if (debugging_on) DLIF_trace("Copying symbol: %s\n",
+ (char*)dyn_module->symtab[i + global_index].st_name);
+#endif
+ }
+}
+
+/*****************************************************************************/
+/* BREADTH_FIRST_LOOKUP() - Perform a breadth-first search of the Module */
+/* dependency graph to find specified symbol name (sym_name). */
+/*****************************************************************************/
+static BOOL breadth_first_lookup(DLOAD_HANDLE phandle,
+ const char* sym_name,
+ int handle,
+ Elf32_Addr *sym_value)
+{
+ /*------------------------------------------------------------------------*/
+ /* We start this function by putting the specified file handle on the */
+ /* file_handle_queue. */
+ /*------------------------------------------------------------------------*/
+ LOADER_OBJECT *dHandle = (LOADER_OBJECT *)phandle;
+ Int32_Queue file_handle_queue = TYPE_QUEUE_INITIALIZER;
+ Int32_enqueue(&file_handle_queue, handle);
+
+ /*------------------------------------------------------------------------*/
+ /* While the queue is not empty, keep looking for the symbol. */
+ /*------------------------------------------------------------------------*/
+ while(file_handle_queue.size)
+ {
+ int i;
+
+ /*---------------------------------------------------------------------*/
+ /* Set up a pointer to front of the list of loaded files so that we */
+ /* can be sure that dependent files will be searched in load order. */
+ /*---------------------------------------------------------------------*/
+ loaded_module_ptr_Queue_Node* mod_node =
+ dHandle->DLIMP_loaded_objects.front_ptr;
+ int* dependencies = (int*)(mod_node->value->dependencies.buf);
+
+ /*---------------------------------------------------------------------*/
+ /* Pluck off the file handle at the front of the file_handle_queue. */
+ /* We will search this file next. */
+ /*---------------------------------------------------------------------*/
+ handle = Int32_dequeue(&file_handle_queue);
+
+ /*---------------------------------------------------------------------*/
+ /* Locate the Module associated with the current file handle. */
+ /*---------------------------------------------------------------------*/
+ while (mod_node->value->file_handle != handle) mod_node++;
+
+ /*---------------------------------------------------------------------*/
+ /* Search the symbol table of the current file handle's Module. */
+ /* If the symbol was found, then we're finished. */
+ /*---------------------------------------------------------------------*/
+ if (DLSYM_lookup_global_symtab(sym_name,
+ mod_node->value->gsymtab,
+ mod_node->value->gsymnum,
+ sym_value))
+ return TRUE;
+
+ /*---------------------------------------------------------------------*/
+ /* If our symbol was not in the current Module, then add this Module's */
+ /* dependents to the end of the file_handle_queue. */
+ /*---------------------------------------------------------------------*/
+ for (i = 0; i < mod_node->value->dependencies.size; i++)
+ Int32_enqueue(&file_handle_queue, dependencies[i]);
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* We didn't find our symbol; return FALSE. */
+ /*------------------------------------------------------------------------*/
+ return FALSE;
+}
+
+/*****************************************************************************/
+/* DLSYM_global_lookup() - Search the global symbol table to find the */
+/* definition of the given symbol name. */
+/*****************************************************************************/
+BOOL DLSYM_global_lookup(DLOAD_HANDLE handle,
+ const char *sym_name,
+ DLIMP_Loaded_Module *loaded_module,
+ Elf32_Addr *sym_value)
+{
+ int i = 0;
+ loaded_module_ptr_Queue_Node* node;
+ LOADER_OBJECT *dHandle = (LOADER_OBJECT *)handle;
+
+#if LOADER_DEBUG
+ if (debugging_on)
+ DLIF_trace("DLSYM_global_lookup: %s\n", sym_name);
+#endif
+
+ /*------------------------------------------------------------------------*/
+ /* We will choose a different lookup algorithm based on what kind of */
+ /* platform we are supporting. In the Braveheart case, the global symbol */
+ /* lookup algorithm searches the base image first, followed by the */
+ /* explicit children of the specified Module. */
+ /*------------------------------------------------------------------------*/
+ if (loaded_module->direct_dependent_only)
+ {
+ int* child_handle = (int*)(loaded_module->dependencies.buf);
+
+ /*---------------------------------------------------------------------*/
+ /* Spin through list of this Module's dependencies (anything on its */
+ /* DT_NEEDED list), searching through each dependent's symbol table */
+ /* to find the symbol we are after. */
+ /*---------------------------------------------------------------------*/
+ for (i = 0; i < loaded_module->dependencies.size; i++)
+ {
+ for (node = dHandle->DLIMP_loaded_objects.front_ptr;
+ node->value->file_handle != child_handle[i];
+ node=node->next_ptr);
+
+ /*------------------------------------------------------------------*/
+ /* Return true if we find the symbol. */
+ /*------------------------------------------------------------------*/
+ if (DLSYM_lookup_global_symtab(sym_name,
+ node->value->gsymtab,
+ node->value->gsymnum,
+ sym_value))
+ return TRUE;
+ }
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* In the LINUX model, we will use a breadth-first global symbol lookup */
+ /* algorithm. First, the application's global symbol table is searched, */
+ /* followed by its children, followed by their children, and so on. */
+ /* It is up to the client of this module to set the application handle. */
+ /*------------------------------------------------------------------------*/
+ else
+ {
+ if (breadth_first_lookup(handle, sym_name, DLIMP_application_handle,
+ sym_value))
+ return TRUE;
+ }
+
+ /*------------------------------------------------------------------------*/
+ /* If we got this far, then symbol was not found. */
+ /*------------------------------------------------------------------------*/
+ DLIF_error(DLET_SYMBOL, "Could not resolve symbol %s!\n", sym_name);
+
+ return FALSE;
+}
+
+/*****************************************************************************/
+/* DLSYM_lookup_symtab() - Lookup the symbol name in the given symbol table. */
+/* Symbol must have specified binding. Return the */
+/* value in sym_value and return TRUE if the lookup */
+/* succeeds. */
+/*****************************************************************************/
+static BOOL DLSYM_lookup_symtab(const char *sym_name, struct Elf32_Sym *symtab,
+ Elf32_Word symnum, Elf32_Addr *sym_value,
+ BOOL require_local_binding)
+{
+ Elf32_Addr sym_idx;
+
+#if LOADER_DEBUG
+ if (debugging_on)
+ DLIF_trace("DLSYM_lookup_symtab, sym to find : %s\n", sym_name);
+#endif
+
+ for (sym_idx = 0; sym_idx < symnum; sym_idx++)
+ {
+#if LOADER_DEBUG
+ if (debugging_on)
+ DLIF_trace("\tPotential symbol match : %s\n",
+ (char*)symtab[sym_idx].st_name);
+#endif
+
+ if ((symtab[sym_idx].st_shndx != SHN_UNDEF) && ((require_local_binding &&
+ (ELF32_ST_BIND(symtab[sym_idx].st_info) == STB_LOCAL)) ||
+ (!require_local_binding &&
+ (ELF32_ST_BIND(symtab[sym_idx].st_info) != STB_LOCAL))) &&
+ !strcmp(sym_name,(char*)(symtab[sym_idx].st_name)))
+ {
+ if (sym_value) *sym_value = symtab[sym_idx].st_value;
+ return TRUE;
+ }
+ }
+ if (sym_value) *sym_value = 0;
+ return FALSE;
+}
+
+/*****************************************************************************/
+/* DLSYM_lookup_global_symtab() - Lookup the symbol name in the given symbol */
+/* table. Symbol must have global binding. */
+/* Return the value in sym_value and return */
+/* TRUE if the lookup succeeds. */
+/*****************************************************************************/
+BOOL DLSYM_lookup_global_symtab(const char *sym_name, struct Elf32_Sym *symtab,
+ Elf32_Word symnum, Elf32_Addr *sym_value)
+{
+ return DLSYM_lookup_symtab(sym_name, symtab, symnum, sym_value, FALSE);
+}
+
+/*****************************************************************************/
+/* DLSYM_lookup_local_symtab() - Lookup the symbol name in the given symbol */
+/* table. Symbol must have local binding. */
+/* Return the value in sym_value and return */
+/* TRUE if the lookup succeeds. */
+/*****************************************************************************/
+BOOL DLSYM_lookup_local_symtab(const char *sym_name, struct Elf32_Sym *symtab,
+ Elf32_Word symnum, Elf32_Addr *sym_value)
+{
+ return DLSYM_lookup_symtab(sym_name, symtab, symnum, sym_value, TRUE);
+}
+
+/*****************************************************************************/
+/* CANONICAL_SYMBOL_LOOKUP() - Find the symbol definition. Look up the local */
+/* symbol table to find the symbol. If it is a */
+/* definition and cannot be pre-empted, return */
+/* it. Otherwise, do a look up in the global */
+/* symbol table that contains the symbol tables */
+/* from all the necessary modules. */
+/*****************************************************************************/
+BOOL DLSYM_canonical_lookup(DLOAD_HANDLE handle, int sym_index,
+ DLIMP_Dynamic_Module *dyn_module,
+ Elf32_Addr *sym_value)
+{
+ /*------------------------------------------------------------------------*/
+ /* Lookup the symbol table to get the symbol characteristics. */
+ /*------------------------------------------------------------------------*/
+ struct Elf32_Sym *sym = &dyn_module->symtab[sym_index];
+ int32_t st_bind = ELF32_ST_BIND(sym->st_info);
+ int32_t st_vis = ELF32_ST_VISIBILITY(sym->st_other);
+ BOOL is_def = (sym->st_shndx != SHN_UNDEF &&
+ (sym->st_shndx < SHN_LORESERVE ||
+ sym->st_shndx == SHN_ABS ||
+ sym->st_shndx == SHN_COMMON ||
+ sym->st_shndx == SHN_XINDEX));
+ const char *sym_name = (char *)sym->st_name;
+
+#if LOADER_DEBUG
+ if (debugging_on)
+ DLIF_trace("DLSYM_canonical_lookup: %d, %s\n", sym_index, sym_name);
+#endif
+
+ /*------------------------------------------------------------------------*/
+ /* Local symbols and symbol definitions that cannot be pre-empted */
+ /* are resolved by the definition in the same module. */
+ /*------------------------------------------------------------------------*/
+ if (st_bind == STB_LOCAL || st_vis != STV_DEFAULT)
+ {
+ /*---------------------------------------------------------------------*/
+ /* If it is a local symbol or non-local that cannot be preempted, */
+ /* the definition should be found in the same module. If we don't */
+ /* find the definition it is an error. */
+ /*---------------------------------------------------------------------*/
+ if (!is_def)
+ {
+ DLIF_error(DLET_SYMBOL,
+ "Local/non-imported symbol %s definition is not found "
+ "in module %s!\n", sym_name, dyn_module->name);
+ return FALSE;
+ }
+ else
+ {
+ if (sym_value) *sym_value = sym->st_value;
+ return TRUE;
+ }
+ }
+ /*------------------------------------------------------------------------*/
+ /* Else we have either pre-emptable defintion or undef symbol. We need */
+ /* to do global look up. */
+ /*------------------------------------------------------------------------*/
+ else
+ {
+ return DLSYM_global_lookup(handle, sym_name, dyn_module->loaded_module,
+ sym_value);
+ }
+}
+
diff --git a/src/core/dsp/ocl_load/README b/src/core/dsp/ocl_load/README
new file mode 100644
index 0000000..19165f6
--- /dev/null
+++ b/src/core/dsp/ocl_load/README
@@ -0,0 +1,8 @@
+
+This program is dependent on these Standard CVS modules
+
+C60_DLOAD_DYN:
+C60_DLOAD_REL:
+DLOAD:
+DLOAD_API:
+DLOAD_SYM:
diff --git a/src/core/dsp/ocl_load/Stack.h b/src/core/dsp/ocl_load/Stack.h
new file mode 100644
index 0000000..e958674
--- /dev/null
+++ b/src/core/dsp/ocl_load/Stack.h
@@ -0,0 +1,182 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+/*
+* Stack.h
+*
+* Interface to Stack
+* ------------------
+*
+* This is an implementation of a type-independent stack implemented as
+* a signly linked list class for C. It's basically a template class, but
+* uses macros instead, so that it can be compiled with a C-only compiler.
+*
+* To define a Stack class:
+* #include "Stack.h"
+* TYPE_STACK_DEFINITION(object_type,Class_Identifier)
+*
+* In a separate C file:
+* #include "Stack.h"
+* TYPE_STACK_DEFINITION(object_type,Class_Identifier)
+* TYPE_STACK_IMPLEMENTATION(object_type,Class_Identifier)
+*
+* Now, to create a stack:
+* struct Class_Identifier_Stack name;
+* Get it initialized to zero everywhere somehow, maybe like this:
+* initialize_stack_Class_Identifier(&name);
+*
+* To add to the stack:
+* push_Class_Identifier(&name, object);
+*
+* To access the top of the stack:
+* Class_Identifier_Stack_Node *tos = name.top_ptr;
+* do_something_to_(tos->value);
+*
+* To delete from the stack:
+* if (name.size > 0) pop_Class_Identifier(&name);
+*
+* Copyright (C) 2009 Texas Instruments Incorporated - http://www.ti.com/
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the
+* distribution.
+*
+* Neither the name of Texas Instruments Incorporated nor the names of
+* its contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#ifndef STACK_H
+#define STACK_H
+
+#include <inttypes.h>
+#include "dload_api.h"
+
+/*****************************************************************************/
+/* TYPE_STACK_DEFINITION() - Define structure specifications for a last-in, */
+/* first-out linked list of t_name objects. */
+/*****************************************************************************/
+#define TYPE_STACK_DEFINITION(t, t_name) \
+struct t_name##_Stack_Node_ \
+{ \
+ t value; \
+ struct t_name##_Stack_Node_* next_ptr; \
+}; \
+typedef struct t_name##_Stack_Node_ t_name##_Stack_Node; \
+ \
+typedef struct \
+{ \
+ t_name##_Stack_Node* top_ptr; \
+ t_name##_Stack_Node* bottom_ptr; \
+ int size; \
+} t_name##_Stack; \
+ \
+extern void t_name##_initialize_stack(t_name##_Stack* stack); \
+extern void t_name##_push(t_name##_Stack* stack, t to_push); \
+extern t t_name##_pop(t_name##_Stack* stack);
+
+/*****************************************************************************/
+/* TYPE_STACK_DEFINITION() - Define the initializer to initalize Stacks. */
+/*****************************************************************************/
+#define TYPE_STACK_INITIALIZER {NULL, NULL, 0 }
+
+/*****************************************************************************/
+/* TYPE_STACK_IMPLEMENTATION() - Define member functions of new LIFO linked */
+/* list "class" of t_name objects. */
+/* */
+/* <type>_initialize_stack() - clears the stack */
+/* <type>_push() - pushes a <t> type object to the top of the stack */
+/* <type>_pop() - pop a <t> type object from the top of the stack */
+/* and provide access to it to the caller */
+/*****************************************************************************/
+#define TYPE_STACK_IMPLEMENTATION(t, t_name) \
+void t_name##_initialize_stack (t_name##_Stack* stack) \
+{ \
+ stack->top_ptr = stack->bottom_ptr = NULL; \
+ stack->size = 0; \
+} \
+void t_name##_push(t_name##_Stack* stack, t to_push) \
+{ \
+ stack->size++; \
+ \
+ if(!stack->top_ptr) \
+ { \
+ stack->bottom_ptr = stack->top_ptr = \
+ (t_name##_Stack_Node*)(DLIF_malloc(sizeof(t_name##_Stack_Node))); \
+ stack->top_ptr->next_ptr = NULL; \
+ } \
+ else \
+ { \
+ t_name##_Stack_Node* next_ptr = stack->top_ptr; \
+ stack->top_ptr = \
+ (t_name##_Stack_Node*)(DLIF_malloc(sizeof(t_name##_Stack_Node))); \
+ stack->top_ptr->next_ptr = next_ptr; \
+ } \
+ \
+ stack->top_ptr->value = to_push; \
+} \
+ \
+t t_name##_pop(t_name##_Stack* stack) \
+{ \
+ t to_ret; \
+ t_name##_Stack_Node* next_ptr = stack->top_ptr->next_ptr; \
+ \
+ stack->size--; \
+ to_ret = stack->top_ptr->value; \
+ DLIF_free((void*)(stack->top_ptr)); \
+ \
+ if(!stack->size) \
+ stack->top_ptr = stack->bottom_ptr = NULL; \
+ else \
+ stack->top_ptr = next_ptr; \
+ \
+ return to_ret; \
+}
+
+#endif
diff --git a/src/core/dsp/ocl_load/ocl_load.c b/src/core/dsp/ocl_load/ocl_load.c
new file mode 100644
index 0000000..c53a137
--- /dev/null
+++ b/src/core/dsp/ocl_load/ocl_load.c
@@ -0,0 +1,139 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "dload_api.h"
+
+#define TYPE_STACK_DEFINITION(t, t_name)
+#define TYPE_STACK_IMPLEMENTATION(t, t_name)
+
+int debugging_on = FALSE;
+int profiling_on = FALSE;
+
+int global_argc;
+char **global_argv;
+
+int DLIF_fseek(LOADER_FILE_DESC *stream, int32_t offset, int origin)
+ { return fseek(stream, offset, origin); }
+
+
+size_t DLIF_fread(void *ptr, size_t size, size_t nmemb,
+ LOADER_FILE_DESC *stream)
+ { return fread(ptr, size, nmemb, stream); }
+
+int32_t DLIF_ftell (LOADER_FILE_DESC *stream) { return ftell(stream); }
+int32_t DLIF_fclose(LOADER_FILE_DESC *fd) { return fclose(fd); }
+void* DLIF_malloc(size_t size) { return malloc(size); }
+void DLIF_free (void* ptr) { free(ptr); }
+
+/*****************************************************************************/
+/* DLIF_COPY() - Copy data from file to host-accessible memory. */
+/* Returns a host pointer to the data in the host_address field of the */
+/* DLOAD_MEMORY_REQUEST object. */
+/*****************************************************************************/
+BOOL DLIF_copy(void* client_handle, struct DLOAD_MEMORY_REQUEST* targ_req)
+{
+ struct DLOAD_MEMORY_SEGMENT* obj_desc = targ_req->segment;
+ LOADER_FILE_DESC* f = targ_req->fp;
+ void *buf = calloc(obj_desc->memsz_in_bytes, 1);
+
+ fseek(f, targ_req->offset, SEEK_SET);
+
+ int result = 1;
+ if (obj_desc->objsz_in_bytes)
+ result = fread(buf, obj_desc->objsz_in_bytes, 1, f);
+
+ assert(result == 1);
+
+ targ_req->host_address = buf;
+
+ return 1;
+}
+
+BOOL DLIF_read(void* client_handle,
+ void *ptr, size_t size, size_t nmemb, TARGET_ADDRESS src)
+ { assert(0); }
+
+BOOL DLIF_memcpy(void* client_handle,
+ void *to, void *from, size_t size)
+ { return (!memcpy(to, from, size)) ? 0 : 1; }
+
+int32_t DLIF_execute(void* client_handle,
+ TARGET_ADDRESS exec_addr) { assert(0); return 1; }
+
+
+
+
+BOOL DLIF_register_dsbt_index_request(DLOAD_HANDLE handle,
+ const char *requestor_name,
+ int32_t requestor_file_handle,
+ int32_t requested_dsbt_index)
+ { assert(0); }
+
+void DLIF_assign_dsbt_indices(void) { assert(0); }
+
+int32_t DLIF_get_dsbt_index(int32_t file_handle)
+ { assert(0); return DSBT_INDEX_INVALID; }
+
+BOOL DLIF_update_all_dsbts() { assert(0); return TRUE; }
+
+void DLIF_warning(LOADER_WARNING_TYPE wtype, const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap,fmt);
+ printf("<< D L O A D >> WARNING: ");
+ vprintf(fmt,ap);
+ va_end(ap);
+}
+
+void DLIF_error(LOADER_ERROR_TYPE etype, const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap,fmt);
+ printf("<< D L O A D >> ERROR: ");
+ vprintf(fmt,ap);
+ va_end(ap);
+}
+
+void DLIF_trace(const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap,fmt);
+ vprintf(fmt,ap);
+ va_end(ap);
+}
+
+void DLIF_exit(ecode)
+{
+ exit(ecode);
+}
+
diff --git a/src/core/dsp/program.cpp b/src/core/dsp/program.cpp
new file mode 100644
index 0000000..6495ec9
--- /dev/null
+++ b/src/core/dsp/program.cpp
@@ -0,0 +1,633 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#include "program.h"
+#include "device.h"
+#include "kernel.h"
+
+#include "../program.h"
+
+#include <llvm/PassManager.h>
+#include <llvm/Analysis/Passes.h>
+#include <llvm/Analysis/Verifier.h>
+#include <llvm/Transforms/Scalar.h>
+#include <llvm/Transforms/IPO.h>
+#include <llvm/Transforms/Utils/UnifyFunctionExitNodes.h>
+#include <llvm/Support/raw_ostream.h>
+#include <llvm/Bitcode/ReaderWriter.h>
+#include "wga.h"
+
+#include <llvm/LinkAllPasses.h>
+#include <WorkitemHandlerChooser.h>
+#include <BreakConstantGEPs.h>
+#include <Flatten.h>
+#include <PHIsToAllocas.h>
+#include <IsolateRegions.h>
+#include <VariableUniformityAnalysis.h>
+#include <ImplicitLoopBarriers.h>
+#include <LoopBarriers.h>
+#include <BarrierTailReplication.h>
+#include <CanonicalizeBarriers.h>
+#include <WorkItemAliasAnalysis.h>
+#include <WorkitemReplication.h>
+#include <WorkitemLoops.h>
+#include <AllocasToEntry.h>
+#include <Workgroup.h>
+#include <TargetAddressSpaces.h>
+
+#include <string>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <stdio.h>
+#include <stdlib.h>
+#include <vector>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <elf.h>
+
+#include "genfile_cache.h"
+
+genfile_cache * genfile_cache::pInstance = 0;
+
+timespec getTime()
+{
+ struct timespec tp;
+ if (clock_gettime(CLOCK_MONOTONIC, &tp) != 0)
+ clock_gettime(CLOCK_REALTIME, &tp);
+ return tp;
+}
+
+double ts_to_double(const timespec &t)
+ { return ((double)t.tv_nsec) /1000000000.0 + (double)t.tv_sec; }
+
+double tsdiff (const timespec& start, const timespec& end)
+ { return ts_to_double(end) - ts_to_double(start); }
+
+
+using namespace Coal;
+
+DSPProgram::DSPProgram(DSPDevice *device, Program *program)
+: DeviceProgram(), p_device(device), p_program(program), p_program_handle(-1), p_loaded(false), p_keep_files(false),
+ p_cache_kernels(true)
+{
+ char *keep = getenv("TI_OCL_KEEP_FILES");
+ if (keep) p_keep_files = true;
+
+ char *cache = getenv("TI_OCL_CACHE_KERNELS_OFF");
+ if (cache) p_cache_kernels = false;
+}
+
+DSPProgram::~DSPProgram()
+{
+ p_device->unload(p_program_handle);
+ if (!p_keep_files && !p_cache_kernels) unlink(p_outfile);
+}
+
+DSPProgram::segment_list *segments;
+
+bool DSPProgram::load()
+{
+ segments = &p_segments_written;
+
+ p_program_handle = p_device->load(p_outfile);
+ if (!p_program_handle) return false;
+
+ segments = NULL;
+ p_loaded = true;
+
+ char *debug_kernel = getenv("TI_OCL_DEBUG_KERNEL");
+
+ /*-------------------------------------------------------------------------
+ * ensure that the newly populated areas are not stale in device caches
+ *------------------------------------------------------------------------*/
+ Msg_t msg;
+ int segNum = p_segments_written.size();
+
+ assert(segNum <= MAX_FLUSH_BUF_SIZE/2);
+
+ msg.command = CACHEINV;
+ msg.u.k.flush.numBuffers = segNum;
+ msg.u.k.flush.num_mpaxs = 0;
+ for (int i=0; i < segNum; ++i)
+ {
+ msg.u.k.flush.buffers[2*i] = p_segments_written[i].ptr;
+ msg.u.k.flush.buffers[2*i+1] = p_segments_written[i].size;
+
+ uint32_t flags = p_segments_written[i].flags &
+ (DLOAD_SF_executable | DLOAD_SF_writable);
+
+ const char *seg_desc;
+ switch (flags)
+ {
+ case 0: seg_desc = "Read Only"; break;
+ case DLOAD_SF_executable: seg_desc = "Executable"; break;
+ case DLOAD_SF_writable: seg_desc = "Writable"; break;
+ default: seg_desc = "Writable & Executable"; break;
+ }
+
+ if (debug_kernel)
+ printf("%s segment loaded to 0x%08x with size 0x%x\n",
+ seg_desc, p_segments_written[i].ptr, p_segments_written[i].size);
+ }
+
+ /*-------------------------------------------------------------------------
+ * Send the command and wait for the ready response.
+ *------------------------------------------------------------------------*/
+ p_device->mail_to(msg);
+
+ /*-------------------------------------------------------------------------
+ * We do not wait here. The wait will be handled by the standard wait loop
+ * int the worker thread.
+ *------------------------------------------------------------------------*/
+ return true;
+}
+
+bool DSPProgram::is_loaded() const
+{
+ return p_loaded;
+}
+
+bool DSPProgram::linkStdLib() const
+{
+ return false;
+}
+
+const char* DSPProgram::outfile_name() const
+{
+ return p_outfile;
+}
+
+DSPDevicePtr DSPProgram::data_page_ptr()
+{
+ DSPDevicePtr p;
+
+ if (!is_loaded()) load();
+
+ DLOAD_get_static_base(p_device->dload_handle(), p_program_handle, &p);
+ return p;
+}
+
+void DSPProgram::createOptimizationPasses(llvm::PassManager *manager,
+ bool optimize, bool hasBarrier)
+{
+ if (hasBarrier)
+ {
+ manager->add(new llvm::DominatorTree());
+ manager->add(new pocl::WorkitemHandlerChooser());
+ manager->add(new BreakConstantGEPs()); // from pocl
+ // add(new GenerateHeader()); // no need
+ manager->add(new pocl::Flatten());
+ manager->add( llvm::createAlwaysInlinerPass());
+ manager->add( llvm::createGlobalDCEPass());
+ manager->add( llvm::createCFGSimplificationPass());
+ manager->add( llvm::createLoopSimplifyPass());
+ manager->add(new pocl::PHIsToAllocas());
+ manager->add( llvm::createRegionInfoPass());
+ manager->add(new pocl::IsolateRegions());
+ manager->add(new pocl::VariableUniformityAnalysis()); // TODO
+ manager->add(new pocl::ImplicitLoopBarriers());
+ manager->add(new pocl::LoopBarriers());
+ manager->add(new pocl::BarrierTailReplication());
+ manager->add(new pocl::CanonicalizeBarriers());
+ manager->add(new pocl::IsolateRegions());
+ manager->add(new pocl::WorkItemAliasAnalysis());
+ // add(new pocl::WorkitemReplication()); // no need
+ manager->add(new pocl::WorkitemLoops());
+ manager->add(new pocl::AllocasToEntry());
+ // add(new pocl::Workgroup()); // no need
+ manager->add(new pocl::TargetAddressSpaces());
+ }
+
+ if (optimize)
+ {
+ /*
+ * Inspired by code from "The LLVM Compiler Infrastructure"
+ */
+ manager->add(llvm::createDeadArgEliminationPass());
+ manager->add(llvm::createInstructionCombiningPass());
+ manager->add(llvm::createFunctionInliningPass());
+ manager->add(llvm::createPruneEHPass()); // Remove dead EH info.
+ manager->add(llvm::createGlobalOptimizerPass());
+ manager->add(llvm::createGlobalDCEPass()); // Remove dead functions.
+ manager->add(llvm::createArgumentPromotionPass());
+ manager->add(llvm::createInstructionCombiningPass());
+ manager->add(llvm::createJumpThreadingPass());
+
+ //ASW TODO maybe turn off re: pete. might gen bad xlator input
+ //manager->add(llvm::createScalarReplAggregatesPass());
+
+ manager->add(llvm::createFunctionAttrsPass()); // Add nocapture.
+ manager->add(llvm::createGlobalsModRefPass()); // IP alias analysis.
+ manager->add(llvm::createLICMPass()); // Hoist loop invariants.
+ manager->add(llvm::createGVNPass()); // Remove redundancies.
+ manager->add(llvm::createMemCpyOptPass()); // Remove dead memcpys.
+ manager->add(llvm::createDeadStoreEliminationPass());
+ manager->add(llvm::createInstructionCombiningPass());
+ manager->add(llvm::createJumpThreadingPass());
+ manager->add(llvm::createCFGSimplificationPass());
+ }
+
+ manager->add(llvm::createUnifyFunctionExitNodesPass());
+ manager->add(llvm::createTIOpenclWorkGroupAggregationPass(hasBarrier));
+
+ /*-------------------------------------------------------------------------
+ * Borrow the pocl alloca hoister for the TI simplistic WGA pass as well
+ *------------------------------------------------------------------------*/
+ if (!hasBarrier)
+ manager->add(new pocl::AllocasToEntry());
+}
+
+
+std::string process_cl6x_options(std::string options)
+{
+ std::istringstream options_stream(options);
+ std::string token;
+ std::string result;
+
+ while (options_stream >> token)
+ {
+ if ((token.find(".obj") != std::string::npos) ||
+ (token.find(".dll") != std::string::npos) ||
+ (token.find(".ae66") != std::string::npos) ||
+ (token.find(".a66") != std::string::npos) ||
+ (token.find(".out") != std::string::npos) ||
+ (token.find(".lib") != std::string::npos) ||
+ (token.find(".o") != std::string::npos) ||
+ (token.find(".o66") != std::string::npos) ||
+ (token.find(".oe66") != std::string::npos) ||
+ (token.find(".a") != std::string::npos) ||
+ (token.find(".cmd") != std::string::npos))
+ result += token + " ";
+ }
+ return result;
+}
+
+/******************************************************************************
+* Find the C6000 CGT installation
+******************************************************************************/
+char *get_cgt_install()
+{
+ char *install = getenv("TI_OCL_CGT_INSTALL");
+ if (!install)
+ {
+ std::cout <<
+ "The environment variable TI_OCL_CGT_INSTALL must be set to a "
+ << std::endl <<
+ "directory path where the C6000 compiler tools are installed. "
+ << std::endl;
+
+ abort();
+ }
+
+ return install;
+}
+
+/******************************************************************************
+* Find the OpenCL installation
+******************************************************************************/
+char *get_ocl_install()
+{
+ char *install = getenv("TI_OCL_INSTALL");
+ if (!install)
+ {
+ std::cout <<
+ "The environment variable TI_OCL_INSTALL must be set to a "
+ << std::endl <<
+ "directory path where the TI OpenCL product is installed. "
+ << std::endl;
+
+ abort();
+ }
+
+ return install;
+}
+
+std::string get_ocl_dsp()
+{
+ static std::string sinstall;
+
+ if (sinstall.empty())
+ {
+ struct stat st;
+ const char *stdpath = "/usr/share/ti/opencl/dsp";
+ if (stat(stdpath, &st) == 0)
+ sinstall = string(stdpath);
+ else sinstall = string(get_ocl_install()) + "/dsp";
+ }
+
+ return sinstall;
+}
+
+/******************************************************************************
+* run_cl6x
+******************************************************************************/
+static int run_cl6x(char *filename, std::string *llvm_bitcode,
+ bool keep_files, std::string options)
+{
+ std::string command("cl6x --f -q --abi=eabi --use_g3 -mv6600 -mt -mo "
+ "-ft=/tmp -fs=/tmp -fr=/tmp ");
+
+ if (keep_files) command += "-mw -k --z ";
+
+ /*-------------------------------------------------------------------------
+ * Turned off for now to workaround a timing bug. Plan to re-enable later
+ *------------------------------------------------------------------------*/
+ command += "--disable:sploop ";
+
+ char *cl6x_debug = getenv("TI_OCL_CL6X_DEBUG");
+
+ if (cl6x_debug) command += "-g -o0 ";
+ else command += "-o3 ";
+
+ char *no_sp = getenv("TI_OCL_SOFTWARE_PIPELINE_OFF");
+ if (no_sp) command += "-mu ";
+
+ char *cgt_install = get_cgt_install();
+
+ command += "-I"; command += cgt_install; command += "/include ";
+ command += "-I"; command += cgt_install; command += "/lib ";
+ command += "-I"; command += get_ocl_dsp().c_str(); command += " ";
+
+ command += "--bc_file="; command += filename; command += " ";
+
+ /*-------------------------------------------------------------------------
+ * Encode LLVM bitcode as bytes in the .llvmir section of the .asm file
+ *------------------------------------------------------------------------*/
+ if (llvm_bitcode != NULL)
+ {
+ char bitasm_name[32];
+ strcpy(bitasm_name, filename);
+ strcat(bitasm_name, "_bc.asm");
+ std::ofstream outasmfile(bitasm_name, std::ios::out);
+ outasmfile << "\t.sect \".llvmir\"\n" << "\t.retain";
+ int nbytes = llvm_bitcode->size();
+ for (int i = 0; i < nbytes; i++)
+ if (i % 10 == 0)
+ outasmfile << "\n\t.byte " << (int) llvm_bitcode->at(i);
+ else
+ outasmfile << ", " << (int) llvm_bitcode->at(i);
+ outasmfile.close();
+
+ command += bitasm_name; command += " ";
+ }
+
+ command += "-z -ldsp.syms -o ";
+ command += filename; command += ".out ";
+
+ if (keep_files)
+ { command += "-m "; command += filename; command += ".map "; }
+
+ /*-------------------------------------------------------------------------
+ * Any libraries or object files need to go last to resolve references
+ *------------------------------------------------------------------------*/
+ command += process_cl6x_options(options);
+
+ //timespec t0, t1;
+ //clock_gettime(CLOCK_MONOTONIC, &t0);
+ int x = system(command.c_str());
+ //clock_gettime(CLOCK_MONOTONIC, &t1);
+ //printf("cl6x time: %6.4f secs\n",
+ // (float)t1.tv_sec-t0.tv_sec+(t1.tv_nsec-t0.tv_nsec)/1e9);
+
+ if (!cl6x_debug)
+ {
+ std::string strip_command("strip6x ");
+ strip_command += filename; strip_command += ".out";
+ x = system(strip_command.c_str());
+ }
+}
+
+/**
+ * Extract llvm bitcode and native binary from MixedBinary
+ */
+bool DSPProgram::ExtractMixedBinary(std::string *binary_str,
+ std::string *bitcode, std::string *native)
+{
+ if (binary_str == NULL) return false;
+ if (strncmp(&binary_str->at(0), ELFMAG, SELFMAG) != 0) return false;
+
+ /*-------------------------------------------------------------------------
+ * Parse ELF file format, extract ".llvmir" section into bitcode
+ * Valid Assumptions: 1. cl6x only creates 32-bit ELF files (for now)
+ * 2. cl6x ELF file has the same endianness as the host
+ *------------------------------------------------------------------------*/
+ if (bitcode != NULL)
+ {
+ Elf32_Ehdr ehdr; /* memcpy into here to guarantee proper alignment */
+ memcpy(&ehdr, & binary_str->at(0), sizeof(Elf32_Ehdr));
+ int n_sects = ehdr.e_shnum;
+ int shoff = ehdr.e_shoff;
+ int shstr_sect = ehdr.e_shstrndx;
+
+ Elf32_Shdr shdr; /* memcpy into here to guarantee proper alignment */
+ int shsize = sizeof(Elf32_Shdr);
+ memcpy(&shdr, & binary_str->at(shoff + shstr_sect * shsize), shsize);
+ char *strtab = & binary_str->at(shdr.sh_offset);
+
+ int i;
+ for (i = 0; i < n_sects; i++)
+ {
+ if (i == shstr_sect) continue;
+ memcpy(&shdr, & binary_str->at(shoff + i * shsize), shsize);
+ if (strcmp(&strtab[shdr.sh_name], ".llvmir") == 0) break;
+ }
+ if (i >= n_sects) return false;
+
+ bitcode->clear();
+ bitcode->append(& binary_str->at(shdr.sh_offset), shdr.sh_size);
+ }
+
+ /*-------------------------------------------------------------------------
+ * Return the c6x ELF file in binary_str as native binary
+ *------------------------------------------------------------------------*/
+ if (native != NULL)
+ {
+ native->clear();
+ native->append(*binary_str);
+ }
+
+ return true;
+}
+
+
+/**
+ * Write native binary into file, create tmporary filename in p_outfile
+ */
+void DSPProgram::WriteNativeOut(std::string *native)
+{
+ try
+ {
+ char name_out[] = "/tmp/openclXXXXXX";
+ int fOutfile = mkstemp(name_out);
+ strcpy(p_outfile, name_out);
+ strcat(p_outfile, ".out");
+
+ std::ofstream outfile(p_outfile, std::ios::out | std::ios::binary);
+ outfile.write(native->data(), native->size());
+ outfile.close();
+ close(fOutfile);
+ }
+ catch(...) { std::cout << "ERROR: Binary write out failure" << std::endl; }
+}
+
+/**
+ * Native binary is stored in file, filename in p_outfile
+ * Input: binary_str contains only the bitcode
+ * Output: binary_str contains c6x ELF file with bitcode in ".llvmir" section
+ */
+void DSPProgram::ReadEmbeddedBinary(std::string *binary_str)
+{
+ if (binary_str == NULL) return;
+
+ int length;
+ char *buffer = NULL;
+
+ try
+ {
+ std::ifstream is;
+ is.open(p_outfile, std::ios::binary);
+ is.seekg(0, std::ios::end);
+ length = is.tellg();
+ is.seekg(0, std::ios::beg);
+ buffer = new char[length];
+ is.read(buffer, length);
+ is.close();
+
+ binary_str->clear();
+ binary_str->append(buffer, length);
+ delete [] buffer;
+ }
+ catch(...) { std::cout << "ERROR: Binary read in failure" << std::endl; }
+}
+
+bool DSPProgram::build(llvm::Module *module, std::string *binary_str)
+{
+ p_module = module;
+
+ /*------------------------------------------------------------------------
+ * The input binary_str could be any of the following:
+ * 1. Mixed C6x binary embedded with LLVM bitcode, extract C6x native
+ * binary and return. There is no need to rebuild from LLVM module.
+ * 2. LLVM bitcode, proceed to the regular build:
+ * 2.1 return a corresponding cached c6x binary, if found
+ * 2.2 invoke c6x compiler toolchain, embed LLVM bitcode, build
+ * In either case, put c6x binary in binary_str when return
+ *------------------------------------------------------------------------*/
+ std::string native;
+ if (ExtractMixedBinary(binary_str, NULL, &native))
+ {
+ WriteNativeOut(&native);
+ return true;
+ }
+
+ if (p_cache_kernels)
+ {
+ string cached_outfile = genfile_cache::instance()->lookup
+ (p_module, p_program->deviceDependentCompilerOptions(p_device));
+
+ if (!cached_outfile.empty())
+ {
+ strcpy(p_outfile, cached_outfile.c_str());
+ ReadEmbeddedBinary(binary_str);
+ return true;
+ }
+ }
+
+ char name_template[] = "/tmp/openclXXXXXX";
+ int pFile = mkstemp(name_template);
+
+ strcpy(p_outfile, name_template);
+ strcat(p_outfile, ".out");
+
+ if (pFile != -1)
+ {
+ if (p_keep_files)
+ {
+ //write out the source as well
+
+ std::string filename(name_template);
+ filename += ".cl";
+ std::ofstream out(filename.c_str());
+ out << p_program->source();
+ out.close();
+ }
+
+ llvm::raw_fd_ostream ostream(pFile, false);
+ llvm::WriteBitcodeToFile(p_module, ostream);
+ ostream.flush();
+
+ run_cl6x(name_template, binary_str, p_keep_files,
+ p_program->deviceDependentCompilerOptions(p_device));
+
+ if (!p_keep_files)
+ {
+ unlink(name_template);
+
+ char objfile[32];
+ strcpy(objfile, name_template);
+ strcat(objfile, ".obj");
+ unlink(objfile);
+
+ if (binary_str != NULL)
+ {
+ strcpy(objfile, name_template);
+ strcat(objfile, "_bc.asm");
+ unlink(objfile);
+
+ strcpy(objfile, name_template);
+ strcat(objfile, "_bc.obj");
+ unlink(objfile);
+ }
+ }
+
+ if (p_cache_kernels)
+ genfile_cache::instance()->remember(p_outfile, p_module,
+ p_program->deviceDependentCompilerOptions(p_device));
+
+ ReadEmbeddedBinary(binary_str);
+ }
+
+ if (pFile != -1) close(pFile);
+
+ return true;
+}
+
+DSPDevicePtr DSPProgram::query_symbol(const char *symname)
+{
+ DSPDevicePtr addr;
+
+ bool found = DLOAD_query_symbol(p_device->dload_handle(), p_program_handle,
+ symname, &addr);
+
+ return (found) ? addr : 0;
+}
+
diff --git a/src/core/dsp/program.h b/src/core/dsp/program.h
new file mode 100644
index 0000000..63c1858
--- /dev/null
+++ b/src/core/dsp/program.h
@@ -0,0 +1,92 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#ifndef __DSP_PROGRAM_H__
+#define __DSP_PROGRAM_H__
+
+#include "device.h"
+#include "../deviceinterface.h"
+#include <vector>
+
+namespace llvm
+{
+ class ExecutionEngine;
+ class Module;
+}
+
+namespace Coal
+{
+
+class DSPDevice;
+class Program;
+
+class DSPProgram : public DeviceProgram
+{
+ public:
+ struct seg_desc
+ {
+ seg_desc(DSPDevicePtr p, int s, uint32_t f) :
+ ptr(p), size(s), flags(f) {}
+ DSPDevicePtr ptr;
+ unsigned size;
+ uint32_t flags;
+ };
+
+ typedef std::vector<seg_desc> segment_list;
+
+ public:
+ DSPProgram(DSPDevice *device, Program *program);
+ ~DSPProgram();
+
+ bool linkStdLib() const;
+ const char* outfile_name() const;
+ void createOptimizationPasses(llvm::PassManager *manager,
+ bool optimize, bool hasBarrier=false);
+ bool build(llvm::Module *module, std::string *binary_str);
+ bool ExtractMixedBinary(std::string *binary_str,
+ std::string *bitcode, std::string *native);
+ void WriteNativeOut(std::string *native);
+ void ReadEmbeddedBinary(std::string *binary_str);
+
+ DSPDevicePtr query_symbol(const char *symname);
+ DSPDevicePtr data_page_ptr();
+ bool load();
+ bool is_loaded() const;
+
+ private:
+ DSPDevice *p_device;
+ Program *p_program;
+ llvm::Module *p_module;
+ int p_program_handle;
+ char p_outfile[32];
+ bool p_loaded;
+ segment_list p_segments_written;
+ bool p_keep_files;
+ bool p_cache_kernels;
+};
+}
+#endif
diff --git a/src/core/dsp/shmem.cpp b/src/core/dsp/shmem.cpp
new file mode 100644
index 0000000..6aec2f8
--- /dev/null
+++ b/src/core/dsp/shmem.cpp
@@ -0,0 +1,539 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#include "shmem.h"
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <ti/cmem.h>
+
+#define REPORT(x) printf(x "\n")
+#define ERR(status, msg) if (status) { printf("ERROR: %s\n", msg); exit(-1); }
+
+/******************************************************************************
+* shmem::shmem
+******************************************************************************/
+shmem::shmem()
+ : p_dsp_addr(0), p_size(0), p_page_size(sysconf(_SC_PAGE_SIZE)), p_mmap_fd(-1)
+ , p_mpm_transport_handle(NULL)
+
+{ }
+
+/******************************************************************************
+* shmem::~shmem
+******************************************************************************/
+shmem::~shmem()
+{
+ if (p_mmap_fd != -1) close(p_mmap_fd);
+}
+
+/******************************************************************************
+* shmem::configure
+******************************************************************************/
+void shmem::configure_base(DSPDevicePtr64 dsp_addr, uint64_t size)
+{
+ /*-------------------------------------------------------------------------
+ * If the sysconf for the page size failed
+ *------------------------------------------------------------------------*/
+ if (p_page_size <= 0) { REPORT("Failed to get PAGE_SIZE"); return; }
+
+ // p_mmap_fd = open("/dev/mem", (O_RDWR | O_SYNC));
+ // Now we use mpm_transport_{open, mmap, munmap, close}
+ /*-------------------------------------------------------------------------
+ * core1-core7's l2 go through /dev/dsp{1-7}
+ * everything else (core0's l2, msmc, global addr) go through /dev/dsp0
+ *------------------------------------------------------------------------*/
+ char devname[16];
+ strcpy(devname, "dsp0");
+ if (0x11800000 <= dsp_addr & dsp_addr < 0x17900000)
+ devname[3] = ((dsp_addr >> 24) - 0x10) + '0';
+ mpm_transport_open_t mpm_transport_open_cfg;
+ mpm_transport_open_cfg.open_mode = (O_SYNC|O_RDWR);
+ p_mpm_transport_handle = mpm_transport_open(devname,
+ &mpm_transport_open_cfg);
+
+ /*-------------------------------------------------------------------------
+ * If the open failed
+ *------------------------------------------------------------------------*/
+ // if (p_mmap_fd == -1) { REPORT("Failed to open /dev/mem"); return; }
+ if (p_mpm_transport_handle == NULL)
+ {
+ printf("Failed to open /dev/%s", devname);
+ return;
+ }
+
+ p_dsp_addr = dsp_addr;
+ p_size = size;
+}
+
+
+/******************************************************************************
+* shmem_persistent::shmem
+******************************************************************************/
+#define MULTIPLE_OF_POW2(x, y) (((x) & ((y)-1)) != 0 ? false : true)
+
+shmem_persistent::shmem_persistent()
+ : p_host_addr(0), p_xlate_dsp_to_host_offset(0)
+{ }
+
+/******************************************************************************
+* shmem_persistent::configure
+******************************************************************************/
+void shmem_persistent::configure(DSPDevicePtr64 dsp_addr, uint64_t size)
+{
+ configure_base(dsp_addr, size);
+
+ /*-------------------------------------------------------------------------
+ * if base class failed to construct, because /dev/mem could not be opened
+ *------------------------------------------------------------------------*/
+ // if (p_mmap_fd == -1) return;
+ if (p_mpm_transport_handle == NULL) return;
+
+ if (!MULTIPLE_OF_POW2(dsp_addr, p_page_size))
+ {
+ REPORT("Mapped region addr is not a multiple of page size");
+ return;
+ }
+
+ if (!MULTIPLE_OF_POW2(size, p_page_size))
+ {
+ REPORT("Mapped region size is not a multiple of page size");
+ return;
+ }
+
+ //p_host_addr = mmap(0, size, (PROT_READ|PROT_WRITE), MAP_SHARED, p_mmap_fd,
+ // (off_t)dsp_addr);
+ mpm_transport_mmap_t mpm_transport_mmap_cfg;
+ mpm_transport_mmap_cfg.mmap_prot = (PROT_READ|PROT_WRITE);
+ mpm_transport_mmap_cfg.mmap_flags = MAP_SHARED;
+
+ p_host_addr = (void *)mpm_transport_mmap(p_mpm_transport_handle,
+ dsp_addr, size,
+ &mpm_transport_mmap_cfg);
+
+ // if (p_host_addr == MAP_FAILED)
+ if (p_host_addr == (void *) -1)
+ {
+ REPORT("Failed to mmap");
+ p_host_addr = 0;
+ return;
+ }
+
+ p_xlate_dsp_to_host_offset = (void*)((int64_t)p_host_addr - dsp_addr);
+}
+
+/******************************************************************************
+* shmem_persistent::~shmem_persistent
+******************************************************************************/
+shmem_persistent::~shmem_persistent()
+{
+ // if (p_host_addr) munmap(p_host_addr, p_size);
+ if (p_host_addr)
+ mpm_transport_munmap(p_mpm_transport_handle, p_host_addr, p_size);
+}
+
+/******************************************************************************
+* shmem_persistent::map
+******************************************************************************/
+void *shmem_persistent::map(DSPDevicePtr64 dsp_addr, uint32_t size, bool is_read)
+{
+ if (!p_host_addr) return 0;
+
+ if (dsp_addr >= p_dsp_addr && dsp_addr + size <= p_dsp_addr + p_size)
+ return dsp_addr + (char*)p_xlate_dsp_to_host_offset;
+ else
+ {
+ REPORT("Attempting to map a region outside a defined area");
+ return 0;
+ }
+}
+
+/******************************************************************************
+* shmem_persistent::unmap
+******************************************************************************/
+void shmem_persistent::unmap(void* host_addr, uint32_t size, bool is_write)
+{
+ // if (host_addr) msync(host_addr, size, MS_SYNC);
+}
+
+
+
+/******************************************************************************
+* shmem_ondemand::shmem_ondemap
+******************************************************************************/
+shmem_ondemand::shmem_ondemand()
+{ }
+
+/******************************************************************************
+* shmem::~shmem
+******************************************************************************/
+shmem_ondemand::~shmem_ondemand()
+{
+}
+
+/******************************************************************************
+* shmem_ondemand::configure
+******************************************************************************/
+void shmem_ondemand::configure(DSPDevicePtr64 dsp_addr, uint64_t size)
+{
+ configure_base(dsp_addr, size);
+}
+
+
+/******************************************************************************
+* shmem_ondemand::map
+******************************************************************************/
+void *shmem_ondemand::map(DSPDevicePtr64 dsp_addr, uint32_t size, bool is_read)
+{
+ if (!MULTIPLE_OF_POW2(dsp_addr, p_page_size))
+ {
+ REPORT("Mapped region addr is not a multiple of page size");
+ return 0;
+ }
+
+ if (!MULTIPLE_OF_POW2(size, p_page_size))
+ {
+ REPORT("Mapped region addr is not a multiple of page size");
+ return 0;
+ }
+
+ if (dsp_addr < p_dsp_addr || dsp_addr + size > p_dsp_addr + p_size)
+ {
+ REPORT("Attempting to map a region outside a defined area");
+ return 0;
+ }
+
+ //void *host_addr = mmap(0, size, (PROT_READ|PROT_WRITE), MAP_SHARED,
+ // p_mmap_fd, (off_t)dsp_addr);
+ mpm_transport_mmap_t mpm_transport_mmap_cfg;
+ mpm_transport_mmap_cfg.mmap_prot = (PROT_READ|PROT_WRITE);
+ mpm_transport_mmap_cfg.mmap_flags = MAP_SHARED;
+
+ void * host_addr = mpm_transport_mmap(p_mpm_transport_handle,
+ dsp_addr, size,
+ &mpm_transport_mmap_cfg);
+
+ // if (host_addr == MAP_FAILED)
+ if (host_addr == (void *) -1)
+ {
+ REPORT("Failed to mmap");
+ return 0;
+ }
+
+ return host_addr;
+}
+
+/******************************************************************************
+* shmem_ondemand::unmap
+******************************************************************************/
+void shmem_ondemand::unmap(void* host_addr, uint32_t size, bool is_write)
+{
+ // if (host_addr) munmap(host_addr, size);
+}
+
+/******************************************************************************
+* shmem_cmem_persistent::shmem
+******************************************************************************/
+shmem_cmem_persistent::shmem_cmem_persistent(int cmem_block)
+ : p_host_addr(0), p_xlate_dsp_to_host_offset(0), p_cmem_block(cmem_block)
+{ }
+
+/******************************************************************************
+* shmem_cmem_persistent::init
+* TODO: remove addr3, size3 once uboot is updated, so that we don't have
+* have fragemented CMEM blocks for DDR
+******************************************************************************/
+void shmem_cmem_persistent::cmem_init(DSPDevicePtr64 *addr1, uint64_t *size1,
+ DSPDevicePtr *addr2, uint32_t *size2,
+ DSPDevicePtr64 *addr3, uint64_t *size3)
+{
+ /*-------------------------------------------------------------------------
+ * Assume this is the only use of CMEM, so we reset everything
+ *------------------------------------------------------------------------*/
+#if 0
+ const char *cmem_command = "modprobe -r cmemk; modprobe cmemk "
+ "phys_start=0xa2000000 phys_end=0x100000000 pools=1x1577058304 "
+ "phys_start_1=0x0c000000 phys_end_1=0x0c500000 pools_1=1x5242880 "
+ "allowOverlap=1";
+
+ int result = system(cmem_command);
+#endif
+
+ const char *cmem_command = "For available CMEM DDR block size: ~1.5GB:\n"
+ "modprobe cmemk "
+ "phys_start=0x823000000 phys_end=0x880000000 pools=1x1560281088 "
+ "phys_start_1=0x0C040000 phys_end_1=0x0C500000 "
+ "allowOverlap=1";
+ const char *cmem_command2 = "For available CMEM DDR block size: ~3.5GB:\n"
+ "modprobe cmemk "
+ "phys_start=0x823000000 phys_end=0x900000000 pools=1x3707764736 "
+ "phys_start_1=0x0C040000 phys_end_1=0x0C500000 "
+ "allowOverlap=1";
+ const char *cmem_command3 = "For available CMEM DDR block size: ~7.5GB:\n"
+ "modprobe cmemk "
+ "phys_start=0x823000000 phys_end=0xA00000000 pools=1x8002732032 "
+ "phys_start_1=0x0C040000 phys_end_1=0x0C500000 "
+ "allowOverlap=1";
+
+ /*-------------------------------------------------------------------------
+ * First initialize the CMEM module
+ *------------------------------------------------------------------------*/
+ if (CMEM_init() == -1)
+ {
+ printf("\nThe cmemk kernel module does not appear to installed.\n\n"
+ "Commands such as the following run as root would "
+ "install cmemk\n"
+ "and allow OpenCL to proceed properly. The actual memory "
+ "address values for\n"
+ "your system may differ.\n\n");
+ printf("%s\n\n", cmem_command);
+ printf("%s\n\n", cmem_command2);
+ printf("%s\n\n", cmem_command3);
+ exit(-1);
+ }
+
+ /*-------------------------------------------------------------------------
+ * Debug to see in cmem init was correct
+ *------------------------------------------------------------------------*/
+ int num_Blocks = 0;
+ CMEM_getNumBlocks(&num_Blocks);
+ if (num_Blocks < 2)
+ {
+ printf("\nOpenCL needs at least two CMEM blocks to operate properly.\n"
+ "One for DDR, the other for MSMC. Example commands:\n");
+ printf("%s\n\n", cmem_command);
+ printf("%s\n\n", cmem_command2);
+ printf("%s\n\n", cmem_command3);
+ exit(-1);
+ }
+
+ CMEM_BlockAttrs pattrs0 = {0, 0};
+ CMEM_BlockAttrs pattrs1 = {0, 0};
+ CMEM_BlockAttrs pattrs2 = {0, 0};
+
+ CMEM_getBlockAttrs(0, &pattrs0);
+ CMEM_getBlockAttrs(1, &pattrs1);
+ if (num_Blocks > 2)
+ CMEM_getBlockAttrs(2, &pattrs2);
+
+ /*-------------------------------------------------------------------------
+ * Return 36-bit addr, and up to 7.5G memory size
+ *------------------------------------------------------------------------*/
+ *addr1 = (DSPDevicePtr64) pattrs0.phys_base;
+ *size1 = (uint64_t) pattrs0.size;
+ // Persistent CMEM should start within 0x8:2200_0000 - 0x8:4000_0000
+ if (*addr1 >= MPAX_USER_MAPPED_DSP_ADDR)
+ {
+ printf("Unable to allocate OCL persistent CMem from 0x%llx\n",
+ pattrs0.phys_base);
+ exit(EXIT_FAILURE);
+ }
+
+ *addr2 = pattrs1.phys_base;
+ *size2 = pattrs1.size;
+ if (*addr2 < MSMC_OCL_START_ADDR || *addr2 >= MSMC_OCL_END_ADDR)
+ {
+ printf("Unable to allocate OCL MSMC memory from 0x%llx\n",
+ pattrs1.phys_base);
+ exit(EXIT_FAILURE);
+ }
+
+ /*-------------------------------------------------------------------------
+ * Grab all available CMEM physical address, to be managed by OCL
+ *------------------------------------------------------------------------*/
+ DSPDevicePtr64 alloc_dsp_addr = 0;
+ CMEM_AllocParams params = CMEM_DEFAULTPARAMS;
+ params.flags = CMEM_CACHED;
+ params.type = CMEM_POOL;
+ alloc_dsp_addr = CMEM_allocPoolPhys2(0, 0, &params);
+ if (!alloc_dsp_addr || alloc_dsp_addr != *addr1)
+ {
+ printf("Failed to allocate 0x%llx from CMem 0, allocated=0x%llx\n",
+ *size1, alloc_dsp_addr);
+ exit(EXIT_FAILURE);
+ }
+
+ params.type = CMEM_HEAP;
+ alloc_dsp_addr = CMEM_allocPhys2(1, *size2, &params);
+ if (!alloc_dsp_addr || alloc_dsp_addr != *addr2)
+ {
+ printf("Failed to allocate 0x%x from CMem 1, allocated=0x%llx\n",
+ *size2, alloc_dsp_addr);
+ exit(EXIT_FAILURE);
+ }
+
+ if (num_Blocks > 2)
+ {
+ *addr3 = pattrs2.phys_base;
+ *size3 = pattrs2.size;
+ params.type = CMEM_POOL;
+ alloc_dsp_addr = CMEM_allocPoolPhys2(2, 0, &params);
+ if (!alloc_dsp_addr || alloc_dsp_addr != *addr3)
+ {
+ printf("Failed to allocate 0x%llx from CMem 2, allocated=0x%llx\n",
+ *size3, alloc_dsp_addr);
+ exit(EXIT_FAILURE);
+ }
+ }
+ else
+ {
+ *addr3 = 0;
+ *size3 = 0;
+ }
+}
+
+/******************************************************************************
+* shmem_cmem_persistent::cmem_exit
+******************************************************************************/
+void shmem_cmem_persistent::cmem_exit()
+{
+ /* Finalize the CMEM module */
+ if (CMEM_exit() == -1) ERR(1, "Failed to finalize CMEM");
+}
+
+/******************************************************************************
+* shmem_cmem_persistent::configure
+******************************************************************************/
+void shmem_cmem_persistent::configure(DSPDevicePtr64 dsp_addr, uint64_t size)
+{
+ p_dsp_addr = dsp_addr;
+ p_size = size;
+ DSPDevicePtr64 cmem_addr = p_dsp_addr;
+ if (p_dsp_addr >= 0xA0000000 && p_dsp_addr < 0xFFFFFFFF)
+ cmem_addr = p_dsp_addr - 0xA0000000 + 0x820000000ULL;
+ p_host_addr = CMEM_map(cmem_addr, size);
+ if (! p_host_addr)
+ ERR(1, "Cannot map CMEM physical memory into the Host virtual address space.\n"
+ " This is typically due to Linux system memory being near capacity.");
+ p_xlate_dsp_to_host_offset = (int64_t)p_host_addr - dsp_addr;
+}
+
+/******************************************************************************
+* shmem_cmem_persistent::~shmem_cmem_persistent
+******************************************************************************/
+shmem_cmem_persistent::~shmem_cmem_persistent()
+{
+ if (p_dsp_addr == 0) return;
+
+ if (p_host_addr != NULL) CMEM_unmap(p_host_addr, p_size);
+ CMEM_AllocParams params = CMEM_DEFAULTPARAMS;
+ params.flags = CMEM_CACHED;
+ DSPDevicePtr64 cmem_addr = p_dsp_addr;
+ if (p_dsp_addr > 0xA0000000 && p_dsp_addr < 0xFFFFFFFF)
+ cmem_addr = p_dsp_addr - 0xA0000000 + 0x820000000ULL;
+ CMEM_freePhys(cmem_addr, &params);
+}
+
+/******************************************************************************
+* shmem_cmem_persistent::map: dsp_addr (phys) -> host_addr (virt)
+******************************************************************************/
+void *shmem_cmem_persistent::map(DSPDevicePtr64 dsp_addr, uint32_t size, bool is_read)
+{
+ if (!p_host_addr ||
+ dsp_addr < p_dsp_addr || dsp_addr + size > p_dsp_addr + p_size)
+ {
+ ERR(1, "Attempting to cmem_map a region outside a defined area");
+ return NULL;
+ }
+
+ void *host_addr = dsp_addr + (char*)p_xlate_dsp_to_host_offset;
+ if (is_read) CMEM_cacheInv(host_addr, size);
+ return host_addr;
+}
+
+/******************************************************************************
+* shmem_cmem_persistent::unmap: flush host side writes
+******************************************************************************/
+void shmem_cmem_persistent::unmap(void* host_addr, uint32_t size, bool is_write)
+{
+ if (host_addr && is_write) CMEM_cacheWb(host_addr, size);
+}
+
+
+/******************************************************************************
+* shmem_cmem_ondeman::configure
+******************************************************************************/
+void shmem_cmem_ondemand::configure(DSPDevicePtr64 dsp_addr, uint64_t size)
+{
+ p_dsp_addr = dsp_addr;
+ p_size = size;
+}
+
+/******************************************************************************
+* shmem_cmem_ondemand::map: dsp_addr (phys) -> host_addr (virt)
+******************************************************************************/
+void *shmem_cmem_ondemand::map(DSPDevicePtr64 dsp_addr, uint32_t size, bool is_read)
+{
+ void *host_addr = CMEM_map(dsp_addr, size);
+ if (! host_addr) ERR(1, "Failed to map CMEM address (ondemand)");
+ if (is_read) CMEM_cacheInv(host_addr, size);
+ return host_addr;
+}
+
+/******************************************************************************
+* shmem_cmem_persistent::unmap: flush host side writes
+******************************************************************************/
+void shmem_cmem_ondemand::unmap(void* host_addr, uint32_t size, bool is_write)
+{
+ if (host_addr && is_write) CMEM_cacheWb(host_addr, size);
+ if (host_addr) CMEM_unmap(host_addr, size);
+}
+
+/******************************************************************************
+* shmem_cmem_ondemand::malloc: allocate CMEM physical address
+* 64-bit size: could be allocating a buffer, then accessing smaller subbuffers
+******************************************************************************/
+DSPDevicePtr64 shmem_cmem_ondemand::cmem_malloc(uint64_t size)
+{
+ CMEM_AllocParams params = CMEM_DEFAULTPARAMS;
+ params.flags = CMEM_CACHED;
+ params.type = CMEM_HEAP;
+ DSPDevicePtr64 addr = CMEM_allocPhys2(0, size, &params);
+ if (!addr)
+ {
+ printf("Failed to allocate space 0x%llx from CMem\n", size);
+ exit(EXIT_FAILURE);
+ }
+ return addr;
+}
+
+/******************************************************************************
+* shmem_cmem_ondemand::free: free allocated CMEM physical address
+******************************************************************************/
+void shmem_cmem_ondemand::cmem_free(DSPDevicePtr64 addr)
+{
+ CMEM_AllocParams params = CMEM_DEFAULTPARAMS;
+ params.flags = CMEM_CACHED;
+ params.type = CMEM_HEAP;
+ CMEM_freePhys(addr, &params);
+}
+
diff --git a/src/core/dsp/shmem.h b/src/core/dsp/shmem.h
new file mode 100644
index 0000000..03504a0
--- /dev/null
+++ b/src/core/dsp/shmem.h
@@ -0,0 +1,134 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#include <stdint.h>
+#ifndef _SHMEM_H
+#define _SHMEM_H
+
+extern "C"
+{
+ #include <mpm_transport.h>
+}
+#include "dspmem.h"
+
+/*=============================================================================
+* Abstract class for Shared memory
+*============================================================================*/
+class shmem
+{
+ public:
+ shmem ();
+ virtual ~shmem ();
+ virtual void configure_base(DSPDevicePtr64 dsp_addr, uint64_t size);
+ virtual void configure (DSPDevicePtr64 dsp_addr, uint64_t size) = 0;
+ virtual void *map (DSPDevicePtr64 dsp_addr, uint32_t size,
+ bool is_read=false) = 0;
+ virtual void unmap (void* host_addr, uint32_t size,
+ bool is_write=false) = 0;
+ uint32_t page_size ();
+ DSPDevicePtr64 start () { return p_dsp_addr; }
+ int64_t size () { return p_size; }
+
+ protected:
+ DSPDevicePtr64 p_dsp_addr;
+ int64_t p_size;
+ uint32_t p_page_size;
+ int32_t p_mmap_fd;
+ mpm_transport_h p_mpm_transport_handle;
+
+};
+
+/*=============================================================================
+* Peristent implementation of shmem
+*============================================================================*/
+class shmem_persistent : public shmem
+{
+ public:
+ shmem_persistent ();
+ ~shmem_persistent ();
+ void configure(DSPDevicePtr64 dsp_addr, uint64_t size);
+ virtual void *map (DSPDevicePtr64 dsp_addr, uint32_t size, bool is_read=false);
+ virtual void unmap (void* host_addr, uint32_t size, bool is_write=false);
+
+ private:
+ void * p_host_addr;
+ void * p_xlate_dsp_to_host_offset;
+};
+
+/*=============================================================================
+* On Demand implementation of shmem
+*============================================================================*/
+class shmem_ondemand : public shmem
+{
+ public:
+ shmem_ondemand ();
+ ~shmem_ondemand ();
+ void configure(DSPDevicePtr64 dsp_addr, uint64_t size);
+ virtual void *map (DSPDevicePtr64 dsp_addr, uint32_t size, bool is_read=false);
+ virtual void unmap (void* host_addr, uint32_t size, bool is_write=false);
+};
+
+/*=============================================================================
+* Peristent implementation of shmem using CMem
+*============================================================================*/
+class shmem_cmem_persistent : public shmem
+{
+ public:
+ shmem_cmem_persistent (int cmem_block);
+ ~shmem_cmem_persistent ();
+ void configure(DSPDevicePtr64 dsp_addr, uint64_t size);
+ virtual void *map (DSPDevicePtr64 dsp_addr, uint32_t size, bool is_read=false);
+ virtual void unmap (void* host_addr, uint32_t size, bool is_write=false);
+
+ static void cmem_init(DSPDevicePtr64* addr1, uint64_t* size1,
+ DSPDevicePtr* addr2, uint32_t* size2,
+ DSPDevicePtr64* addr3, uint64_t* size3);
+ static void cmem_exit();
+
+ private:
+ void * p_host_addr;
+ int64_t p_xlate_dsp_to_host_offset;
+ int p_cmem_block;
+};
+
+/*=============================================================================
+* Ondemand implementation of shmem using CMem
+*============================================================================*/
+class shmem_cmem_ondemand : public shmem
+{
+ public:
+ shmem_cmem_ondemand () {}
+ ~shmem_cmem_ondemand () {}
+ void configure(DSPDevicePtr64 dsp_addr, uint64_t size);
+ virtual void *map (DSPDevicePtr64 dsp_addr, uint32_t size, bool is_read=false);
+ virtual void unmap (void* host_addr, uint32_t size, bool is_write=false);
+
+ static DSPDevicePtr64 cmem_malloc(uint64_t size);
+ static void cmem_free (DSPDevicePtr64 addr);
+};
+
+#endif // _SHMEM_H
diff --git a/src/core/dsp/source_cache.h b/src/core/dsp/source_cache.h
new file mode 100644
index 0000000..66b4400
--- /dev/null
+++ b/src/core/dsp/source_cache.h
@@ -0,0 +1,114 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#ifndef _source_cache_
+#define _source_cache_
+
+#include <llvm/Support/raw_ostream.h>
+#include <llvm/Bitcode/ReaderWriter.h>
+
+#include <boost/lexical_cast.hpp>
+#include <boost/crc.hpp>
+
+#include <sys/stat.h>
+
+#include <string>
+#include <iostream>
+#include <sstream>
+#include <vector>
+#include <stdint.h>
+#include "u_locks_pthread.h"
+#include "database.h"
+
+class source_cache
+{
+ public:
+ void remember(std::string source)
+ {
+ uint32_t hash = get_crc(source);
+ std::string query("insert into programs(hash, source) values("
+ + boost::lexical_cast<std::string>(hash)
+ + ", \""
+ + source
+ + "\");");
+
+ p_database.query(query.c_str());
+ }
+
+ /*-------------------------------------------------------------------------
+ * Thread safe instance function for singleton behavior
+ *------------------------------------------------------------------------*/
+ static source_cache* instance ()
+ {
+ static Mutex Cache_instance_mutex;
+ source_cache* tmp = pInstance;
+
+ __sync_synchronize();
+
+ if (tmp == 0)
+ {
+ ScopedLock lck(Cache_instance_mutex);
+
+ tmp = pInstance;
+ if (tmp == 0)
+ {
+ char *user = getenv("USER");
+ tmp = new source_cache("/tmp/opencl_source_" + string(user));
+ __sync_synchronize();
+ pInstance = tmp;
+ }
+ }
+ return tmp;
+ }
+
+
+ private:
+ static source_cache* pInstance;
+ std::string p_dbname;
+ Database p_database;
+
+ private:
+ source_cache(std::string db_name) : p_dbname(db_name), p_database(db_name.c_str())
+ {
+ p_database.query("create table if not exists "
+ "programs(hash integer, source string);");
+ }
+
+ uint32_t get_crc(std::string& my_string)
+ {
+ boost::crc_32_type result;
+ result.process_bytes(my_string.data(), my_string.length());
+ return result.checksum();
+ }
+
+ source_cache(const source_cache&); // copy ctor disallowed
+ source_cache& operator=(const source_cache&); // assignment disallowed
+};
+
+#endif // _source_cache_
+
+
diff --git a/src/core/dsp/u_concurrent_map.h b/src/core/dsp/u_concurrent_map.h
new file mode 100644
index 0000000..014c0b6
--- /dev/null
+++ b/src/core/dsp/u_concurrent_map.h
@@ -0,0 +1,137 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+/**************************************************************************//**
+*
+* @file u_concurrent_map.h
+* @brief TI implementation class that implements a thread safe map.
+*
+******************************************************************************/
+#ifndef _U_CONCURRENT_MAP_H_
+#define _U_CONCURRENT_MAP_H_
+
+#include <iostream>
+#include <map>
+#include "u_lockable.h"
+
+/**************************************************************************//**
+* @class concurrent_map
+*
+* @brief A thread safe map implementation
+*
+* @details This implementation wraps a standard stl map with some locking
+* capability to make the member functions mutually exclusive
+* regions. In derives from the class Lockable which defines a type
+* Lock that can be used to define a type in a scope. The result will
+* be that the remainder of the scope (or until unlock is called) is a
+* mutex.
+*
+******************************************************************************/
+template<typename I, typename T>
+class concurrent_map : public Lockable
+{
+public:
+ concurrent_map() : M(), num_elements(0) {}
+ ~concurrent_map() {}
+
+ /**********************************************************************//**
+ * @brief Place an object in the map.
+ * @param data is the item to psh on the map
+ ***************************************************************************/
+ void push(I index, T const data)
+ {
+ Lock lock(this);
+ M[index] = data;
+ num_elements++;
+ }
+
+ /**********************************************************************//**
+ * @brief How many elements are in the map.
+ * @returns The number of elements in the map.
+ ***************************************************************************/
+ int size() const
+ {
+ Lock lock(this);
+ return num_elements;
+ }
+
+ /**********************************************************************//**
+ * @brief Determine if the map is empty.
+ * @returns true if the map is empty, otherwise false.
+ ***************************************************************************/
+ bool empty() const
+ {
+ Lock lock(this);
+ return (num_elements == 0);
+ }
+
+ /**********************************************************************//**
+ * @brief Attempt to pop an item off the map.
+ * @param popped_value is an output parameter that contains the object popped
+ * if the map is successfully popped.
+ * @returns true if a value is popped, otherwise false
+ ***************************************************************************/
+ bool try_pop(I idx, T& popped_value)
+ {
+ Lock lock(this);
+ if (num_elements == 0) return false;
+
+ typename std::map<I,T>::iterator it = M.find(idx);
+
+ if (it != M.end())
+ {
+ popped_value = it->second;
+ M.erase (it);
+ num_elements--;
+ return true;
+ }
+
+ return false;
+ }
+
+ void dump()
+ {
+ for (typename std::map<I,T>::const_iterator i = M.begin(); i != M.end(); ++i)
+ std::cout << i->first << " ==> " << i->second << std::endl;
+ }
+
+ /*-------------------------------------------------------------------------
+ * The class's data
+ *------------------------------------------------------------------------*/
+private:
+ std::map<I,T> M; //!< standard stl map
+ int num_elements;
+
+ /*-------------------------------------------------------------------------
+ * Prevent copy construction and assignment
+ *------------------------------------------------------------------------*/
+private:
+ concurrent_map(const concurrent_map&);
+ concurrent_map& operator=(const concurrent_map&);
+};
+
+#endif //_U_CONCURRENT_MAP_H_
diff --git a/src/core/dsp/u_concurrent_stack.h b/src/core/dsp/u_concurrent_stack.h
new file mode 100644
index 0000000..6e9755b
--- /dev/null
+++ b/src/core/dsp/u_concurrent_stack.h
@@ -0,0 +1,124 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+/**************************************************************************//**
+*
+* @file u_concurrent_stack.h
+* @brief TI implementation class that implements a thread safe stack.
+*
+******************************************************************************/
+#ifndef _U_CONCURRENT_STACK_H_
+#define _U_CONCURRENT_STACK_H_
+
+#include <iostream>
+#include <stack>
+#include "u_lockable.h"
+
+/**************************************************************************//**
+* @class concurrent_stack
+*
+* @brief A thread safe stack implementation
+*
+* @details This implementation wraps a standard stl stack with some locking
+* capability to make the member functions mutually exclusive
+* regions. In derives from the class Lockable which defines a type
+* Lock that can be used to define a type in a scope. The result will
+* be that the remainder of the scope (or until unlock is called) is a
+* mutex.
+*
+******************************************************************************/
+template<typename T>
+class concurrent_stack : public Lockable
+{
+public:
+ concurrent_stack() : S(), num_elements(0) {}
+ ~concurrent_stack() {}
+
+ /**********************************************************************//**
+ * @brief Place an object in the stack.
+ * @param data is the item to psh on the stack
+ ***************************************************************************/
+ void push(T const data)
+ {
+ Lock lock(this);
+ S.push(data);
+ num_elements++;
+ }
+
+ /**********************************************************************//**
+ * @brief How many elements are in the stack.
+ * @returns The number of elements in the stack.
+ ***************************************************************************/
+ int size() const
+ {
+ Lock lock(this);
+ return num_elements;
+ }
+
+ /**********************************************************************//**
+ * @brief Determine if the stack is empty.
+ * @returns true if the stack is empty, otherwise false.
+ ***************************************************************************/
+ bool empty() const
+ {
+ Lock lock(this);
+ return (num_elements == 0);
+ }
+
+ /**********************************************************************//**
+ * @brief Attempt to pop an item off the stack.
+ * @param popped_value is an output parameter that contains the object popped
+ * if the stack is successfully popped.
+ * @returns true if a value is popped, otherwise false
+ ***************************************************************************/
+ bool pop(T& popped_value)
+ {
+ Lock lock(this);
+ if (num_elements == 0) return false;
+
+ popped_value = S.top();
+ S.pop();
+ num_elements--;
+ return true;
+ }
+
+ /*-------------------------------------------------------------------------
+ * The class's data
+ *------------------------------------------------------------------------*/
+private:
+ std::stack<T> S; //!< standard stl stack
+ int num_elements;
+
+ /*-------------------------------------------------------------------------
+ * Prevent copy construction and assignment
+ *------------------------------------------------------------------------*/
+private:
+ concurrent_stack(const concurrent_stack&);
+ concurrent_stack& operator=(const concurrent_stack&);
+};
+
+#endif //_U_CONCURRENT_STACK_H_
diff --git a/src/core/dsp/u_lockable.h b/src/core/dsp/u_lockable.h
new file mode 100644
index 0000000..803197f
--- /dev/null
+++ b/src/core/dsp/u_lockable.h
@@ -0,0 +1,109 @@
+/******************************************************************************
+* The Loki Library
+* Copyright (c) 2001 by Andrei Alexandrescu
+* Copyright (c) 2010-2014, Texas Instruments Incorporated
+*
+* This code accompanies the book:
+* Alexandrescu, Andrei. "Modern C++ Design: Generic Programming and Design
+* Patterns Applied". Copyright (c) 2001. Addison-Wesley.
+* Permission to use, copy, modify, distribute and sell this software for any
+* purpose is hereby granted without fee, provided that the above copyright
+* notice appear in all copies and that both that copyright notice and this
+* permission notice appear in supporting documentation.
+* The author or Addison-Wesley Longman make no representations about the
+* suitability of this software for any purpose. It is provided "as is"
+* without express or implied warranty.
+******************************************************************************/
+
+/**************************************************************************//**
+*
+* @file u_lockable.h
+*
+* @brief Defines a base class that provides a derived class with a Lock type.
+*
+* @version 1.00.00
+*
+* @note The Locakable class is a modified version of the ObjectLevelLockable
+* class from the LOKI library. The copyright from that library is
+* included at the top of this file.
+*
+******************************************************************************/
+#ifndef _U_LOCKABLE_H_
+#define _U_LOCKABLE_H_
+#include "u_locks_pthread.h"
+
+/**************************************************************************//**
+* @brief used as a base class to give your derived class a Lock type.
+* @details Have a class derive from this class and you can lock member
+* functions of your class by defining a lock like this
+* Lock lock(this);
+******************************************************************************/
+class Lockable
+{
+ public:
+ Lockable() : mutex() {} //!< Default Constructor
+ Lockable(const Lockable&) : mutex() {} //!< Copy Constructor
+ ~Lockable() {} //!< Destructor
+
+ /**********************************************************************//**
+ * @brief The Lock type defined by inheriting from Lockable.
+ **************************************************************************/
+ class Lock
+ {
+ public:
+
+ /*******************************************************************//**
+ * @brief Constructing a Lock object will lock the parent object's mutex
+ ***********************************************************************/
+ explicit Lock(const Lockable* host_) : host(*host_)
+ { host.mutex.Lock(); }
+
+ /*******************************************************************//**
+ * @brief Destructing a Lock object will unlock the parent object's mutex
+ ***********************************************************************/
+ ~Lock() { host.mutex.Unlock(); }
+
+ /*******************************************************************//**
+ * @brief Unlock the parent object's mutex
+ ***********************************************************************/
+ void unlock() { host.mutex.Unlock(); }
+
+ /*******************************************************************//**
+ * @brief Return a raw pointer to the parent object's mutex
+ ***********************************************************************/
+ Mutex* raw() { return &host.mutex; }
+
+ private:
+ const Lockable& host; //!< a pointer back to the parent object
+
+ private: // prevent copy construction and assignment
+ Lock(const Lock&);
+ Lock& operator=(const Lock&);
+ };
+
+ protected:
+ mutable Mutex mutex;
+};
+
+/*-----------------------------------------------------------------------------
+* Can use to turn off locking without chaning client code using Lockable
+*----------------------------------------------------------------------------*/
+class Lockable_off
+{
+ public:
+ Lockable_off() {}
+
+ class Lock
+ {
+ public:
+
+ explicit Lock(const Lockable_off* host_) { }
+ void unlock() { }
+
+ private: // prevent copy construction and assignment
+ Lock(const Lock&);
+ Lock& operator=(const Lock&);
+ };
+};
+
+#endif
diff --git a/src/core/dsp/u_locks_pthread.h b/src/core/dsp/u_locks_pthread.h
new file mode 100644
index 0000000..4663a57
--- /dev/null
+++ b/src/core/dsp/u_locks_pthread.h
@@ -0,0 +1,137 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+/**************************************************************************//**
+*
+* @file u_locks_pthread.h
+*
+* @brief TI implementation classes for mutual exclusion and locking.
+*
+* @ingroup Utilities
+*
+* @version 1.00.00
+*
+******************************************************************************/
+#ifndef _U_LOCKS_PTHREAD_H_
+#define _U_LOCKS_PTHREAD_H_
+
+#include <pthread.h>
+
+/**************************************************************************//**
+* @brief Simple mutex implemented using the pthreads library
+*
+* @details This mutex is simply a wrapper around a pthread mutex. Two regions
+* of code cannot have the mutex locked at the same time.
+*
+******************************************************************************/
+class Mutex
+{
+ public:
+ Mutex() { pthread_mutex_init (&mutex, 0); } //!< Construct a mutex
+ ~Mutex() { pthread_mutex_destroy(&mutex); } //!< Destruct a mutex
+ void Lock() { pthread_mutex_lock (&mutex); } //!< Lock a mutex
+ void Unlock() { pthread_mutex_unlock (&mutex); } //!< Unlock a mutex
+
+ pthread_mutex_t* raw() { return &mutex; } //!< Return raw ptr to underlying
+
+ private:
+ pthread_mutex_t mutex; //!< The underlying pthread mutex
+
+ private: // prevent copy construction and assignment
+ Mutex(const Mutex &);
+ Mutex & operator = (const Mutex &);
+};
+
+/**************************************************************************//**
+* @brief Simple condition variable implemented using the pthreads library.
+*
+* @details Condition variables are synchronization primitives that enable
+* threads to wait until a particular condition occurs. Condition
+* variables enable threads to atomically release a lock and sleep.
+* Condition variables support operations that "wake one" or
+* "wake all" waiting threads. After a thread is woken, it
+* re-acquires the lock it released when the thread entered the
+* sleeping state.
+*
+******************************************************************************/
+class CondVar
+{
+ public:
+
+ CondVar() { pthread_cond_init (&cond, 0); } //!< Constructor
+ ~CondVar() { pthread_cond_destroy (&cond); } //!< Destructor
+
+ /**********************************************************************//**
+ * @brief Signal 1 of N threads waiting on the condition variable
+ **************************************************************************/
+ void notify_one() { pthread_cond_signal (&cond); }
+
+ /**********************************************************************//**
+ * @brief Signal all N threads waiting on the condition variable
+ **************************************************************************/
+ void notify_all() { pthread_cond_broadcast(&cond); }
+
+ /**********************************************************************//**
+ * @brief Wait on the condition variable and release the passed mutex.
+ **************************************************************************/
+ void wait(Mutex* m) { pthread_cond_wait(&cond, m->raw()); }
+
+ private:
+ pthread_cond_t cond; //!< The underlying pthread condition variable
+
+ private: // prevent copy construction and assignment
+ CondVar(CondVar&);
+ CondVar& operator=(CondVar&);
+};
+
+/**************************************************************************//**
+* @brief Objects of this type lock the remainder of the enclosing scope.
+*
+* @details Declare one of these in a scope and pass a mutex reference and the
+* mutex will be locked for the remainder of the scope. This is a
+* safer way to lock and unlock a mutex, because the mutex will
+* automatically be unlocked when the scope level is exited. This
+* helps prevent an unlocked mutex from occuring during exceptions or
+* forgotten early function returns.
+*
+******************************************************************************/
+class ScopedLock
+{
+ public:
+ ScopedLock(Mutex &m) : mutex(m) { mutex.Lock(); } //!< Constructor
+ ~ScopedLock() { mutex.Unlock(); } //!< Destructor
+
+ private:
+ //mutable
+ Mutex& mutex; //!< The Underlying mutex reference
+
+ private: // prevent copy construction and assignment
+ ScopedLock(const ScopedLock&);
+ ScopedLock& operator=(const ScopedLock&);
+};
+
+#endif
diff --git a/src/core/dsp/utils.h b/src/core/dsp/utils.h
new file mode 100644
index 0000000..f125ebd
--- /dev/null
+++ b/src/core/dsp/utils.h
@@ -0,0 +1,85 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#ifndef __UTILS_H
+#define __UTILS_H
+
+/**
+ * \brief Increment a n-component vector given a maximum value
+ *
+ * This function is used to increment a vector for which a set of maximum values
+ * each of its element can reach before the next is incremented.
+ *
+ * For example, if \p dims is \c 3, \p vec starts at <tt>{0, 0, 0}</tt> and
+ * \p maxs if <tt>{2, 3, 1}</tt>, repeatedly calling this function with the
+ * same vector will produce the following results :
+ *
+ * \code
+ * {0, 0, 1}
+ * {0, 1, 0}
+ * {0, 1, 1}
+ * {0, 2, 0}
+ * {0, 2, 1}
+ * {0, 3, 0}
+ * {0, 3, 1}
+ * {1, 0, 0}
+ * ...
+ * \endcode
+ *
+ * Until \p vec reaches <tt>{2, 3, 1}</tt>.
+ *
+ * \param dims number of elements in the vectors
+ * \param vec vector whose elements will be incremented
+ * \param maxs vector containing a maximum value above which each corresponding
+ * element of \p vec cannot go.
+ * \return false if the increment was ok, true if \p vec was already at it's
+ * maximum value and couldn't be further incremented.
+ */
+template<typename T>
+bool incVec(unsigned long dims, T *vec, T *maxs)
+{
+ bool overflow = false;
+
+ for (unsigned int i=0; i<dims; ++i)
+ {
+ vec[i] += 1;
+
+ if (vec[i] > maxs[i])
+ {
+ vec[i] = 0;
+ overflow = true;
+ }
+ else
+ {
+ overflow = false;
+ break;
+ }
+ }
+
+ return overflow;
+}
+#endif
diff --git a/src/core/dsp/wga.cpp b/src/core/dsp/wga.cpp
new file mode 100644
index 0000000..8269898
--- /dev/null
+++ b/src/core/dsp/wga.cpp
@@ -0,0 +1,464 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#include "wga.h"
+#include <iostream>
+#include <llvm/Pass.h>
+#include <llvm/IR/Function.h>
+#include <llvm/IR/Module.h>
+#include <llvm/IR/BasicBlock.h>
+#include <llvm/IR/DataLayout.h>
+#include <llvm/Support/raw_ostream.h>
+#include <llvm/Support/InstIterator.h>
+#include <llvm/IR/IntrinsicInst.h>
+#include "llvm/Support/CFG.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "boost/assign/std/set.hpp"
+#include <stdio.h>
+
+using namespace std;
+using namespace boost::assign;
+
+namespace llvm
+{
+
+/******************************************************************************
+* createTIOpenclWorkGroupAggregation
+******************************************************************************/
+Pass *createTIOpenclWorkGroupAggregationPass(bool is_pocl_mode)
+{
+ TIOpenclWorkGroupAggregation *fp = new TIOpenclWorkGroupAggregation(
+ is_pocl_mode);
+ return fp;
+}
+
+/**************************************************************************
+* Constructor
+**************************************************************************/
+TIOpenclWorkGroupAggregation::TIOpenclWorkGroupAggregation(bool pocl_mode) :
+ FunctionPass(ID), is_pocl_mode(pocl_mode)
+{
+ for (int i = 0; i < MAX_DIMENSIONS; ++i) IVPhi[i] = 0;
+}
+
+/**************************************************************************
+* Get index variable
+* 1. Original mode, only one loop inserted: return IVPhi[]
+* 2. pocl mode, multiple loops inserted: return a new LoadInst
+**************************************************************************/
+llvm::Instruction* TIOpenclWorkGroupAggregation::get_IV(Function &F,
+ CallInst *call)
+{
+ llvm::Value *ivx, *ivy, *ivz;
+ Value *arg = call->getArgOperand(0);
+ uint32_t dim = 9999;
+
+ if (ConstantInt * constInt = dyn_cast<ConstantInt>(arg))
+ dim = constInt->getSExtValue();
+
+ if (is_pocl_mode)
+ {
+ llvm::GlobalValue *iv;
+ if (dim == 2)
+ iv = F.getParent()->getNamedGlobal("_local_id_z");
+ else if (dim == 1)
+ iv = F.getParent()->getNamedGlobal("_local_id_y");
+ else if (dim == 0)
+ iv = F.getParent()->getNamedGlobal("_local_id_x");
+ if (dim != 9999) return new LoadInst(iv);
+
+ ivx = F.getParent()->getNamedGlobal("_local_id_x");
+ ivy = F.getParent()->getNamedGlobal("_local_id_y");
+ ivz = F.getParent()->getNamedGlobal("_local_id_z");
+ }
+ else
+ {
+ if (dim != 9999) return IVPhi[dim];
+
+ ivx = IVPhi[0];
+ ivy = IVPhi[1];
+ ivz = IVPhi[2];
+ }
+
+ // not constant arg: return (arg == 2) ? ivz : (arg == 1 ? ivy : ivx)
+ Type *Int32 = Type::getInt32Ty(F.getContext());
+ Value *one = ConstantInt::get(Int32, 1);
+ Value *two = ConstantInt::get(Int32, 2);
+ llvm::Value *cyx = new ICmpInst(call, ICmpInst::ICMP_EQ, arg, two);
+ llvm::Value *syx = SelectInst::Create(cyx, ivy, ivx, "", call);
+ llvm::Value *czyx = new ICmpInst(call, ICmpInst::ICMP_EQ, arg, one);
+ return SelectInst::Create(czyx, ivz, syx, "", is_pocl_mode ? NULL : call);
+}
+
+/**************************************************************************
+* runOnFunction(Function &F)
+**************************************************************************/
+bool TIOpenclWorkGroupAggregation::runOnFunction(Function &F)
+{
+ /*-------------------------------------------------------------------------
+ * Determine how many dimensions are referenced using OpenCL getXXX
+ * functions, and record them all for later rewrite.
+ *------------------------------------------------------------------------*/
+ int dims;
+ if (!is_pocl_mode) dims = findNeededLoopNest(F);
+
+ /*-------------------------------------------------------------------------
+ * Add a loop nest for each dimension referenced that requires a workitem
+ * id.
+ *------------------------------------------------------------------------*/
+ if (!is_pocl_mode) for (int i = 0; i < dims; ++i) add_loop(F, i);
+
+ /*-------------------------------------------------------------------------
+ * rewrite the alloca() generated during pocl llvm work-group aggregation
+ *------------------------------------------------------------------------*/
+ if (is_pocl_mode) rewrite_allocas(F);
+
+ /*-------------------------------------------------------------------------
+ * rewrite the OpenCL getXXX dimension query functions to reference the info
+ * packet for the workgroup. Return true if we modified the function.
+ *------------------------------------------------------------------------*/
+ return rewrite_ocl_funcs(F);
+}
+
+/******************************************************************************
+* getAnalysisUsage(AnalysisUsage &Info) const
+******************************************************************************/
+void TIOpenclWorkGroupAggregation::getAnalysisUsage(AnalysisUsage &Info) const
+{
+ /*-------------------------------------------------------------------------
+ * This will ensure that all returns go through a single exit node, which
+ * our WGA loop generation algorithm depends on.
+ *------------------------------------------------------------------------*/
+ Info.addRequired<UnifyFunctionExitNodes>();
+}
+
+/**************************************************************************
+* findNeededLoopNest(Function &F)
+**************************************************************************/
+unsigned int TIOpenclWorkGroupAggregation::findNeededLoopNest(Function &F)
+{
+ unsigned int maxDim = 0;
+
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I)
+ if (CallInst * callInst = dyn_cast<CallInst>(&*I))
+ {
+ if (!callInst->getCalledFunction()) continue;
+ string functionName(callInst->getCalledFunction()->getName());
+
+ if (functionName == "get_local_id" ||
+ functionName == "get_global_id")
+ {
+ Value *arg = callInst->getArgOperand(0);
+ if (ConstantInt * constInt = dyn_cast<ConstantInt>(arg))
+ {
+ unsigned int dimIdx = constInt->getSExtValue();
+ dimIdx = min(MAX_DIMENSIONS-1, dimIdx);
+ maxDim = max(maxDim, dimIdx + 1);
+ }
+
+ /*-------------------------------------------------------------
+ * if the work group function has a variable argument, then
+ * assume worst case and return 3 loop levels are needed.
+ *------------------------------------------------------------*/
+ else return 3;
+ }
+ }
+
+ return maxDim;
+}
+
+/**************************************************************************
+* createLoadGlobal
+* Create an aligned 32 bit load from a global address.
+**************************************************************************/
+Instruction* TIOpenclWorkGroupAggregation::createLoadGlobal
+ (int32_t idx, Module* M, Instruction *before, const char *name)
+{
+ llvm::ArrayType *type = ArrayType::get(
+ IntegerType::getInt32Ty(getGlobalContext()), 64);
+ llvm::Value* dummy = M->getOrInsertGlobal("kernel_config_l2", type);
+
+ GlobalVariable* global = M->getNamedGlobal("kernel_config_l2");
+
+ std::vector<Value*> indices;
+ indices.push_back(ConstantInt::get(IntegerType::getInt32Ty(getGlobalContext()), 0));
+ indices.push_back(ConstantInt::get(IntegerType::getInt32Ty(getGlobalContext()), idx));
+
+ Constant* gep = ConstantExpr::getInBoundsGetElementPtr (global, indices);
+ LoadInst* ld = new LoadInst(gep, name, before);
+
+ ld->setAlignment(4);
+ return ld;
+}
+
+/******************************************************************************
+* findDim
+******************************************************************************/
+unsigned int TIOpenclWorkGroupAggregation::findDim(class CallInst* call)
+{
+ Value *arg = call->getArgOperand(0);
+
+ if (ConstantInt * constInt = dyn_cast<ConstantInt>(arg))
+ return constInt->getSExtValue();
+ return 100; // who knows
+}
+
+/**************************************************************************
+* rewrite allocas to _wg_alloca(sizeinbytes)
+**************************************************************************/
+bool TIOpenclWorkGroupAggregation::rewrite_allocas(Function &F)
+{
+ int wi_alloca_size = 0;
+ Module *M = F.getParent();
+ AllocaInst *alloca;
+
+ std::vector<AllocaInst *> allocas;
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I)
+ if ((alloca = dyn_cast<AllocaInst>(&*I)) != NULL)
+ allocas.push_back(alloca);
+ if (allocas.empty()) return false;
+
+ DataLayout dataLayout(M);
+ FunctionType *ft = FunctionType::get
+ (/*Result=*/ IntegerType::get(M->getContext(), 32),
+ /*Params=*/ IntegerType::get(M->getContext(), 32),
+ /*isVarArg=*/ false);
+ Function *wg_alloca = dyn_cast<Function>(
+ M->getOrInsertFunction("_wg_alloca", ft));
+ Type *Int32 = Type::getInt32Ty(M->getContext());
+
+ for (std::vector<AllocaInst *>::iterator I = allocas.begin();
+ I != allocas.end(); ++I)
+ {
+ alloca = *I;
+
+ // get number of elements, element type size, compute total size
+ Value *numElems = alloca->getArraySize();
+ // YUAN TODO: skip regular constant numElems?
+
+ Type *elementType = alloca->getAllocatedType();
+ // getTypeSizeInBits(), what about uchar3 type?
+ uint64_t esBytes = dataLayout.getTypeStoreSize(elementType);
+ Value *esize = ConstantInt::get(Int32, (uint32_t) esBytes);
+ Instruction *alloca_size = BinaryOperator::Create(
+ Instruction::Mul, esize, numElems, "", alloca);
+ SmallVector<Value *, 4> args;
+ args.push_back(alloca_size);
+
+ // create function call: _wg_alloca(alloca_size)
+ CallInst *f_alloca = CallInst::Create(
+ wg_alloca, ArrayRef<Value *>(args), "", alloca);
+
+ // cast to alloca type
+ Instruction * new_alloca = new IntToPtrInst(
+ f_alloca, alloca->getType());
+
+ // replace AllocaInst with new _wg_alloca()
+ ReplaceInstWithInst(alloca, new_alloca);
+
+ // accumulate element type size
+ unsigned align = dataLayout.getPrefTypeAlignment(elementType);
+ wi_alloca_size = (wi_alloca_size + align - 1) & (~(align-1));
+ wi_alloca_size += esBytes;
+ }
+
+ // initialize _wg_alloca_start and _wg_alloca_size
+ // _wg_alloca_size = load(packetaddr+offset);
+ // _wg_alloca_start = load(packetaddr+offset) + __core_num() * _wg_alloca_size;
+ Instruction *inspt = F.getEntryBlock().getFirstNonPHI();
+ FunctionType *core_num_ft = FunctionType::get
+ (/*Result=*/ IntegerType::get(M->getContext(), 32),
+ /*isVarArg=*/ false);
+ Function *core_num = dyn_cast<Function>(
+ M->getOrInsertFunction("__core_num", core_num_ft));
+ Instruction *f_core_num = CallInst::Create(core_num, "", inspt);
+
+ Instruction *wg_alloca_size = createLoadGlobal(17, M, inspt);
+
+ Instruction *shift = BinaryOperator::Create(Instruction::Mul, f_core_num,
+ wg_alloca_size, "", inspt);
+
+ Instruction *start = createLoadGlobal(16, M, inspt);
+
+ Instruction *core_start = BinaryOperator::Create(
+ Instruction::Add, start, shift, "", inspt);
+ Value *gv = M->getOrInsertGlobal("_wg_alloca_start", Int32);
+ GlobalVariable *wg_gv = M->getNamedGlobal("_wg_alloca_start");
+ wg_gv->setSection(StringRef("far"));
+ Instruction *store = new StoreInst(core_start, gv, inspt);
+
+ // put total orig_wi_size into attributes data in the function
+ char *s_wi_alloca_size = new char[32]; // we have to leak this
+ snprintf(s_wi_alloca_size, 32, "_wi_alloca_size=%d", wi_alloca_size);
+ F.addFnAttr(StringRef(s_wi_alloca_size));
+
+ return true;
+}
+
+/**************************************************************************
+* rewrite_ocl_funcs
+**************************************************************************/
+bool TIOpenclWorkGroupAggregation::rewrite_ocl_funcs(Function &F)
+{
+ CallInst *call;
+ Module *M = F.getParent();
+ std::vector<CallInst *> wi_calls;
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I)
+ {
+ if ((call = dyn_cast<CallInst>(&*I)) == NULL) continue;
+ if (call->getCalledFunction() == NULL) continue;
+ string name(call->getCalledFunction()->getName());
+ if (name != "get_local_id" && name != "get_local_size") continue;
+ wi_calls.push_back(call);
+ }
+ if (wi_calls.empty()) return false;
+
+ LLVMContext &ctx = F.getContext();
+ std::vector<CallInst *>::iterator I, E;
+ for (I = wi_calls.begin(), E = wi_calls.end(); I != E; ++I)
+ {
+ call = *I;
+ string name(call->getCalledFunction()->getName());
+
+ if (name == "get_local_id")
+ {
+ if (is_pocl_mode)
+ {
+ ReplaceInstWithInst(call, get_IV(F, call));
+ }
+ else
+ {
+ BasicBlock::iterator BI(call);
+ ReplaceInstWithValue(call->getParent()->getInstList(), BI,
+ get_IV(F, call));
+ }
+ }
+ else if (name == "get_local_size")
+ {
+ // remaining get_local_size() are generated by pocl,
+ // arguments guaranteed to be constants: 0, 1, or 2
+ ReplaceInstWithInst(call,
+ createLoadGlobal(4+findDim(call), M));
+ }
+ }
+ return true;
+}
+
+BasicBlock* TIOpenclWorkGroupAggregation::findExitBlock(Function &F)
+{
+ BasicBlock *exit = 0;
+
+ /*-------------------------------------------------------------------------
+ * Find the one block with no successors
+ *------------------------------------------------------------------------*/
+ for (Function::iterator B = F.begin(), E = F.end(); B != E; ++B)
+ if ((*B).getTerminator()->getNumSuccessors() == 0)
+ if (!exit) exit = &(*B);
+ else assert(false);
+
+ /*-------------------------------------------------------------------------
+ * Split the return off into it's own block
+ *------------------------------------------------------------------------*/
+ Instruction *ret = exit->getTerminator();
+
+ if (ret != &exit->front())
+ exit = SplitBlock(exit, ret, this);
+
+ return exit;
+}
+
+/**************************************************************************
+* add_loop(Function &F)
+**************************************************************************/
+void TIOpenclWorkGroupAggregation::add_loop(Function &F, int dimIdx)
+{
+ LLVMContext &ctx = F.getContext();
+ Type *Int32 = Type::getInt32Ty(ctx);
+ Value *zero = ConstantInt::get(Int32, 0);
+ Value *one = ConstantInt::get(Int32, 1);
+ Module *M = F.getParent();
+
+ BasicBlock* exit = findExitBlock(F);
+ BasicBlock* entry = &(F.getEntryBlock());
+
+ BasicBlock* bodytop = SplitBlock(entry, &entry->front(), this);
+ BasicBlock* bodyend = exit;
+ exit = SplitBlock(bodyend, &exit->front(), this);
+
+ exit->setName(".exit");
+ entry->setName(".entry");
+ bodytop->setName(".bodyTop");
+ bodyend->setName(".bodyEnd");
+
+ /*----------------------------------------------------------------------
+ * Populate the branch around
+ *---------------------------------------------------------------------*/
+ Instruction *branch = entry->getTerminator();
+ Instruction *ld_upper_bnd = createLoadGlobal(4+dimIdx, M, branch);
+
+ Instruction *cmp = CmpInst::Create (Instruction::ICmp, CmpInst::ICMP_SGT,
+ ld_upper_bnd, zero, "", branch);
+
+ Instruction *cbr = BranchInst::Create(bodytop, exit, cmp);
+ ReplaceInstWithInst(branch, cbr);
+
+ /*----------------------------------------------------------------------
+ * Add the phi node to the top of the body
+ *---------------------------------------------------------------------*/
+ PHINode *phi = PHINode::Create(Int32, 0, "", &bodytop->front());
+ phi->addIncoming(zero, entry);
+
+ /*----------------------------------------------------------------------
+ * Add the loop control to the bottom of the bodyend
+ *---------------------------------------------------------------------*/
+ branch = bodyend->getTerminator();
+ Instruction *inc = BinaryOperator::Create(Instruction::Add,
+ phi, one, Twine(), branch);
+
+ Instruction *ld_upper_bnd2 = createLoadGlobal(4+dimIdx, M, branch);
+ Instruction *cmp2 = CmpInst::Create (Instruction::ICmp, CmpInst::ICMP_SLT,
+ inc, ld_upper_bnd2, "", branch);
+
+ Instruction *cbr2 = BranchInst::Create(bodytop, exit, cmp2);
+ ReplaceInstWithInst(branch, cbr2);
+
+ phi->addIncoming(inc, bodyend);
+ IVPhi[dimIdx] = phi;
+
+ // YUAN TODO: maybe handled better later
+ if (dimIdx < 1) IVPhi[1] = phi;
+ if (dimIdx < 2) IVPhi[2] = phi;
+}
+
+char TIOpenclWorkGroupAggregation::ID = 0;
+static RegisterPass<TIOpenclWorkGroupAggregation>
+ X("wga", "Work Group Aggregation", false, false);
+
+}
diff --git a/src/core/dsp/wga.h b/src/core/dsp/wga.h
new file mode 100644
index 0000000..8728fea
--- /dev/null
+++ b/src/core/dsp/wga.h
@@ -0,0 +1,72 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#ifndef __TIOPENCLWORKGROUPAGGREGATIONPASS_H
+#define __TIOPENCLWORKGROUPAGGREGATIONPASS_H
+
+#include <string>
+#include <set>
+#include "boost/tuple/tuple.hpp"
+#include <llvm/Pass.h>
+#include <llvm/IR/Instruction.h>
+
+#define MAX_DIMENSIONS 3u
+
+namespace llvm
+{
+
+class TIOpenclWorkGroupAggregation : public FunctionPass
+{
+ public:
+ static char ID;
+
+ TIOpenclWorkGroupAggregation(bool pocl_mode = false);
+ virtual bool runOnFunction(Function &F);
+ virtual void getAnalysisUsage(AnalysisUsage &Info) const;
+
+ private:
+ Instruction* IVPhi[MAX_DIMENSIONS];
+ bool is_pocl_mode;
+
+ private:
+ Instruction* createLoadGlobal(int32_t idx, Module* m, Instruction *before=0,
+ const char *name=0);
+
+ BasicBlock* findExitBlock (Function &F);
+ unsigned int findNeededLoopNest(Function &F);
+ unsigned int findDim (class CallInst* call);
+ bool rewrite_ocl_funcs (Function &F);
+ void add_loop (Function &F, int dimIdx);
+ Instruction* get_IV(Function &F, CallInst *call);
+ bool rewrite_allocas(Function &F);
+};
+
+Pass *createTIOpenclWorkGroupAggregationPass(bool is_pocl_mode = false);
+
+}
+
+#endif // __TIOPENCLWORKGROUPAGGREGATIONPASS_H
diff --git a/src/core/dsp/worker.cpp b/src/core/dsp/worker.cpp
new file mode 100644
index 0000000..79223f0
--- /dev/null
+++ b/src/core/dsp/worker.cpp
@@ -0,0 +1,519 @@
+/******************************************************************************
+ * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#include "device.h"
+#include "buffer.h"
+#include "kernel.h"
+#include "driver.h"
+
+#include "../commandqueue.h"
+#include "../events.h"
+#include "../memobject.h"
+#include "../kernel.h"
+
+#include <stdlib.h>
+#include <iostream>
+#include <string.h>
+
+#include "u_locks_pthread.h"
+
+using namespace Coal;
+
+#define ERR(status, msg) if (status) { printf("ERROR: %s\n", msg); exit(-1); }
+
+/******************************************************************************
+* handle_event_completion
+******************************************************************************/
+void handle_event_completion(DSPDevice *device)
+{
+ int k_id = device->mail_from();
+
+ /*-------------------------------------------------------------------------
+ * If this is a false completion message due to prinft traffic, etc.
+ *------------------------------------------------------------------------*/
+ if (k_id < 0) return;
+
+ Event* event;
+ bool found = device->get_complete_pending(k_id, event);
+ if (!found)
+ {
+ std::cout << "Completion status received for kernel Id " << k_id <<
+ " but no pending event found for that id" << std::endl;
+ exit(-1);
+ }
+
+ KernelEvent *e = (KernelEvent *) event;
+ DSPKernelEvent *ke = (DSPKernelEvent *)e->deviceData();
+ ke->free_tmp_bufs();
+
+ CommandQueue *queue = 0;
+ cl_command_queue_properties queue_props = 0;
+
+ event->info(CL_EVENT_COMMAND_QUEUE, sizeof(CommandQueue *), &queue, 0);
+
+ if (queue)
+ queue->info(CL_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties),
+ &queue_props, 0);
+
+ // an event may be released once it is Complete
+ if (queue_props & CL_QUEUE_PROFILING_ENABLE)
+ event->updateTiming(Event::End);
+ event->setStatus(Event::Complete);
+}
+
+
+/******************************************************************************
+* handle_event_dispatch
+******************************************************************************/
+bool handle_event_dispatch(DSPDevice *device)
+{
+ bool stop = false;
+ cl_int errcode;
+ Event * event;
+
+ event = device->getEvent(stop);
+
+ /*---------------------------------------------------------------------
+ * Ensure we have a good event and we don't have to stop
+ *--------------------------------------------------------------------*/
+ if (stop) return true;
+ if (!event) return false;
+
+ /*---------------------------------------------------------------------
+ * Get info about the event and its command queue
+ *--------------------------------------------------------------------*/
+ Event::Type t = event->type();
+ CommandQueue * queue = 0;
+ cl_command_queue_properties queue_props = 0;
+
+ errcode = CL_SUCCESS;
+
+ event->info(CL_EVENT_COMMAND_QUEUE, sizeof(CommandQueue *), &queue, 0);
+
+ if (queue)
+ queue->info(CL_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties),
+ &queue_props, 0);
+
+ if (queue_props & CL_QUEUE_PROFILING_ENABLE)
+ event->updateTiming(Event::Start);
+
+ /*---------------------------------------------------------------------
+ * Execute the action
+ *--------------------------------------------------------------------*/
+ switch (t)
+ {
+ case Event::ReadBuffer:
+ case Event::WriteBuffer:
+ {
+ ReadWriteBufferEvent *e = (ReadWriteBufferEvent *)event;
+
+ if (e->buffer()->flags() & CL_MEM_USE_HOST_PTR)
+ {
+ if (t == Event::ReadBuffer)
+ memcpy(e->ptr(), e->buffer()->host_ptr(), e->cb());
+ else memcpy(e->buffer()->host_ptr(), e->ptr(), e->cb());
+ break;
+ }
+
+ DSPBuffer *buf = (DSPBuffer *)e->buffer()->deviceBuffer(device);
+ DSPDevicePtr64 data = (DSPDevicePtr64)buf->data() + e->offset();
+
+ if (t == Event::ReadBuffer)
+ Driver::instance()->read(device->dspID(), data,
+ (uint8_t*)e->ptr(), e->cb());
+
+ else
+ Driver::instance()->write(device->dspID(), data,
+ (uint8_t*)e->ptr(), e->cb());
+
+ break;
+ }
+
+ case Event::CopyBuffer:
+ {
+#ifdef DSPC868X
+ std::cerr << "Event type not yet supported" << std::endl;
+#else
+ CopyBufferEvent *e = (CopyBufferEvent *)event;
+
+ DSPDevicePtr64 src_addr;
+ DSPDevicePtr64 dst_addr;
+
+ void *psrc;
+ void *pdst;
+
+ if (e->source()->flags() & CL_MEM_USE_HOST_PTR)
+ psrc = (char*)e->source()->host_ptr() + e->src_offset();
+ else
+ {
+ DSPBuffer *src = (DSPBuffer*)e->source()->deviceBuffer(device);
+ src_addr = (DSPDevicePtr64)src->data() + e->src_offset();
+ psrc = Driver::instance()->map(src_addr, e->cb(), true);
+ }
+
+ if (e->destination()->flags() & CL_MEM_USE_HOST_PTR)
+ pdst = (char *)e->destination()->host_ptr() + e->dst_offset();
+ else
+ {
+ DSPBuffer *dst = (DSPBuffer*)e->destination()->deviceBuffer(device);
+ dst_addr = (DSPDevicePtr64)dst->data() + e->dst_offset();
+ pdst = Driver::instance()->map(dst_addr, e->cb(), false);
+ }
+
+ memcpy(pdst, psrc, e->cb());
+
+ if (!(e->source()->flags() & CL_MEM_USE_HOST_PTR))
+ Driver::instance()->unmap(psrc, src_addr, e->cb(), false);
+
+ if (!(e->destination()->flags() & CL_MEM_USE_HOST_PTR))
+ Driver::instance()->unmap(pdst, dst_addr, e->cb(), true);
+#endif
+ break;
+ }
+
+ case Event::ReadBufferRect:
+ case Event::WriteBufferRect:
+ {
+ ReadWriteBufferRectEvent *e = (ReadWriteBufferRectEvent *)event;
+
+ // Calculate the start points for each block of memory referenced
+ DSPDevicePtr64 buf_start;
+ uint8_t * host_start;
+
+ if (e->buffer()->flags() & CL_MEM_USE_HOST_PTR)
+ buf_start = (DSPDevicePtr64)e->buffer()->host_ptr();
+ else
+ buf_start = ((DSPBuffer *)e->source()->deviceBuffer(device))
+ ->data();
+
+ buf_start += e->src_origin(2) * e->src_slice_pitch() +
+ e->src_origin(1) * e->src_row_pitch() +
+ e->src_origin(0);
+
+ host_start = (uint8_t *)e->ptr() +
+ e->dst_origin(2) * e->dst_slice_pitch() +
+ e->dst_origin(1) * e->dst_row_pitch() +
+ e->dst_origin(0);
+
+ // Map the device/host buffers to the appopriate src/dst operands
+ // based on the requested operation (read vs write)
+ DSPDevicePtr64 src_start, dst_start;
+
+ size_t src_row_pitch, dst_row_pitch;
+ size_t src_slice_pitch, dst_slice_pitch;
+
+ if (t == Event::ReadBufferRect)
+ {
+ src_start = buf_start;
+ src_row_pitch = e->src_row_pitch();
+ src_slice_pitch = e->src_slice_pitch();
+
+ dst_start = (DSPDevicePtr64) host_start;
+ dst_row_pitch = e->dst_row_pitch();
+ dst_slice_pitch = e->dst_slice_pitch();
+ }
+ else
+ {
+ src_start = (DSPDevicePtr64) host_start;
+ src_row_pitch = e->dst_row_pitch();
+ src_slice_pitch = e->dst_slice_pitch();
+
+ dst_start = buf_start;
+ dst_row_pitch = e->src_row_pitch();
+ dst_slice_pitch = e->src_slice_pitch();
+ }
+
+ // The dimensions of the region to be copied gives us our
+ // loop boundaries for copying
+ cl_ulong xdim = e->region(0);
+ cl_ulong ydim = e->region(1);
+ cl_ulong zdim = e->region(2);
+
+ // Set up the start point
+ DSPDevicePtr64 src_cur_slice = src_start;
+ DSPDevicePtr64 dst_cur_slice = dst_start;
+
+ // The outer loop handles each z-axis slice
+ // For 2-D copy, will only iterate once (zdim=1)
+ for(cl_uint z = 0; z < zdim; z++)
+ {
+ DSPDevicePtr64 src_cur_row = src_cur_slice;
+ DSPDevicePtr64 dst_cur_row = dst_cur_slice;
+
+ // The inner loop handles each row of the current slice
+ for(cl_uint y = 0; y < ydim; y++)
+ {
+ // Copy a row
+ if (e->buffer()->flags() & CL_MEM_USE_HOST_PTR)
+ memcpy((void *)dst_cur_row, (void *)src_cur_row, xdim);
+ else
+ {
+ if (t == Event::ReadBufferRect)
+ Driver::instance()->read(device->dspID(),
+ src_cur_row, (uint8_t *)dst_cur_row, xdim);
+ else
+ Driver::instance()->write(device->dspID(),
+ dst_cur_row, (uint8_t *)src_cur_row, xdim);
+ }
+
+ // Proceed to next row
+ src_cur_row += src_row_pitch;
+ dst_cur_row += dst_row_pitch;
+ }
+
+ // Proceed to next slice
+ src_cur_slice += src_slice_pitch;
+ dst_cur_slice += dst_slice_pitch;
+ }
+ break;
+ }
+
+ case Event::CopyBufferRect:
+ {
+#ifdef DSPC868X
+ std::cerr << "Event type not yet supported" << std::endl;
+#else
+ CopyBufferRectEvent *e = (CopyBufferRectEvent *)event;
+
+ // Calculate the offsets into each buffer
+ size_t src_offset, dst_offset;
+
+ src_offset = e->src_origin(2) * e->src_slice_pitch() +
+ e->src_origin(1) * e->src_row_pitch() +
+ e->src_origin(0);
+
+ dst_offset = e->dst_origin(2) * e->dst_slice_pitch() +
+ e->dst_origin(1) * e->dst_row_pitch() +
+ e->dst_origin(0);
+
+ // Set up start points for the copy. If it is a DSP buffer, we'll
+ // need to map the buffer before copying (done in copy loop below)
+ DSPDevicePtr64 src_start, dst_start;
+
+ if (e->source()->flags() & CL_MEM_USE_HOST_PTR)
+ src_start = (DSPDevicePtr64)e->source()->host_ptr() + src_offset;
+ else
+ {
+ DSPBuffer *src = (DSPBuffer*)e->source()->deviceBuffer(device);
+ src_start = src->data() + src_offset;
+ }
+
+ if (e->destination()->flags() & CL_MEM_USE_HOST_PTR)
+ dst_start = (DSPDevicePtr64)e->destination()->host_ptr() + dst_offset;
+ else
+ {
+ DSPBuffer *dst=(DSPBuffer*)e->destination()->deviceBuffer(device);
+ dst_start = dst->data() + dst_offset;
+ }
+
+ // The dimensions of the region to be copied
+ cl_ulong xdim = e->region(0);
+ cl_ulong ydim = e->region(1);
+ cl_ulong zdim = e->region(2);
+
+ // If we need to map memory we will currently map a slice
+ // at a time. So determine the size of a 2D slice
+ size_t src_slice_size = ydim * e->src_row_pitch()-e->src_origin(0);
+ size_t dst_slice_size = ydim * e->dst_row_pitch()-e->dst_origin(0);
+
+ // Set up the initial copy point
+ DSPDevicePtr64 src_cur_slice = src_start;
+ DSPDevicePtr64 dst_cur_slice = dst_start;
+
+ // The outer loop handles each z-axis slice
+ // For 2-D copy, will only iterate once (zdim=1)
+ for(cl_ulong z = 0; z < zdim; z++)
+ {
+ uint8_t *src_cur_row = (uint8_t *)src_cur_slice;
+ uint8_t *dst_cur_row = (uint8_t *)dst_cur_slice;
+ uint8_t *src_cur_mslice, *dst_cur_mslice;
+
+ // If necessary, memory map a slice of buffer
+ if (!(e->source()->flags() & CL_MEM_USE_HOST_PTR))
+ src_cur_row = src_cur_mslice = (uint8_t *)
+ Driver::instance()->map(src_cur_slice, src_slice_size,true);
+
+ if (!(e->destination()->flags() & CL_MEM_USE_HOST_PTR))
+ dst_cur_row = dst_cur_mslice = (uint8_t *)
+ Driver::instance()->map(dst_cur_slice, dst_slice_size,false);
+
+ // The inner loop handles each row of the current slice
+ for(cl_ulong y = 0; y < ydim; y++)
+ {
+ // Copy current row
+ memcpy(dst_cur_row, src_cur_row, xdim);
+
+ // Proceed to next row
+ src_cur_row += e->src_row_pitch();
+ dst_cur_row += e->dst_row_pitch();
+ }
+
+ // If necessary, unmap the current slice
+ if (!(e->source()->flags() & CL_MEM_USE_HOST_PTR))
+ Driver::instance()->unmap(src_cur_mslice, src_cur_slice,
+ src_slice_size, false);
+
+ if (!(e->destination()->flags() & CL_MEM_USE_HOST_PTR))
+ Driver::instance()->unmap(dst_cur_mslice, dst_cur_slice,
+ dst_slice_size, true);
+
+ // Proceed to next slice
+ src_cur_slice += e->src_slice_pitch();
+ dst_cur_slice += e->dst_slice_pitch();
+ }
+#endif
+ break;
+ }
+
+ case Event::ReadImage:
+ case Event::WriteImage:
+ case Event::CopyImage:
+ case Event::CopyBufferToImage:
+ case Event::CopyImageToBuffer:
+ case Event::MapImage:
+ {
+ std::cerr << "Images are not supported" << std::endl;
+ break;
+ }
+
+ case Event::MapBuffer:
+ {
+#ifdef DSPC868X
+ std::cerr << "Event type not yet supported" << std::endl;
+#endif
+ MapBufferEvent *e = (MapBufferEvent *)event;
+
+ /*-----------------------------------------------------------
+ * for USE_HOST_PTR, the buffer store is already on the host and
+ * map should not be needed.
+ -----------------------------------------------------------*/
+ if (e->buffer()->flags() & CL_MEM_USE_HOST_PTR) break;
+
+ clRetainEvent((cl_event) e);
+ if(! e->buffer()->addMapEvent(e))
+ ERR(1, "MapBuffer: Range conflicts with previous maps");
+ if ((e->flags() & CL_MAP_READ) != 0)
+ {
+ DSPBuffer *buf = (DSPBuffer *)e->buffer()->deviceBuffer(device);
+ DSPDevicePtr64 data = (DSPDevicePtr64)buf->data() + e->offset();
+ Driver::instance()->map(data, e->cb(), true);
+ }
+ break;
+ }
+ case Event::UnmapMemObject:
+ {
+#ifdef DSPC868X
+ std::cerr << "Event type not yet supported" << std::endl;
+#endif
+ UnmapBufferEvent *e = (UnmapBufferEvent *)event;
+
+ /*-----------------------------------------------------------
+ * for USE_HOST_PTR, the buffer store is already on the host and
+ * unmap should not be needed.
+ -----------------------------------------------------------*/
+ if (e->buffer()->flags() & CL_MEM_USE_HOST_PTR) break;
+
+ if (e->buffer()->type() != Coal::MemObject::Buffer &&
+ e->buffer()->type() != Coal::MemObject::SubBuffer)
+ ERR(1, "UnmapMemObject: MapImage/Unmap not support yet");
+ MapBufferEvent *mbe = (MapBufferEvent *)
+ e->buffer()->removeMapEvent(e->mapping());
+ if (mbe == NULL)
+ ERR(1, "UnmapMemObject: host_ptr not from previous maps");
+
+ if ((mbe->flags() & CL_MAP_WRITE) != 0)
+ {
+ DSPBuffer *buf = (DSPBuffer *)e->buffer()->deviceBuffer(device);
+ DSPDevicePtr64 buf_dsp_addr = (DSPDevicePtr64)buf->data();
+ Driver::instance()->unmap(e->mapping(), buf_dsp_addr,
+ mbe->cb(), true);
+ }
+ if (queue) queue->releaseEvent(mbe);
+ break;
+ }
+
+ case Event::NativeKernel:
+ {
+ std::cerr << "Native Kernels not supported on the DSP" << std::endl;
+ break;
+ }
+
+ case Event::NDRangeKernel:
+ case Event::TaskKernel:
+ {
+ KernelEvent *e = (KernelEvent *) event;
+ DSPKernelEvent *ke = (DSPKernelEvent *)e->deviceData();
+
+ errcode = ke->run(t);
+
+ /*-----------------------------------------------------------------
+ * Put the event on a pending completion list and its
+ * completion will be handled asynchronously.
+ *----------------------------------------------------------------*/
+ if (errcode == CL_SUCCESS)
+ {
+ device->push_complete_pending(ke->kernel_id(), e);
+ return false;
+ }
+ break;
+ }
+ default: break;
+ }
+
+ /*---------------------------------------------------------------------
+ * Cleanup
+ *--------------------------------------------------------------------*/
+
+ // an event may be released once it is Complete
+ if (queue_props & CL_QUEUE_PROFILING_ENABLE)
+ event->updateTiming(Event::End);
+ event->setStatus((errcode == CL_SUCCESS) ? Event::Complete :
+ (Event::Status)errcode);
+
+ return false;
+}
+
+/******************************************************************************
+* dsp_worker
+******************************************************************************/
+void *dsp_worker(void *data)
+{
+ DSPDevice *device = (DSPDevice *)data;
+
+ while (true)
+ {
+ if (device->any_complete_pending() && device->mail_query())
+ handle_event_completion(device);
+
+ bool stop = device->stop();
+
+ if (!stop && device->availableEvent())
+ stop |= handle_event_dispatch(device);
+
+ if (stop && !device->any_complete_pending()) break;
+ }
+}
diff --git a/src/core/events.cpp b/src/core/events.cpp
new file mode 100644
index 0000000..629a0c9
--- /dev/null
+++ b/src/core/events.cpp
@@ -0,0 +1,1519 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * Copyright (c) 2012-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file events.cpp
+ * \brief Events inheriting \c Coal::Event
+ */
+
+#include "events.h"
+#include "commandqueue.h"
+#include "memobject.h"
+#include "kernel.h"
+#include "deviceinterface.h"
+
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+
+using namespace Coal;
+
+/*
+ * Read/Write buffers
+ */
+
+BufferEvent::BufferEvent(CommandQueue *parent,
+ MemObject *buffer,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: Event(parent, Queued, num_events_in_wait_list, event_wait_list, errcode_ret),
+ p_buffer(buffer)
+{
+ clRetainMemObject((cl_mem) p_buffer);
+
+ if (*errcode_ret != CL_SUCCESS) return;
+
+ // Correct buffer
+ if (!buffer)
+ {
+ *errcode_ret = CL_INVALID_MEM_OBJECT;
+ return;
+ }
+
+ // Buffer's context must match the CommandQueue one
+ Context *ctx = 0;
+ *errcode_ret = parent->info(CL_QUEUE_CONTEXT, sizeof(Context *), &ctx, 0);
+
+ if (*errcode_ret != CL_SUCCESS) return;
+
+ if ((Context *)buffer->parent() != ctx)
+ {
+ *errcode_ret = CL_INVALID_CONTEXT;
+ return;
+ }
+
+ // Alignment of SubBuffers
+ DeviceInterface *device = 0;
+ *errcode_ret = parent->info(CL_QUEUE_DEVICE, sizeof(DeviceInterface *),
+ &device, 0);
+
+ if (*errcode_ret != CL_SUCCESS)
+ return;
+
+ if (!isSubBufferAligned(buffer, device))
+ {
+ *errcode_ret = CL_MISALIGNED_SUB_BUFFER_OFFSET;
+ return;
+ }
+
+ // Allocate the buffer for the device
+ if (!buffer->allocate(device))
+ {
+ *errcode_ret = CL_MEM_OBJECT_ALLOCATION_FAILURE;
+ return;
+ }
+}
+
+BufferEvent::~BufferEvent()
+{
+ clReleaseMemObject((cl_mem) p_buffer);
+}
+
+MemObject *BufferEvent::buffer() const
+{
+ return p_buffer;
+}
+
+bool BufferEvent::isSubBufferAligned(const MemObject *buffer,
+ const DeviceInterface *device)
+{
+ cl_uint align;
+ cl_int rs;
+
+ if (buffer->type() != MemObject::SubBuffer)
+ return true;
+
+ rs = device->info(CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(uint),
+ &align, 0);
+
+ if (rs != CL_SUCCESS)
+ return false;
+
+ size_t mask = 0;
+ if (align != 0) mask = align - 1;
+
+ if (((SubBuffer *)buffer)->offset() & mask)
+ return false;
+
+ return true;
+}
+
+ReadWriteBufferEvent::ReadWriteBufferEvent(CommandQueue *parent,
+ MemObject *buffer,
+ size_t offset,
+ size_t cb,
+ void *ptr,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: BufferEvent(parent, buffer, num_events_in_wait_list, event_wait_list, errcode_ret),
+ p_offset(offset), p_cb(cb), p_ptr(ptr)
+{
+ if (*errcode_ret != CL_SUCCESS) return;
+
+ // Check for out-of-bounds reads
+ if (!ptr)
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ if (offset + cb > buffer->size())
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+}
+
+size_t ReadWriteBufferEvent::offset() const
+{
+ return p_offset;
+}
+
+size_t ReadWriteBufferEvent::cb() const
+{
+ return p_cb;
+}
+
+void *ReadWriteBufferEvent::ptr() const
+{
+ return p_ptr;
+}
+
+ReadBufferEvent::ReadBufferEvent(CommandQueue *parent,
+ MemObject *buffer,
+ size_t offset,
+ size_t cb,
+ void *ptr,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: ReadWriteBufferEvent(parent, buffer, offset, cb, ptr, num_events_in_wait_list,
+ event_wait_list, errcode_ret)
+{}
+
+Event::Type ReadBufferEvent::type() const
+{
+ return Event::ReadBuffer;
+}
+
+WriteBufferEvent::WriteBufferEvent(CommandQueue *parent,
+ MemObject *buffer,
+ size_t offset,
+ size_t cb,
+ void *ptr,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: ReadWriteBufferEvent(parent, buffer, offset, cb, ptr, num_events_in_wait_list,
+ event_wait_list, errcode_ret)
+{}
+
+Event::Type WriteBufferEvent::type() const
+{
+ return Event::WriteBuffer;
+}
+
+MapBufferEvent::MapBufferEvent(CommandQueue *parent,
+ MemObject *buffer,
+ size_t offset,
+ size_t cb,
+ cl_map_flags map_flags,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: BufferEvent(parent, buffer, num_events_in_wait_list, event_wait_list, errcode_ret),
+ p_offset(offset), p_cb(cb), p_map_flags(map_flags)
+{
+ if (*errcode_ret != CL_SUCCESS) return;
+
+ // Check flags
+ if (map_flags & ~(CL_MAP_READ | CL_MAP_WRITE))
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ // Check for out-of-bounds values
+ if (offset + cb > buffer->size())
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+}
+
+Event::Type MapBufferEvent::type() const
+{
+ return Event::MapBuffer;
+}
+
+size_t MapBufferEvent::offset() const
+{
+ return p_offset;
+}
+
+size_t MapBufferEvent::cb() const
+{
+ return p_cb;
+}
+
+cl_map_flags MapBufferEvent::flags() const
+{
+ return p_map_flags;
+}
+
+void *MapBufferEvent::ptr() const
+{
+ return p_ptr;
+}
+
+void MapBufferEvent::setPtr(void *ptr)
+{
+ p_ptr = ptr;
+}
+
+MapImageEvent::MapImageEvent(CommandQueue *parent,
+ Image2D *image,
+ cl_map_flags map_flags,
+ const size_t origin[3],
+ const size_t region[3],
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: BufferEvent (parent, image, num_events_in_wait_list, event_wait_list, errcode_ret)
+{
+ if (*errcode_ret != CL_SUCCESS) return;
+
+ // Check flags
+ if (map_flags & ~(CL_MAP_READ | CL_MAP_WRITE))
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ // Copy the vectors
+ if (origin)
+ std::memcpy(&p_origin, origin, 3 * sizeof(size_t));
+ else
+ std::memset(&p_origin, 0, 3 * sizeof(size_t));
+
+ for (unsigned int i=0; i<3; ++i)
+ {
+ if (!region[i])
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ p_region[i] = region[i];
+ }
+
+ // Multiply the elements (for images)
+ p_region[0] *= image->pixel_size();
+ p_origin[0] *= image->pixel_size();
+
+ // Check for overflow
+ if (image->type() == MemObject::Image2D &&
+ (origin[2] != 0 || region[2] != 1))
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ // Check for out-of-bounds
+ if ((p_origin[0] + p_region[0]) > image->row_pitch() ||
+ (p_origin[1] + p_region[1]) * image->row_pitch() > image->slice_pitch() ||
+ (p_origin[2] + p_region[2]) * image->slice_pitch() > image->size())
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+}
+
+Event::Type MapImageEvent::type() const
+{
+ return Event::MapImage;
+}
+
+
+cl_map_flags MapImageEvent::flags() const
+{
+ return p_map_flags;
+}
+
+size_t MapImageEvent::origin (unsigned int index) const
+{
+ return p_origin[index];
+}
+
+size_t MapImageEvent::region (unsigned int index) const
+{
+ return p_region[index];
+}
+
+size_t MapImageEvent::row_pitch() const
+{
+ return p_row_pitch;
+}
+
+size_t MapImageEvent::slice_pitch() const
+{
+ return p_slice_pitch;
+}
+
+void *MapImageEvent::ptr() const
+{
+ return p_ptr;
+}
+
+void MapImageEvent::setRowPitch (size_t row_pitch)
+{
+ p_row_pitch = row_pitch;
+}
+
+void MapImageEvent::setSlicePitch (size_t slice_pitch)
+{
+ p_slice_pitch = slice_pitch;
+}
+
+void MapImageEvent::setPtr (void *ptr)
+{
+ p_ptr = ptr;
+}
+
+UnmapBufferEvent::UnmapBufferEvent(CommandQueue *parent,
+ MemObject *buffer,
+ void *mapped_addr,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: BufferEvent(parent, buffer, num_events_in_wait_list, event_wait_list, errcode_ret),
+ p_mapping(mapped_addr)
+{
+ if (*errcode_ret != CL_SUCCESS) return;
+
+ // TODO: Check that p_mapping is ok (will be done in the drivers)
+ if (!mapped_addr)
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+}
+
+Event::Type UnmapBufferEvent::type() const
+{
+ return Event::UnmapMemObject;
+}
+
+void *UnmapBufferEvent::mapping() const
+{
+ return p_mapping;
+}
+
+CopyBufferEvent::CopyBufferEvent(CommandQueue *parent,
+ MemObject *source,
+ MemObject *destination,
+ size_t src_offset,
+ size_t dst_offset,
+ size_t cb,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: BufferEvent(parent, source, num_events_in_wait_list, event_wait_list,
+ errcode_ret), p_destination(destination), p_src_offset(src_offset),
+ p_dst_offset(dst_offset), p_cb(cb)
+{
+ clRetainMemObject((cl_mem) p_destination);
+
+ if (*errcode_ret != CL_SUCCESS) return;
+
+ if (!destination)
+ {
+ *errcode_ret = CL_INVALID_MEM_OBJECT;
+ return;
+ }
+
+ // Check for out-of-bounds
+ if (src_offset + cb > source->size() ||
+ dst_offset + cb > destination->size())
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ // Check for overlap
+ if (source == destination)
+ {
+ if ((src_offset < dst_offset && src_offset + cb > dst_offset) ||
+ (dst_offset < src_offset && dst_offset + cb > src_offset))
+ {
+ *errcode_ret = CL_MEM_COPY_OVERLAP;
+ return;
+ }
+ }
+
+ // Check alignement of destination
+ DeviceInterface *device = 0;
+ *errcode_ret = parent->info(CL_QUEUE_DEVICE, sizeof(DeviceInterface *),
+ &device, 0);
+
+ if (*errcode_ret != CL_SUCCESS)
+ return;
+
+ if (!isSubBufferAligned(destination, device))
+ {
+ *errcode_ret = CL_MISALIGNED_SUB_BUFFER_OFFSET;
+ return;
+ }
+
+ // Allocate the buffer for the device
+ if (!destination->allocate(device))
+ {
+ *errcode_ret = CL_MEM_OBJECT_ALLOCATION_FAILURE;
+ return;
+ }
+}
+
+CopyBufferEvent::~CopyBufferEvent()
+{
+ clReleaseMemObject((cl_mem) p_destination);
+}
+
+MemObject *CopyBufferEvent::source() const
+{
+ return buffer();
+}
+
+MemObject *CopyBufferEvent::destination() const
+{
+ return p_destination;
+}
+
+size_t CopyBufferEvent::src_offset() const
+{
+ return p_src_offset;
+}
+
+size_t CopyBufferEvent::dst_offset() const
+{
+ return p_dst_offset;
+}
+
+size_t CopyBufferEvent::cb() const
+{
+ return p_cb;
+}
+
+Event::Type CopyBufferEvent::type() const
+{
+ return Event::CopyBuffer;
+}
+
+/*
+ * Native kernel
+ */
+NativeKernelEvent::NativeKernelEvent(CommandQueue *parent,
+ void (*user_func)(void *),
+ void *args,
+ size_t cb_args,
+ cl_uint num_mem_objects,
+ const MemObject **mem_list,
+ const void **args_mem_loc,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: Event (parent, Queued, num_events_in_wait_list, event_wait_list, errcode_ret),
+ p_user_func((void *)user_func), p_args(0)
+{
+ if (*errcode_ret != CL_SUCCESS) return;
+
+ // Parameters sanity
+ if (!user_func)
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ if (!args && (cb_args || num_mem_objects))
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ if (args && !cb_args)
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ if (num_mem_objects && (!mem_list || !args_mem_loc))
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ if (!num_mem_objects && (mem_list || args_mem_loc))
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ // Check that the device can execute a native kernel
+ DeviceInterface *device;
+ cl_device_exec_capabilities caps;
+
+ *errcode_ret = parent->info(CL_QUEUE_DEVICE, sizeof(DeviceInterface *),
+ &device, 0);
+
+ if (*errcode_ret != CL_SUCCESS)
+ return;
+
+ *errcode_ret = device->info(CL_DEVICE_EXECUTION_CAPABILITIES,
+ sizeof(cl_device_exec_capabilities), &caps, 0);
+
+ if (*errcode_ret != CL_SUCCESS)
+ return;
+
+ if ((caps & CL_EXEC_NATIVE_KERNEL) == 0)
+ {
+ *errcode_ret = CL_INVALID_OPERATION;
+ return;
+ }
+
+ // Copy the arguments in a new list
+ if (cb_args)
+ {
+ p_args = std::malloc(cb_args);
+
+ if (!p_args)
+ {
+ *errcode_ret = CL_OUT_OF_HOST_MEMORY;
+ return;
+ }
+
+ std::memcpy((void *)p_args, (void *)args, cb_args);
+
+ // Replace memory objects with global pointers
+ for (cl_uint i=0; i<num_mem_objects; ++i)
+ {
+ const MemObject *buffer = mem_list[i];
+ const char *loc = (const char *)args_mem_loc[i];
+
+ if (!buffer)
+ {
+ *errcode_ret = CL_INVALID_MEM_OBJECT;
+ return;
+ }
+
+ // We need to do relocation : loc is in args, we need it in p_args
+ size_t delta = (char *)p_args - (char *)args;
+ loc += delta;
+
+ *(void **)loc = buffer->deviceBuffer(device)->nativeGlobalPointer();
+ }
+ }
+}
+
+NativeKernelEvent::~NativeKernelEvent()
+{
+ if (p_args)
+ std::free((void *)p_args);
+}
+
+Event::Type NativeKernelEvent::type() const
+{
+ return Event::NativeKernel;
+}
+
+void *NativeKernelEvent::function() const
+{
+ return p_user_func;
+}
+
+void *NativeKernelEvent::args() const
+{
+ return p_args;
+}
+
+/*
+ * Kernel event
+ */
+KernelEvent::KernelEvent(CommandQueue *parent,
+ Kernel *kernel,
+ cl_uint work_dim,
+ const size_t *global_work_offset,
+ const size_t *global_work_size,
+ const size_t *local_work_size,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: Event(parent, Queued, num_events_in_wait_list, event_wait_list, errcode_ret),
+ p_work_dim(work_dim), p_kernel(kernel)
+{
+ clRetainKernel((cl_kernel) p_kernel);
+
+ if (*errcode_ret != CL_SUCCESS) return;
+
+ *errcode_ret = CL_SUCCESS;
+
+ // Sanity checks
+ if (!kernel)
+ {
+ *errcode_ret = CL_INVALID_KERNEL;
+ return;
+ }
+
+ // Check that the kernel was built for parent's device.
+ DeviceInterface *device;
+ Context *k_ctx, *q_ctx;
+ size_t max_work_group_size;
+ cl_uint max_dims = 0;
+
+ *errcode_ret = parent->info(CL_QUEUE_DEVICE, sizeof(DeviceInterface *),
+ &device, 0);
+
+ if (*errcode_ret != CL_SUCCESS)
+ return;
+
+ *errcode_ret = parent->info(CL_QUEUE_CONTEXT, sizeof(Context *), &q_ctx, 0);
+ *errcode_ret |= kernel->info(CL_KERNEL_CONTEXT, sizeof(Context *), &k_ctx, 0);
+ *errcode_ret |= device->info(CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t),
+ &max_work_group_size, 0);
+ *errcode_ret |= device->info(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(size_t),
+ &max_dims, 0);
+ *errcode_ret |= device->info(CL_DEVICE_MAX_WORK_ITEM_SIZES,
+ max_dims * sizeof(size_t), p_max_work_item_sizes, 0);
+
+ if (*errcode_ret != CL_SUCCESS)
+ return;
+
+ p_dev_kernel = kernel->deviceDependentKernel(device);
+
+ if (!p_dev_kernel)
+ {
+ *errcode_ret = CL_INVALID_PROGRAM_EXECUTABLE;
+ return;
+ }
+
+ // Check that contexts match
+ if (k_ctx != q_ctx)
+ {
+ *errcode_ret = CL_INVALID_CONTEXT;
+ return;
+ }
+
+ // Check args
+ if (!kernel->argsSpecified())
+ {
+ *errcode_ret = CL_INVALID_KERNEL_ARGS;
+ return;
+ }
+
+ // Check dimension
+ if (work_dim == 0 || work_dim > max_dims)
+ {
+ *errcode_ret = CL_INVALID_WORK_DIMENSION;
+ return;
+ }
+
+ // Populate work_offset, work_size and local_work_size
+ size_t work_group_size = 1;
+ boost::tuple <uint,uint,uint> reqd_work_group_size(
+ kernel->reqdWorkGroupSize(kernel->deviceDependentModule(device)));
+
+ uint reqd_x = reqd_work_group_size.get<0>();
+ uint reqd_y = reqd_work_group_size.get<1>();
+ uint reqd_z = reqd_work_group_size.get<2>();
+ bool reqd_any = reqd_x > 0 || reqd_y > 0 || reqd_z > 0;
+
+ if (reqd_any)
+ {
+ // if __attribute__((reqd_work_group_size(X, Y, Z))) is set and local size not specified
+ if (!local_work_size)
+ {
+ *errcode_ret = CL_INVALID_WORK_GROUP_SIZE;
+ return;
+ }
+
+ // if __attribute__((reqd_work_group_size(X, Y, Z))) doesn't match
+ else
+ {
+ if (( local_work_size[0] != reqd_x) ||
+ (work_dim > 1 && local_work_size[1] != reqd_y) ||
+ (work_dim > 2 && local_work_size[2] != reqd_z))
+ {
+ *errcode_ret = CL_INVALID_WORK_GROUP_SIZE;
+ return;
+ }
+ }
+ }
+
+ cl_uint i;
+ for (i=0; i<work_dim; ++i)
+ {
+ if (global_work_offset)
+ {
+ p_global_work_offset[i] = global_work_offset[i];
+ }
+ else
+ {
+ p_global_work_offset[i] = 0;
+ }
+
+ if (!global_work_size || !global_work_size[i])
+ {
+ *errcode_ret = CL_INVALID_GLOBAL_WORK_SIZE;
+ }
+ p_global_work_size[i] = global_work_size[i];
+
+ if (!local_work_size)
+ {
+ // Guess the best value according to the device
+ p_local_work_size[i] =
+ p_dev_kernel->guessWorkGroupSize(work_dim, i, global_work_size[i]);
+ }
+ else
+ {
+ // Check divisibility
+ if ((global_work_size[i] % local_work_size[i]) != 0)
+ {
+ *errcode_ret = CL_INVALID_WORK_GROUP_SIZE;
+ return;
+ }
+
+ // Not too big ?
+ if (local_work_size[i] > p_max_work_item_sizes[i])
+ {
+ *errcode_ret = CL_INVALID_WORK_ITEM_SIZE;
+ return;
+ }
+
+ p_local_work_size[i] = local_work_size[i];
+ work_group_size *= local_work_size[i];
+ }
+ }
+ // initialize missing dimensions
+ for (; i < max_dims; i++)
+ {
+ p_global_work_offset[i] = 0;
+ p_global_work_size[i] = 1;
+ p_local_work_size[i] = 1;
+ }
+
+ // Check we don't ask too much to the device
+ if (work_group_size > max_work_group_size)
+ {
+ *errcode_ret = CL_INVALID_WORK_GROUP_SIZE;
+ return;
+ }
+
+ // Check arguments (buffer alignment, image size, ...)
+ for (unsigned int i=0; i<kernel->numArgs(); ++i)
+ {
+ const Kernel::Arg *a = kernel->arg(i);
+
+ if (a->kind() == Kernel::Arg::Buffer && a->file() != Kernel::Arg::Local)
+ {
+ const MemObject *buffer = *(const MemObject **)(a->value(0));
+
+ if (!BufferEvent::isSubBufferAligned(buffer, device))
+ {
+ *errcode_ret = CL_MISALIGNED_SUB_BUFFER_OFFSET;
+ return;
+ }
+ }
+ else if (a->kind() == Kernel::Arg::Image2D)
+ {
+ const Image2D *image = *(const Image2D **)(a->value(0));
+ size_t maxWidth, maxHeight;
+
+ *errcode_ret = device->info(CL_DEVICE_IMAGE2D_MAX_WIDTH,
+ sizeof(size_t), &maxWidth, 0);
+ *errcode_ret |= device->info(CL_DEVICE_IMAGE2D_MAX_HEIGHT,
+ sizeof(size_t), &maxHeight, 0);
+
+ if (*errcode_ret != CL_SUCCESS)
+ return;
+
+ if (image->width() > maxWidth || image->height() > maxHeight)
+ {
+ *errcode_ret = CL_INVALID_IMAGE_SIZE;
+ return;
+ }
+ }
+ else if (a->kind() == Kernel::Arg::Image3D)
+ {
+ const Image3D *image = *(const Image3D **)a->value(0);
+ size_t maxWidth, maxHeight, maxDepth;
+
+ *errcode_ret = device->info(CL_DEVICE_IMAGE3D_MAX_WIDTH,
+ sizeof(size_t), &maxWidth, 0);
+ *errcode_ret |= device->info(CL_DEVICE_IMAGE3D_MAX_HEIGHT,
+ sizeof(size_t), &maxHeight, 0);
+ *errcode_ret |= device->info(CL_DEVICE_IMAGE3D_MAX_DEPTH,
+ sizeof(size_t), &maxDepth, 0);
+
+ if (*errcode_ret != CL_SUCCESS)
+ return;
+
+ if (image->width() > maxWidth || image->height() > maxHeight ||
+ image->depth() > maxDepth)
+ {
+ *errcode_ret = CL_INVALID_IMAGE_SIZE;
+ return;
+ }
+ }
+ }
+}
+
+KernelEvent::~KernelEvent()
+{
+ clReleaseKernel((cl_kernel) p_kernel);
+}
+
+cl_uint KernelEvent::work_dim() const
+{
+ return p_work_dim;
+}
+
+size_t KernelEvent::global_work_offset(cl_uint dim) const
+{
+ return p_global_work_offset[dim];
+}
+
+size_t KernelEvent::global_work_size(cl_uint dim) const
+{
+ return p_global_work_size[dim];
+}
+
+size_t KernelEvent::local_work_size(cl_uint dim) const
+{
+ return p_local_work_size[dim];
+}
+
+Kernel *KernelEvent::kernel() const
+{
+ return p_kernel;
+}
+
+DeviceKernel *KernelEvent::deviceKernel() const
+{
+ return p_dev_kernel;
+}
+
+Event::Type KernelEvent::type() const
+{
+ return Event::NDRangeKernel;
+}
+
+static size_t one = 1;
+
+TaskEvent::TaskEvent(CommandQueue *parent,
+ Kernel *kernel,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: KernelEvent(parent, kernel, 1, 0, &one, &one, num_events_in_wait_list,
+ event_wait_list, errcode_ret)
+{
+ // TODO: CL_INVALID_WORK_GROUP_SIZE if
+ // __attribute__((reqd_work_group_size(X, Y, Z))) != (1, 1, 1)
+}
+
+Event::Type TaskEvent::type() const
+{
+ return Event::TaskKernel;
+}
+
+/*
+ * User event
+ */
+UserEvent::UserEvent(Context *context, cl_int *errcode_ret)
+: Event(0, Submitted, 0, 0, errcode_ret), p_context(context)
+{}
+
+Event::Type UserEvent::type() const
+{
+ return Event::User;
+}
+
+Context *UserEvent::context() const
+{
+ return p_context;
+}
+
+/*
+ * ReadWriteBufferRectEvent
+ */
+ReadWriteCopyBufferRectEvent::ReadWriteCopyBufferRectEvent(CommandQueue *parent,
+ MemObject *source,
+ const size_t src_origin[3],
+ const size_t dst_origin[3],
+ const size_t region[3],
+ size_t src_row_pitch,
+ size_t src_slice_pitch,
+ size_t dst_row_pitch,
+ size_t dst_slice_pitch,
+ unsigned int bytes_per_element,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: BufferEvent (parent, source, num_events_in_wait_list, event_wait_list,
+ errcode_ret)
+{
+ if (*errcode_ret != CL_SUCCESS) return;
+
+ // Copy the vectors
+ if (src_origin)
+ std::memcpy(&p_src_origin, src_origin, 3 * sizeof(size_t));
+ else
+ std::memset(&p_src_origin, 0, 3 * sizeof(size_t));
+
+ if (dst_origin)
+ std::memcpy(&p_dst_origin, dst_origin, 3 * sizeof(size_t));
+ else
+ std::memset(&p_dst_origin, 0, 3 * sizeof(size_t));
+
+ for (unsigned int i=0; i<3; ++i)
+ {
+ if (!region[i])
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ p_region[i] = region[i];
+ }
+
+ // Multiply the elements (for images)
+ p_region[0] *= bytes_per_element;
+ p_src_origin[0] *= bytes_per_element;
+ p_dst_origin[0] *= bytes_per_element;
+
+ // Compute the pitches
+ p_src_row_pitch = p_region[0];
+
+ if (src_row_pitch)
+ {
+ if (src_row_pitch < p_src_row_pitch)
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ p_src_row_pitch = src_row_pitch;
+ }
+
+ p_src_slice_pitch = p_region[1] * p_src_row_pitch;
+
+ if (src_slice_pitch)
+ {
+ if (src_slice_pitch < p_src_slice_pitch)
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ p_src_slice_pitch = src_slice_pitch;
+ }
+
+ p_dst_row_pitch = p_region[0];
+
+ if (dst_row_pitch)
+ {
+ if (dst_row_pitch < p_dst_row_pitch)
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ p_dst_row_pitch = dst_row_pitch;
+ }
+
+ p_dst_slice_pitch = p_region[1] * p_dst_row_pitch;
+
+ if (dst_slice_pitch)
+ {
+ if (dst_slice_pitch < p_dst_slice_pitch)
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ p_dst_slice_pitch = dst_slice_pitch;
+ }
+}
+
+size_t ReadWriteCopyBufferRectEvent::src_origin(unsigned int index) const
+{
+ return p_src_origin[index];
+}
+
+size_t ReadWriteCopyBufferRectEvent::dst_origin(unsigned int index) const
+{
+ return p_dst_origin[index];
+}
+
+size_t ReadWriteCopyBufferRectEvent::region(unsigned int index) const
+{
+ return p_region[index];
+}
+
+size_t ReadWriteCopyBufferRectEvent::src_row_pitch() const
+{
+ return p_src_row_pitch;
+}
+
+size_t ReadWriteCopyBufferRectEvent::src_slice_pitch() const
+{
+ return p_src_slice_pitch;
+}
+
+size_t ReadWriteCopyBufferRectEvent::dst_row_pitch() const
+{
+ return p_dst_row_pitch;
+}
+
+size_t ReadWriteCopyBufferRectEvent::dst_slice_pitch() const
+{
+ return p_dst_slice_pitch;
+}
+
+MemObject *ReadWriteCopyBufferRectEvent::source() const
+{
+ return buffer();
+}
+
+CopyBufferRectEvent::CopyBufferRectEvent(CommandQueue *parent,
+ MemObject *source,
+ MemObject *destination,
+ const size_t src_origin[3],
+ const size_t dst_origin[3],
+ const size_t region[3],
+ size_t src_row_pitch,
+ size_t src_slice_pitch,
+ size_t dst_row_pitch,
+ size_t dst_slice_pitch,
+ unsigned int bytes_per_element,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: ReadWriteCopyBufferRectEvent(parent, source, src_origin, dst_origin, region,
+ src_row_pitch, src_slice_pitch, dst_row_pitch,
+ dst_slice_pitch, bytes_per_element,
+ num_events_in_wait_list, event_wait_list, errcode_ret),
+ p_destination(destination)
+{
+ if (*errcode_ret != CL_SUCCESS) return;
+
+ if (!destination)
+ {
+ *errcode_ret = CL_INVALID_MEM_OBJECT;
+ return;
+ }
+
+ // Check for out-of-bounds
+ if ((p_src_origin[0] + p_region[0]) > p_src_row_pitch ||
+ (p_src_origin[1] + p_region[1]) * p_src_row_pitch > p_src_slice_pitch ||
+ (p_src_origin[2] + p_region[2]) * p_src_slice_pitch > source->size())
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ if ((p_dst_origin[0] + p_region[0]) > p_dst_row_pitch ||
+ (p_dst_origin[1] + p_region[1]) * p_dst_row_pitch > p_dst_slice_pitch ||
+ (p_dst_origin[2] + p_region[2]) * p_dst_slice_pitch > destination->size())
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ // Check for overlapping
+ if (source == destination)
+ {
+ unsigned char overlapping_dimensions = 0;
+
+ for (unsigned int i=0; i<3; ++i)
+ {
+ if ((p_dst_origin[i] < p_src_origin[i] && p_dst_origin[i] + p_region[i] > p_src_origin[i]) ||
+ (p_src_origin[i] < p_dst_origin[i] && p_src_origin[i] + p_region[i] > p_dst_origin[i]))
+ overlapping_dimensions++;
+ }
+
+ if (overlapping_dimensions == 3)
+ {
+ // If all the dimensions are overlapping, the region is overlapping
+ *errcode_ret = CL_MEM_COPY_OVERLAP;
+ return;
+ }
+ }
+
+ // Check alignment of destination (source already checked by BufferEvent)
+ DeviceInterface *device = 0;
+ *errcode_ret = parent->info(CL_QUEUE_DEVICE, sizeof(DeviceInterface *),
+ &device, 0);
+
+ if (*errcode_ret != CL_SUCCESS)
+ return;
+
+ if (!isSubBufferAligned(destination, device))
+ {
+ *errcode_ret = CL_MISALIGNED_SUB_BUFFER_OFFSET;
+ return;
+ }
+
+ // Allocate the buffer for the device
+ if (!destination->allocate(device))
+ {
+ *errcode_ret = CL_MEM_OBJECT_ALLOCATION_FAILURE;
+ return;
+ }
+}
+
+Event::Type CopyBufferRectEvent::type() const
+{
+ return Event::CopyBufferRect;
+}
+
+MemObject *CopyBufferRectEvent::destination() const
+{
+ return p_destination;
+}
+
+ReadWriteBufferRectEvent::ReadWriteBufferRectEvent(CommandQueue *parent,
+ MemObject *buffer,
+ const size_t buffer_origin[3],
+ const size_t host_origin[3],
+ const size_t region[3],
+ size_t buffer_row_pitch,
+ size_t buffer_slice_pitch,
+ size_t host_row_pitch,
+ size_t host_slice_pitch,
+ void *ptr,
+ unsigned int bytes_per_element,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: ReadWriteCopyBufferRectEvent(parent, buffer, buffer_origin, host_origin, region,
+ buffer_row_pitch, buffer_slice_pitch,
+ host_row_pitch, host_slice_pitch, bytes_per_element,
+ num_events_in_wait_list, event_wait_list, errcode_ret),
+ p_ptr(ptr)
+{
+ if (*errcode_ret != CL_SUCCESS) return;
+
+ if (!ptr)
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ // Check for out-of-bounds
+ if ((p_src_origin[0] + p_region[0]) > p_src_row_pitch ||
+ (p_src_origin[1] + p_region[1]) * p_src_row_pitch > p_src_slice_pitch ||
+ (p_src_origin[2] + p_region[2]) * p_src_slice_pitch > buffer->size())
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+}
+
+void *ReadWriteBufferRectEvent::ptr() const
+{
+ return p_ptr;
+}
+
+ReadBufferRectEvent::ReadBufferRectEvent (CommandQueue *parent,
+ MemObject *buffer,
+ const size_t buffer_origin[3],
+ const size_t host_origin[3],
+ const size_t region[3],
+ size_t buffer_row_pitch,
+ size_t buffer_slice_pitch,
+ size_t host_row_pitch,
+ size_t host_slice_pitch,
+ void *ptr,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: ReadWriteBufferRectEvent(parent, buffer, buffer_origin, host_origin, region,
+ buffer_row_pitch, buffer_slice_pitch, host_row_pitch,
+ host_slice_pitch, ptr, 1, num_events_in_wait_list,
+ event_wait_list, errcode_ret)
+{
+}
+
+Event::Type ReadBufferRectEvent::type() const
+{
+ return ReadBufferRect;
+}
+
+WriteBufferRectEvent::WriteBufferRectEvent (CommandQueue *parent,
+ MemObject *buffer,
+ const size_t buffer_origin[3],
+ const size_t host_origin[3],
+ const size_t region[3],
+ size_t buffer_row_pitch,
+ size_t buffer_slice_pitch,
+ size_t host_row_pitch,
+ size_t host_slice_pitch,
+ void *ptr,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: ReadWriteBufferRectEvent (parent, buffer, buffer_origin, host_origin, region,
+ buffer_row_pitch, buffer_slice_pitch, host_row_pitch,
+ host_slice_pitch, ptr, 1, num_events_in_wait_list,
+ event_wait_list, errcode_ret)
+{
+}
+
+Event::Type WriteBufferRectEvent::type() const
+{
+ return WriteBufferRect;
+}
+
+ReadWriteImageEvent::ReadWriteImageEvent (CommandQueue *parent,
+ Image2D *image,
+ const size_t origin[3],
+ const size_t region[3],
+ size_t row_pitch,
+ size_t slice_pitch,
+ void *ptr,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: ReadWriteBufferRectEvent(parent, image, origin, 0, region, image->row_pitch(),
+ image->slice_pitch(), row_pitch, slice_pitch, ptr,
+ image->pixel_size(), num_events_in_wait_list,
+ event_wait_list, errcode_ret)
+{
+ if (*errcode_ret != CL_SUCCESS) return;
+
+ if (image->type() == MemObject::Image2D &&
+ (origin[2] != 0 || region[2] != 1))
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+}
+
+ReadImageEvent::ReadImageEvent(CommandQueue *parent,
+ Image2D *image,
+ const size_t origin[3],
+ const size_t region[3],
+ size_t row_pitch,
+ size_t slice_pitch,
+ void *ptr,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: ReadWriteImageEvent(parent, image, origin, region, row_pitch, slice_pitch, ptr,
+ num_events_in_wait_list, event_wait_list, errcode_ret)
+{}
+
+Event::Type ReadImageEvent::type() const
+{
+ return Event::ReadImage;
+}
+
+WriteImageEvent::WriteImageEvent(CommandQueue *parent,
+ Image2D *image,
+ const size_t origin[3],
+ const size_t region[3],
+ size_t row_pitch,
+ size_t slice_pitch,
+ void *ptr,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: ReadWriteImageEvent (parent, image, origin, region, row_pitch, slice_pitch, ptr,
+ num_events_in_wait_list, event_wait_list, errcode_ret)
+{}
+
+Event::Type WriteImageEvent::type() const
+{
+ return Event::WriteImage;
+}
+
+static bool operator!=(const cl_image_format &a, const cl_image_format &b)
+{
+ return (a.image_channel_data_type != b.image_channel_data_type) ||
+ (a.image_channel_order != b.image_channel_order);
+}
+
+CopyImageEvent::CopyImageEvent(CommandQueue *parent,
+ Image2D *source,
+ Image2D *destination,
+ const size_t src_origin[3],
+ const size_t dst_origin[3],
+ const size_t region[3],
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: CopyBufferRectEvent (parent, source, destination, src_origin, dst_origin,
+ region, source->row_pitch(), source->slice_pitch(),
+ destination->row_pitch(), destination->slice_pitch(),
+ source->pixel_size(), num_events_in_wait_list,
+ event_wait_list, errcode_ret)
+{
+ if (*errcode_ret != CL_SUCCESS) return;
+
+ // Check bounds
+ if (source->type() == MemObject::Image2D &&
+ (src_origin[2] != 0 || region[2] != 1))
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ if (destination->type() == MemObject::Image2D &&
+ (dst_origin[2] != 0 || region[2] != 1))
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ // Formats must match
+ if (source->format() != destination->format())
+ {
+ *errcode_ret = CL_IMAGE_FORMAT_MISMATCH;
+ return;
+ }
+}
+
+Event::Type CopyImageEvent::type() const
+{
+ return Event::CopyImage;
+}
+
+CopyImageToBufferEvent::CopyImageToBufferEvent(CommandQueue *parent,
+ Image2D *source,
+ MemObject *destination,
+ const size_t src_origin[3],
+ const size_t region[3],
+ size_t dst_offset,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: CopyBufferRectEvent(parent, source, destination, src_origin, 0, region,
+ source->row_pitch(), source->slice_pitch(), 0, 0,
+ source->pixel_size(), num_events_in_wait_list,
+ event_wait_list, errcode_ret),
+ p_offset(dst_offset)
+{
+ if (*errcode_ret != CL_SUCCESS) return;
+
+ // Check for buffer overflow
+ size_t dst_cb = region[2] * region[1] * region[0] * source->pixel_size();
+
+ if (dst_offset + dst_cb > destination->size())
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ // Check validity
+ if (source->type() == MemObject::Image2D &&
+ (src_origin[2] != 0 || region[2] != 1))
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+}
+
+size_t CopyImageToBufferEvent::offset() const
+{
+ return p_offset;
+}
+
+Event::Type CopyImageToBufferEvent::type() const
+{
+ return Event::CopyImageToBuffer;
+}
+
+CopyBufferToImageEvent::CopyBufferToImageEvent(CommandQueue *parent,
+ MemObject *source,
+ Image2D *destination,
+ size_t src_offset,
+ const size_t dst_origin[3],
+ const size_t region[3],
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: CopyBufferRectEvent(parent, source, destination, 0, dst_origin, region, 0, 0,
+ destination->row_pitch(), destination->slice_pitch(),
+ destination->pixel_size(), num_events_in_wait_list,
+ event_wait_list, errcode_ret),
+ p_offset(src_offset)
+{
+ if (*errcode_ret != CL_SUCCESS) return;
+
+ // Check for buffer overflow
+ size_t src_cb = region[2] * region[1] * region[0] * destination->pixel_size();
+
+ if (src_offset + src_cb > source->size())
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ // Check validity
+ if (destination->type() == MemObject::Image2D &&
+ (dst_origin[2] != 0 || region[2] != 1))
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+}
+
+size_t CopyBufferToImageEvent::offset() const
+{
+ return p_offset;
+}
+
+Event::Type CopyBufferToImageEvent::type() const
+{
+ return Event::CopyBufferToImage;
+}
+
+/*
+ * Barrier
+ */
+
+BarrierEvent::BarrierEvent(CommandQueue *parent, cl_int *errcode_ret)
+: Event(parent, Queued, 0, 0, errcode_ret)
+{}
+
+Event::Type BarrierEvent::type() const
+{
+ return Event::Barrier;
+}
+
+/*
+ * WaitForEvents
+ */
+
+WaitForEventsEvent::WaitForEventsEvent(CommandQueue *parent,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: Event(parent, Queued, num_events_in_wait_list, event_wait_list, errcode_ret)
+{}
+
+Event::Type WaitForEventsEvent::type() const
+{
+ return Event::WaitForEvents;
+}
+
+/*
+ * Marker
+ */
+MarkerEvent::MarkerEvent(CommandQueue *parent,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret)
+: WaitForEventsEvent(parent, num_events_in_wait_list, event_wait_list, errcode_ret)
+{}
+
+Event::Type MarkerEvent::type() const
+{
+ return Event::Marker;
+}
diff --git a/src/core/events.h b/src/core/events.h
new file mode 100644
index 0000000..2311d92
--- /dev/null
+++ b/src/core/events.h
@@ -0,0 +1,718 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * Copyright (c) 2012-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file events.h
+ * \brief All the event-related classes
+ */
+
+#ifndef __EVENTS_H__
+#define __EVENTS_H__
+
+#include "commandqueue.h"
+#include <core/config.h>
+
+#include <vector>
+
+namespace Coal
+{
+
+class MemObject;
+class Image2D;
+class Kernel;
+class DeviceKernel;
+class DeviceInterface;
+
+/**
+ * \brief Buffer-related event
+ */
+class BufferEvent : public Event
+{
+ public:
+ BufferEvent(CommandQueue *parent,
+ MemObject *buffer,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+ virtual ~BufferEvent();
+
+ MemObject *buffer() const; /*!< \brief Buffer on which to operate */
+
+ /**
+ * \brief Check that a buffer is correctly aligned for a device
+ *
+ * OpenCL supports sub-buffers of buffers (\c Coal::SubBuffer). They
+ * have to be aligned on a certain device-dependent boundary.
+ *
+ * This function checks that \p buffer is correctly aligned for
+ * \p device. If \p buffer is not a \c Coal::SubBuffer, this function
+ * returns true.
+ *
+ * \return true if the buffer is aligned or not a \c Coal::SubBuffer
+ */
+ static bool isSubBufferAligned(const MemObject *buffer,
+ const DeviceInterface *device);
+
+ private:
+ MemObject *p_buffer;
+};
+
+/**
+ * \brief Reading or writing to a buffer
+ */
+class ReadWriteBufferEvent : public BufferEvent
+{
+ public:
+ ReadWriteBufferEvent(CommandQueue *parent,
+ MemObject *buffer,
+ size_t offset,
+ size_t cb,
+ void *ptr,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+
+ size_t offset() const; /*!< \brief Offset in the buffer of the operation, in bytes */
+ size_t cb() const; /*!< \brief Number of bytes to read or write */
+ void *ptr() const; /*!< \brief Pointer in host memory at which to put the data */
+
+ private:
+ size_t p_offset, p_cb;
+ void *p_ptr;
+};
+
+/**
+ * \brief Reading a buffer
+ */
+class ReadBufferEvent : public ReadWriteBufferEvent
+{
+ public:
+ ReadBufferEvent(CommandQueue *parent,
+ MemObject *buffer,
+ size_t offset,
+ size_t cb,
+ void *ptr,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+
+ Type type() const; /*!< \brief Say the event is a \c Coal::Event::ReadBuffer one */
+};
+
+/**
+ * \brief Writing a buffer
+ */
+class WriteBufferEvent : public ReadWriteBufferEvent
+{
+ public:
+ WriteBufferEvent(CommandQueue *parent,
+ MemObject *buffer,
+ size_t offset,
+ size_t cb,
+ void *ptr,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+
+ Type type() const; /*!< \brief Say the event is a \c Coal::Event::WriteBuffer one */
+};
+
+/**
+ * \brief Mapping a buffer
+ */
+class MapBufferEvent : public BufferEvent
+{
+ public:
+ MapBufferEvent(CommandQueue *parent,
+ MemObject *buffer,
+ size_t offset,
+ size_t cb,
+ cl_map_flags map_flags,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+
+ Type type() const; /*!< \brief Say the event is a \c Coal::Event::MapBuffer one */
+
+ size_t offset() const; /*!< \brief Offset in the buffer at which the mapping begins, in bytes */
+ size_t cb() const; /*!< \brief Number of bytes to map */
+ cl_map_flags flags() const; /*!< \brief Flags of the mapping */
+ void *ptr() const; /*!< \brief Pointer at which the data has been mapped */
+
+ /**
+ * \brief Set the memory location at which the data has been mapped
+ *
+ * This function is called by the device when it has successfully mapped
+ * the buffer. It must be called inside
+ * \c Coal::DeviceInterface::initEventDeviceData().
+ *
+ * \param ptr the address at which the buffer has been mapped
+ */
+ void setPtr(void *ptr);
+
+ private:
+ size_t p_offset, p_cb;
+ cl_map_flags p_map_flags;
+ void *p_ptr;
+};
+
+/**
+ * \brief Mapping an image
+ */
+class MapImageEvent : public BufferEvent
+{
+ public:
+ MapImageEvent(CommandQueue *parent,
+ Image2D *image,
+ cl_map_flags map_flags,
+ const size_t origin[3],
+ const size_t region[3],
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+
+ Type type() const; /*!< \brief Say the event is a \c Coal::Event::MapImage one */
+
+ /**
+ * \brief Origin of the mapping, in pixels, for the given dimension
+ * \param index dimension for which the origin is retrieved
+ * \return origin of the mapping for the given dimension
+ */
+ size_t origin(unsigned int index) const;
+
+ /**
+ * \brief Region of the mapping, in pixels, for the given dimension
+ * \param index dimension for which the region is retrieved
+ * \return region of the mapping for the given dimension
+ */
+ size_t region(unsigned int index) const;
+ cl_map_flags flags() const; /*!< \brief Flags of the mapping */
+
+ void *ptr() const; /*!< \brief Pointer at which the data is mapped */
+ size_t row_pitch() const; /*!< \brief Row pitch of the mapped data */
+ size_t slice_pitch() const; /*!< \brief Slice pitch of the mapped data */
+
+ /**
+ * \brief Set the memory location at which the image is mapped
+ *
+ * This function must be called by
+ * \c Coal::DeviceInterface::initEventDeviceData(). Row and slice pitches
+ * must also be set by this function by calling \c setRowPitch() and
+ * \c setSlicePitch().
+ *
+ * \param ptr pointer at which the data is available
+ */
+ void setPtr(void *ptr);
+ void setRowPitch(size_t row_pitch); /*!< \brief Set row pitch */
+ void setSlicePitch(size_t slice_pitch); /*!< \brief Set slice pitch */
+
+ private:
+ cl_map_flags p_map_flags;
+ size_t p_origin[3], p_region[3];
+ void *p_ptr;
+ size_t p_slice_pitch, p_row_pitch;
+};
+
+/**
+ * \brief Unmapping a memory object
+ */
+class UnmapBufferEvent : public BufferEvent
+{
+ public:
+ UnmapBufferEvent(CommandQueue *parent,
+ MemObject *buffer,
+ void *mapped_addr,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+
+ Type type() const; /*!< \brief Say the event is a \c Coal::Event::UnmapBuffer one */
+
+ void *mapping() const; /*!< \brief Mapped address to unmap */
+
+ private:
+ void *p_mapping;
+};
+
+/**
+ * \brief Copying between two buffers
+ */
+class CopyBufferEvent : public BufferEvent
+{
+ public:
+ CopyBufferEvent(CommandQueue *parent,
+ MemObject *source,
+ MemObject *destination,
+ size_t src_offset,
+ size_t dst_offset,
+ size_t cb,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+ ~CopyBufferEvent();
+
+ Type type() const; /*!< \brief Say the event is a \c Coal::Event::CopyBuffer one */
+
+ MemObject *source() const; /*!< \brief Source buffer, equivalent to \c Coal::BufferEvent::buffer() */
+ MemObject *destination() const; /*!< \brief Destination buffer */
+ size_t src_offset() const; /*!< \brief Offset in the source buffer, in bytes */
+ size_t dst_offset() const; /*!< \brief Offset in the destination buffer, in bytes */
+ size_t cb() const; /*!< \brief Number of bytes to copy */
+
+ private:
+ MemObject *p_destination;
+ size_t p_src_offset, p_dst_offset, p_cb;
+};
+
+/**
+ * \brief Events related to rectangular (or cubic) memory regions
+ *
+ * This event is the base for all the *BufferRect events, and the Image ones.
+ */
+class ReadWriteCopyBufferRectEvent : public BufferEvent
+{
+ public:
+ ReadWriteCopyBufferRectEvent(CommandQueue *parent,
+ MemObject *source,
+ const size_t src_origin[3],
+ const size_t dst_origin[3],
+ const size_t region[3],
+ size_t src_row_pitch,
+ size_t src_slice_pitch,
+ size_t dst_row_pitch,
+ size_t dst_slice_pitch,
+ unsigned int bytes_per_element,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+
+ size_t src_origin(unsigned int index) const; /*!< \brief Source origin for the \p index dimension */
+ size_t dst_origin(unsigned int index) const; /*!< \brief Destination origin for the \p index dimension */
+ size_t region(unsigned int index) const; /*!< \brief Region to copy for the \p index dimension */
+ size_t src_row_pitch() const; /*!< \brief Source row pitch */
+ size_t src_slice_pitch() const; /*!< \brief Source slice pitch */
+ size_t dst_row_pitch() const; /*!< \brief Destination row pitch */
+ size_t dst_slice_pitch() const; /*!< \brief Destination slice pitch */
+ MemObject *source() const; /*!< \brief Source of the copy, for readability. Calls \c Coal::BufferEvent::buffer(). */
+
+ protected:
+ size_t p_src_origin[3], p_dst_origin[3], p_region[3];
+ size_t p_src_row_pitch, p_src_slice_pitch;
+ size_t p_dst_row_pitch, p_dst_slice_pitch;
+};
+
+/**
+ * \brief Copying between two buffers
+ */
+class CopyBufferRectEvent : public ReadWriteCopyBufferRectEvent
+{
+ public:
+ CopyBufferRectEvent(CommandQueue *parent,
+ MemObject *source,
+ MemObject *destination,
+ const size_t src_origin[3],
+ const size_t dst_origin[3],
+ const size_t region[3],
+ size_t src_row_pitch,
+ size_t src_slice_pitch,
+ size_t dst_row_pitch,
+ size_t dst_slice_pitch,
+ unsigned int bytes_per_element,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+
+ virtual Type type() const; /*!< \brief Say the event is a \c Coal::Event::CopyBufferRect one */
+ MemObject *destination() const; /*!< \brief Destination buffer */
+
+ private:
+ MemObject *p_destination;
+};
+
+/**
+ * \brief Reading or writing to a buffer
+ */
+class ReadWriteBufferRectEvent : public ReadWriteCopyBufferRectEvent
+{
+ public:
+ ReadWriteBufferRectEvent(CommandQueue *parent,
+ MemObject *buffer,
+ const size_t buffer_origin[3],
+ const size_t host_origin[3],
+ const size_t region[3],
+ size_t buffer_row_pitch,
+ size_t buffer_slice_pitch,
+ size_t host_row_pitch,
+ size_t host_slice_pitch,
+ void *ptr,
+ unsigned int bytes_per_element,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+
+ void *ptr() const; /*!< \brief Pointer in host memory in which to put the data */
+
+ private:
+ void *p_ptr;
+};
+
+/**
+ * \brief Reading a buffer
+ */
+class ReadBufferRectEvent : public ReadWriteBufferRectEvent
+{
+ public:
+ ReadBufferRectEvent(CommandQueue *parent,
+ MemObject *buffer,
+ const size_t buffer_origin[3],
+ const size_t host_origin[3],
+ const size_t region[3],
+ size_t buffer_row_pitch,
+ size_t buffer_slice_pitch,
+ size_t host_row_pitch,
+ size_t host_slice_pitch,
+ void *ptr,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+
+ Type type() const; /*!< \brief Say the event is a \c Coal::Event::ReadBufferRect one */
+};
+
+/**
+ * \brief Writing a buffer
+ */
+class WriteBufferRectEvent : public ReadWriteBufferRectEvent
+{
+ public:
+ WriteBufferRectEvent(CommandQueue *parent,
+ MemObject *buffer,
+ const size_t buffer_origin[3],
+ const size_t host_origin[3],
+ const size_t region[3],
+ size_t buffer_row_pitch,
+ size_t buffer_slice_pitch,
+ size_t host_row_pitch,
+ size_t host_slice_pitch,
+ void *ptr,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+
+ Type type() const; /*!< \brief Say the event is a \c Coal::Event::WriteBufferRect one */
+};
+
+/**
+ * \brief Reading or writing images
+ *
+ * This class only converts some of the arguments given to its constructor
+ * to the one of \c Coal::ReadWriteBufferRectEvent. For example, the source row
+ * and slice pitches are read from the \c Coal::Image2D object.
+ */
+class ReadWriteImageEvent : public ReadWriteBufferRectEvent
+{
+ public:
+ ReadWriteImageEvent(CommandQueue *parent,
+ Image2D *image,
+ const size_t origin[3],
+ const size_t region[3],
+ size_t row_pitch,
+ size_t slice_pitch,
+ void *ptr,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+};
+
+/**
+ * \brief Reading an image
+ */
+class ReadImageEvent : public ReadWriteImageEvent
+{
+ public:
+ ReadImageEvent(CommandQueue *parent,
+ Image2D *image,
+ const size_t origin[3],
+ const size_t region[3],
+ size_t row_pitch,
+ size_t slice_pitch,
+ void *ptr,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+
+ Type type() const; /*!< \brief Say the event is a \c Coal::Event::ReadImage one */
+};
+
+/**
+ * \brief Writing to an image
+ */
+class WriteImageEvent : public ReadWriteImageEvent
+{
+ public:
+ WriteImageEvent(CommandQueue *parent,
+ Image2D *image,
+ const size_t origin[3],
+ const size_t region[3],
+ size_t row_pitch,
+ size_t slice_pitch,
+ void *ptr,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+
+ Type type() const; /*!< \brief Say the event is a \c Coal::Event::WriteImage one */
+};
+
+/**
+ * \brief Copying between two images
+ */
+class CopyImageEvent : public CopyBufferRectEvent
+{
+ public:
+ CopyImageEvent(CommandQueue *parent,
+ Image2D *source,
+ Image2D *destination,
+ const size_t src_origin[3],
+ const size_t dst_origin[3],
+ const size_t region[3],
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+
+ Type type() const; /*!< \brief Say the event is a \c Coal::Event::CopyImage one */
+};
+
+/**
+ * \brief Copying an image to a buffer
+ */
+class CopyImageToBufferEvent : public CopyBufferRectEvent
+{
+ public:
+ CopyImageToBufferEvent(CommandQueue *parent,
+ Image2D *source,
+ MemObject *destination,
+ const size_t src_origin[3],
+ const size_t region[3],
+ size_t dst_offset,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+
+ size_t offset() const; /*!< \brief Offset in the buffer at which writing the image */
+ Type type() const; /*!< \brief Say the event is a \c Coal::Event::CopyImageToBuffer one */
+
+ private:
+ size_t p_offset;
+};
+
+/**
+ * \brief Copying a buffer to an image
+ */
+class CopyBufferToImageEvent : public CopyBufferRectEvent
+{
+ public:
+ CopyBufferToImageEvent(CommandQueue *parent,
+ MemObject *source,
+ Image2D *destination,
+ size_t src_offset,
+ const size_t dst_origin[3],
+ const size_t region[3],
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+
+ size_t offset() const; /*!< \brief Offset in the buffer at which the copy starts */
+ Type type() const; /*!< \brief Say the event is a \c Coal::Event::CopyBufferToImage one */
+
+ private:
+ size_t p_offset;
+};
+
+/**
+ * \brief Executing a native function as a kernel
+ *
+ * This event builds an argument list to give to the native function. It needs
+ * for example to replace all occurence of a \c Coal::MemObject by a pointer
+ * to data the host CPU can actually access, using
+ * \c Coal::DeviceBuffer::nativeGlobalPointer().
+ */
+class NativeKernelEvent : public Event
+{
+ public:
+ NativeKernelEvent(CommandQueue *parent,
+ void (*user_func)(void *),
+ void *args,
+ size_t cb_args,
+ cl_uint num_mem_objects,
+ const MemObject **mem_list,
+ const void **args_mem_loc,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+ ~NativeKernelEvent();
+
+ Type type() const; /*!< \brief Say the event is a \c Coal::Event::NativeKernel one */
+
+ void *function() const; /*!< \brief Host function to call */
+ void *args() const; /*!< \brief Args to give to the host function */
+
+ private:
+ void *p_user_func;
+ void *p_args;
+};
+
+/**
+ * \brief Executing a compiled kernel
+ */
+class KernelEvent : public Event
+{
+ public:
+ KernelEvent(CommandQueue *parent,
+ Kernel *kernel,
+ cl_uint work_dim,
+ const size_t *global_work_offset,
+ const size_t *global_work_size,
+ const size_t *local_work_size,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+ ~KernelEvent();
+
+ cl_uint work_dim() const; /*!< \brief Number of working dimensions */
+ size_t global_work_offset(cl_uint dim) const; /*!< \brief Global work offset for the \p dim dimension */
+ size_t global_work_size(cl_uint dim) const; /*!< \brief Global work size for the \p dim dimension */
+ size_t local_work_size(cl_uint dim) const; /*!< \brief Number of work-items per work-group for the \p dim dimension */
+ Kernel *kernel() const; /*!< \brief \c Coal::Kernel object to run */
+ DeviceKernel *deviceKernel() const; /*!< \brief \c Coal::DeviceKernel for the kernel and device of this event */
+
+ virtual Type type() const; /*!< \brief Say the event is a \c Coal::Event::NDRangeKernel one */
+
+ private:
+ cl_uint p_work_dim;
+ size_t p_global_work_offset[MAX_WORK_DIMS],
+ p_global_work_size[MAX_WORK_DIMS],
+ p_local_work_size[MAX_WORK_DIMS],
+ p_max_work_item_sizes[MAX_WORK_DIMS];
+ Kernel *p_kernel;
+ DeviceKernel *p_dev_kernel;
+};
+
+/**
+ * \brief Executing a task kernel
+ *
+ * This event is simple a \c Coal::KernelEvent with:
+ *
+ * - \c work_dim() set to 1
+ * - \c global_work_offset() set to {0}
+ * - \c global_work_size() set to {1}
+ * - \c local_work_size() set to {1}
+ *
+ * It's in fact a \c Coal::KernelEvent containing only one single work-item.
+ */
+class TaskEvent : public KernelEvent
+{
+ public:
+ TaskEvent(CommandQueue *parent,
+ Kernel *kernel,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+
+ Type type() const; /*!< \brief Say the event is a \c Coal::Event::TaskKernel one */
+};
+
+/**
+ * \brief User event
+ *
+ * This event is a bit special as it is created by a call to
+ * \c clCreateUserEvent() and doesn't belong to an event queue. Thus, a mean had
+ * to be found for all to work.
+ *
+ * The solution is the \c addDependentCommandQueue() function, called every time
+ * the user event is added to a command queue. When this event becomes completed,
+ * \c flushQueues() is called to allow all the \c Coal::CommandQueue objects
+ * containing this event to push more events on their device.
+ *
+ * This way, command queues are not blocked by user events.
+ */
+class UserEvent : public Event
+{
+ public:
+ UserEvent(Context *context, cl_int *errcode_ret);
+
+ Type type() const; /*!< \brief Say the event is a \c Coal::Event::User one */
+ Context *context() const; /*!< \brief Context of this event */
+
+ private:
+ Context *p_context;
+};
+
+/**
+ * \brief Barrier event
+ */
+class BarrierEvent : public Event
+{
+ public:
+ BarrierEvent(CommandQueue *parent,
+ cl_int *errcode_ret);
+
+ Type type() const; /*!< \brief Say the event is a \c Coal::Event::Barrier one */
+};
+
+/**
+ * \brief Event waiting for others to complete before being completed
+ */
+class WaitForEventsEvent : public Event
+{
+ public:
+ WaitForEventsEvent(CommandQueue *parent,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+
+ virtual Type type() const; /*!< \brief Say the event is a \c Coal::Event::WaitForEvents one */
+};
+
+/**
+ * \brief Marker event
+ */
+class MarkerEvent : public WaitForEventsEvent
+{
+ public:
+ MarkerEvent(CommandQueue *parent,
+ cl_uint num_events_in_wait_list,
+ const Event **event_wait_list,
+ cl_int *errcode_ret);
+
+ Type type() const; /*!< \brief Say the event is a \c Coal::Event::Marker one */
+};
+
+}
+
+#endif
diff --git a/src/core/icd.cpp b/src/core/icd.cpp
new file mode 100644
index 0000000..2c62035
--- /dev/null
+++ b/src/core/icd.cpp
@@ -0,0 +1,145 @@
+/******************************************************************************
+ * Copyright (c) 2011-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#include "CL/cl.h"
+#include "platform.h"
+#include "icd.h"
+
+void * dispatch_table[] =
+{
+ (void*) clGetPlatformIDs,
+ (void*) clGetPlatformInfo,
+ (void*) clGetDeviceIDs,
+ (void*) clGetDeviceInfo,
+ (void*) clCreateContext,
+ (void*) clCreateContextFromType,
+ (void*) clRetainContext,
+ (void*) clReleaseContext,
+ (void*) clGetContextInfo,
+ (void*) clCreateCommandQueue,
+ (void*) clRetainCommandQueue,
+ (void*) clReleaseCommandQueue,
+ (void*) clGetCommandQueueInfo,
+ (void*) 0, //clSetCommandQueueProperty,
+ (void*) clCreateBuffer,
+ (void*) clCreateImage2D,
+ (void*) clCreateImage3D,
+ (void*) clRetainMemObject,
+ (void*) clReleaseMemObject,
+ (void*) clGetSupportedImageFormats,
+ (void*) clGetMemObjectInfo,
+ (void*) clGetImageInfo,
+ (void*) clCreateSampler,
+ (void*) clRetainSampler,
+ (void*) clReleaseSampler,
+ (void*) clGetSamplerInfo,
+ (void*) clCreateProgramWithSource,
+ (void*) clCreateProgramWithBinary,
+ (void*) clRetainProgram,
+ (void*) clReleaseProgram,
+ (void*) clBuildProgram,
+ (void*) clUnloadCompiler,
+ (void*) clGetProgramInfo,
+ (void*) clGetProgramBuildInfo,
+ (void*) clCreateKernel,
+ (void*) clCreateKernelsInProgram,
+ (void*) clRetainKernel,
+ (void*) clReleaseKernel,
+ (void*) clSetKernelArg,
+ (void*) clGetKernelInfo,
+ (void*) clGetKernelWorkGroupInfo,
+ (void*) clWaitForEvents,
+ (void*) clGetEventInfo,
+ (void*) clRetainEvent,
+ (void*) clReleaseEvent,
+ (void*) clGetEventProfilingInfo,
+ (void*) clFlush,
+ (void*) clFinish,
+ (void*) clEnqueueReadBuffer,
+ (void*) clEnqueueWriteBuffer,
+ (void*) clEnqueueCopyBuffer,
+ (void*) clEnqueueReadImage,
+ (void*) clEnqueueWriteImage,
+ (void*) clEnqueueCopyImage,
+ (void*) clEnqueueCopyImageToBuffer,
+ (void*) clEnqueueCopyBufferToImage,
+ (void*) clEnqueueMapBuffer,
+ (void*) clEnqueueMapImage,
+ (void*) clEnqueueUnmapMemObject,
+ (void*) clEnqueueNDRangeKernel,
+ (void*) clEnqueueTask,
+ (void*) clEnqueueNativeKernel,
+ (void*) clEnqueueMarker,
+ (void*) clEnqueueWaitForEvents,
+ (void*) clEnqueueBarrier,
+ (void*) clGetExtensionFunctionAddress,
+ (void*) 0, //clCreateFromGLBuffer,
+ (void*) 0, //clCreateFromGLTexture2D,
+ (void*) 0, //clCreateFromGLTexture3D,
+ (void*) 0, //clCreateFromGLRenderbuffer,
+ (void*) 0, //clGetGLObjectInfo,
+ (void*) 0, //clGetGLTextureInfo,
+ (void*) 0, //clEnqueueAcquireGLObjects,
+ (void*) 0, //clEnqueueReleaseGLObjects,
+ (void*) 0, //clGetGLContextInfoKHR,
+ (void*) 0, //clGetDeviceIDsFromD3D10KHR,
+ (void*) 0, //clCreateFromD3D10BufferKHR,
+ (void*) 0, //clCreateFromD3D10Texture2DKHR,
+ (void*) 0, //clCreateFromD3D10Texture3DKHR,
+ (void*) 0, //clEnqueueAcquireD3D10ObjectsKHR,
+ (void*) 0, //clEnqueueReleaseD3D10ObjectsKHR,
+ (void*) clSetEventCallback,
+ (void*) clCreateSubBuffer,
+ (void*) clSetMemObjectDestructorCallback,
+ (void*) clCreateUserEvent,
+ (void*) clSetUserEventStatus,
+ (void*) clEnqueueReadBufferRect,
+ (void*) clEnqueueWriteBufferRect,
+ (void*) clEnqueueCopyBufferRect,
+ (void*) 0, //clCreateSubDevicesEXT,
+ (void*) 0, //clRetainDeviceEXT,
+ (void*) 0, //clReleaseDeviceEXT
+};
+
+
+cl_int CL_API_CALL
+clIcdGetPlatformIDsKHR(cl_uint num_entries,
+ cl_platform_id * platforms,
+ cl_uint * num_platforms)
+{
+ if (num_platforms) *num_platforms = 1;
+ else if (!platforms) return CL_INVALID_VALUE;
+
+ if (!num_entries && platforms) return CL_INVALID_VALUE;
+
+ /*-------------------------------------------------------------------------
+ * Only one "default" platform
+ *------------------------------------------------------------------------*/
+ if (platforms != 0) *platforms = &the_platform;
+
+ return CL_SUCCESS;
+}
diff --git a/src/core/icd.h b/src/core/icd.h
new file mode 100644
index 0000000..591aed6
--- /dev/null
+++ b/src/core/icd.h
@@ -0,0 +1,44 @@
+/******************************************************************************
+ * Copyright (c) 2011-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#ifndef _ICD_H
+#define _ICD_H
+#include "CL/cl.h"
+
+typedef void *(KHRicdVendorDispatch)[];
+extern KHRicdVendorDispatch dispatch_table;
+
+class Dispatch
+{
+ public:
+ Dispatch() : dispatch(&dispatch_table) {}
+ private:
+ KHRicdVendorDispatch *dispatch;
+};
+
+#endif // _ICD_H
+
diff --git a/src/core/kernel.cpp b/src/core/kernel.cpp
new file mode 100644
index 0000000..4c53576
--- /dev/null
+++ b/src/core/kernel.cpp
@@ -0,0 +1,637 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * Copyright (c) 2012-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file core/kernel.cpp
+ * \brief Kernel
+ */
+
+#include "kernel.h"
+#include "propertylist.h"
+#include "program.h"
+#include "memobject.h"
+#include "sampler.h"
+#include "deviceinterface.h"
+
+#include <string>
+#include <iostream>
+#include <cstring>
+#include <cstdio>
+#include <cstdlib>
+#include <boost/tuple/tuple.hpp>
+
+#include <llvm/Support/Casting.h>
+#include <llvm/IR/Module.h>
+#include <llvm/IR/Type.h>
+#include <llvm/IR/DerivedTypes.h>
+#include <llvm/IR/Constants.h>
+#include <llvm/IR/Metadata.h>
+#include <llvm/IR/DataLayout.h>
+
+
+using namespace Coal;
+Kernel::Kernel(Program *program)
+: Object(Object::T_Kernel, program), p_has_locals(false), wi_alloca_size(0)
+{
+ // TODO: Say a kernel is attached to the program (that becomes unalterable)
+
+ null_dep.device = 0;
+ null_dep.kernel = 0;
+ null_dep.function = 0;
+ null_dep.module = 0;
+ p_name = "";
+}
+
+Kernel::~Kernel()
+{
+ while (p_device_dependent.size())
+ {
+ DeviceDependent &dep = p_device_dependent.back();
+
+ delete dep.kernel;
+
+ p_device_dependent.pop_back();
+ }
+}
+
+const Kernel::DeviceDependent &Kernel::deviceDependent(DeviceInterface *device) const
+{
+ for (size_t i=0; i<p_device_dependent.size(); ++i)
+ {
+ const DeviceDependent &rs = p_device_dependent[i];
+
+ if (rs.device == device || (!device && p_device_dependent.size() == 1))
+ return rs;
+ }
+
+ return null_dep;
+}
+
+Kernel::DeviceDependent &Kernel::deviceDependent(DeviceInterface *device)
+{
+ for (size_t i=0; i<p_device_dependent.size(); ++i)
+ {
+ DeviceDependent &rs = p_device_dependent[i];
+
+ if (rs.device == device || (!device && p_device_dependent.size() == 1))
+ return rs;
+ }
+
+ return null_dep;
+}
+
+/******************************************************************************
+* cl_int Kernel::addFunction
+******************************************************************************/
+cl_int Kernel::addFunction(DeviceInterface *device, llvm::Function *function,
+ llvm::Module *module)
+{
+ llvm::DataLayout TD(module);
+
+#if 0 // Uncomment to see the Function IR being generated:
+ function->dump();
+#endif
+
+ p_name = function->getName().str();
+
+ // Get wi_alloca_size, to be used for computing wg_alloca_size
+ std::string fattrs = function->getAttributes().getAsString(
+ llvm::AttributeSet::FunctionIndex);
+ std::size_t found = fattrs.find("_wi_alloca_size=");
+ if (found != std::string::npos)
+ wi_alloca_size = atoi(fattrs.data() + found + 16);
+
+ /*-------------------------------------------------------------------------
+ * Add a device dependent
+ *------------------------------------------------------------------------*/
+ DeviceDependent dep;
+
+ dep.device = device;
+ dep.function = function;
+ dep.module = module;
+
+ /*-------------------------------------------------------------------------
+ * Build the arg list of the kernel (or verify it if a previous function
+ * was already registered)
+ *------------------------------------------------------------------------*/
+ llvm::FunctionType *f = function->getFunctionType();
+ bool append = (p_args.size() == 0);
+
+ if (!append && p_args.size() != f->getNumParams())
+ return CL_INVALID_KERNEL_DEFINITION;
+
+ int i = 0;
+ for (llvm::Function::arg_iterator I = function->arg_begin(),
+ E = function->arg_end(); I != E; ++I, i++)
+ {
+ llvm::Type *param_type = f->getParamType(i);
+ llvm::Argument *arg = I;
+ Arg::Kind kind = Arg::Invalid;
+ Arg::File file = Arg::Private;
+ unsigned short vec_dim = 1;
+
+ llvm::Type *arg_type = arg->getType();
+ const unsigned arg_store_size = TD.getTypeStoreSize(arg_type);
+
+ // LLVM IR writes parameters passed by value as pointers:
+ if (llvm::isa<llvm::PointerType>(arg_type) && arg->hasByValAttr()) {
+ arg_type = llvm::dyn_cast<llvm::PointerType>(arg_type)->getElementType();
+ }
+
+ llvm::Type *itype = TD.getSmallestLegalIntType(module->getContext(), arg_store_size * 8);
+ llvm::Type *target_type = (itype != NULL && arg_type->isIntegerTy()) ? itype : arg_type;
+
+ unsigned target_size = TD.getTypeStoreSize(target_type);
+ unsigned target_align = TD.getABITypeAlignment(target_type);
+
+#if 0 // Uncomment to see arg info
+ arg_type->dump(); std::cout << " Size: " << target_size << " Align: " << target_align << std::endl ;
+#endif
+
+ if (arg_type->isPointerTy())
+ {
+ // It's a pointer, dereference it
+ llvm::PointerType *p_type = llvm::cast<llvm::PointerType>(arg_type);
+
+ file = (Arg::File)p_type->getAddressSpace();
+ arg_type = p_type->getElementType();
+
+ // If it's a __local argument, we'll have to allocate memory at run time
+ if (file == Arg::Local)
+ p_has_locals = true;
+
+ kind = Arg::Buffer;
+
+ // If it's a struct, get its name
+ if (arg_type->isStructTy())
+ {
+ llvm::StructType *struct_type =
+ llvm::cast<llvm::StructType>(arg_type);
+ std::string struct_name = struct_type->getName().str();
+
+ if (struct_name.compare(0, 14, "struct.image2d") == 0)
+ {
+ kind = Arg::Image2D;
+ file = Arg::Global;
+ }
+ else if (struct_name.compare(0, 14, "struct.image3d") == 0)
+ {
+ kind = Arg::Image3D;
+ file = Arg::Global;
+ }
+ }
+ }
+ else
+ {
+ if (arg_type->isVectorTy())
+ {
+ // It's a vector, we need its element's type
+ llvm::VectorType *v_type = llvm::cast<llvm::VectorType>(arg_type);
+
+ vec_dim = v_type->getNumElements();
+ arg_type = v_type->getElementType();
+ }
+
+ // Get type kind
+ if (arg_type->isFloatTy())
+ {
+ kind = Arg::Float;
+ }
+ else if (arg_type->isDoubleTy())
+ {
+ kind = Arg::Double;
+ }
+ else if (arg_type->isIntegerTy())
+ {
+ llvm::IntegerType *i_type = llvm::cast<llvm::IntegerType>(arg_type);
+
+ if (i_type->getBitWidth() == 8)
+ {
+ kind = Arg::Int8;
+ }
+ else if (i_type->getBitWidth() == 16)
+ {
+ kind = Arg::Int16;
+ }
+ else if (i_type->getBitWidth() == 32)
+ {
+ // NOTE: May also be a sampler, check done in setArg
+ kind = Arg::Int32;
+ }
+ else if (i_type->getBitWidth() == 64)
+ {
+ kind = Arg::Int64;
+ }
+ }
+ }
+
+ // Check if we recognized the type
+ if (kind == Arg::Invalid)
+ return CL_INVALID_KERNEL_DEFINITION;
+
+ // Create arg
+ Arg *a= new Arg(vec_dim, file, kind, target_align);
+
+ // If we also have a function registered, check for signature compliance
+ if (!append && (a) != p_args[i])
+ return CL_INVALID_KERNEL_DEFINITION;
+
+ // Append arg if needed
+ if (append)
+ p_args.push_back(a);
+ }
+
+ dep.kernel = device->createDeviceKernel(this, dep.function);
+ p_device_dependent.push_back(dep);
+
+ return CL_SUCCESS;
+}
+
+llvm::Function *Kernel::function(DeviceInterface *device) const
+{
+ const DeviceDependent &dep = deviceDependent(device);
+
+ return dep.function;
+}
+
+/******************************************************************************
+* cl_int Kernel::setArg
+******************************************************************************/
+cl_int Kernel::setArg(cl_uint index, size_t size, const void *value)
+{
+ if (index > p_args.size())
+ return CL_INVALID_ARG_INDEX;
+
+ Arg *arg = p_args[index];
+
+ /*-------------------------------------------------------------------------
+ * Special case for __local pointers
+ *------------------------------------------------------------------------*/
+ if (arg->file() == Arg::Local)
+ {
+ if (size == 0) return CL_INVALID_ARG_SIZE;
+ if (value != 0) return CL_INVALID_ARG_VALUE;
+
+ arg->setAllocAtKernelRuntime(size);
+ return CL_SUCCESS;
+ }
+
+ /*-------------------------------------------------------------------------
+ * Check that size corresponds to the arg type
+ *------------------------------------------------------------------------*/
+ size_t arg_size = arg->valueSize() * arg->vecDim();
+
+ /*-------------------------------------------------------------------------
+ * Special case for samplers (pointers in C++, uint32 in OpenCL).
+ *------------------------------------------------------------------------*/
+ if (size == sizeof(cl_sampler) && arg_size == 4 &&
+ (*(Object **)value)->isA(T_Sampler))
+ {
+ unsigned int bitfield = (*(Sampler **)value)->bitfield();
+
+ arg->refineKind(Arg::Sampler);
+ arg->alloc();
+ arg->loadData(&bitfield, size);
+
+ return CL_SUCCESS;
+ }
+
+ // LLVM IR redefines function parameter types to fit the smallest integer type width for the ABI
+ // eg: <2xi8> (2 bytes) may actually be pushed as an i32 (4 bytes!), but this knowledge is
+ // not known to shamrock. But, we do know the parameter type alignment in addFunction().
+ // So allow sizes less than or equal to the target alignment to succeed the size test:
+ if ((size != arg_size) && (size > arg->targetAlignment())) return CL_INVALID_ARG_SIZE;
+
+ /*-------------------------------------------------------------------------
+ * Check for null values
+ *------------------------------------------------------------------------*/
+ cl_mem null_mem = 0;
+
+ if (!value)
+ {
+ switch (arg->kind())
+ {
+ /*-------------------------------------------------------------
+ * Special case buffers : value can be 0 (or point to 0)
+ *------------------------------------------------------------*/
+ case Arg::Buffer:
+ case Arg::Image2D:
+ case Arg::Image3D: value = &null_mem;
+ default: return CL_INVALID_ARG_VALUE;
+ }
+ }
+
+ /*-------------------------------------------------------------------------
+ * Copy just the data actually passed. Expect LLVM to do the signext/zeroext.
+ *------------------------------------------------------------------------*/
+ arg->alloc();
+ arg->loadData(value, size);
+
+ return CL_SUCCESS;
+}
+
+unsigned int Kernel::numArgs() const
+{
+ return p_args.size();
+}
+
+const Kernel::Arg *Kernel::arg(unsigned int index) const
+{
+ return p_args.at(index);
+}
+
+bool Kernel::argsSpecified() const
+{
+ for (size_t i=0; i<p_args.size(); ++i)
+ if (!p_args[i]->defined()) return false;
+ return true;
+}
+
+bool Kernel::hasLocals() const
+{
+ return p_has_locals;
+}
+
+DeviceKernel *Kernel::deviceDependentKernel(DeviceInterface *device) const
+{
+ const DeviceDependent &dep = deviceDependent(device);
+
+ return dep.kernel;
+}
+
+llvm::Module *Kernel::deviceDependentModule(DeviceInterface *device) const
+{
+ const DeviceDependent &dep = deviceDependent(device);
+
+ return dep.module;
+}
+
+cl_int Kernel::info(cl_kernel_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const
+{
+ void *value = 0;
+ size_t value_length = 0;
+
+ union {
+ cl_uint cl_uint_var;
+ cl_program cl_program_var;
+ cl_context cl_context_var;
+ };
+
+ switch (param_name)
+ {
+ case CL_KERNEL_FUNCTION_NAME:
+ MEM_ASSIGN(p_name.size() + 1, p_name.c_str());
+ break;
+
+ case CL_KERNEL_NUM_ARGS:
+ SIMPLE_ASSIGN(cl_uint, p_args.size());
+ break;
+
+ case CL_KERNEL_REFERENCE_COUNT:
+ SIMPLE_ASSIGN(cl_uint, references());
+ break;
+
+ case CL_KERNEL_CONTEXT:
+ SIMPLE_ASSIGN(cl_context, parent()->parent());
+ break;
+
+ case CL_KERNEL_PROGRAM:
+ SIMPLE_ASSIGN(cl_program, parent());
+ break;
+
+ default:
+ return CL_INVALID_VALUE;
+ }
+
+ if (param_value && param_value_size < value_length)
+ return CL_INVALID_VALUE;
+
+ if (param_value_size_ret)
+ *param_value_size_ret = value_length;
+
+ if (param_value)
+ std::memcpy(param_value, value, value_length);
+
+ return CL_SUCCESS;
+}
+
+boost::tuple<uint,uint,uint> Kernel::reqdWorkGroupSize(llvm::Module *module) const
+{
+ llvm::NamedMDNode *kernels = module->getNamedMetadata("opencl.kernels");
+
+ boost::tuple<uint,uint,uint> zeros(0,0,0);
+
+ if (!kernels) return zeros;
+
+ for (unsigned int i=0; i<kernels->getNumOperands(); ++i)
+ {
+ llvm::MDNode *node = kernels->getOperand(i);
+
+ /*---------------------------------------------------------------------
+ * Each node has only one operand : a llvm::Function
+ *--------------------------------------------------------------------*/
+ llvm::Value *value = node->getOperand(0);
+
+ /*---------------------------------------------------------------------
+ * Bug somewhere, don't crash
+ *--------------------------------------------------------------------*/
+ if (!llvm::isa<llvm::Function>(value)) continue;
+
+ llvm::Function *f = llvm::cast<llvm::Function>(value);
+ if(f->getName().str() != p_name) continue;
+
+ if (node->getNumOperands() <= 1) return zeros;
+
+ llvm::MDNode *meta = llvm::cast<llvm::MDNode>(node->getOperand(1));
+ if (meta->getNumOperands() == 4 &&
+ meta->getOperand(0)->getName().str() == std::string("reqd_work_group_size"))
+ {
+ uint x = llvm::cast<llvm::ConstantInt> (meta->getOperand(1))->getValue().getLimitedValue();
+ uint y = llvm::cast<llvm::ConstantInt> (meta->getOperand(2))->getValue().getLimitedValue();
+ uint z = llvm::cast<llvm::ConstantInt> (meta->getOperand(3))->getValue().getLimitedValue();
+
+ return boost::tuple<uint,uint,uint> (x,y,z);
+ }
+ return zeros;
+ }
+}
+
+
+cl_int Kernel::workGroupInfo(DeviceInterface *device,
+ cl_kernel_work_group_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const
+{
+ void *value = 0;
+ size_t value_length = 0;
+
+ union {
+ size_t size_t_var;
+ size_t three_size_t[3];
+ cl_ulong cl_ulong_var;
+ };
+
+ const DeviceDependent &dep = deviceDependent(device);
+
+ // BUG? Shouldn't we check if the kernel is associated with
+ // the default device ?
+ if (!device && p_device_dependent.size() > 1)
+ return CL_INVALID_DEVICE;
+
+ switch (param_name)
+ {
+ case CL_KERNEL_WORK_GROUP_SIZE:
+ SIMPLE_ASSIGN(size_t, dep.kernel->workGroupSize());
+ break;
+
+ case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
+ {
+ boost::tuple<uint,uint,uint> res(reqdWorkGroupSize(dep.module));
+ three_size_t[0] = res.get<0>();
+ three_size_t[1] = res.get<1>();
+ three_size_t[2] = res.get<2>();
+ value = &three_size_t;
+ value_length = sizeof(three_size_t);
+ }
+ break;
+
+ case CL_KERNEL_LOCAL_MEM_SIZE:
+ SIMPLE_ASSIGN(cl_ulong, dep.kernel->localMemSize());
+ break;
+
+ case CL_KERNEL_PRIVATE_MEM_SIZE:
+ SIMPLE_ASSIGN(cl_ulong, dep.kernel->privateMemSize());
+ break;
+
+ case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
+ SIMPLE_ASSIGN(size_t, dep.kernel->preferredWorkGroupSizeMultiple());
+ break;
+
+ default:
+ return CL_INVALID_VALUE;
+ }
+
+ if (param_value && param_value_size < value_length)
+ return CL_INVALID_VALUE;
+
+ if (param_value_size_ret)
+ *param_value_size_ret = value_length;
+
+ if (param_value)
+ std::memcpy(param_value, value, value_length);
+
+ return CL_SUCCESS;
+}
+
+/*
+ * Kernel::Arg
+ */
+Kernel::Arg::Arg(unsigned short vec_dim, File file, Kind kind, size_t targ_align)
+ : p_vec_dim(vec_dim), p_file(file), p_kind(kind), p_targ_align(targ_align), p_data(0), p_defined(false),
+ p_runtime_alloc(0)
+{ }
+
+Kernel::Arg::~Arg()
+{
+ if (p_data) std::free(p_data);
+}
+
+void Kernel::Arg::alloc()
+{
+ if (!p_data) p_data = std::calloc(p_vec_dim, valueSize());
+}
+
+void Kernel::Arg::loadData(const void *data, size_t size)
+{
+ assert ( size <= p_vec_dim * valueSize());
+ std::memcpy(p_data, data, size);
+ p_defined = true;
+}
+
+void Kernel::Arg::setAllocAtKernelRuntime(size_t size)
+{
+ p_runtime_alloc = size;
+ p_defined = true;
+}
+
+void Kernel::Arg::refineKind (Kernel::Arg::Kind kind)
+{
+ p_kind = kind;
+}
+
+bool Kernel::Arg::operator!=(const Arg &b)
+{
+ bool same = (p_vec_dim == b.p_vec_dim) &&
+ (p_file == b.p_file) &&
+ (p_kind == b.p_kind);
+
+ return !same;
+}
+
+size_t Kernel::Arg::valueSize() const
+{
+ switch (p_kind)
+ {
+ case Invalid: return 0;
+ case Int8: return 1;
+ case Int16: return 2;
+ case Int32:
+ case Sampler: return 4;
+ case Int64: return 8;
+ case Float: return sizeof(cl_float);
+ case Double: return sizeof(double);
+ case Buffer:
+ case Image2D:
+ case Image3D: return sizeof(cl_mem);
+ }
+
+ return 0;
+}
+
+unsigned short Kernel::Arg::vecDim() const { return p_vec_dim; }
+Kernel::Arg::File Kernel::Arg::file() const { return p_file; }
+Kernel::Arg::Kind Kernel::Arg::kind() const { return p_kind; }
+size_t Kernel::Arg::targetAlignment() const { return p_targ_align; }
+bool Kernel::Arg::defined() const { return p_defined; }
+const void * Kernel::Arg::data() const { return p_data; }
+size_t Kernel::Arg::allocAtKernelRuntime() const {return p_runtime_alloc;}
+
+const void *Kernel::Arg::value(unsigned short index) const
+{
+ const char *data = (const char *)p_data;
+ unsigned int offset = index * valueSize();
+
+ data += offset;
+
+ return (const void *)data;
+}
+
diff --git a/src/core/kernel.h b/src/core/kernel.h
new file mode 100644
index 0000000..80672ea
--- /dev/null
+++ b/src/core/kernel.h
@@ -0,0 +1,326 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * Copyright (c) 2012-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file core/kernel.h
+ * \brief Kernel
+ */
+
+#ifndef __KERNEL_H__
+#define __KERNEL_H__
+
+#include "object.h"
+
+#include <CL/cl.h>
+
+#include <vector>
+#include <string>
+#include <boost/tuple/tuple.hpp>
+
+namespace llvm
+{
+ class Function;
+ class Module;
+}
+
+namespace Coal
+{
+
+class Program;
+class DeviceInterface;
+class DeviceKernel;
+
+/**
+ * \brief Kernel
+ *
+ * A kernel represents a LLVM function that can be run on a device. As
+ * \c Coal::Kernel objects are device-independent, they in fact represent only
+ * the name of a kernel and the arguments the application wants to pass to it,
+ * but it also contains a list of LLVM functions for each device for which its
+ * parent \c Coal::Program has been built
+ */
+class Kernel : public Object
+{
+ public:
+ /**
+ * \brief Constructor
+ * \param program Parent \c Coal::Program
+ */
+ Kernel(Program *program);
+ ~Kernel();
+
+ /**
+ * \brief Kernel argument
+ *
+ * This class holds OpenCL-related information about the arguments of
+ * a kernel. It is also used to check that a kernel takes the same
+ * arguments on every device on which it has been built.
+ */
+ class Arg
+ {
+ public:
+ /**
+ * \brief Memory address space qualifier
+ */
+ enum File
+ {
+ Private = 0, /*!< \brief __private */
+#if 1
+ Global = 1, /*!< \brief __global */
+ Constant = 2, /*!< \brief __constant */
+ Local = 3 /*!< \brief __local */
+#else
+ /* using clang defaults */
+ Global = 0xFFFF00, /*!< \brief __global */
+ Local = 0xFFFF01, /*!< \brief __local */
+ Constant = 0xFFFF02 /*!< \brief __constant */
+#endif
+ };
+
+ /**
+ * \brief Kind of argument (its datatype)
+ */
+ enum Kind
+ {
+ Invalid, /*!< \brief Invalid argument */
+ Int8, /*!< \brief \c uchar or \c char, \c i8 in LLVM */
+ Int16, /*!< \brief \c ushort or \c short, \c i16 in LLVM */
+ Int32, /*!< \brief \c uint or \c int, \c i32 in LLVM */
+ Int64, /*!< \brief \c ulong or \c long, \c i64 in LLVM */
+ Float, /*!< \brief \c float, \c float in LLVM */
+ Double, /*!< \brief \c double, \c double in LLVM */
+ Buffer, /*!< \brief \c Coal::Buffer or \c Coal::SubBuffer, <tt>type*</tt> in LLVM */
+ Image2D, /*!< \brief \c Coal::Image2D, <tt>\%struct.image2d*</tt> in LLVM */
+ Image3D, /*!< \brief \c Coal::Image3D, <tt>\%struct.image3d*</tt> in LLVM */
+ Sampler /*!< \brief \c Coal::Sampler::bitfield(), \c i32 in LLVM, see \c Coal::Kernel::setArg() */
+ };
+
+ /**
+ * \brief Constructor
+ * \param vec_dim vector dimension of the argument, 1 if not a vector
+ * \param file \c File of the argument
+ * \param kind \c Kind of the argument
+ * \param kind \c Argument type alignment (ABI specific)
+ */
+ Arg(unsigned short vec_dim, File file, Kind kind, size_t targ_align);
+ ~Arg();
+
+ /**
+ * \brief Allocate the argument
+ *
+ * This function must be called before \c loadData(). It
+ * allocates a buffer in which the argument value can be stored.
+ *
+ * \sa valueSize()
+ */
+ void alloc();
+
+ /**
+ * \brief Load a value into the argument
+ * \note \c alloc() must have been called before this function.
+ * \sa valueSize()
+ */
+ void loadData(const void *data, size_t size);
+
+ /**
+ * \brief Set the number of bytes that must be allocated at run-time
+ *
+ * \c __local arguments don't take a value given by the host
+ * application, but take pointers allocated on the device
+ * for each work-group.
+ *
+ * This function allows to set the size of the device-allocated
+ * memory buffer used by this argument.
+ *
+ * \param size size in byte of the buffer the device has to
+ * allocate for each work-group of this kernel
+ */
+ void setAllocAtKernelRuntime(size_t size);
+
+ /**
+ * \brief Changes the \c Kind of this argument
+ * \param kind new \c Kind
+ */
+ void refineKind(Kind kind);
+
+ /**
+ * \brief Compares this argument with another
+ *
+ * They are different if they \c vec_dim, \c file or \c kind are
+ * not the same.
+ *
+ * \param b other argument to compare
+ * \return true if the this arguments doesn't match \p b
+ */
+ bool operator !=(const Arg &b);
+
+ /**
+ * \brief Size of a field of this arg
+ *
+ * This function returns the size of this argument based on its
+ * \c Kind
+ *
+ * \note This size is not multiplied by \c vecDim(), you must do
+ * this by yourself to find the total space taken by this
+ * arg.
+ * \return the size of this argument, in bytes, without any padding
+ */
+ size_t valueSize() const;
+ unsigned short vecDim() const; /*!< \brief Vector dimension */
+ File file() const; /*!< \brief File */
+ Kind kind() const; /*!< \brief Kind */
+ bool defined() const; /*!< \brief Has the value of this argument already beed loaded by the host application ? */
+ size_t targetAlignment() const; /*!< \brief Get alignment (bytes) of arg type */
+ size_t allocAtKernelRuntime() const; /*!< \brief Size of the \c __local buffer to allocate at kernel runtime */
+ const void *value(unsigned short index) const; /*!< \brief Pointer to the value of this argument, for the \p index vector element */
+ const void *data() const; /*!< \brief Pointer to the data of this arg, equivalent to <tt>value(0)</tt> */
+
+ private:
+ unsigned short p_vec_dim;
+ File p_file;
+ Kind p_kind;
+ void *p_data;
+ bool p_defined;
+ size_t p_runtime_alloc;
+ size_t p_targ_align;
+ };
+
+ /**
+ * \brief Add a \c llvm::Function to this kernel
+ *
+ * This function adds a \c llvm::Function to this kernel for the
+ * specified \p device. It also has the responsibility to find the
+ * \c Arg::Kind of each of the function's arguments.
+ *
+ * LLVM provides a \c llvm::Type for each argument:
+ *
+ * - If it is a pointer, the kind of the argument is \c Arg::Buffer and
+ * its field is a simple cast from a LLVM \c addrspace to \c Arg::File.
+ * - If it is a pointer to a struct whose name is either
+ * <tt>\%struct.image2d</tt> or <tt>\%struct.image3d</tt>, kind is set
+ * to \c Arg::Image2D or \c Arg::Image3D, respectively.
+ * - If it is a vector, \c vec_dim is set to the vector size, and the
+ * rest of the computations are done on the element type
+ * - Then we translate the LLVM type to an \c Arg::Kind. For instance,
+ * \c i32 becomes \c Arg::Int32
+ *
+ * Samplers aren't detected at this stage because they are plain \c i32
+ * types on the LLVM side. They are detected in \c setArg() when the
+ * value being set to the argument appears to be a \c Coal::Sampler.
+ *
+ * \param device device for which the function is added
+ * \param function function to add
+ * \param module LLVM module of this function
+ */
+ cl_int addFunction(DeviceInterface *device, llvm::Function *function,
+ llvm::Module *module);
+
+ /**
+ * \brief Get the LLVM function for a specified \p device
+ * \param device the device for which a LLVM function is needed
+ * \return the LLVM function for the given \p device
+ */
+ llvm::Function *function(DeviceInterface *device) const;
+
+ /**
+ * \brief Set the value of an argument
+ *
+ * See the constructor's documentation for a note on the
+ * \c Coal::Sampler objects
+ *
+ * \param index index of the argument
+ * \param size size of the value being stored in the argument, must match
+ * <tt>Arg::valueSize() * Arg::vecDim()</tt>
+ * \param value pointer to the data that will be copied in the argument
+ * \return \c CL_SUCCESS if success, an error code otherwise
+ */
+ cl_int setArg(cl_uint index, size_t size, const void *value);
+
+ unsigned int numArgs() const; /*!< \brief Number of arguments of this kernel */
+ const Arg *arg(unsigned int index) const; /*!< \brief \c Arg at the given \p index */
+
+ /*! \brief \c Coal::DeviceKernel for the specified \p device */
+ DeviceKernel *deviceDependentKernel(DeviceInterface *device) const;
+ llvm::Module *deviceDependentModule(DeviceInterface *device) const;
+
+ bool argsSpecified() const; /*!< \brief true if all the arguments have been set through \c setArg() */
+ bool hasLocals() const; /*!< \brief true if one or more argument is in file \c Arg::Local */
+
+ /**
+ * \brief Get information about this kernel
+ * \copydetails Coal::DeviceInterface::info
+ */
+ cl_int info(cl_kernel_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const;
+
+
+ /**
+ * \brief Get performance hints and device-specific data about this kernel
+ * \copydetails Coal::DeviceInterface::info
+ * \param device \c Coal::DeviceInterface on which the kernel will be run
+ */
+ cl_int workGroupInfo(DeviceInterface *device,
+ cl_kernel_work_group_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const;
+
+ boost::tuple<uint,uint,uint> reqdWorkGroupSize(llvm::Module *module) const;
+
+ int get_wi_alloca_size() { return wi_alloca_size; }
+
+ std::string p_name;
+ private:
+ bool p_has_locals;
+ int wi_alloca_size;
+
+ struct DeviceDependent
+ {
+ DeviceInterface *device;
+ DeviceKernel *kernel;
+ llvm::Function *function;
+ llvm::Module *module;
+ };
+
+ std::vector<DeviceDependent> p_device_dependent;
+ std::vector<Arg *> p_args;
+ DeviceDependent null_dep;
+
+ const DeviceDependent &deviceDependent(DeviceInterface *device) const;
+ DeviceDependent &deviceDependent(DeviceInterface *device);
+
+};
+
+}
+
+struct _cl_kernel : public Coal::Kernel
+{};
+
+#endif
diff --git a/src/core/memobject.cpp b/src/core/memobject.cpp
new file mode 100644
index 0000000..5501ac1
--- /dev/null
+++ b/src/core/memobject.cpp
@@ -0,0 +1,960 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * Copyright (c) 2012-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file memobject.cpp
+ * \brief Memory objects
+ */
+
+#include "CL/cl_ext.h"
+#include "memobject.h"
+#include "context.h"
+#include "deviceinterface.h"
+#include "propertylist.h"
+#include "events.h"
+
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+
+using namespace Coal;
+
+/*
+ * MemObject
+ */
+
+MemObject::MemObject(Context *ctx, cl_mem_flags flags, void *host_ptr,
+ cl_int *errcode_ret)
+: Object(Object::T_MemObject, ctx), p_num_devices(0), p_flags(flags),
+ p_host_ptr(host_ptr), p_devicebuffers(0), p_dtor_callback_stack()
+{
+ // Check the flags value
+ const cl_mem_flags all_flags = CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY |
+ CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR |
+ CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR
+ |CL_MEM_USE_MSMC_TI;
+
+ if ((flags & CL_MEM_READ_WRITE) && (flags & CL_MEM_READ_ONLY))
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+ if ((flags & CL_MEM_READ_WRITE) && (flags & CL_MEM_WRITE_ONLY))
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+ if ((flags & CL_MEM_READ_ONLY) && (flags & CL_MEM_WRITE_ONLY))
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ if ((flags & ~all_flags) != 0)
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ if ((flags & CL_MEM_ALLOC_HOST_PTR) && (flags & CL_MEM_USE_HOST_PTR))
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ if ((flags & CL_MEM_COPY_HOST_PTR) && (flags & CL_MEM_USE_HOST_PTR))
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ // Check other values
+ if ((flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) != 0 && !host_ptr)
+ {
+ *errcode_ret = CL_INVALID_HOST_PTR;
+ return;
+ }
+
+ if ((flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) == 0 && host_ptr)
+ {
+ *errcode_ret = CL_INVALID_HOST_PTR;
+ return;
+ }
+}
+
+MemObject::~MemObject()
+{
+ while (!p_dtor_callback_stack.empty())
+ {
+ dtor_callback_t callback;
+ if (p_dtor_callback_stack.pop(callback))
+ callback.first((cl_mem)this, callback.second);
+ }
+
+ if (p_devicebuffers)
+ {
+ // Also delete our children in the device
+ for (unsigned int i=0; i<p_num_devices; ++i)
+ delete p_devicebuffers[i];
+
+ std::free((void *)p_devicebuffers);
+ }
+}
+
+cl_int MemObject::init()
+{
+ // Get the device list of the context
+ DeviceInterface **devices = 0;
+ cl_int rs;
+
+ rs = ((Context *)parent())->info(CL_CONTEXT_NUM_DEVICES,
+ sizeof(unsigned int),
+ &p_num_devices, 0);
+
+ if (rs != CL_SUCCESS)
+ return rs;
+
+ p_devices_to_allocate = p_num_devices;
+ devices = (DeviceInterface **)std::malloc(p_num_devices *
+ sizeof(DeviceInterface *));
+
+ if (!devices)
+ return CL_OUT_OF_HOST_MEMORY;
+
+ rs = ((Context *)parent())->info(CL_CONTEXT_DEVICES,
+ p_num_devices * sizeof(DeviceInterface *),
+ devices, 0);
+
+ if (rs != CL_SUCCESS)
+ {
+ std::free((void *)devices);
+ return rs;
+ }
+
+ // Allocate a table of DeviceBuffers
+ p_devicebuffers = (DeviceBuffer **)std::malloc(p_num_devices *
+ sizeof(DeviceBuffer *));
+
+ if (!p_devicebuffers)
+ {
+ std::free((void *)devices);
+ return CL_OUT_OF_HOST_MEMORY;
+ }
+
+ // If we have more than one device, the allocation on the devices is
+ // defered to first use, so host_ptr can become invalid. So, copy it in
+ // a RAM location and keep it. Also, set a flag telling CPU devices that
+ // they don't need to reallocate and re-copy host_ptr
+ // SubBuffer should simply reuse Buffer data
+ if (p_num_devices > 1 && (p_flags & CL_MEM_COPY_HOST_PTR)
+ && type() != SubBuffer)
+ {
+ void *tmp_hostptr = std::malloc(size());
+
+ if (!tmp_hostptr)
+ {
+ std::free((void *)devices);
+ return CL_OUT_OF_HOST_MEMORY;
+ }
+
+ std::memcpy(tmp_hostptr, p_host_ptr, size());
+
+ p_host_ptr = tmp_hostptr;
+ // Now, the client application can safely std::free() its host_ptr
+ }
+
+ // Create a DeviceBuffer for each device
+ unsigned int failed_devices = 0;
+
+ for (unsigned int i=0; i<p_num_devices; ++i)
+ {
+ DeviceInterface *device = devices[i];
+
+ rs = CL_SUCCESS;
+ p_devicebuffers[i] = device->createDeviceBuffer(this, &rs);
+
+ if (rs != CL_SUCCESS)
+ {
+ p_devicebuffers[i] = 0;
+ failed_devices++;
+ }
+ }
+
+ if (failed_devices == p_num_devices)
+ {
+ // Each device found a reason to reject the buffer, so it's invalid
+ std::free((void *)devices);
+ return rs;
+ }
+
+ std::free((void *)devices);
+ devices = 0;
+
+ // If we have only one device, already allocate the buffer
+ if (p_num_devices == 1)
+ {
+ if (!p_devicebuffers[0]->allocate())
+ return CL_MEM_OBJECT_ALLOCATION_FAILURE;
+ }
+
+ return CL_SUCCESS;
+}
+
+bool MemObject::allocate(DeviceInterface *device)
+{
+ DeviceBuffer *buffer = deviceBuffer(device);
+
+ if (!buffer->allocated())
+ {
+ return buffer->allocate();
+ }
+
+ return true;
+}
+
+cl_mem_flags MemObject::flags() const
+{
+ return p_flags;
+}
+
+void *MemObject::host_ptr() const
+{
+ if (type() != SubBuffer)
+ return p_host_ptr;
+ else
+ {
+ const class SubBuffer *subbuf = (const class SubBuffer *)this;
+ char *tmp = (char *)subbuf->parent()->host_ptr();
+
+ if (!tmp) return 0;
+
+ tmp += subbuf->offset();
+
+ return (void *)tmp;
+ }
+}
+
+DeviceBuffer *MemObject::deviceBuffer(DeviceInterface *device) const
+{
+ for (unsigned int i=0; i<p_num_devices; ++i)
+ {
+ if (p_devicebuffers[i]->device() == device)
+ return p_devicebuffers[i];
+ }
+
+ return 0;
+}
+
+void MemObject::deviceAllocated(DeviceBuffer *buffer)
+{
+ (void) buffer;
+
+ // Decrement the count of devices that must be allocated. If it becomes
+ // 0, it means we don't need to keep a copied host_ptr and that we can
+ // std::free() it.
+ p_devices_to_allocate--;
+
+ if (p_devices_to_allocate == 0 &&
+ p_num_devices > 1 &&
+ (p_flags & CL_MEM_COPY_HOST_PTR))
+ {
+ std::free(p_host_ptr);
+ p_host_ptr = 0;
+ }
+
+}
+
+void MemObject::setDestructorCallback(void (CL_CALLBACK *pfn_notify)
+ (cl_mem memobj, void *user_data),
+ void *user_data)
+{
+ p_dtor_callback_stack.push(dtor_callback_t(pfn_notify, user_data));
+}
+
+// HACK for the union
+typedef void * void_p;
+
+cl_int MemObject::info(cl_mem_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const
+{
+ void *value = 0;
+ size_t value_length = 0;
+ class SubBuffer *subbuf = (class SubBuffer *)this;
+
+ union {
+ cl_mem_object_type cl_mem_object_type_var;
+ cl_mem_flags cl_mem_flags_var;
+ size_t size_t_var;
+ void_p void_p_var;
+ cl_uint cl_uint_var;
+ cl_context cl_context_var;
+ cl_mem cl_mem_var;
+ };
+
+ switch (param_name)
+ {
+ case CL_MEM_TYPE:
+ switch (type())
+ {
+ case Buffer:
+ case SubBuffer:
+ cl_mem_object_type_var = CL_MEM_OBJECT_BUFFER;
+ break;
+
+ case Image2D:
+ cl_mem_object_type_var = CL_MEM_OBJECT_IMAGE2D;
+ break;
+
+ case Image3D:
+ cl_mem_object_type_var = CL_MEM_OBJECT_IMAGE3D;
+ break;
+ }
+ value = (void *)&cl_mem_object_type_var;
+ value_length = sizeof(cl_mem_object_type);
+ break;
+
+ case CL_MEM_FLAGS:
+ SIMPLE_ASSIGN(cl_mem_flags, p_flags);
+ break;
+
+ case CL_MEM_SIZE:
+ SIMPLE_ASSIGN(size_t, size());
+ break;
+
+ case CL_MEM_HOST_PTR:
+ SIMPLE_ASSIGN(void_p, host_ptr());
+ break;
+
+ case CL_MEM_MAP_COUNT:
+ SIMPLE_ASSIGN(cl_uint, 0); // TODO
+ break;
+
+ case CL_MEM_REFERENCE_COUNT:
+ SIMPLE_ASSIGN(cl_uint, references());
+ break;
+
+ case CL_MEM_CONTEXT:
+ SIMPLE_ASSIGN(cl_context, parent());
+ break;
+
+ case CL_MEM_ASSOCIATED_MEMOBJECT:
+ if (type() != SubBuffer)
+ SIMPLE_ASSIGN(cl_mem, 0)
+ else
+ SIMPLE_ASSIGN(cl_mem, subbuf->parent());
+ break;
+
+ case CL_MEM_OFFSET:
+ if (type() != SubBuffer)
+ SIMPLE_ASSIGN(cl_mem, 0)
+ else
+ SIMPLE_ASSIGN(cl_mem, subbuf->offset());
+ break;
+
+ default:
+ return CL_INVALID_VALUE;
+ }
+
+ if (param_value && param_value_size < value_length)
+ return CL_INVALID_VALUE;
+
+ if (param_value_size_ret)
+ *param_value_size_ret = value_length;
+
+ if (param_value)
+ std::memcpy(param_value, value, value_length);
+
+ return CL_SUCCESS;
+}
+
+/*
+ * Buffer
+ */
+
+Buffer::Buffer(Context *ctx, size_t size, void *host_ptr, cl_mem_flags flags,
+ cl_int *errcode_ret)
+: MemObject(ctx, flags, host_ptr, errcode_ret), p_size(size)
+{
+ if (size == 0)
+ {
+ *errcode_ret = CL_INVALID_BUFFER_SIZE;
+ return;
+ }
+
+#if defined(__arm__)
+ if (size > 512*1024*1024)
+#else
+ if (size > 1*1024*1024*1024)
+#endif
+ {
+ *errcode_ret = CL_INVALID_BUFFER_SIZE;
+ return;
+ }
+
+ // CL_MEM_READ_WRITE is default if not specified {READ,WRITE}_ONLY
+ if (! (flags & (CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY)))
+ p_flags |= CL_MEM_READ_WRITE;
+}
+
+size_t Buffer::size() const
+{
+ return p_size;
+}
+
+MemObject::Type Buffer::type() const
+{
+ return MemObject::Buffer;
+}
+
+/*----------------------------------------------------------------------------
+ * mapped_event: MapBufferEvent when the Map is on a Buffer
+ * RETURN: true if successful, false if fail
+ * Traverse currently mapped event list, check overlapping and if either is
+ * WRITE, insert into list in the increasing order of offset
+ * TODO: do we need to lock the list for operation???
+ *---------------------------------------------------------------------------*/
+bool Buffer::addMapEvent(BufferEvent *mapped_event)
+{
+ MapBufferEvent *mbe = (MapBufferEvent *) mapped_event;
+ size_t mbe_offset = mbe->offset();
+ if (mbe->buffer()->type() == SubBuffer)
+ mbe_offset += ((class SubBuffer *) mbe->buffer())->offset();
+
+ std::list<BufferEvent *>::iterator it, it_insert = p_mapped_events.end();
+ for (it = p_mapped_events.begin(); it != p_mapped_events.end(); ++it)
+ {
+ MapBufferEvent *e = (MapBufferEvent *) (*it);
+ size_t e_offset = e->offset();
+ if (e->buffer()->type() == SubBuffer)
+ e_offset += ((class SubBuffer *) e->buffer())->offset();
+ if (mbe_offset < e_offset) it_insert = it;
+
+ if ( mbe_offset <= e_offset + e->cb() - 1
+ && e_offset <= mbe_offset + mbe->cb() - 1)
+ if ((mbe->flags() & CL_MAP_WRITE) ||
+ (e->flags() & CL_MAP_WRITE))
+ return false;
+ }
+
+ p_mapped_events.insert(it_insert, mapped_event);
+ return true;
+}
+
+/*----------------------------------------------------------------------------
+ * mapped_ptr: mapped pointer from previous MapBuffer/MapImage Event
+ * RETURN: first MappedBufferEvent with same mapped_ptr in the list
+ * TODO: do we need to lock the list for operation???
+ *---------------------------------------------------------------------------*/
+BufferEvent* Buffer::removeMapEvent(void *mapped_ptr)
+{
+ std::list<BufferEvent *>::iterator it;
+ for (it = p_mapped_events.begin(); it != p_mapped_events.end(); ++it)
+ {
+ MapBufferEvent *e = (MapBufferEvent *) (*it);
+ if (e->ptr() != mapped_ptr) continue;
+ p_mapped_events.erase(it);
+ return e;
+ }
+ return NULL;
+}
+
+/*
+ * SubBuffer
+ */
+
+SubBuffer::SubBuffer(class Buffer *parent, size_t offset, size_t size,
+ cl_mem_flags flags, cl_int *errcode_ret)
+: MemObject((Context *)parent->parent(), flags, 0, errcode_ret), p_offset(offset),
+ p_size(size), p_parent(parent)
+{
+ clRetainMemObject((cl_mem) p_parent);
+
+ if (size == 0)
+ {
+ *errcode_ret = CL_INVALID_BUFFER_SIZE;
+ return;
+ }
+
+ if (offset + size > parent->size())
+ {
+ *errcode_ret = CL_INVALID_BUFFER_SIZE;
+ return;
+ }
+
+ // Check the compatibility of flags and parent->flags()
+ const cl_mem_flags wrong_flags =
+ CL_MEM_ALLOC_HOST_PTR |
+ CL_MEM_USE_HOST_PTR |
+ CL_MEM_COPY_HOST_PTR;
+
+ if (flags & wrong_flags)
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ if ((parent->flags() & CL_MEM_WRITE_ONLY) &&
+ (flags & (CL_MEM_READ_WRITE | CL_MEM_READ_ONLY)))
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ if ((parent->flags() & CL_MEM_READ_ONLY) &&
+ (flags & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY)))
+ {
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ // OpenCL 1.2: SubBuffer should inherit some of parent Buffer flags
+ cl_mem_flags parent_rw_flags = parent->flags()
+ & (CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY);
+ cl_mem_flags my_rw_flags = p_flags
+ & (CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY);
+ // parent be READ_WRITE, subBuffer be READ_ONLY/WRITE_ONLY (Spec allows)
+ if (! my_rw_flags) p_flags |= parent_rw_flags;
+ cl_mem_flags parent_hostptr_flags = parent->flags()
+ & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR);
+ if (parent_hostptr_flags) p_flags |= parent_hostptr_flags;
+}
+
+SubBuffer::~SubBuffer()
+{
+ clReleaseMemObject((cl_mem) p_parent);
+}
+
+size_t SubBuffer::size() const
+{
+ return p_size;
+}
+
+MemObject::Type SubBuffer::type() const
+{
+ return MemObject::SubBuffer;
+}
+
+bool SubBuffer::allocate(DeviceInterface *device)
+{
+ // SubBuffer always use Buffer's data
+ return p_parent->allocate(device);
+}
+
+size_t SubBuffer::offset() const
+{
+ return p_offset;
+}
+
+Buffer *SubBuffer::parent() const
+{
+ return p_parent;
+}
+
+bool SubBuffer::addMapEvent(BufferEvent *mapped_event)
+{
+ return p_parent->addMapEvent(mapped_event);
+}
+
+BufferEvent* SubBuffer::removeMapEvent(void *mapped_ptr)
+{
+ return p_parent->removeMapEvent(mapped_ptr);
+}
+
+/*
+ * Image2D
+ */
+
+Image2D::Image2D(Context *ctx, size_t width, size_t height, size_t row_pitch,
+ const cl_image_format *format, void *host_ptr,
+ cl_mem_flags flags, cl_int *errcode_ret)
+: MemObject(ctx, flags, host_ptr, errcode_ret),
+ p_width(width), p_height(height), p_row_pitch(row_pitch)
+{
+ if (!width || !height)
+ {
+ *errcode_ret = CL_INVALID_IMAGE_SIZE;
+ return;
+ }
+
+ if (!format)
+ {
+ *errcode_ret = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ return;
+ }
+
+ p_format = *format;
+
+ // Check format descriptor
+ switch (p_format.image_channel_data_type)
+ {
+ case CL_UNORM_INT_101010:
+ case CL_UNORM_SHORT_555:
+ case CL_UNORM_SHORT_565:
+ if (p_format.image_channel_order != CL_RGB ||
+ p_format.image_channel_order != CL_RGBx)
+ {
+ *errcode_ret = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ return;
+ }
+ }
+
+ switch (p_format.image_channel_order)
+ {
+ case CL_LUMINANCE:
+ case CL_INTENSITY:
+ switch (p_format.image_channel_data_type)
+ {
+ case CL_UNORM_INT8:
+ case CL_UNORM_INT16:
+ case CL_SNORM_INT8:
+ case CL_SNORM_INT16:
+ case CL_HALF_FLOAT:
+ case CL_FLOAT:
+ break;
+ default:
+ *errcode_ret = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ return;
+ }
+ break;
+
+ case CL_RGB:
+ case CL_RGBx:
+ switch (p_format.image_channel_data_type)
+ {
+ case CL_UNORM_SHORT_555:
+ case CL_UNORM_SHORT_565:
+ case CL_UNORM_INT_101010:
+ break;
+ default:
+ *errcode_ret = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ return;
+ }
+ break;
+
+ case CL_ARGB:
+ case CL_BGRA:
+ switch (p_format.image_channel_data_type)
+ {
+ case CL_UNORM_INT8:
+ case CL_SNORM_INT8:
+ case CL_SIGNED_INT8:
+ case CL_UNSIGNED_INT8:
+ break;
+ default:
+ *errcode_ret = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ return;
+ }
+ break;
+ }
+
+ // Row pitch
+ p_row_pitch = width * pixel_size(p_format);
+
+ if (row_pitch)
+ {
+ if (!host_ptr)
+ {
+ // row_pitch must be 0 if host_ptr is null
+ *errcode_ret = CL_INVALID_IMAGE_SIZE;
+ return;
+ }
+ if (row_pitch < p_row_pitch)
+ {
+ *errcode_ret = CL_INVALID_IMAGE_SIZE;
+ return;
+ }
+ if (row_pitch % pixel_size(p_format) != 0)
+ {
+ *errcode_ret = CL_INVALID_IMAGE_SIZE;
+ return;
+ }
+
+ p_row_pitch = row_pitch;
+ }
+}
+
+size_t Image2D::size() const
+{
+ return height() * row_pitch();
+}
+
+MemObject::Type Image2D::type() const
+{
+ return MemObject::Image2D;
+}
+
+size_t Image2D::width() const
+{
+ return p_width;
+}
+
+size_t Image2D::height() const
+{
+ return p_height;
+}
+
+size_t Image2D::row_pitch() const
+{
+ return p_row_pitch;
+}
+
+size_t Image2D::slice_pitch() const
+{
+ // An Image2D is made of only one slice
+ return size();
+}
+
+const cl_image_format &Image2D::format() const
+{
+ return p_format;
+}
+
+cl_int Image2D::imageInfo(cl_image_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const
+{
+ void *value = 0;
+ size_t value_length = 0;
+ class Image3D *image3D = (class Image3D *)this;
+
+ union {
+ cl_image_format cl_image_format_var;
+ size_t size_t_var;
+ };
+
+ switch (param_name)
+ {
+ case CL_IMAGE_FORMAT:
+ SIMPLE_ASSIGN(cl_image_format, format());
+ break;
+
+ case CL_IMAGE_ELEMENT_SIZE:
+ SIMPLE_ASSIGN(size_t, element_size(p_format));
+ break;
+
+ case CL_IMAGE_ROW_PITCH:
+ // TODO: What was given when the image was created or width*size ?
+ SIMPLE_ASSIGN(size_t, row_pitch());
+ break;
+
+ case CL_IMAGE_SLICE_PITCH:
+ if (type() == Image3D)
+ SIMPLE_ASSIGN(size_t, image3D->slice_pitch())
+ else
+ SIMPLE_ASSIGN(size_t, 0);
+ break;
+
+ case CL_IMAGE_WIDTH:
+ SIMPLE_ASSIGN(size_t, width());
+ break;
+
+ case CL_IMAGE_HEIGHT:
+ SIMPLE_ASSIGN(size_t, height());
+ break;
+
+ case CL_IMAGE_DEPTH:
+ if (type() == Image3D)
+ SIMPLE_ASSIGN(size_t, image3D->depth())
+ else
+ SIMPLE_ASSIGN(size_t, 0);
+ break;
+ default:
+ return CL_INVALID_VALUE;
+ }
+
+ if (param_value && param_value_size < value_length)
+ return CL_INVALID_VALUE;
+
+ if (param_value_size_ret)
+ *param_value_size_ret = value_length;
+
+ if (param_value)
+ std::memcpy(param_value, value, value_length);
+
+ return CL_SUCCESS;
+}
+
+size_t Image2D::element_size(const cl_image_format &format)
+{
+ switch (format.image_channel_data_type)
+ {
+ case CL_SNORM_INT8:
+ case CL_UNORM_INT8:
+ case CL_SIGNED_INT8:
+ case CL_UNSIGNED_INT8:
+ return 1;
+ case CL_SNORM_INT16:
+ case CL_UNORM_INT16:
+ case CL_SIGNED_INT16:
+ case CL_UNSIGNED_INT16:
+ return 2;
+ case CL_SIGNED_INT32:
+ case CL_UNSIGNED_INT32:
+ return 4;
+ case CL_FLOAT:
+ return sizeof(float);
+ case CL_HALF_FLOAT:
+ return 2;
+ case CL_UNORM_SHORT_565:
+ case CL_UNORM_SHORT_555:
+ return 2;
+ case CL_UNORM_INT_101010:
+ return 4;
+ default:
+ return 0;
+ }
+}
+
+unsigned int Image2D::channels(const cl_image_format &format)
+{
+ switch (format.image_channel_order)
+ {
+ case CL_R:
+ case CL_Rx:
+ case CL_A:
+ case CL_INTENSITY:
+ case CL_LUMINANCE:
+ return 1;
+ break;
+
+ case CL_RG:
+ case CL_RGx:
+ case CL_RA:
+ return 2;
+ break;
+
+ case CL_RGBA:
+ case CL_ARGB:
+ case CL_BGRA:
+ return 4;
+ break;
+
+ case CL_RGBx:
+ case CL_RGB:
+ return 1; // Only special data types allowed (565, 555, etc)
+ break;
+
+ default:
+ return 0;
+ }
+}
+
+size_t Image2D::pixel_size(const cl_image_format &format)
+{
+ switch (format.image_channel_data_type)
+ {
+ case CL_UNORM_SHORT_565:
+ case CL_UNORM_SHORT_555:
+ return 2;
+ case CL_UNORM_INT_101010:
+ return 4;
+ default:
+ return channels(format) * element_size(format);
+ }
+}
+
+size_t Image2D::element_size() const
+{
+ return element_size(p_format);
+}
+
+size_t Image2D::pixel_size() const
+{
+ return pixel_size(p_format);
+}
+
+unsigned int Image2D::channels() const
+{
+ return channels(p_format);
+}
+
+/*
+ * Image3D
+ */
+
+Image3D::Image3D(Context *ctx, size_t width, size_t height, size_t depth,
+ size_t row_pitch, size_t slice_pitch,
+ const cl_image_format *format, void *host_ptr,
+ cl_mem_flags flags, cl_int *errcode_ret)
+: Image2D(ctx, width, height, row_pitch, format, host_ptr, flags, errcode_ret),
+ p_depth(depth)
+{
+ if (depth <= 1)
+ {
+ *errcode_ret = CL_INVALID_IMAGE_SIZE;
+ return;
+ }
+
+ // Slice pitch
+ p_slice_pitch = height * this->row_pitch();
+
+ if (slice_pitch)
+ {
+ if (!host_ptr)
+ {
+ // slice_pitch must be 0 if host_ptr is null
+ *errcode_ret = CL_INVALID_IMAGE_SIZE;
+ return;
+ }
+ if (slice_pitch < p_slice_pitch)
+ {
+ *errcode_ret = CL_INVALID_IMAGE_SIZE;
+ return;
+ }
+ if (slice_pitch % this->row_pitch() != 0)
+ {
+ *errcode_ret = CL_INVALID_IMAGE_SIZE;
+ return;
+ }
+
+ p_slice_pitch = slice_pitch;
+ }
+}
+
+size_t Image3D::size() const
+{
+ return depth() * slice_pitch();
+}
+
+MemObject::Type Image3D::type() const
+{
+ return MemObject::Image3D;
+}
+
+size_t Image3D::depth() const
+{
+ return p_depth;
+}
+
+size_t Image3D::slice_pitch() const
+{
+ return p_slice_pitch;
+}
diff --git a/src/core/memobject.h b/src/core/memobject.h
new file mode 100644
index 0000000..82cbfab
--- /dev/null
+++ b/src/core/memobject.h
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * Copyright (c) 2012-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file memobject.h
+ * \brief Memory objects
+ */
+
+#ifndef __MEMOBJECT_H__
+#define __MEMOBJECT_H__
+
+#include "object.h"
+#include "dsp/u_concurrent_stack.h"
+
+#include <CL/cl.h>
+
+namespace Coal
+{
+
+class DeviceBuffer;
+class Context;
+class DeviceInterface;
+class BufferEvent;
+
+/**
+ * \brief Base class for all the memory objects
+ */
+class MemObject : public Object
+{
+ public:
+ /**
+ * \brief Type of memory object
+ */
+ enum Type
+ {
+ Buffer,
+ SubBuffer,
+ Image2D,
+ Image3D
+ };
+
+ /**
+ * \brief Constructor
+ * \param ctx parent \c Coal::Context
+ * \param flags memory object flags
+ * \param host_ptr host pointer used by some flags (see the OpenCL spec)
+ * \param errcode_ret return value
+ * \note Don't do any initialization here, but in \c init(). We only fill
+ * the private variables and check the values passed in argument.
+ * \sa init
+ */
+ MemObject(Context *ctx, cl_mem_flags flags, void *host_ptr,
+ cl_int *errcode_ret);
+ virtual ~MemObject();
+
+ /**
+ * \brief Initialize the memory object
+ *
+ * Memory objects are device-independent classes. This function creates
+ * one \c Coal::DeviceBuffer per device present in the context by
+ * calling \c Coal::DeviceInterface::createDeviceBuffer().
+ *
+ * If there is only one device, its \c Coal::DeviceBuffer is directly
+ * allocated. If there are more than one device, the allocation is
+ * deferred until a \c Coal::Event is pushed for this device.
+ *
+ * \return \c CL_SUCCESS if success, an error code otherwise
+ */
+ virtual cl_int init();
+ virtual bool allocate(DeviceInterface *device); /*!< \brief Allocate this memory object on the given \p device */
+ virtual size_t size() const = 0; /*!< \brief Device-independent size of the memory object */
+ virtual Type type() const = 0; /*!< \brief Type of the memory object */
+
+ cl_mem_flags flags() const; /*!< \brief Flags */
+ void *host_ptr() const; /*!< \brief Host pointer */
+ DeviceBuffer *deviceBuffer(DeviceInterface *device) const; /*!< \brief \c Coal::DeviceBuffer for the given \p device */
+
+ void deviceAllocated(DeviceBuffer *buffer); /*!< \brief Is the \c Coal::DeviceBuffer for \p buffer allocated ? */
+
+ /**
+ * \brief Set a destructor callback for this memory object
+ *
+ * This callback is called when this memory object is deleted. It is
+ * currently called from the destructor, so the memory object is already
+ * invalid, but as OpenCL objects are immutable, the callback cannot
+ * use its \c memobj parameter except in a pointer comparison, and there
+ * is no problem.
+ *
+ * \param pfn_notify function to call when the memory object is deleted
+ * \param user_data user data to pass to this function
+ */
+ void setDestructorCallback(void (CL_CALLBACK *pfn_notify)(cl_mem memobj,
+ void *user_data),
+ void *user_data);
+
+ /**
+ * \brief Get information about this memory object
+ * \copydetails Coal::DeviceInterface::info
+ */
+ cl_int info(cl_mem_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const;
+
+ virtual bool addMapEvent(BufferEvent *mapped_event) { return false; }
+ virtual BufferEvent* removeMapEvent(void *mapped_ptr) { return NULL; }
+
+ protected:
+ cl_mem_flags p_flags;
+ std::list<BufferEvent *> p_mapped_events;
+
+ private:
+ unsigned int p_num_devices, p_devices_to_allocate;
+ void *p_host_ptr;
+ DeviceBuffer **p_devicebuffers;
+
+ typedef std::pair<void (CL_CALLBACK *)(cl_mem memobj, void *user_data), void*> dtor_callback_t;
+ concurrent_stack<dtor_callback_t> p_dtor_callback_stack;
+
+ //void (CL_CALLBACK *p_dtor_callback)(cl_mem memobj, void *user_data);
+ //void *p_dtor_userdata;
+};
+
+/**
+ * \brief Simple buffer object
+ */
+class Buffer : public MemObject
+{
+ public:
+ /**
+ * \brief Constructor
+ * \param ctx parent \c Coal::Context
+ * \param size size of the buffer, in bytes
+ * \param host_ptr host pointer
+ * \param flags memory flags
+ * \param errcode_ret return code
+ */
+ Buffer(Context *ctx, size_t size, void *host_ptr, cl_mem_flags flags,
+ cl_int *errcode_ret);
+
+ size_t size() const; /*!< \brief Size of the buffer, in bytes */
+ Type type() const; /*!< \brief Return that we are a \c Coal::MemObject::Buffer */
+
+ bool addMapEvent(BufferEvent *mapped_event);
+ BufferEvent* removeMapEvent(void *mapped_ptr);
+ private:
+ size_t p_size;
+
+};
+
+/**
+ * \brief Sub-buffer
+ */
+class SubBuffer : public MemObject
+{
+ public:
+ /**
+ * \brief Constructor
+ * \param parent parent \c Coal::Buffer
+ * \param offset offset in \p parent of the start of this sub-buffer
+ * \param size size of the sub-buffer
+ * \param flags memory flags (must be compatible with the \p parent's ones)
+ * \param errcode_ret return code
+ */
+ SubBuffer(class Buffer *parent, size_t offset, size_t size,
+ cl_mem_flags flags, cl_int *errcode_ret);
+ ~SubBuffer();
+
+ size_t size() const; /*!< \brief Size */
+ Type type() const; /*!< \brief Return that we are a \c Coal::MemObject::SubBuffer */
+ bool allocate(DeviceInterface *device); /*!< \brief Allocate the \b parent \c Coal::Buffer */
+
+ size_t offset() const; /*!< \brief Offset in bytes */
+ class Buffer *parent() const; /*!< \brief Parent \c Coal::Buffer */
+
+ bool addMapEvent(BufferEvent *mapped_event);
+ BufferEvent* removeMapEvent(void *mapped_ptr);
+ private:
+ size_t p_offset, p_size;
+ class Buffer *p_parent;
+};
+
+/**
+ * \brief 2D image
+ */
+class Image2D : public MemObject
+{
+ public:
+ /**
+ * \brief Constructor
+ * \param ctx parent \c Coal::Context
+ * \param width width of the image
+ * \param height height of the image
+ * \param row_pitch number of bytes in a row of pixels. If 0, defaults to <tt>width * pixel_size()</tt>
+ * \param format image format
+ * \param host_ptr host pointer
+ * \param flags memory flags
+ * \param errcode_ret return code
+ */
+ Image2D(Context *ctx, size_t width, size_t height, size_t row_pitch,
+ const cl_image_format *format, void *host_ptr,
+ cl_mem_flags flags, cl_int *errcode_ret);
+
+ virtual size_t size() const; /*!< \brief Size in bytes */
+ virtual Type type() const; /*!< \brief Return that we are a \c Coal::MemObject::Image2D */
+
+ size_t width() const; /*!< \brief Width */
+ size_t height() const; /*!< \brief Height */
+ size_t row_pitch() const; /*!< \brief Size in bytes of a row of pixels */
+ virtual size_t slice_pitch() const; /*!< \brief Size in bytes of the image */
+ const cl_image_format &format() const; /*!< \brief Image format descriptor */
+
+ /**
+ * \brief Information about this image object
+ *
+ * This function is also usable for \c Coal::Image3D objects as it does
+ * casting when necessary in order to give information when needed.
+ *
+ * \copydetails Coal::DeviceInterface::info
+ */
+ cl_int imageInfo(cl_image_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const;
+
+ static size_t element_size(const cl_image_format &format); /*!< \brief Size in bytes of each channel of \p format */
+ static unsigned int channels(const cl_image_format &format);/*!< \brief Number of channels of \p format */
+ static size_t pixel_size(const cl_image_format &format); /*!< \brief Size in bytes of a pixel in \p format */
+ size_t pixel_size() const; /*!< \brief Pixel size of this image */
+ size_t element_size() const; /*!< \brief Channel size of this image */
+ unsigned int channels() const; /*!< \brief Number of channels of this image */
+
+ private:
+ size_t p_width, p_height, p_row_pitch;
+ cl_image_format p_format;
+};
+
+/**
+ * \brief 3D image
+ */
+class Image3D : public Image2D
+{
+ public:
+ /**
+ * \brief Constructor
+ * \param ctx parent \c Coal::Context
+ * \param width width of the image
+ * \param height height of the image
+ * \param depth depth of the image
+ * \param row_pitch number of bytes in a row of pixels. If 0, defaults to <tt>width * pixel_size()</tt>
+ * \param slice_pitch number of bytes in a 2D slice. If 0, defaults to <tt>height * row_pitch()</tt>
+ * \param format image format
+ * \param host_ptr host pointer
+ * \param flags memory flags
+ * \param errcode_ret return code
+ */
+ Image3D(Context *ctx, size_t width, size_t height, size_t depth,
+ size_t row_pitch, size_t slice_pitch,
+ const cl_image_format *format, void *host_ptr,
+ cl_mem_flags flags, cl_int *errcode_ret);
+
+ size_t size() const; /*!< \brief Size in bytes of this image */
+ Type type() const; /*!< \brief Return that we are a \c Coal::MemObject::Image3D */
+
+ size_t depth() const; /*!< \brief Depth of the image */
+ size_t slice_pitch() const; /*!< \brief Size in bytes of a 2D slice */
+
+ private:
+ size_t p_depth, p_slice_pitch;
+};
+
+}
+
+struct _cl_mem : public Coal::MemObject
+{};
+
+#endif
diff --git a/src/core/object.cpp b/src/core/object.cpp
new file mode 100644
index 0000000..be44279
--- /dev/null
+++ b/src/core/object.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * Copyright (c) 2012-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file object.cpp
+ * \brief Reference-counted object tree
+ */
+
+#include "object.h"
+
+using namespace Coal;
+
+static std::list<Object *>& getKnownObjects()
+{
+ static std::list<Object *> known_objects;
+ return known_objects;
+}
+
+
+Object::Object(Type type, Object *parent)
+: p_references(1), p_parent(parent), p_type(type), p_release_parent(true)
+{
+ if (parent)
+ parent->reference();
+
+ // Add object in the list of known objects
+ getKnownObjects().push_front(this);
+ p_it = getKnownObjects().begin();
+}
+
+Object::~Object()
+{
+ if (p_parent && p_parent->dereference() && p_release_parent)
+ delete p_parent;
+
+ // Remove object from the list of known objects
+ getKnownObjects().erase(p_it);
+}
+
+void Object::reference()
+{
+ p_references++;
+}
+
+bool Object::dereference()
+{
+ p_references--;
+ return (p_references == 0);
+}
+
+void Object::setReleaseParent (bool release)
+{
+ p_release_parent = release;
+}
+
+unsigned int Object::references() const
+{
+ return p_references;
+}
+
+Object *Object::parent() const
+{
+ return p_parent;
+}
+
+Object::Type Object::type() const
+{
+ return p_type;
+}
+
+bool Object::isA(Object::Type type) const
+{
+ // Check for null values
+ if (this == 0)
+ return false;
+
+ // Check that the value isn't garbage or freed pointer
+ std::list<Object *>::const_iterator it = getKnownObjects().begin(),
+ e = getKnownObjects().end();
+ while (it != e)
+ {
+ if (*it == this)
+ // OK, NOW it is safe to dereference this ptr:
+ return this->type() == type;
+
+ ++it;
+ }
+
+ return false;
+}
diff --git a/src/core/object.h b/src/core/object.h
new file mode 100644
index 0000000..d83e326
--- /dev/null
+++ b/src/core/object.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file object.h
+ * \brief Object tree
+ */
+
+#ifndef __REFCOUNTED_H__
+#define __REFCOUNTED_H__
+
+#include <list>
+
+namespace Coal
+{
+
+/**
+ * \brief Base class of all the Clover objects
+ *
+ * This class implements functions needed by all the Clover objects, like
+ * reference counting, the object tree (parents/children), etc.
+ *
+ * It also uses a special list of known objects, used to check that a pointer
+ * passed by the user to an OpenCL function actually is an object of the correct
+ * type. See \c isA().
+ */
+class Object
+{
+ public:
+ /**
+ * \brief Type of object the inherited class actually is
+ */
+ enum Type
+ {
+ T_Device, /*!< \brief \c Coal::DeviceInterface */
+ T_CommandQueue, /*!< \brief \c Coal::CommandQueue */
+ T_Event, /*!< \brief \c Coal::Event */
+ T_Context, /*!< \brief \c Coal::Context */
+ T_Kernel, /*!< \brief \c Coal::Kernel */
+ T_MemObject, /*!< \brief \c Coal::MemObject */
+ T_Program, /*!< \brief \c Coal::Program */
+ T_Sampler /*!< \brief \c Coal::Sampler */
+ };
+
+ /**
+ * \brief Constructor
+ * \param type type of the child class calling this constructor
+ * \param parent parent object
+ */
+ Object(Type type, Object *parent = 0);
+ virtual ~Object();
+
+ /**
+ * \brief Increments the reference counter
+ */
+ void reference();
+
+ /**
+ * \brief Decrements the reference counter
+ * \return true if the reference counter has reached 0
+ */
+ bool dereference();
+
+ /**
+ * \brief Reference counter
+ * \return the number of references of this class currently in use
+ */
+ unsigned int references() const;
+
+ /**
+ * \brief Set if the parent object has to be deleted if its reference count reaches 0
+ *
+ * The destructor of \c Coal::Object dereferences its parent object.
+ * This is done in order to correctly free objects when no object has
+ * a reference to it anymore.
+ *
+ * Some objects such as \c Coal::CommandQueue need to do some operations
+ * before being deleted. This function tells \c Coal::Object to
+ * dereference its parent object, but not to call \b delete on it.
+ *
+ * \param release true to have \b delete called on the parent object
+ * when its reference count reaches 0, false to keep it
+ */
+ void setReleaseParent(bool release);
+
+ Object *parent() const; /*!< \brief Parent object */
+ Type type() const; /*!< \brief Type */
+
+ /**
+ * \brief Returns whether this object is an instance of \p type
+ * \note This function begins with a NULL-check on the \c this pointer,
+ * so it's safe to use even when \c this is not guaranteed not to
+ * be NULL.
+ * \param type type this object must have for the check to pass
+ * \return true if this object exists and has the correct type
+ */
+ bool isA(Type type) const;
+
+ private:
+ unsigned int p_references;
+ Object *p_parent;
+ Type p_type;
+ std::list<Object *>::iterator p_it;
+ bool p_release_parent;
+};
+
+}
+
+#endif
diff --git a/src/core/platform.cpp b/src/core/platform.cpp
new file mode 100644
index 0000000..1af6153
--- /dev/null
+++ b/src/core/platform.cpp
@@ -0,0 +1,227 @@
+/******************************************************************************
+ * Copyright (c) 2012-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#include <list>
+#include <iostream>
+
+#include "CL/cl.h"
+#include "CL/cl_ext.h"
+#include "platform.h"
+#include "propertylist.h"
+#include "object.h"
+#include "cpu/device.h"
+#ifndef SHAMROCK_BUILD
+#include "dsp/device.h"
+#include "dsp/driver.h"
+#endif
+
+/*-----------------------------------------------------------------------------
+* For the lock file
+*----------------------------------------------------------------------------*/
+#include <sys/file.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+
+using namespace Coal;
+
+/******************************************************************************
+* begin_file_lock_crit_section
+******************************************************************************/
+static int begin_file_lock_crit_section(char* fname)
+{
+ /*---------------------------------------------------------------------
+ * Create a lock, so only 1 OpenCL program can progress at a time.
+ * I'm not sure about the appropriateness of putting this in the ctor.
+ * We may look at delayed ctor of platform with this in it.
+ *--------------------------------------------------------------------*/
+ int lock_fd = open(fname, O_CREAT,
+ S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH);
+
+ std::string str_fname(fname);
+
+ if (lock_fd < 0)
+ {
+ std::cout << "Can not open lock file " << str_fname << ", Aborting !" << std::endl;
+ exit(-1);
+ }
+
+ int res = flock(lock_fd, LOCK_EX|LOCK_NB);
+ if (res == -1)
+ {
+ if (errno == EWOULDBLOCK)
+ {
+ std::cout << "Waiting on lock " << str_fname << " ..." << std::endl;
+ res = flock(lock_fd, LOCK_EX);
+ if (res == -1)
+ {
+ std::cout << "Error Locking file " << str_fname << ", Aborting !" << std::endl;
+ exit(-1);
+ }
+ else std::cout << "Acquired lock " << str_fname << ", Proceeding!" << std::endl;
+ }
+ else
+ {
+ std::cout << "Error Locking file " << str_fname << ", Aborting !" << std::endl;
+ exit(-1);
+ }
+ }
+
+ return lock_fd;
+
+}
+
+namespace Coal
+{
+ Platform::Platform() : dispatch(&dispatch_table)
+ {
+ char filename[] = "/var/lock/opencl";
+ p_lock_fd = begin_file_lock_crit_section(filename);
+
+ p_devices.push_back((_cl_device_id*)new Coal::CPUDevice);
+
+ // Driver class only exists for the DSPDevice, so need this guard:
+#ifndef SHAMROCK_BUILD
+ for (int i = 0; i < Driver::instance()->num_dsps(); i++)
+ p_devices.push_back((_cl_device_id*)new Coal::DSPDevice(i));
+#endif
+ }
+
+ Platform::~Platform()
+ {
+ flock(p_lock_fd, LOCK_UN);
+ close(p_lock_fd);
+
+ for (int i = 0; i < p_devices.size(); i++)
+ delete p_devices[i];
+ }
+
+ cl_uint Platform::getDevices(cl_device_type device_type,
+ cl_uint num_entries, cl_device_id * devices)
+ {
+ cl_uint device_number = 0;
+
+ if (device_type == CL_DEVICE_TYPE_DEFAULT)
+#ifdef SHAMROCK_BUILD
+ device_type = CL_DEVICE_TYPE_CPU;
+#else
+ device_type = CL_DEVICE_TYPE_ACCELERATOR;
+#endif
+
+ for (int d = 0; d < p_devices.size(); d++)
+ {
+ cl_device_type type;
+ p_devices[d]->info(CL_DEVICE_TYPE, sizeof(cl_device_type), &type,0);
+
+ if (type & device_type)
+ {
+ if (devices && device_number < num_entries)
+ devices[device_number++] = p_devices[d];
+ else device_number++;
+ }
+ }
+
+ return device_number;
+ }
+
+ cl_int Platform::info(cl_mem_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const
+ {
+ void *value = 0;
+ size_t value_length = 0;
+
+ switch (param_name)
+ {
+ case CL_PLATFORM_PROFILE:
+ STRING_ASSIGN("FULL_PROFILE");
+ break;
+
+ case CL_PLATFORM_VERSION:
+#ifdef SHAMROCK_BUILD
+ STRING_ASSIGN("OpenCL 1.1 Shamrock ");
+#else
+ STRING_ASSIGN("OpenCL 1.1 TI ");
+#endif
+ break;
+
+ case CL_PLATFORM_NAME:
+#ifdef SHAMROCK_BUILD
+ STRING_ASSIGN("Shamrock OpenCL for Arm");
+#else
+#if defined(__arm__)
+ STRING_ASSIGN("TI OpenCL for Arm + Dsp");
+#else
+ STRING_ASSIGN("TI OpenCL for Advantech DSPC868x");
+#endif
+#endif
+ break;
+
+ case CL_PLATFORM_VENDOR:
+#ifdef SHAMROCK_BUILD
+ STRING_ASSIGN("Open Source Software");
+#else
+ STRING_ASSIGN("Texas Instruments, Inc.");
+#endif
+ break;
+
+ case CL_PLATFORM_EXTENSIONS:
+ // TODO add cl_khr_icd when it works
+#ifdef SHAMROCK_BUILD
+ STRING_ASSIGN("cl_khr_byte_addressable_store cl_khr_fp64");
+#else
+ STRING_ASSIGN("cl_khr_byte_addressable_store cl_khr_fp64 cl_ti_msmc_buffers");
+#endif
+ break;
+
+ case CL_PLATFORM_ICD_SUFFIX_KHR:
+#ifndef SHAMROCK_BUILD
+ STRING_ASSIGN("TI");
+#endif
+ break;
+
+ default:
+ return CL_INVALID_VALUE;
+ }
+
+ if (param_value && param_value_size < value_length)
+ return CL_INVALID_VALUE;
+
+ if (param_value_size_ret)
+ *param_value_size_ret = value_length;
+
+ if (param_value)
+ std::memcpy(param_value, value, value_length);
+
+ return CL_SUCCESS;
+ }
+};
+
+_cl_platform_id the_platform;
diff --git a/src/core/platform.h b/src/core/platform.h
new file mode 100644
index 0000000..809d12c
--- /dev/null
+++ b/src/core/platform.h
@@ -0,0 +1,65 @@
+/******************************************************************************
+ * Copyright (c) 2012-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#ifndef __PLATFORM_H__
+#define __PLATFORM_H__
+
+#include <CL/cl.h>
+#include <vector>
+#include <cstring>
+#include "icd.h"
+
+namespace Coal
+{
+
+class Platform
+{
+ public:
+ Platform();
+ ~Platform();
+
+ cl_uint getDevices(cl_device_type device_type,
+ cl_uint num_entries, cl_device_id * devices);
+
+ cl_int info(cl_platform_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const;
+
+ private:
+ KHRicdVendorDispatch *dispatch;
+ std::vector <cl_device_id> p_devices;
+ int p_lock_fd;
+};
+
+}
+
+struct _cl_platform_id : public Coal::Platform
+{};
+
+extern _cl_platform_id the_platform;
+#endif
diff --git a/src/core/program.cpp b/src/core/program.cpp
new file mode 100644
index 0000000..5f6e99f
--- /dev/null
+++ b/src/core/program.cpp
@@ -0,0 +1,846 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * Copyright (c) 2012-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file core/program.cpp
+ * \brief Program
+ */
+
+#include "program.h"
+#include "context.h"
+#include "compiler.h"
+#include "kernel.h"
+#include "propertylist.h"
+#include "deviceinterface.h"
+
+#include <string>
+#include <cstring>
+#include <cstdlib>
+#include <iostream>
+#include <vector>
+#include <set>
+#include <algorithm>
+
+#include <llvm/ADT/StringRef.h>
+#include <llvm/ADT/SmallVector.h>
+#include <llvm/Support/MemoryBuffer.h>
+#include <llvm/Support/raw_ostream.h>
+#include <llvm/Support/Casting.h>
+#include <llvm/Support/ErrorOr.h>
+#include <llvm/Bitcode/ReaderWriter.h>
+#include <llvm/Transforms/IPO.h>
+#include <llvm/IR/LLVMContext.h>
+#include <llvm/IR/Module.h>
+#include <llvm/Linker/Linker.h>
+#include <llvm/PassManager.h>
+#include <llvm/IR/Metadata.h>
+#include <llvm/IR/Function.h>
+#include <llvm/Analysis/Passes.h>
+#include <llvm/Transforms/IPO.h>
+#include <llvm/IR/Instructions.h>
+#include <llvm/IR/InstIterator.h>
+
+#include <runtime/stdlib.c.bc.embed.h>
+
+
+/*-----------------------------------------------------------------------------
+* temporary for source file cacheing, remove from product releases
+*----------------------------------------------------------------------------*/
+//#include "dsp/source_cache.h"
+//source_cache * source_cache::pInstance = 0;
+
+using namespace Coal;
+using namespace llvm;
+
+Program::Program(Context *ctx)
+: Object(Object::T_Program, ctx), p_type(Invalid), p_state(Empty)
+{
+ p_null_device_dependent.compiler = 0;
+ p_null_device_dependent.device = 0;
+ p_null_device_dependent.linked_module = 0;
+ p_null_device_dependent.program = 0;
+}
+
+Program::~Program()
+{
+ resetDeviceDependent();
+}
+
+void Program::resetDeviceDependent()
+{
+ while (p_device_dependent.size())
+ {
+ DeviceDependent &dep = p_device_dependent.back();
+
+ delete dep.compiler;
+ delete dep.program;
+ delete dep.linked_module;
+
+ p_device_dependent.pop_back();
+ }
+}
+
+void Program::setDevices(cl_uint num_devices, DeviceInterface * const*devices)
+{
+ p_device_dependent.resize(num_devices);
+
+ for (cl_uint i=0; i<num_devices; ++i)
+ {
+ DeviceDependent &dep = p_device_dependent[i];
+
+ dep.device = devices[i];
+ dep.program = dep.device->createDeviceProgram(this);
+ dep.is_native_binary = false;
+ dep.linked_module = 0;
+ dep.compiler = new Compiler(dep.device);
+ }
+}
+
+Program::DeviceDependent &Program::deviceDependent(DeviceInterface *device)
+{
+ for (size_t i=0; i<p_device_dependent.size(); ++i)
+ {
+ DeviceDependent &rs = p_device_dependent[i];
+
+ if (rs.device == device || (!device && p_device_dependent.size() == 1))
+ return rs;
+ }
+
+ return p_null_device_dependent;
+}
+
+const Program::DeviceDependent &Program::deviceDependent(DeviceInterface *device) const
+{
+ for (size_t i=0; i<p_device_dependent.size(); ++i)
+ {
+ const DeviceDependent &rs = p_device_dependent[i];
+
+ if (rs.device == device || (!device && p_device_dependent.size() == 1))
+ return rs;
+ }
+
+ return p_null_device_dependent;
+}
+
+DeviceProgram *Program::deviceDependentProgram(DeviceInterface *device) const
+{
+ const DeviceDependent &dep = deviceDependent(device);
+
+ return dep.program;
+}
+
+std::string Program::deviceDependentCompilerOptions(DeviceInterface *device) const
+{
+ const DeviceDependent &dep = deviceDependent(device);
+
+ return dep.compiler->options();
+}
+
+std::vector<llvm::Function *> Program::kernelFunctions(DeviceDependent &dep)
+{
+ std::vector<llvm::Function *> rs;
+
+ llvm::NamedMDNode *kernels =
+ dep.linked_module->getNamedMetadata("opencl.kernels");
+
+ if (!kernels) return rs;
+
+ for (unsigned int i=0; i<kernels->getNumOperands(); ++i)
+ {
+ llvm::MDNode *node = kernels->getOperand(i);
+
+ /*---------------------------------------------------------------------
+ * Each node has only one operand : a llvm::Function
+ *--------------------------------------------------------------------*/
+ llvm::Value *value = node->getOperand(0);
+
+ /*---------------------------------------------------------------------
+ * Bug somewhere, don't crash
+ *--------------------------------------------------------------------*/
+ if (!llvm::isa<llvm::Function>(value)) continue;
+
+ llvm::Function *f = llvm::cast<llvm::Function>(value);
+ rs.push_back(f);
+ }
+
+ return rs;
+}
+
+/******************************************************************************
+* Kernel *Program::createKernel(const std::string &name, cl_int *errcode_ret)
+******************************************************************************/
+Kernel *Program::createKernel(const std::string &name, cl_int *errcode_ret)
+{
+ Kernel *rs = NULL;
+
+ for (size_t i=0; i < kernelList.size(); i++)
+ {
+ if (kernelList[i]->p_name.compare(name) == 0)
+ {
+ *errcode_ret = CL_SUCCESS;
+ return kernelList[i];
+ }
+ }
+ /* Now check the previously released list */
+ for (size_t i=0; i < kernelReleasedList.size(); i++)
+ {
+ if (kernelReleasedList[i]->p_name.compare(name) == 0)
+ {
+ *errcode_ret = CL_SUCCESS;
+ rs = kernelReleasedList[i];
+ kernelReleasedList.erase(kernelReleasedList.begin() + i);
+ kernelList.push_back(rs);
+
+ return rs;
+ }
+ }
+
+ rs = new Kernel(this);
+
+ /*-------------------------------------------------------------------------
+ * Add a function definition for each device
+ *------------------------------------------------------------------------*/
+ for (size_t i=0; i < p_device_dependent.size(); ++i)
+ {
+ bool found = false;
+ DeviceDependent &dep = p_device_dependent[i];
+ const std::vector<llvm::Function *> &kernels = kernelFunctions(dep);
+
+ /*---------------------------------------------------------------------
+ * Find the one with the good name
+ *--------------------------------------------------------------------*/
+ for (size_t j=0; j < kernels.size(); ++j)
+ {
+ llvm::Function *func = kernels[j];
+
+ if (func->getName().str().compare(name) == 0)
+ {
+ found = true;
+ *errcode_ret = rs->addFunction(dep.device, func,
+ dep.linked_module);
+ if (*errcode_ret != CL_SUCCESS) return rs;
+ break;
+ }
+ }
+
+ /*---------------------------------------------------------------------
+ * Kernel unavailable for this device
+ *--------------------------------------------------------------------*/
+ if (!found)
+ {
+ *errcode_ret = CL_INVALID_KERNEL_NAME;
+ return rs;
+ }
+ else
+ {
+ kernelList.push_back(rs);
+ }
+ }
+
+ return rs;
+}
+
+Kernel * Program::createKernelsAndReturnKernel(const std::string &name, cl_int *errcode_ret)
+{
+ Kernel *rs = NULL;
+ /*-------------------------------------------------------------------------
+ * We should never go here
+ *------------------------------------------------------------------------*/
+ if (p_device_dependent.size() == 0) return rs;
+
+
+ for (size_t i=0; i < kernelList.size(); i++)
+ {
+ if (kernelList[i]->p_name.compare(name) == 0)
+ {
+ *errcode_ret = CL_SUCCESS;
+ return kernelList[i];
+ }
+ }
+ /* Now check the previously released list */
+ for (size_t i=0; i < kernelReleasedList.size(); i++)
+ {
+ if (kernelReleasedList[i]->p_name.compare(name) == 0)
+ {
+ *errcode_ret = CL_SUCCESS;
+ rs = kernelReleasedList[i];
+ kernelReleasedList.erase(kernelReleasedList.begin() + i);
+ kernelList.push_back(rs);
+
+ return rs;
+ }
+ }
+
+ /*-------------------------------------------------------------------------
+ * Take the list of kernels for the first device dependent
+ *------------------------------------------------------------------------*/
+ DeviceDependent &dep = p_device_dependent[0];
+ const std::vector<llvm::Function *> &kernels = kernelFunctions(dep);
+
+ /*-------------------------------------------------------------------------
+ * Create the kernel for each function name
+ * It returns an error if the signature is not the same for every device
+ * or if the kernel isn't found on all the devices.
+ *------------------------------------------------------------------------*/
+ *errcode_ret = CL_SUCCESS;
+
+ for (size_t i=0; i < kernels.size(); ++i)
+ {
+ cl_int result = CL_SUCCESS;
+ Kernel *kernel = createKernel(kernels[i]->getName().str(), &result);
+
+ if (result == CL_SUCCESS)
+ {
+ }
+ else
+ {
+ *errcode_ret = result;
+ delete kernel;
+ }
+ if (kernel->p_name.compare(name) == 0 && result == CL_SUCCESS)
+ {
+ rs = kernel;
+ *errcode_ret = result;
+ }
+ }
+
+ if (!rs && (*errcode_ret == CL_SUCCESS))
+ *errcode_ret = CL_INVALID_KERNEL_NAME;
+
+ return rs;
+}
+
+std::vector<Kernel *> Program::createKernels(cl_int *errcode_ret)
+{
+ std::vector<Kernel *> rs;
+ Kernel *kern = NULL;
+
+ /*-------------------------------------------------------------------------
+ * We should never go here
+ *------------------------------------------------------------------------*/
+ if (p_device_dependent.size() == 0) return rs;
+
+ /*
+ * Resurrect any released kernels back to the kernel list. This handles the
+ * case where clCreateKernelsInProgram() is asking only for a count of kernels in
+ * the currently built program. In that case, KernelList.size() must be the actual
+ * number of kernels compiled into the program (event if they were previously released).
+ */
+ for (size_t i=0; i < kernelReleasedList.size(); i++)
+ {
+ kern = kernelReleasedList[i];
+ kernelReleasedList.erase(kernelReleasedList.begin() + i);
+ kernelList.push_back(kern);
+ }
+
+ if (kernelList.size()) return kernelList;
+
+ /*-------------------------------------------------------------------------
+ * Take the list of kernels for the first device dependent
+ *------------------------------------------------------------------------*/
+ DeviceDependent &dep = p_device_dependent[0];
+ const std::vector<llvm::Function *> &kernels = kernelFunctions(dep);
+
+ /*-------------------------------------------------------------------------
+ * Create the kernel for each function name
+ * It returns an error if the signature is not the same for every device
+ * or if the kernel isn't found on all the devices.
+ *------------------------------------------------------------------------*/
+ for (size_t i=0; i < kernels.size(); ++i)
+ {
+ cl_int result = CL_SUCCESS;
+ Kernel *kernel = createKernel(kernels[i]->getName().str(), &result);
+
+ if (result == CL_SUCCESS)
+ {
+ kernelList.push_back(kernel);
+ }
+ else
+ {
+ *errcode_ret = result;
+ delete kernel;
+ }
+ }
+
+ return kernelList;
+}
+
+cl_int Program::loadSources(cl_uint count, const char **strings,
+ const size_t *lengths)
+{
+ // Initialize
+ p_source = std::string("");
+
+ // Merge all strings into one big one
+ for (cl_uint i=0; i<count; ++i)
+ {
+ size_t len = 0;
+ const char *data = strings[i];
+
+ if (!data)
+ return CL_INVALID_VALUE;
+
+ // Get the length of the source
+ if (lengths && lengths[i])
+ len = lengths[i];
+ else
+ len = std::strlen(data);
+
+ // Remove trailing \0's, it's not good for sources (it can arise when
+ // the client application wrongly sets lengths
+ while (len > 0 && data[len-1] == 0)
+ len--;
+
+ // Merge the string
+ std::string part(data, len);
+ p_source += part;
+ }
+
+ /*-------------------------------------------------------------------------
+ * temporary for source file cacheing, remove from product releases
+ *------------------------------------------------------------------------*/
+ //source_cache::instance()->remember(p_source);
+
+ p_type = Source;
+ p_state = Loaded;
+
+ return CL_SUCCESS;
+}
+
+cl_int Program::loadBinaries(const unsigned char **data, const size_t *lengths,
+ cl_int *binary_status, cl_uint num_devices,
+ DeviceInterface * const*device_list)
+{
+ // Set device infos
+ setDevices(num_devices, device_list);
+
+ // Load the data
+ for (cl_uint i=0; i<num_devices; ++i)
+ {
+ DeviceDependent &dep = deviceDependent(device_list[i]);
+ dep.unlinked_binary = std::string((const char *)data[i], lengths[i]);
+ dep.is_native_binary = true;
+
+ /*--------------------------------------------------------------------
+ * Loaded binary is either native code with LLVM bitcode embedded,
+ * or LLVM bitcode itself
+ *--------------------------------------------------------------------*/
+ std::string bitcode;
+ if (! dep.program->ExtractMixedBinary(&dep.unlinked_binary, &bitcode,
+ NULL))
+ {
+ bitcode = dep.unlinked_binary;
+ dep.is_native_binary = false;
+ }
+
+ const llvm::StringRef s_data(bitcode);
+ const llvm::StringRef s_name("<binary>");
+
+ llvm::MemoryBuffer *buffer = llvm::MemoryBuffer::getMemBuffer(
+ s_data, s_name, false);
+
+ if (!buffer)
+ return CL_OUT_OF_HOST_MEMORY;
+
+ // Make a module of it
+ ErrorOr<Module *> ModuleOrErr = parseBitcodeFile(buffer,
+ llvm::getGlobalContext());
+ if (ModuleOrErr) {
+ dep.linked_module = ModuleOrErr.get();
+ }
+ else {
+ dep.linked_module = NULL;
+ if (binary_status) binary_status[i] = CL_INVALID_VALUE;
+ return CL_INVALID_BINARY;
+ }
+
+ if (binary_status) binary_status[i] = CL_SUCCESS;
+ }
+
+ p_type = Binary;
+ p_state = Loaded;
+
+ return CL_SUCCESS;
+}
+
+cl_int Program::build(const char *options,
+ void (CL_CALLBACK *pfn_notify)(cl_program program,
+ void *user_data),
+ void *user_data, cl_uint num_devices,
+ DeviceInterface * const*device_list)
+{
+ // If we've already built this program and are re-building
+ // (for example, with different user options) then clear out the
+ // device dependent information in preparation for building again.
+ if( p_state == Built) resetDeviceDependent();
+
+ p_state = Failed;
+
+ // Set device infos
+ if (!p_device_dependent.size())
+ {
+ setDevices(num_devices, device_list);
+ }
+
+ // ASW TODO - optimize to compile for each device type only once.
+ for (cl_uint i=0; i<p_device_dependent.size(); ++i)
+ {
+ DeviceDependent &dep = deviceDependent(device_list[i]);
+
+ // Do we need to compile the source for each device ?
+ if (p_type == Source)
+ {
+ // Load source
+ const llvm::StringRef s_data(p_source);
+ const llvm::StringRef s_name("<source>");
+
+ llvm::MemoryBuffer *buffer = llvm::MemoryBuffer::getMemBuffer(
+ s_data, s_name);
+
+ // Compile
+ int compile_result = dep.compiler->compile(options ? options : std::string(), buffer);
+ if (compile_result)
+ //if (! dep.compiler->compile(options ? options : std::string(),
+ // buffer) )
+ {
+ if (pfn_notify)
+ pfn_notify((cl_program)this, user_data);
+ if (compile_result == CL_INVALID_BUILD_OPTIONS)
+ return CL_INVALID_BUILD_OPTIONS;
+ else
+ return CL_BUILD_PROGRAM_FAILURE;
+ }
+
+ // Get module and its bitcode
+ dep.linked_module = dep.compiler->module();
+
+ llvm::raw_string_ostream ostream(dep.unlinked_binary);
+ llvm::WriteBitcodeToFile(dep.linked_module, ostream);
+ ostream.flush();
+ }
+
+ // Link p_linked_module with the stdlib if the device needs that
+ if (! dep.is_native_binary && dep.program->linkStdLib())
+ {
+ // Load the stdlib bitcode
+ const llvm::StringRef s_data(embed_stdlib_c_bc,
+ sizeof(embed_stdlib_c_bc) - 1);
+ const llvm::StringRef s_name("stdlib.bc");
+ std::string errMsg;
+
+ llvm::MemoryBuffer *buffer = llvm::MemoryBuffer::getMemBuffer(
+ s_data, s_name, false);
+
+ if (!buffer)
+ return CL_OUT_OF_HOST_MEMORY;
+
+ ErrorOr<Module *> ModuleOrErr =
+ parseBitcodeFile(buffer, llvm::getGlobalContext());
+ Module *stdlib = NULL;
+ if (ModuleOrErr) {
+ stdlib = ModuleOrErr.get();
+ }
+ else {
+ std::error_code EC = ModuleOrErr.getError();
+ errMsg = EC.message();
+ }
+
+ // Link
+ if (!stdlib ||
+ llvm::Linker::LinkModules(dep.linked_module, stdlib,
+ llvm::Linker::DestroySource, &errMsg))
+ {
+ dep.compiler->appendLog("link error: ");
+ dep.compiler->appendLog(errMsg);
+ dep.compiler->appendLog("\n");
+
+ // DEBUG
+ std::cout << dep.compiler->log() << std::endl;
+
+ if (pfn_notify)
+ pfn_notify((cl_program)this, user_data);
+
+ return CL_BUILD_PROGRAM_FAILURE;
+ }
+ }
+
+ if (! dep.is_native_binary)
+ {
+ // Get list of kernels to strip other unused functions
+ std::vector<const char *> api;
+ std::vector<std::string> api_s; // Needed to keep valid data in api
+ const std::vector<llvm::Function *> &kernels = kernelFunctions(dep);
+
+ for (size_t j=0; j<kernels.size(); ++j)
+ {
+ std::string s = kernels[j]->getName().str();
+ api_s.push_back(s);
+ api.push_back(s.c_str());
+ }
+
+ // determine if module has barrier() function calls
+ bool hasBarrier = false;
+ llvm::CallInst* call;
+ for (llvm::Module::iterator F = dep.linked_module->begin(),
+ EF = dep.linked_module->end(); !hasBarrier && F != EF; ++F)
+ for (llvm::inst_iterator I = inst_begin(*F),
+ E = inst_end(*F); I != E; ++I)
+ {
+ if (!(call = llvm::dyn_cast<llvm::CallInst>(&*I))) continue;
+ if (!call->getCalledFunction()) continue;
+ std::string name(call->getCalledFunction()->getName());
+ if (name == "barrier")
+ {
+ hasBarrier = true;
+ break;
+ }
+ }
+
+ // Optimize code
+ llvm::PassManager *manager = new llvm::PassManager();
+
+ // Common passes (primary goal : remove unused stdlib functions)
+ manager->add(llvm::createTypeBasedAliasAnalysisPass());
+ manager->add(llvm::createBasicAliasAnalysisPass());
+ manager->add(llvm::createInternalizePass(api));
+ manager->add(llvm::createIPSCCPPass());
+ manager->add(llvm::createGlobalOptimizerPass());
+ manager->add(llvm::createConstantMergePass());
+ manager->add(llvm::createAlwaysInlinerPass());
+
+ dep.program->createOptimizationPasses(manager,
+ dep.compiler->optimize(), hasBarrier);
+
+ manager->add(llvm::createGlobalDCEPass());
+
+ manager->run(*dep.linked_module);
+ delete manager;
+ }
+
+ // Now that the LLVM module is built, build the device-specific
+ // representation
+ if (!dep.program->build(dep.linked_module, &dep.unlinked_binary))
+ {
+ if (pfn_notify)
+ pfn_notify((cl_program)this, user_data);
+
+ return CL_BUILD_PROGRAM_FAILURE;
+ }
+ }
+
+ // TODO: Asynchronous compile
+ if (pfn_notify)
+ pfn_notify((cl_program)this, user_data);
+
+ p_state = Built;
+
+ return CL_SUCCESS;
+}
+
+Program::Type Program::type() const
+{
+ return p_type;
+}
+
+Program::State Program::state() const
+{
+ return p_state;
+}
+
+cl_int Program::info(cl_program_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const
+{
+ void *value = 0;
+ size_t value_length = 0;
+ llvm::SmallVector<size_t, 4> binary_sizes;
+ llvm::SmallVector<DeviceInterface *, 4> devices;
+
+ union {
+ cl_uint cl_uint_var;
+ cl_context cl_context_var;
+ };
+
+ switch (param_name)
+ {
+ case CL_PROGRAM_REFERENCE_COUNT:
+ SIMPLE_ASSIGN(cl_uint, references());
+ break;
+
+ case CL_PROGRAM_NUM_DEVICES:
+ // Use devices associated with any built kernels, otherwise use
+ // the devices associated with the program context
+ if (p_device_dependent.size() != 0)
+ { SIMPLE_ASSIGN(cl_uint, p_device_dependent.size()); }
+ else
+ return ((Context *)parent())->info(CL_CONTEXT_NUM_DEVICES,
+ param_value_size, param_value, param_value_size_ret);
+ break;
+
+ case CL_PROGRAM_DEVICES:
+ // Use devices associated with any built kernels, otherwise use
+ // the devices associated with the program context
+ if (p_device_dependent.size() != 0)
+ {
+ for (size_t i=0; i<p_device_dependent.size(); ++i)
+ {
+ const DeviceDependent &dep = p_device_dependent[i];
+
+ devices.push_back(dep.device);
+ }
+
+ value = devices.data();
+ value_length = devices.size() * sizeof(DeviceInterface *);
+ }
+ else
+ return ((Context *)parent())->info(CL_CONTEXT_DEVICES,
+ param_value_size, param_value, param_value_size_ret);
+ break;
+
+ case CL_PROGRAM_CONTEXT:
+ SIMPLE_ASSIGN(cl_context, parent());
+ break;
+
+ case CL_PROGRAM_SOURCE:
+ MEM_ASSIGN(p_source.size() + 1, p_source.c_str());
+ break;
+
+ case CL_PROGRAM_BINARY_SIZES:
+ for (size_t i=0; i<p_device_dependent.size(); ++i)
+ {
+ const DeviceDependent &dep = p_device_dependent[i];
+
+ binary_sizes.push_back(dep.unlinked_binary.size());
+ }
+
+ value = binary_sizes.data();
+ value_length = binary_sizes.size() * sizeof(size_t);
+ break;
+
+ case CL_PROGRAM_BINARIES:
+ {
+ // Special case : param_value points to an array of p_num_devices
+ // application-allocated unsigned char* pointers. Check it's good
+ // and std::memcpy the data
+
+ unsigned char **binaries = (unsigned char **)param_value;
+ value_length = p_device_dependent.size() * sizeof(unsigned char *);
+
+ if (param_value && param_value_size >= value_length)
+ for (size_t i=0; i<p_device_dependent.size(); ++i)
+ {
+ const DeviceDependent &dep = p_device_dependent[i];
+ unsigned char *dest = binaries[i];
+
+ if (!dest)
+ continue;
+
+ std::memcpy(dest, dep.unlinked_binary.data(),
+ dep.unlinked_binary.size());
+ }
+
+ if (param_value_size_ret)
+ *param_value_size_ret = value_length;
+
+ return CL_SUCCESS;
+ }
+
+ default:
+ return CL_INVALID_VALUE;
+ }
+ if (param_value && param_value_size < value_length)
+ return CL_INVALID_VALUE;
+
+ if (param_value_size_ret)
+ *param_value_size_ret = value_length;
+
+ if (param_value)
+ std::memcpy(param_value, value, value_length);
+
+ return CL_SUCCESS;
+}
+
+cl_int Program::buildInfo(DeviceInterface *device,
+ cl_program_build_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const
+{
+ const void *value = 0;
+ size_t value_length = 0;
+ const DeviceDependent &dep = deviceDependent(device);
+
+ union {
+ cl_build_status cl_build_status_var;
+ };
+
+ switch (param_name)
+ {
+ case CL_PROGRAM_BUILD_STATUS:
+ switch (p_state)
+ {
+ case Empty:
+ case Loaded:
+ SIMPLE_ASSIGN(cl_build_status, CL_BUILD_NONE);
+ break;
+ case Built:
+ SIMPLE_ASSIGN(cl_build_status, CL_BUILD_SUCCESS);
+ break;
+ case Failed:
+ SIMPLE_ASSIGN(cl_build_status, CL_BUILD_ERROR);
+ break;
+ // TODO: CL_BUILD_IN_PROGRESS
+ }
+ break;
+
+ case CL_PROGRAM_BUILD_OPTIONS:
+ value = dep.compiler->options().c_str();
+ value_length = dep.compiler->options().size() + 1;
+ break;
+
+ case CL_PROGRAM_BUILD_LOG:
+ value = dep.compiler->log().c_str();
+ value_length = dep.compiler->log().size() + 1;
+ break;
+
+ default:
+ return CL_INVALID_VALUE;
+ }
+
+ if (param_value && param_value_size < value_length)
+ return CL_INVALID_VALUE;
+
+ if (param_value_size_ret)
+ *param_value_size_ret = value_length;
+
+ if (param_value)
+ std::memcpy(param_value, value, value_length);
+
+ return CL_SUCCESS;
+}
diff --git a/src/core/program.h b/src/core/program.h
new file mode 100644
index 0000000..a06b452
--- /dev/null
+++ b/src/core/program.h
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * Copyright (c) 2012-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file core/program.h
+ * \brief Program
+ */
+
+#ifndef __PROGRAM_H__
+#define __PROGRAM_H__
+
+#include "object.h"
+
+#include <CL/cl.h>
+#include <string>
+#include <vector>
+
+namespace llvm
+{
+ class MemoryBuffer;
+ class Module;
+ class Function;
+}
+
+namespace Coal
+{
+
+class Context;
+class Compiler;
+class DeviceInterface;
+class DeviceProgram;
+class Kernel;
+
+/**
+ * \brief Program object
+ *
+ * This class compiles and links a source or binaries into LLVM modules for each
+ * \c Coal::DeviceInterface for which the program is built.
+ *
+ * It then contains functions to get the list of kernels available in the
+ * program, using \c Coal::Kernel objects.
+ */
+class Program : public Object
+{
+ public:
+ /**
+ * \brief Constructor
+ * \param ctx parent \c Coal::Context
+ */
+ Program(Context *ctx);
+ ~Program();
+
+ /**
+ * \brief Program type
+ */
+ enum Type
+ {
+ Invalid, /*!< Invalid or unknown, type of a program not already loaded */
+ Source, /*!< Program made of sources that must be compiled and linked */
+ Binary /*!< Program made of pre-built binaries that only need to be (transformed)/linked */
+ };
+
+ /**
+ * \brief Program state
+ */
+ enum State
+ {
+ Empty, /*!< Just created */
+ Loaded, /*!< Source or binary loaded */
+ Built, /*!< Built */
+ Failed, /*!< Build failed */
+ };
+
+ /**
+ * \brief Load sources into the program
+ *
+ * This function loads the source-code given in \p strings into the
+ * program and sets its type to \c Source.
+ *
+ * \param count number of strings in \p strings
+ * \param strings array of pointers to strings, either null-terminated
+ * or of length given in \p lengths
+ * \param lengths lengths of the strings. If a field is 0, the
+ * corresponding string is null-terminated. If \p lengths is
+ * 0, all the strings are null-terminated
+ * \return \c CL_SUCCESS if success, an error code otherwise
+ */
+ cl_int loadSources(cl_uint count, const char **strings,
+ const size_t *lengths);
+
+ /**
+ * \brief Load binaries into the program
+ *
+ * This function allows client application to load a source, retrieve
+ * binaries using \c buildInfo(), and then re-create the same program
+ * (after a restart for example) by giving it a precompiled binary.
+ *
+ * This function loads the binaries for each device and parse them into
+ * LLVM modules, then sets the program type to \c Binary or
+ * \c NativeBinary.
+ *
+ * \param data array of pointers to binaries, one for each device
+ * \param lengths lengths of the binaries pointed to by \p data
+ * \param binary_status array that will be filled by this function with
+ * the status of each loaded binary (\c CL_SUCCESS if success)
+ * \param num_devices number of devices for which a binary is loaded
+ * \param device_list list of devices for which the binaries are loaded
+ * \return \c CL_SUCCESS if success, an error code otherwise
+ */
+ cl_int loadBinaries(const unsigned char **data, const size_t *lengths,
+ cl_int *binary_status, cl_uint num_devices,
+ DeviceInterface * const*device_list);
+
+ /**
+ * \brief Build the program
+ *
+ * This function compiles the sources, if any, and then link the
+ * resulting binaries if the devices for which they are compiled asks
+ * \c Coal::Program to do so, using \c Coal::DeviceProgram::linkStdLib().
+ *
+ * \param options options to pass to the compiler, see the OpenCL
+ * specification.
+ * \param pfn_notify callback function called at the end of the build
+ * \param user_data user data given to \p pfn_notify
+ * \param num_devices number of devices for which binaries are being
+ * built. If it's a source-based program, this can be 0.
+ * \param device_list list of devices for which the program will be built.
+ * \return \c CL_SUCCESS if success, an error code otherwise
+ */
+ cl_int build(const char *options,
+ void (CL_CALLBACK *pfn_notify)(cl_program program,
+ void *user_data),
+ void *user_data, cl_uint num_devices,
+ DeviceInterface * const*device_list);
+
+ Type type() const; /*!< \brief Type of the program */
+ State state() const; /*!< \brief State of the program */
+
+ /**
+ * \brief Create a kernel given a \p name
+ * \param name name of the kernel to be created
+ * \param errcode_ret return code (\c CL_SUCCESS if success)
+ * \return a \c Coal::Kernel object corresponding to the given \p name
+ */
+ Kernel *createKernel(const std::string &name, cl_int *errcode_ret);
+
+ /**
+ * \brief Create kernels of the program and return given a \p name
+ * \param name name of the kernel to be returned
+ * \param errcode_ret return code (\c CL_SUCCESS if success)
+ * \return a \c Coal::Kernel object corresponding to the given \p name
+ */
+ Kernel *createKernelsAndReturnKernel(const std::string &name, cl_int *errcode_ret);
+
+ /**
+ * \brief Create all the kernels of the program
+ * \param errcode_ret return code (\c CL_SUCCESS if success)
+ * \return the list of \c Coal::Kernel objects of this program
+ */
+ std::vector<Kernel *> createKernels(cl_int *errcode_ret);
+
+ /**
+ * \brief Device-specific program
+ * \param device device for which the device-specific program is needed
+ * \return the device-specific program requested, 0 if not found
+ */
+ DeviceProgram *deviceDependentProgram(DeviceInterface *device) const;
+ std::string deviceDependentCompilerOptions(DeviceInterface *device) const;
+
+ /**
+ * \brief Get information about this program
+ * \copydetails Coal::DeviceInterface::info
+ */
+ cl_int info(cl_program_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const;
+
+ /**
+ * \brief Get build info about this program (log, binaries, etc)
+ * \copydetails Coal::DeviceInterface::info
+ * \param device \c Coal::DeviceInterface for which info is needed
+ */
+ cl_int buildInfo(DeviceInterface *device,
+ cl_program_build_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const;
+
+ std::string source() { return p_source; }
+
+ std::vector<Kernel *> kernelList;
+ std::vector<Kernel *> kernelReleasedList;
+
+ private:
+ Type p_type;
+ State p_state;
+ std::string p_source;
+
+ struct DeviceDependent
+ {
+ DeviceInterface * device;
+ DeviceProgram * program;
+ std::string unlinked_binary;
+ bool is_native_binary; // llvm kernel bitcode vs final native binary
+ llvm::Module * linked_module;
+ Compiler * compiler;
+ };
+
+ std::vector<DeviceDependent> p_device_dependent;
+ DeviceDependent p_null_device_dependent;
+
+ void setDevices(cl_uint num_devices, DeviceInterface * const*devices);
+ void resetDeviceDependent();
+ DeviceDependent &deviceDependent(DeviceInterface *device);
+ const DeviceDependent &deviceDependent(DeviceInterface *device) const;
+ std::vector<llvm::Function *> kernelFunctions(DeviceDependent &dep);
+};
+
+}
+
+struct _cl_program : public Coal::Program
+{};
+
+#endif
diff --git a/src/core/propertylist.h b/src/core/propertylist.h
new file mode 100644
index 0000000..8d32397
--- /dev/null
+++ b/src/core/propertylist.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file propertylist.h
+ * \brief Helper macros for \c info() functions
+ *
+ * The OpenCL API is full of functions like \c clGetXXXInfo(). They all take
+ * the same arguments and are handled the same way. This file contains macros
+ * easing the implementation of these info functions.
+ *
+ * One info function, using these macros, looks like that:
+ *
+ * \code
+ * cl_int Foo::info(cl_foo_info param_name,
+ * size_t param_value_size,
+ * void *param_value,
+ * size_t *param_value_size_ret) const
+ * {
+ * void *value = 0;
+ * size_t value_length = 0;
+ *
+ * union {
+ * cl_uint cl_uint_var;
+ * cl_context cl_context_var;
+ * };
+ *
+ * switch (param_name)
+ * {
+ * case CL_UINT_PARAM:
+ * SIMPLE_ASSIGN(cl_uint, the_value);
+ * break;
+ * case CL_CONTEXT_PARAM:
+ * SIMPLE_ASSIGN(cl_context, a_call());
+ * break;
+ * case CL_STRING_PARAM:
+ * STRING_ASSIGN("This is a string");
+ * break;
+ * case CL_BINARY_PARAM:
+ * MEM_ASSIGN(sizeof(something), something);
+ * break;
+ * default:
+ * return CL_INVALID_VALUE;
+ * }
+ *
+ * if (param_value && param_value_size < value_length)
+ * return CL_INVALID_VALUE;
+ *
+ * if (param_value_size_ret)
+ * *param_value_size_ret = value_length;
+ *
+ * if (param_value)
+ * std::memcpy(param_value, value, value_length);
+ *
+ * return CL_SUCCESS;
+ * }
+ * \endcode
+ */
+
+#ifndef __PROPERTYLIST_H__
+#define __PROPERTYLIST_H__
+
+/**
+ * \brief Assign a value of a given type to the return value
+ * \param type type of the argument
+ * \param _value value to assign
+ */
+#define SIMPLE_ASSIGN(type, _value) do { \
+ value_length = sizeof(type); \
+ type##_var = (type)_value; \
+ value = & type##_var; \
+} while (0);
+
+/**
+ * \brief Assign a string to the return value
+ * \param string the string to assign, as a constant
+ */
+#define STRING_ASSIGN(string) do { \
+ static const char str[] = string; \
+ value_length = sizeof(str); \
+ value = (void *)str; \
+} while (0);
+
+/**
+ * \brief Assign a memory buffer to the return value
+ * \note the buffer must remain valid after the end of the \c info() call
+ * \param size size of the buffer
+ * \param buf buffer (of type <tt>void *</tt> for instance)
+ */
+#define MEM_ASSIGN(size, buf) do { \
+ value_length = size; \
+ value = (void *)buf; \
+} while (0);
+
+#endif
diff --git a/src/core/sampler.cpp b/src/core/sampler.cpp
new file mode 100644
index 0000000..71fca86
--- /dev/null
+++ b/src/core/sampler.cpp
@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file core/sampler.cpp
+ * \brief Sampler
+ */
+
+#include "sampler.h"
+#include "context.h"
+#include "deviceinterface.h"
+#include "propertylist.h"
+
+#include <cstring>
+#include <cstdlib>
+
+using namespace Coal;
+
+Sampler::Sampler(Context *ctx,
+ cl_bool normalized_coords,
+ cl_addressing_mode addressing_mode,
+ cl_filter_mode filter_mode,
+ cl_int *errcode_ret)
+: Object(Object::T_Sampler, ctx), p_bitfield(0)
+{
+ if (normalized_coords)
+ p_bitfield |= CLK_NORMALIZED_COORDS_TRUE;
+ else
+ p_bitfield |= CLK_NORMALIZED_COORDS_FALSE;
+
+ switch (addressing_mode)
+ {
+ case CL_ADDRESS_NONE:
+ p_bitfield |= CLK_ADDRESS_NONE;
+ break;
+
+ case CL_ADDRESS_MIRRORED_REPEAT:
+ p_bitfield |= CLK_ADDRESS_MIRRORED_REPEAT;
+ break;
+
+ case CL_ADDRESS_REPEAT:
+ p_bitfield |= CLK_ADDRESS_REPEAT;
+ break;
+
+ case CL_ADDRESS_CLAMP_TO_EDGE:
+ p_bitfield |= CLK_ADDRESS_CLAMP_TO_EDGE;
+ break;
+
+ case CL_ADDRESS_CLAMP:
+ p_bitfield |= CLK_ADDRESS_CLAMP;
+ break;
+
+ default:
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ switch (filter_mode)
+ {
+ case CL_FILTER_NEAREST:
+ p_bitfield |= CLK_FILTER_NEAREST;
+ break;
+
+ case CL_FILTER_LINEAR:
+ p_bitfield |= CLK_FILTER_LINEAR;
+ break;
+
+ default:
+ *errcode_ret = CL_INVALID_VALUE;
+ return;
+ }
+
+ // Check that images are available on all the devices
+ *errcode_ret = checkImageAvailability();
+}
+
+Sampler::Sampler(Context *ctx, unsigned int bitfield)
+: Object(Object::T_Sampler, ctx), p_bitfield(bitfield)
+{
+ checkImageAvailability();
+}
+
+cl_int Sampler::checkImageAvailability() const
+{
+ cl_uint num_devices;
+ DeviceInterface **devices;
+ cl_int rs;
+
+ rs = ((Context *)parent())->info(CL_CONTEXT_NUM_DEVICES,
+ sizeof(unsigned int),
+ &num_devices, 0);
+
+ if (rs != CL_SUCCESS)
+ return rs;
+
+ devices = (DeviceInterface **)std::malloc(num_devices *
+ sizeof(DeviceInterface *));
+
+ if (!devices)
+ {
+ return CL_OUT_OF_HOST_MEMORY;
+ }
+
+ rs = ((Context *)parent())->info(CL_CONTEXT_DEVICES,
+ num_devices * sizeof(DeviceInterface *),
+ devices, 0);
+
+ if (rs != CL_SUCCESS)
+ {
+ std::free((void *)devices);
+ return rs;
+ }
+
+ for (unsigned int i=0; i<num_devices; ++i)
+ {
+ cl_bool image_support;
+
+ rs = devices[i]->info(CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool),
+ &image_support, 0);
+
+ if (rs != CL_SUCCESS)
+ {
+ std::free((void *)devices);
+ return rs;
+ }
+
+ if (!image_support)
+ {
+ std::free((void *)devices);
+ return CL_INVALID_OPERATION;
+ }
+ }
+
+ std::free((void *)devices);
+
+ return CL_SUCCESS;
+}
+
+unsigned int Sampler::bitfield() const
+{
+ return p_bitfield;
+}
+
+cl_int Sampler::info(cl_sampler_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const
+{
+ void *value = 0;
+ size_t value_length = 0;
+
+ union {
+ cl_uint cl_uint_var;
+ cl_context cl_context_var;
+ cl_bool cl_bool_var;
+ cl_addressing_mode cl_addressing_mode_var;
+ cl_filter_mode cl_filter_mode_var;
+ };
+
+ switch (param_name)
+ {
+ case CL_SAMPLER_REFERENCE_COUNT:
+ SIMPLE_ASSIGN(cl_uint, references());
+ break;
+
+ case CL_SAMPLER_CONTEXT:
+ SIMPLE_ASSIGN(cl_context, parent());
+ break;
+
+ case CL_SAMPLER_NORMALIZED_COORDS:
+ if (p_bitfield & CLK_NORMALIZED_COORDS_MASK)
+ SIMPLE_ASSIGN(cl_bool, true)
+ else
+ SIMPLE_ASSIGN(cl_bool, false);
+ break;
+
+ case CL_SAMPLER_ADDRESSING_MODE:
+ switch (p_bitfield & CLK_ADDRESS_MODE_MASK)
+ {
+ case CLK_ADDRESS_CLAMP:
+ SIMPLE_ASSIGN(cl_addressing_mode, CL_ADDRESS_CLAMP);
+ break;
+ case CLK_ADDRESS_CLAMP_TO_EDGE:
+ SIMPLE_ASSIGN(cl_addressing_mode, CL_ADDRESS_CLAMP_TO_EDGE);
+ break;
+ case CLK_ADDRESS_MIRRORED_REPEAT:
+ SIMPLE_ASSIGN(cl_addressing_mode, CL_ADDRESS_MIRRORED_REPEAT);
+ break;
+ case CLK_ADDRESS_REPEAT:
+ SIMPLE_ASSIGN(cl_addressing_mode, CL_ADDRESS_REPEAT);
+ break;
+ case CLK_ADDRESS_NONE:
+ SIMPLE_ASSIGN(cl_addressing_mode, CL_ADDRESS_NONE);
+ break;
+ }
+ break;
+
+ case CL_SAMPLER_FILTER_MODE:
+ switch (p_bitfield & CLK_FILTER_MASK)
+ {
+ case CLK_FILTER_LINEAR:
+ SIMPLE_ASSIGN(cl_filter_mode, CL_FILTER_LINEAR);
+ break;
+ case CLK_FILTER_NEAREST:
+ SIMPLE_ASSIGN(cl_filter_mode, CL_FILTER_NEAREST);
+ break;
+ }
+
+ default:
+ return CL_INVALID_VALUE;
+ }
+
+ if (param_value && param_value_size < value_length)
+ return CL_INVALID_VALUE;
+
+ if (param_value_size_ret)
+ *param_value_size_ret = value_length;
+
+ if (param_value)
+ std::memcpy(param_value, value, value_length);
+
+ return CL_SUCCESS;
+}
diff --git a/src/core/sampler.h b/src/core/sampler.h
new file mode 100644
index 0000000..1ff1f1f
--- /dev/null
+++ b/src/core/sampler.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file sampler.h
+ * \brief Sampler object
+ */
+
+#ifndef __SAMPLER_H__
+#define __SAMPLER_H__
+
+#include <CL/cl.h>
+#include "object.h"
+
+// WARNING: Keep in sync with stdlib.h
+
+#define CLK_NORMALIZED_COORDS_FALSE 0x00000000
+#define CLK_NORMALIZED_COORDS_TRUE 0x00000001
+#define CLK_ADDRESS_NONE 0x00000000
+#define CLK_ADDRESS_MIRRORED_REPEAT 0x00000010
+#define CLK_ADDRESS_REPEAT 0x00000020
+#define CLK_ADDRESS_CLAMP_TO_EDGE 0x00000030
+#define CLK_ADDRESS_CLAMP 0x00000040
+#define CLK_FILTER_NEAREST 0x00000000
+#define CLK_FILTER_LINEAR 0x00000100
+
+#define CLK_NORMALIZED_COORDS_MASK 0x0000000f
+#define CLK_ADDRESS_MODE_MASK 0x000000f0
+#define CLK_FILTER_MASK 0x00000f00
+
+namespace Coal
+{
+
+class Context;
+
+/**
+ * \brief Sampler
+ *
+ * This object doesn't do anything intersting, it only converts a set of
+ * host OpenCL constants to constants that will be used by the kernels and
+ * the image reading and writing built-in functions.
+ */
+class Sampler : public Object
+{
+ public:
+ /**
+ * \brief Constructor
+ * \param ctx parent \c Coal::Context
+ * \param normalized_coords true if the coords given to the built-in
+ * image functions are normalized, false otherwise
+ * \param addressing_mode addressing mode used to read images
+ * \param filter_mode filter mode used to read images
+ * \param errcode_ret return code (\c CL_SUCCESS if all is good)
+ */
+ Sampler(Context *ctx,
+ cl_bool normalized_coords,
+ cl_addressing_mode addressing_mode,
+ cl_filter_mode filter_mode,
+ cl_int *errcode_ret);
+
+ /**
+ * \brief Simpler constructor
+ * \param ctx parent \c Coal::Context
+ * \param bitfield bitfield already calculated
+ */
+ Sampler(Context *ctx,
+ unsigned int bitfield);
+
+ unsigned int bitfield() const; /*!< \brief Bitfield value usable by the kernels */
+
+ /**
+ * \brief Get information about the sampler
+ * \copydetails Coal::DeviceInterface::info
+ */
+ cl_int info(cl_sampler_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret) const;
+
+ private:
+ unsigned int p_bitfield;
+
+ cl_int checkImageAvailability() const;
+};
+
+}
+
+struct _cl_sampler : public Coal::Sampler
+{};
+
+#endif
diff --git a/src/core/util.cpp b/src/core/util.cpp
new file mode 100644
index 0000000..afeb564
--- /dev/null
+++ b/src/core/util.cpp
@@ -0,0 +1,68 @@
+/******************************************************************************
+ * Copyright (c) 2012-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+/**
+ * \file core/util.c
+ * \brief misc utils
+ */
+
+#include <stdint.h>
+#include <ctype.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "util.h"
+
+/******************************************************************************
+* Parse first line in a file, read integer immediately following a string
+******************************************************************************/
+uint32_t parse_file_line_value(const char *fname, const char *sname,
+ uint32_t default_val)
+{
+ uint32_t val = default_val;
+ FILE *fp = NULL;
+ char *line = NULL;
+ char *str = NULL;
+ size_t len = 0;
+
+ if ((fp = fopen(fname, "r")) == NULL) return val;
+ if (getline(&line, &len, fp) != -1)
+ {
+ if ((str = strstr(line, sname)) != NULL)
+ {
+ str += strlen(sname);
+ while(!isdigit(*str) && *str != '\0') str++;
+ if (*str != '\0') val = atoi(str);
+ }
+ }
+
+ if (fp != NULL) fclose(fp);
+ if (line != NULL) free(line);
+ return val;
+}
+
diff --git a/src/core/util.h b/src/core/util.h
new file mode 100644
index 0000000..f2c1609
--- /dev/null
+++ b/src/core/util.h
@@ -0,0 +1,41 @@
+/******************************************************************************
+ * Copyright (c) 2012-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+/**
+ * \file core/util.h
+ * \brief misc utils
+ */
+
+#ifndef _UTIL_H
+#define _UTIL_H
+
+// Parse first line in a file, read integer immediately following a string
+uint32_t parse_file_line_value(const char *fname, const char *sname,
+ uint32_t default_val);
+
+#endif // _UTIL_H
+