aboutsummaryrefslogtreecommitdiff
path: root/parallel-libs
diff options
context:
space:
mode:
authorJason Henline <jhen@google.com>2016-08-24 21:31:53 +0000
committerJason Henline <jhen@google.com>2016-08-24 21:31:53 +0000
commitbcc77b6249a6f193f7322dbb3fdf77482d67bbee (patch)
tree0cc0ad0c30836983a7421f25febc7794a2ed55b4 /parallel-libs
parent571a6478538ba4802ae19b488c3b3f1e0ae710c6 (diff)
[StreamExecutor] Rename Executor to Device
Summary: This more clearly describes what the class is. Reviewers: jlebar Subscribers: jprice, parallel_libs-commits Differential Revision: https://reviews.llvm.org/D23851 llvm-svn: 279669
Diffstat (limited to 'parallel-libs')
-rw-r--r--parallel-libs/streamexecutor/include/streamexecutor/Device.h (renamed from parallel-libs/streamexecutor/include/streamexecutor/Executor.h)46
-rw-r--r--parallel-libs/streamexecutor/include/streamexecutor/Kernel.h22
-rw-r--r--parallel-libs/streamexecutor/include/streamexecutor/PlatformInterfaces.h15
-rw-r--r--parallel-libs/streamexecutor/include/streamexecutor/Stream.h41
-rw-r--r--parallel-libs/streamexecutor/lib/CMakeLists.txt2
-rw-r--r--parallel-libs/streamexecutor/lib/Device.cpp (renamed from parallel-libs/streamexecutor/lib/Executor.cpp)16
-rw-r--r--parallel-libs/streamexecutor/lib/Kernel.cpp12
-rw-r--r--parallel-libs/streamexecutor/lib/PlatformInterfaces.cpp2
-rw-r--r--parallel-libs/streamexecutor/lib/Stream.cpp3
-rw-r--r--parallel-libs/streamexecutor/lib/unittests/CMakeLists.txt8
-rw-r--r--parallel-libs/streamexecutor/lib/unittests/DeviceTest.cpp476
-rw-r--r--parallel-libs/streamexecutor/lib/unittests/ExecutorTest.cpp478
-rw-r--r--parallel-libs/streamexecutor/lib/unittests/KernelTest.cpp20
-rw-r--r--parallel-libs/streamexecutor/lib/unittests/StreamTest.cpp14
14 files changed, 575 insertions, 580 deletions
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/Executor.h b/parallel-libs/streamexecutor/include/streamexecutor/Device.h
index 6b0bc185d900..34bba80859dc 100644
--- a/parallel-libs/streamexecutor/include/streamexecutor/Executor.h
+++ b/parallel-libs/streamexecutor/include/streamexecutor/Device.h
@@ -1,4 +1,4 @@
-//===-- Executor.h - The Executor class -------------------------*- C++ -*-===//
+//===-- Device.h - The Device class -----------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,12 +8,12 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// The Executor class which represents a single device of a specific platform.
+/// The Device class which represents a single device of a specific platform.
///
//===----------------------------------------------------------------------===//
-#ifndef STREAMEXECUTOR_EXECUTOR_H
-#define STREAMEXECUTOR_EXECUTOR_H
+#ifndef STREAMEXECUTOR_DEVICE_H
+#define STREAMEXECUTOR_DEVICE_H
#include "streamexecutor/KernelSpec.h"
#include "streamexecutor/PlatformInterfaces.h"
@@ -24,10 +24,10 @@ namespace streamexecutor {
class KernelInterface;
class Stream;
-class Executor {
+class Device {
public:
- explicit Executor(PlatformExecutor *PExecutor);
- virtual ~Executor();
+ explicit Device(PlatformDevice *PDevice);
+ virtual ~Device();
/// Gets the kernel implementation for the underlying platform.
virtual Expected<std::unique_ptr<KernelInterface>>
@@ -42,7 +42,7 @@ public:
template <typename T>
Expected<GlobalDeviceMemory<T>> allocateDeviceMemory(size_t ElementCount) {
Expected<GlobalDeviceMemoryBase> MaybeBase =
- PExecutor->allocateDeviceMemory(ElementCount * sizeof(T));
+ PDevice->allocateDeviceMemory(ElementCount * sizeof(T));
if (!MaybeBase)
return MaybeBase.takeError();
return GlobalDeviceMemory<T>(*MaybeBase);
@@ -50,7 +50,7 @@ public:
/// Frees memory previously allocated with allocateDeviceMemory.
template <typename T> Error freeDeviceMemory(GlobalDeviceMemory<T> Memory) {
- return PExecutor->freeDeviceMemory(Memory);
+ return PDevice->freeDeviceMemory(Memory);
}
/// Allocates an array of ElementCount entries of type T in host memory.
@@ -59,7 +59,7 @@ public:
/// copies on streams. See Stream::thenCopyD2H and Stream::thenCopyH2D.
template <typename T> Expected<T *> allocateHostMemory(size_t ElementCount) {
Expected<void *> MaybeMemory =
- PExecutor->allocateHostMemory(ElementCount * sizeof(T));
+ PDevice->allocateHostMemory(ElementCount * sizeof(T));
if (!MaybeMemory)
return MaybeMemory.takeError();
return static_cast<T *>(*MaybeMemory);
@@ -67,7 +67,7 @@ public:
/// Frees memory previously allocated with allocateHostMemory.
template <typename T> Error freeHostMemory(T *Memory) {
- return PExecutor->freeHostMemory(Memory);
+ return PDevice->freeHostMemory(Memory);
}
/// Registers a previously allocated host array of type T for asynchronous
@@ -77,15 +77,15 @@ public:
/// memory copies on streams. See Stream::thenCopyD2H and Stream::thenCopyH2D.
template <typename T>
Error registerHostMemory(T *Memory, size_t ElementCount) {
- return PExecutor->registerHostMemory(Memory, ElementCount * sizeof(T));
+ return PDevice->registerHostMemory(Memory, ElementCount * sizeof(T));
}
/// Unregisters host memory previously registered by registerHostMemory.
template <typename T> Error unregisterHostMemory(T *Memory) {
- return PExecutor->unregisterHostMemory(Memory);
+ return PDevice->unregisterHostMemory(Memory);
}
- /// \anchor ExecutorHostSyncCopyGroup
+ /// \anchor DeviceHostSyncCopyGroup
/// \name Host-synchronous device memory copying functions
///
/// These methods block the calling host thread while copying data to or from
@@ -125,9 +125,9 @@ public:
return make_error(
"copying too many elements, " + llvm::Twine(ElementCount) +
", to a host array of element count " + llvm::Twine(Dst.size()));
- return PExecutor->synchronousCopyD2H(
- Src.getBaseMemory(), Src.getElementOffset() * sizeof(T), Dst.data(), 0,
- ElementCount * sizeof(T));
+ return PDevice->synchronousCopyD2H(Src.getBaseMemory(),
+ Src.getElementOffset() * sizeof(T),
+ Dst.data(), 0, ElementCount * sizeof(T));
}
template <typename T>
@@ -179,9 +179,9 @@ public:
llvm::Twine(ElementCount) +
", to a device array of element count " +
llvm::Twine(Dst.getElementCount()));
- return PExecutor->synchronousCopyH2D(Src.data(), 0, Dst.getBaseMemory(),
- Dst.getElementOffset() * sizeof(T),
- ElementCount * sizeof(T));
+ return PDevice->synchronousCopyH2D(Src.data(), 0, Dst.getBaseMemory(),
+ Dst.getElementOffset() * sizeof(T),
+ ElementCount * sizeof(T));
}
template <typename T>
@@ -234,7 +234,7 @@ public:
llvm::Twine(ElementCount) +
", to a device array of element count " +
llvm::Twine(Dst.getElementCount()));
- return PExecutor->synchronousCopyD2D(
+ return PDevice->synchronousCopyD2D(
Src.getBaseMemory(), Src.getElementOffset() * sizeof(T),
Dst.getBaseMemory(), Dst.getElementOffset() * sizeof(T),
ElementCount * sizeof(T));
@@ -292,9 +292,9 @@ public:
///@} End host-synchronous device memory copying functions
private:
- PlatformExecutor *PExecutor;
+ PlatformDevice *PDevice;
};
} // namespace streamexecutor
-#endif // STREAMEXECUTOR_EXECUTOR_H
+#endif // STREAMEXECUTOR_DEVICE_H
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/Kernel.h b/parallel-libs/streamexecutor/include/streamexecutor/Kernel.h
index b8171620db95..4a2eeb4b9151 100644
--- a/parallel-libs/streamexecutor/include/streamexecutor/Kernel.h
+++ b/parallel-libs/streamexecutor/include/streamexecutor/Kernel.h
@@ -54,13 +54,13 @@
/// function as follows:
/// \code
/// namespace ccn = compiler_cuda_namespace;
-/// // Assumes Executor is a pointer to the StreamExecutor on which to
-/// // launch the kernel.
+/// // Assumes Device is a pointer to the Device on which to launch the
+/// // kernel.
/// //
/// // See KernelSpec.h for details on how the compiler can create a
/// // MultiKernelLoaderSpec instance like SaxpyKernelLoaderSpec below.
/// Expected<ccn::SaxpyKernel> MaybeKernel =
-/// ccn::SaxpyKernel::create(Executor, ccn::SaxpyKernelLoaderSpec);
+/// ccn::SaxpyKernel::create(Device, ccn::SaxpyKernelLoaderSpec);
/// if (!MaybeKernel) { /* Handle error */ }
/// ccn::SaxpyKernel SaxpyKernel = *MaybeKernel;
/// Launch(SaxpyKernel, A, X, Y);
@@ -84,7 +84,7 @@
namespace streamexecutor {
-class Executor;
+class Device;
class KernelInterface;
/// The base class for device kernel functions.
@@ -100,13 +100,13 @@ public:
KernelBase &operator=(KernelBase &&) = default;
~KernelBase();
- /// Creates a kernel object from an Executor and a MultiKernelLoaderSpec.
+ /// Creates a kernel object from a Device and a MultiKernelLoaderSpec.
///
- /// The Executor knows which platform it belongs to and the
+ /// The Device knows which platform it belongs to and the
/// MultiKernelLoaderSpec knows how to find the kernel code for different
/// platforms, so the combined information is enough to get the kernel code
/// for the appropriate platform.
- static Expected<KernelBase> create(Executor *ParentExecutor,
+ static Expected<KernelBase> create(Device *Dev,
const MultiKernelLoaderSpec &Spec);
const std::string &getName() const { return Name; }
@@ -116,11 +116,11 @@ public:
KernelInterface *getImplementation() { return Implementation.get(); }
private:
- KernelBase(Executor *ParentExecutor, const std::string &Name,
+ KernelBase(Device *Dev, const std::string &Name,
const std::string &DemangledName,
std::unique_ptr<KernelInterface> Implementation);
- Executor *ParentExecutor;
+ Device *TheDevice;
std::string Name;
std::string DemangledName;
std::unique_ptr<KernelInterface> Implementation;
@@ -136,9 +136,9 @@ public:
TypedKernel &operator=(TypedKernel &&) = default;
/// Parameters here have the same meaning as in KernelBase::create.
- static Expected<TypedKernel> create(Executor *ParentExecutor,
+ static Expected<TypedKernel> create(Device *Dev,
const MultiKernelLoaderSpec &Spec) {
- auto MaybeBase = KernelBase::create(ParentExecutor, Spec);
+ auto MaybeBase = KernelBase::create(Dev, Spec);
if (!MaybeBase) {
return MaybeBase.takeError();
}
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/PlatformInterfaces.h b/parallel-libs/streamexecutor/include/streamexecutor/PlatformInterfaces.h
index 2c8fce390785..b7737e82e7d3 100644
--- a/parallel-libs/streamexecutor/include/streamexecutor/PlatformInterfaces.h
+++ b/parallel-libs/streamexecutor/include/streamexecutor/PlatformInterfaces.h
@@ -31,7 +31,7 @@
namespace streamexecutor {
-class PlatformExecutor;
+class PlatformDevice;
/// Methods supported by device kernel function objects on all platforms.
class KernelInterface {
@@ -41,15 +41,14 @@ class KernelInterface {
/// Platform-specific stream handle.
class PlatformStreamHandle {
public:
- explicit PlatformStreamHandle(PlatformExecutor *PExecutor)
- : PExecutor(PExecutor) {}
+ explicit PlatformStreamHandle(PlatformDevice *PDevice) : PDevice(PDevice) {}
virtual ~PlatformStreamHandle();
- PlatformExecutor *getExecutor() { return PExecutor; }
+ PlatformDevice *getDevice() { return PDevice; }
private:
- PlatformExecutor *PExecutor;
+ PlatformDevice *PDevice;
};
/// Raw executor methods that must be implemented by each platform.
@@ -57,11 +56,11 @@ private:
/// This class defines the platform interface that supports executing work on a
/// device.
///
-/// The public Executor and Stream classes have the type-safe versions of the
+/// The public Device and Stream classes have the type-safe versions of the
/// functions in this interface.
-class PlatformExecutor {
+class PlatformDevice {
public:
- virtual ~PlatformExecutor();
+ virtual ~PlatformDevice();
virtual std::string getName() const = 0;
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/Stream.h b/parallel-libs/streamexecutor/include/streamexecutor/Stream.h
index 3293053cbaf3..0e6e898b4731 100644
--- a/parallel-libs/streamexecutor/include/streamexecutor/Stream.h
+++ b/parallel-libs/streamexecutor/include/streamexecutor/Stream.h
@@ -12,19 +12,18 @@
/// A Stream instance represents a queue of sequential, host-asynchronous work
/// to be performed on a device.
///
-/// To enqueue work on a device, first create a Executor instance for a
-/// given device and then use that Executor to create a Stream instance.
-/// The Stream instance will perform its work on the device managed by the
-/// Executor that created it.
+/// To enqueue work on a device, first create a Device instance then use that
+/// Device to create a Stream instance. The Stream instance will perform its
+/// work on the device managed by the Device object that created it.
///
/// The various "then" methods of the Stream object, such as thenCopyH2D and
/// thenLaunch, may be used to enqueue work on the Stream, and the
/// blockHostUntilDone() method may be used to block the host code until the
/// Stream has completed all its work.
///
-/// Multiple Stream instances can be created for the same Executor. This
-/// allows several independent streams of computation to be performed
-/// simultaneously on a single device.
+/// Multiple Stream instances can be created for the same Device. This allows
+/// several independent streams of computation to be performed simultaneously on
+/// a single device.
///
//===----------------------------------------------------------------------===//
@@ -94,8 +93,8 @@ public:
const ParameterTs &... Arguments) {
auto ArgumentArray =
make_kernel_argument_pack<ParameterTs...>(Arguments...);
- setError(PExecutor->launch(ThePlatformStream.get(), BlockSize, GridSize,
- Kernel, ArgumentArray));
+ setError(PDevice->launch(ThePlatformStream.get(), BlockSize, GridSize,
+ Kernel, ArgumentArray));
return *this;
}
@@ -105,13 +104,13 @@ public:
/// return without waiting for the operation to complete.
///
/// Any host memory used as a source or destination for one of these
- /// operations must be allocated with Executor::allocateHostMemory or
- /// registered with Executor::registerHostMemory. Otherwise, the enqueuing
- /// operation may block until the copy operation is fully complete.
+ /// operations must be allocated with Device::allocateHostMemory or registered
+ /// with Device::registerHostMemory. Otherwise, the enqueuing operation may
+ /// block until the copy operation is fully complete.
///
/// The arguments and bounds checking for these methods match the API of the
- /// \ref ExecutorHostSyncCopyGroup
- /// "host-synchronous device memory copying functions" of Executor.
+ /// \ref DeviceHostSyncCopyGroup
+ /// "host-synchronous device memory copying functions" of Device.
///@{
template <typename T>
@@ -125,9 +124,9 @@ public:
setError("copying too many elements, " + llvm::Twine(ElementCount) +
", to a host array of element count " + llvm::Twine(Dst.size()));
else
- setError(PExecutor->copyD2H(ThePlatformStream.get(), Src.getBaseMemory(),
- Src.getElementOffset() * sizeof(T),
- Dst.data(), 0, ElementCount * sizeof(T)));
+ setError(PDevice->copyD2H(ThePlatformStream.get(), Src.getBaseMemory(),
+ Src.getElementOffset() * sizeof(T), Dst.data(),
+ 0, ElementCount * sizeof(T)));
return *this;
}
@@ -182,7 +181,7 @@ public:
", to a device array of element count " +
llvm::Twine(Dst.getElementCount()));
else
- setError(PExecutor->copyH2D(
+ setError(PDevice->copyH2D(
ThePlatformStream.get(), Src.data(), 0, Dst.getBaseMemory(),
Dst.getElementOffset() * sizeof(T), ElementCount * sizeof(T)));
return *this;
@@ -238,7 +237,7 @@ public:
", to a device array of element count " +
llvm::Twine(Dst.getElementCount()));
else
- setError(PExecutor->copyD2D(
+ setError(PDevice->copyD2D(
ThePlatformStream.get(), Src.getBaseMemory(),
Src.getElementOffset() * sizeof(T), Dst.getBaseMemory(),
Dst.getElementOffset() * sizeof(T), ElementCount * sizeof(T)));
@@ -322,8 +321,8 @@ private:
ErrorMessage = Message.str();
}
- /// The PlatformExecutor that supports the operations of this stream.
- PlatformExecutor *PExecutor;
+ /// The PlatformDevice that supports the operations of this stream.
+ PlatformDevice *PDevice;
/// The platform-specific stream handle for this instance.
std::unique_ptr<PlatformStreamHandle> ThePlatformStream;
diff --git a/parallel-libs/streamexecutor/lib/CMakeLists.txt b/parallel-libs/streamexecutor/lib/CMakeLists.txt
index 7f5cb201956c..cf7baf9c7e29 100644
--- a/parallel-libs/streamexecutor/lib/CMakeLists.txt
+++ b/parallel-libs/streamexecutor/lib/CMakeLists.txt
@@ -6,7 +6,7 @@ add_library(
add_library(
streamexecutor
$<TARGET_OBJECTS:utils>
- Executor.cpp
+ Device.cpp
Kernel.cpp
KernelSpec.cpp
PackedKernelArgumentArray.cpp
diff --git a/parallel-libs/streamexecutor/lib/Executor.cpp b/parallel-libs/streamexecutor/lib/Device.cpp
index f103a76a4d98..4a5ec11997d5 100644
--- a/parallel-libs/streamexecutor/lib/Executor.cpp
+++ b/parallel-libs/streamexecutor/lib/Device.cpp
@@ -1,4 +1,4 @@
-//===-- Executor.cpp - Executor implementation ----------------------------===//
+//===-- Device.cpp - Device implementation --------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,11 +8,11 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// Implementation of Executor class internals.
+/// Implementation of Device class internals.
///
//===----------------------------------------------------------------------===//
-#include "streamexecutor/Executor.h"
+#include "streamexecutor/Device.h"
#include <cassert>
@@ -23,17 +23,17 @@
namespace streamexecutor {
-Executor::Executor(PlatformExecutor *PExecutor) : PExecutor(PExecutor) {}
+Device::Device(PlatformDevice *PDevice) : PDevice(PDevice) {}
-Executor::~Executor() = default;
+Device::~Device() = default;
-Expected<std::unique_ptr<Stream>> Executor::createStream() {
+Expected<std::unique_ptr<Stream>> Device::createStream() {
Expected<std::unique_ptr<PlatformStreamHandle>> MaybePlatformStream =
- PExecutor->createStream();
+ PDevice->createStream();
if (!MaybePlatformStream) {
return MaybePlatformStream.takeError();
}
- assert((*MaybePlatformStream)->getExecutor() == PExecutor &&
+ assert((*MaybePlatformStream)->getDevice() == PDevice &&
"an executor created a stream with a different stored executor");
return llvm::make_unique<Stream>(std::move(*MaybePlatformStream));
}
diff --git a/parallel-libs/streamexecutor/lib/Kernel.cpp b/parallel-libs/streamexecutor/lib/Kernel.cpp
index 9e99e91ef91f..fa0992003a6f 100644
--- a/parallel-libs/streamexecutor/lib/Kernel.cpp
+++ b/parallel-libs/streamexecutor/lib/Kernel.cpp
@@ -13,31 +13,31 @@
//===----------------------------------------------------------------------===//
#include "streamexecutor/Kernel.h"
-#include "streamexecutor/Executor.h"
+#include "streamexecutor/Device.h"
#include "streamexecutor/PlatformInterfaces.h"
#include "llvm/DebugInfo/Symbolize/Symbolize.h"
namespace streamexecutor {
-KernelBase::KernelBase(Executor *ParentExecutor, const std::string &Name,
+KernelBase::KernelBase(Device *Dev, const std::string &Name,
const std::string &DemangledName,
std::unique_ptr<KernelInterface> Implementation)
- : ParentExecutor(ParentExecutor), Name(Name), DemangledName(DemangledName),
+ : TheDevice(Dev), Name(Name), DemangledName(DemangledName),
Implementation(std::move(Implementation)) {}
KernelBase::~KernelBase() = default;
-Expected<KernelBase> KernelBase::create(Executor *ParentExecutor,
+Expected<KernelBase> KernelBase::create(Device *Dev,
const MultiKernelLoaderSpec &Spec) {
- auto MaybeImplementation = ParentExecutor->getKernelImplementation(Spec);
+ auto MaybeImplementation = Dev->getKernelImplementation(Spec);
if (!MaybeImplementation) {
return MaybeImplementation.takeError();
}
std::string Name = Spec.getKernelName();
std::string DemangledName =
llvm::symbolize::LLVMSymbolizer::DemangleName(Name, nullptr);
- KernelBase Instance(ParentExecutor, Name, DemangledName,
+ KernelBase Instance(Dev, Name, DemangledName,
std::move(*MaybeImplementation));
return std::move(Instance);
}
diff --git a/parallel-libs/streamexecutor/lib/PlatformInterfaces.cpp b/parallel-libs/streamexecutor/lib/PlatformInterfaces.cpp
index e0ae644e5485..770cd170c4f0 100644
--- a/parallel-libs/streamexecutor/lib/PlatformInterfaces.cpp
+++ b/parallel-libs/streamexecutor/lib/PlatformInterfaces.cpp
@@ -18,6 +18,6 @@ namespace streamexecutor {
PlatformStreamHandle::~PlatformStreamHandle() = default;
-PlatformExecutor::~PlatformExecutor() = default;
+PlatformDevice::~PlatformDevice() = default;
} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/Stream.cpp b/parallel-libs/streamexecutor/lib/Stream.cpp
index 40f52f9b63d5..20a817c2715f 100644
--- a/parallel-libs/streamexecutor/lib/Stream.cpp
+++ b/parallel-libs/streamexecutor/lib/Stream.cpp
@@ -17,8 +17,7 @@
namespace streamexecutor {
Stream::Stream(std::unique_ptr<PlatformStreamHandle> PStream)
- : PExecutor(PStream->getExecutor()), ThePlatformStream(std::move(PStream)) {
-}
+ : PDevice(PStream->getDevice()), ThePlatformStream(std::move(PStream)) {}
Stream::~Stream() = default;
diff --git a/parallel-libs/streamexecutor/lib/unittests/CMakeLists.txt b/parallel-libs/streamexecutor/lib/unittests/CMakeLists.txt
index 244312ff12c1..3b414e342d95 100644
--- a/parallel-libs/streamexecutor/lib/unittests/CMakeLists.txt
+++ b/parallel-libs/streamexecutor/lib/unittests/CMakeLists.txt
@@ -1,12 +1,12 @@
add_executable(
- executor_test
- ExecutorTest.cpp)
+ device_test
+ DeviceTest.cpp)
target_link_libraries(
- executor_test
+ device_test
streamexecutor
${GTEST_BOTH_LIBRARIES}
${CMAKE_THREAD_LIBS_INIT})
-add_test(ExecutorTest executor_test)
+add_test(DeviceTest device_test)
add_executable(
kernel_test
diff --git a/parallel-libs/streamexecutor/lib/unittests/DeviceTest.cpp b/parallel-libs/streamexecutor/lib/unittests/DeviceTest.cpp
new file mode 100644
index 000000000000..cb34b8b92d57
--- /dev/null
+++ b/parallel-libs/streamexecutor/lib/unittests/DeviceTest.cpp
@@ -0,0 +1,476 @@
+//===-- DeviceTest.cpp - Tests for Device ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the unit tests for Device code.
+///
+//===----------------------------------------------------------------------===//
+
+#include <cstdlib>
+#include <cstring>
+
+#include "streamexecutor/Device.h"
+#include "streamexecutor/PlatformInterfaces.h"
+
+#include "gtest/gtest.h"
+
+namespace {
+
+namespace se = ::streamexecutor;
+
+class MockPlatformDevice : public se::PlatformDevice {
+public:
+ ~MockPlatformDevice() override {}
+
+ std::string getName() const override { return "MockPlatformDevice"; }
+
+ se::Expected<std::unique_ptr<se::PlatformStreamHandle>>
+ createStream() override {
+ return se::make_error("not implemented");
+ }
+
+ se::Expected<se::GlobalDeviceMemoryBase>
+ allocateDeviceMemory(size_t ByteCount) override {
+ return se::GlobalDeviceMemoryBase(std::malloc(ByteCount));
+ }
+
+ se::Error freeDeviceMemory(se::GlobalDeviceMemoryBase Memory) override {
+ std::free(const_cast<void *>(Memory.getHandle()));
+ return se::Error::success();
+ }
+
+ se::Expected<void *> allocateHostMemory(size_t ByteCount) override {
+ return std::malloc(ByteCount);
+ }
+
+ se::Error freeHostMemory(void *Memory) override {
+ std::free(Memory);
+ return se::Error::success();
+ }
+
+ se::Error registerHostMemory(void *, size_t) override {
+ return se::Error::success();
+ }
+
+ se::Error unregisterHostMemory(void *) override {
+ return se::Error::success();
+ }
+
+ se::Error synchronousCopyD2H(const se::GlobalDeviceMemoryBase &DeviceSrc,
+ size_t SrcByteOffset, void *HostDst,
+ size_t DstByteOffset,
+ size_t ByteCount) override {
+ std::memcpy(static_cast<char *>(HostDst) + DstByteOffset,
+ static_cast<const char *>(DeviceSrc.getHandle()) +
+ SrcByteOffset,
+ ByteCount);
+ return se::Error::success();
+ }
+
+ se::Error synchronousCopyH2D(const void *HostSrc, size_t SrcByteOffset,
+ se::GlobalDeviceMemoryBase DeviceDst,
+ size_t DstByteOffset,
+ size_t ByteCount) override {
+ std::memcpy(static_cast<char *>(const_cast<void *>(DeviceDst.getHandle())) +
+ DstByteOffset,
+ static_cast<const char *>(HostSrc) + SrcByteOffset, ByteCount);
+ return se::Error::success();
+ }
+
+ se::Error synchronousCopyD2D(se::GlobalDeviceMemoryBase DeviceDst,
+ size_t DstByteOffset,
+ const se::GlobalDeviceMemoryBase &DeviceSrc,
+ size_t SrcByteOffset,
+ size_t ByteCount) override {
+ std::memcpy(static_cast<char *>(const_cast<void *>(DeviceDst.getHandle())) +
+ DstByteOffset,
+ static_cast<const char *>(DeviceSrc.getHandle()) +
+ SrcByteOffset,
+ ByteCount);
+ return se::Error::success();
+ }
+};
+
+/// Test fixture to hold objects used by tests.
+class DeviceTest : public ::testing::Test {
+public:
+ DeviceTest()
+ : HostA5{0, 1, 2, 3, 4}, HostB5{5, 6, 7, 8, 9},
+ HostA7{10, 11, 12, 13, 14, 15, 16}, HostB7{17, 18, 19, 20, 21, 22, 23},
+ DeviceA5(se::GlobalDeviceMemory<int>::makeFromElementCount(HostA5, 5)),
+ DeviceB5(se::GlobalDeviceMemory<int>::makeFromElementCount(HostB5, 5)),
+ DeviceA7(se::GlobalDeviceMemory<int>::makeFromElementCount(HostA7, 7)),
+ DeviceB7(se::GlobalDeviceMemory<int>::makeFromElementCount(HostB7, 7)),
+ Host5{24, 25, 26, 27, 28}, Host7{29, 30, 31, 32, 33, 34, 35},
+ Device(&PDevice) {}
+
+ // Device memory is backed by host arrays.
+ int HostA5[5];
+ int HostB5[5];
+ int HostA7[7];
+ int HostB7[7];
+ se::GlobalDeviceMemory<int> DeviceA5;
+ se::GlobalDeviceMemory<int> DeviceB5;
+ se::GlobalDeviceMemory<int> DeviceA7;
+ se::GlobalDeviceMemory<int> DeviceB7;
+
+ // Host memory to be used as actual host memory.
+ int Host5[5];
+ int Host7[7];
+
+ MockPlatformDevice PDevice;
+ se::Device Device;
+};
+
+#define EXPECT_NO_ERROR(E) EXPECT_FALSE(static_cast<bool>(E))
+#define EXPECT_ERROR(E) \
+ do { \
+ se::Error E__ = E; \
+ EXPECT_TRUE(static_cast<bool>(E__)); \
+ consumeError(std::move(E__)); \
+ } while (false)
+
+using llvm::ArrayRef;
+using llvm::MutableArrayRef;
+
+TEST_F(DeviceTest, AllocateAndFreeDeviceMemory) {
+ se::Expected<se::GlobalDeviceMemory<int>> MaybeMemory =
+ Device.allocateDeviceMemory<int>(10);
+ EXPECT_TRUE(static_cast<bool>(MaybeMemory));
+ EXPECT_NO_ERROR(Device.freeDeviceMemory(*MaybeMemory));
+}
+
+TEST_F(DeviceTest, AllocateAndFreeHostMemory) {
+ se::Expected<int *> MaybeMemory = Device.allocateHostMemory<int>(10);
+ EXPECT_TRUE(static_cast<bool>(MaybeMemory));
+ EXPECT_NO_ERROR(Device.freeHostMemory(*MaybeMemory));
+}
+
+TEST_F(DeviceTest, RegisterAndUnregisterHostMemory) {
+ std::vector<int> Data(10);
+ EXPECT_NO_ERROR(Device.registerHostMemory(Data.data(), 10));
+ EXPECT_NO_ERROR(Device.unregisterHostMemory(Data.data()));
+}
+
+// D2H tests
+
+TEST_F(DeviceTest, SyncCopyD2HToMutableArrayRefByCount) {
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5), 5));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], Host5[I]);
+ }
+
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyD2H(DeviceB5, MutableArrayRef<int>(Host5), 2));
+ for (int I = 0; I < 2; ++I) {
+ EXPECT_EQ(HostB5[I], Host5[I]);
+ }
+
+ EXPECT_ERROR(
+ Device.synchronousCopyD2H(DeviceA7, MutableArrayRef<int>(Host5), 7));
+
+ EXPECT_ERROR(
+ Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host7), 7));
+
+ EXPECT_ERROR(
+ Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5), 7));
+}
+
+TEST_F(DeviceTest, SyncCopyD2HToMutableArrayRef) {
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5)));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], Host5[I]);
+ }
+
+ EXPECT_ERROR(
+ Device.synchronousCopyD2H(DeviceA7, MutableArrayRef<int>(Host5)));
+
+ EXPECT_ERROR(
+ Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host7)));
+}
+
+TEST_F(DeviceTest, SyncCopyD2HToPointer) {
+ EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA5, Host5, 5));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], Host5[I]);
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5, Host7, 7));
+}
+
+TEST_F(DeviceTest, SyncCopyD2HSliceToMutableArrayRefByCount) {
+ EXPECT_NO_ERROR(Device.synchronousCopyD2H(
+ DeviceA5.asSlice().drop_front(1), MutableArrayRef<int>(Host5 + 1, 4), 4));
+ for (int I = 1; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], Host5[I]);
+ }
+
+ EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceB5.asSlice().drop_back(1),
+ MutableArrayRef<int>(Host5), 2));
+ for (int I = 0; I < 2; ++I) {
+ EXPECT_EQ(HostB5[I], Host5[I]);
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice(),
+ MutableArrayRef<int>(Host5), 7));
+
+ EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(),
+ MutableArrayRef<int>(Host7), 7));
+
+ EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(),
+ MutableArrayRef<int>(Host5), 7));
+}
+
+TEST_F(DeviceTest, SyncCopyD2HSliceToMutableArrayRef) {
+ EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice().slice(1, 5),
+ MutableArrayRef<int>(Host5)));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(HostA7[I + 1], Host5[I]);
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice().drop_back(1),
+ MutableArrayRef<int>(Host5)));
+
+ EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(),
+ MutableArrayRef<int>(Host7)));
+}
+
+TEST_F(DeviceTest, SyncCopyD2HSliceToPointer) {
+ EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice().drop_front(1),
+ Host5 + 1, 4));
+ for (int I = 1; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], Host5[I]);
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(), Host7, 7));
+}
+
+// H2D tests
+
+TEST_F(DeviceTest, SyncCopyH2DToArrayRefByCount) {
+ EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5, 5));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], Host5[I]);
+ }
+
+ EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceB5, 2));
+ for (int I = 0; I < 2; ++I) {
+ EXPECT_EQ(HostB5[I], Host5[I]);
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5, 7));
+
+ EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7, 7));
+
+ EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5, 7));
+}
+
+TEST_F(DeviceTest, SyncCopyH2DToArrayRef) {
+ EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], Host5[I]);
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7));
+
+ EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5));
+}
+
+TEST_F(DeviceTest, SyncCopyH2DToPointer) {
+ EXPECT_NO_ERROR(Device.synchronousCopyH2D(Host5, DeviceA5, 5));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], Host5[I]);
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyH2D(Host7, DeviceA5, 7));
+}
+
+TEST_F(DeviceTest, SyncCopyH2DSliceToArrayRefByCount) {
+ EXPECT_NO_ERROR(Device.synchronousCopyH2D(
+ ArrayRef<int>(Host5 + 1, 4), DeviceA5.asSlice().drop_front(1), 4));
+ for (int I = 1; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], Host5[I]);
+ }
+
+ EXPECT_NO_ERROR(Device.synchronousCopyH2D(
+ ArrayRef<int>(Host5), DeviceB5.asSlice().drop_back(1), 2));
+ for (int I = 0; I < 2; ++I) {
+ EXPECT_EQ(HostB5[I], Host5[I]);
+ }
+
+ EXPECT_ERROR(
+ Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5.asSlice(), 7));
+
+ EXPECT_ERROR(
+ Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7.asSlice(), 7));
+
+ EXPECT_ERROR(
+ Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5.asSlice(), 7));
+}
+
+TEST_F(DeviceTest, SyncCopyH2DSliceToArrayRef) {
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5.asSlice()));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], Host5[I]);
+ }
+
+ EXPECT_ERROR(
+ Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7.asSlice()));
+
+ EXPECT_ERROR(
+ Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5.asSlice()));
+}
+
+TEST_F(DeviceTest, SyncCopyH2DSliceToPointer) {
+ EXPECT_NO_ERROR(Device.synchronousCopyH2D(Host5, DeviceA5.asSlice(), 5));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], Host5[I]);
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyH2D(Host7, DeviceA5.asSlice(), 7));
+}
+
+// D2D tests
+
+TEST_F(DeviceTest, SyncCopyD2DByCount) {
+ EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5, 5));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], HostB5[I]);
+ }
+
+ EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB7, 2));
+ for (int I = 0; I < 2; ++I) {
+ EXPECT_EQ(HostA7[I], HostB7[I]);
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5, 7));
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5, 7));
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7, 7));
+}
+
+TEST_F(DeviceTest, SyncCopyD2D) {
+ EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], HostB5[I]);
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5));
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7));
+}
+
+TEST_F(DeviceTest, SyncCopySliceD2DByCount) {
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyD2D(DeviceA5.asSlice().drop_front(1), DeviceB5, 4));
+ for (int I = 0; I < 4; ++I) {
+ EXPECT_EQ(HostA5[I + 1], HostB5[I]);
+ }
+
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyD2D(DeviceA7.asSlice().drop_back(1), DeviceB7, 2));
+ for (int I = 0; I < 2; ++I) {
+ EXPECT_EQ(HostA7[I], HostB7[I]);
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5, 7));
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5, 7));
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7, 7));
+}
+
+TEST_F(DeviceTest, SyncCopySliceD2D) {
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyD2D(DeviceA7.asSlice().drop_back(2), DeviceB5));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(HostA7[I], HostB5[I]);
+ }
+
+ EXPECT_ERROR(
+ Device.synchronousCopyD2D(DeviceA7.asSlice().drop_front(1), DeviceB5));
+
+ EXPECT_ERROR(
+ Device.synchronousCopyD2D(DeviceA5.asSlice().drop_back(1), DeviceB7));
+}
+
+TEST_F(DeviceTest, SyncCopyD2DSliceByCount) {
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice().drop_front(2), 5));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], HostB7[I + 2]);
+ }
+
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyD2D(DeviceA7, DeviceB7.asSlice().drop_back(3), 2));
+ for (int I = 0; I < 2; ++I) {
+ EXPECT_EQ(HostA7[I], HostB7[I]);
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5.asSlice(), 7));
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice(), 7));
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice(), 7));
+}
+
+TEST_F(DeviceTest, SyncCopyD2DSlice) {
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice().drop_back(2)));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], HostB7[I]);
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice()));
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice()));
+}
+
+TEST_F(DeviceTest, SyncCopySliceD2DSliceByCount) {
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 5));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], HostB5[I]);
+ }
+
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB7.asSlice(), 2));
+ for (int I = 0; I < 2; ++I) {
+ EXPECT_EQ(HostA7[I], HostB7[I]);
+ }
+
+ EXPECT_ERROR(
+ Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 7));
+
+ EXPECT_ERROR(
+ Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice(), 7));
+
+ EXPECT_ERROR(
+ Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice(), 7));
+}
+
+TEST_F(DeviceTest, SyncCopySliceD2DSlice) {
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice()));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], HostB5[I]);
+ }
+
+ EXPECT_ERROR(
+ Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice()));
+
+ EXPECT_ERROR(
+ Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice()));
+}
+
+} // namespace
diff --git a/parallel-libs/streamexecutor/lib/unittests/ExecutorTest.cpp b/parallel-libs/streamexecutor/lib/unittests/ExecutorTest.cpp
deleted file mode 100644
index b6719d303ec0..000000000000
--- a/parallel-libs/streamexecutor/lib/unittests/ExecutorTest.cpp
+++ /dev/null
@@ -1,478 +0,0 @@
-//===-- ExecutorTest.cpp - Tests for Executor -----------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file contains the unit tests for Executor code.
-///
-//===----------------------------------------------------------------------===//
-
-#include <cstdlib>
-#include <cstring>
-
-#include "streamexecutor/Executor.h"
-#include "streamexecutor/PlatformInterfaces.h"
-
-#include "gtest/gtest.h"
-
-namespace {
-
-namespace se = ::streamexecutor;
-
-class MockPlatformExecutor : public se::PlatformExecutor {
-public:
- ~MockPlatformExecutor() override {}
-
- std::string getName() const override { return "MockPlatformExecutor"; }
-
- se::Expected<std::unique_ptr<se::PlatformStreamHandle>>
- createStream() override {
- return se::make_error("not implemented");
- }
-
- se::Expected<se::GlobalDeviceMemoryBase>
- allocateDeviceMemory(size_t ByteCount) override {
- return se::GlobalDeviceMemoryBase(std::malloc(ByteCount));
- }
-
- se::Error freeDeviceMemory(se::GlobalDeviceMemoryBase Memory) override {
- std::free(const_cast<void *>(Memory.getHandle()));
- return se::Error::success();
- }
-
- se::Expected<void *> allocateHostMemory(size_t ByteCount) override {
- return std::malloc(ByteCount);
- }
-
- se::Error freeHostMemory(void *Memory) override {
- std::free(Memory);
- return se::Error::success();
- }
-
- se::Error registerHostMemory(void *, size_t) override {
- return se::Error::success();
- }
-
- se::Error unregisterHostMemory(void *) override {
- return se::Error::success();
- }
-
- se::Error synchronousCopyD2H(const se::GlobalDeviceMemoryBase &DeviceSrc,
- size_t SrcByteOffset, void *HostDst,
- size_t DstByteOffset,
- size_t ByteCount) override {
- std::memcpy(static_cast<char *>(HostDst) + DstByteOffset,
- static_cast<const char *>(DeviceSrc.getHandle()) +
- SrcByteOffset,
- ByteCount);
- return se::Error::success();
- }
-
- se::Error synchronousCopyH2D(const void *HostSrc, size_t SrcByteOffset,
- se::GlobalDeviceMemoryBase DeviceDst,
- size_t DstByteOffset,
- size_t ByteCount) override {
- std::memcpy(static_cast<char *>(const_cast<void *>(DeviceDst.getHandle())) +
- DstByteOffset,
- static_cast<const char *>(HostSrc) + SrcByteOffset, ByteCount);
- return se::Error::success();
- }
-
- se::Error synchronousCopyD2D(se::GlobalDeviceMemoryBase DeviceDst,
- size_t DstByteOffset,
- const se::GlobalDeviceMemoryBase &DeviceSrc,
- size_t SrcByteOffset,
- size_t ByteCount) override {
- std::memcpy(static_cast<char *>(const_cast<void *>(DeviceDst.getHandle())) +
- DstByteOffset,
- static_cast<const char *>(DeviceSrc.getHandle()) +
- SrcByteOffset,
- ByteCount);
- return se::Error::success();
- }
-};
-
-/// Test fixture to hold objects used by tests.
-class ExecutorTest : public ::testing::Test {
-public:
- ExecutorTest()
- : HostA5{0, 1, 2, 3, 4}, HostB5{5, 6, 7, 8, 9},
- HostA7{10, 11, 12, 13, 14, 15, 16}, HostB7{17, 18, 19, 20, 21, 22, 23},
- DeviceA5(se::GlobalDeviceMemory<int>::makeFromElementCount(HostA5, 5)),
- DeviceB5(se::GlobalDeviceMemory<int>::makeFromElementCount(HostB5, 5)),
- DeviceA7(se::GlobalDeviceMemory<int>::makeFromElementCount(HostA7, 7)),
- DeviceB7(se::GlobalDeviceMemory<int>::makeFromElementCount(HostB7, 7)),
- Host5{24, 25, 26, 27, 28}, Host7{29, 30, 31, 32, 33, 34, 35},
- Executor(&PExecutor) {}
-
- // Device memory is backed by host arrays.
- int HostA5[5];
- int HostB5[5];
- int HostA7[7];
- int HostB7[7];
- se::GlobalDeviceMemory<int> DeviceA5;
- se::GlobalDeviceMemory<int> DeviceB5;
- se::GlobalDeviceMemory<int> DeviceA7;
- se::GlobalDeviceMemory<int> DeviceB7;
-
- // Host memory to be used as actual host memory.
- int Host5[5];
- int Host7[7];
-
- MockPlatformExecutor PExecutor;
- se::Executor Executor;
-};
-
-#define EXPECT_NO_ERROR(E) EXPECT_FALSE(static_cast<bool>(E))
-#define EXPECT_ERROR(E) \
- do { \
- se::Error E__ = E; \
- EXPECT_TRUE(static_cast<bool>(E__)); \
- consumeError(std::move(E__)); \
- } while (false)
-
-using llvm::ArrayRef;
-using llvm::MutableArrayRef;
-
-TEST_F(ExecutorTest, AllocateAndFreeDeviceMemory) {
- se::Expected<se::GlobalDeviceMemory<int>> MaybeMemory =
- Executor.allocateDeviceMemory<int>(10);
- EXPECT_TRUE(static_cast<bool>(MaybeMemory));
- EXPECT_NO_ERROR(Executor.freeDeviceMemory(*MaybeMemory));
-}
-
-TEST_F(ExecutorTest, AllocateAndFreeHostMemory) {
- se::Expected<int *> MaybeMemory = Executor.allocateHostMemory<int>(10);
- EXPECT_TRUE(static_cast<bool>(MaybeMemory));
- EXPECT_NO_ERROR(Executor.freeHostMemory(*MaybeMemory));
-}
-
-TEST_F(ExecutorTest, RegisterAndUnregisterHostMemory) {
- std::vector<int> Data(10);
- EXPECT_NO_ERROR(Executor.registerHostMemory(Data.data(), 10));
- EXPECT_NO_ERROR(Executor.unregisterHostMemory(Data.data()));
-}
-
-// D2H tests
-
-TEST_F(ExecutorTest, SyncCopyD2HToMutableArrayRefByCount) {
- EXPECT_NO_ERROR(
- Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5), 5));
- for (int I = 0; I < 5; ++I) {
- EXPECT_EQ(HostA5[I], Host5[I]);
- }
-
- EXPECT_NO_ERROR(
- Executor.synchronousCopyD2H(DeviceB5, MutableArrayRef<int>(Host5), 2));
- for (int I = 0; I < 2; ++I) {
- EXPECT_EQ(HostB5[I], Host5[I]);
- }
-
- EXPECT_ERROR(
- Executor.synchronousCopyD2H(DeviceA7, MutableArrayRef<int>(Host5), 7));
-
- EXPECT_ERROR(
- Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host7), 7));
-
- EXPECT_ERROR(
- Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5), 7));
-}
-
-TEST_F(ExecutorTest, SyncCopyD2HToMutableArrayRef) {
- EXPECT_NO_ERROR(
- Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5)));
- for (int I = 0; I < 5; ++I) {
- EXPECT_EQ(HostA5[I], Host5[I]);
- }
-
- EXPECT_ERROR(
- Executor.synchronousCopyD2H(DeviceA7, MutableArrayRef<int>(Host5)));
-
- EXPECT_ERROR(
- Executor.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host7)));
-}
-
-TEST_F(ExecutorTest, SyncCopyD2HToPointer) {
- EXPECT_NO_ERROR(Executor.synchronousCopyD2H(DeviceA5, Host5, 5));
- for (int I = 0; I < 5; ++I) {
- EXPECT_EQ(HostA5[I], Host5[I]);
- }
-
- EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5, Host7, 7));
-}
-
-TEST_F(ExecutorTest, SyncCopyD2HSliceToMutableArrayRefByCount) {
- EXPECT_NO_ERROR(Executor.synchronousCopyD2H(
- DeviceA5.asSlice().drop_front(1), MutableArrayRef<int>(Host5 + 1, 4), 4));
- for (int I = 1; I < 5; ++I) {
- EXPECT_EQ(HostA5[I], Host5[I]);
- }
-
- EXPECT_NO_ERROR(Executor.synchronousCopyD2H(DeviceB5.asSlice().drop_back(1),
- MutableArrayRef<int>(Host5), 2));
- for (int I = 0; I < 2; ++I) {
- EXPECT_EQ(HostB5[I], Host5[I]);
- }
-
- EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA7.asSlice(),
- MutableArrayRef<int>(Host5), 7));
-
- EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice(),
- MutableArrayRef<int>(Host7), 7));
-
- EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice(),
- MutableArrayRef<int>(Host5), 7));
-}
-
-TEST_F(ExecutorTest, SyncCopyD2HSliceToMutableArrayRef) {
- EXPECT_NO_ERROR(Executor.synchronousCopyD2H(DeviceA7.asSlice().slice(1, 5),
- MutableArrayRef<int>(Host5)));
- for (int I = 0; I < 5; ++I) {
- EXPECT_EQ(HostA7[I + 1], Host5[I]);
- }
-
- EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA7.asSlice().drop_back(1),
- MutableArrayRef<int>(Host5)));
-
- EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice(),
- MutableArrayRef<int>(Host7)));
-}
-
-TEST_F(ExecutorTest, SyncCopyD2HSliceToPointer) {
- EXPECT_NO_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice().drop_front(1),
- Host5 + 1, 4));
- for (int I = 1; I < 5; ++I) {
- EXPECT_EQ(HostA5[I], Host5[I]);
- }
-
- EXPECT_ERROR(Executor.synchronousCopyD2H(DeviceA5.asSlice(), Host7, 7));
-}
-
-// H2D tests
-
-TEST_F(ExecutorTest, SyncCopyH2DToArrayRefByCount) {
- EXPECT_NO_ERROR(
- Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5, 5));
- for (int I = 0; I < 5; ++I) {
- EXPECT_EQ(HostA5[I], Host5[I]);
- }
-
- EXPECT_NO_ERROR(
- Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceB5, 2));
- for (int I = 0; I < 2; ++I) {
- EXPECT_EQ(HostB5[I], Host5[I]);
- }
-
- EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5, 7));
-
- EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7, 7));
-
- EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5, 7));
-}
-
-TEST_F(ExecutorTest, SyncCopyH2DToArrayRef) {
- EXPECT_NO_ERROR(Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5));
- for (int I = 0; I < 5; ++I) {
- EXPECT_EQ(HostA5[I], Host5[I]);
- }
-
- EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7));
-
- EXPECT_ERROR(Executor.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5));
-}
-
-TEST_F(ExecutorTest, SyncCopyH2DToPointer) {
- EXPECT_NO_ERROR(Executor.synchronousCopyH2D(Host5, DeviceA5, 5));
- for (int I = 0; I < 5; ++I) {
- EXPECT_EQ(HostA5[I], Host5[I]);
- }
-
- EXPECT_ERROR(Executor.synchronousCopyH2D(Host7, DeviceA5, 7));
-}
-
-TEST_F(ExecutorTest, SyncCopyH2DSliceToArrayRefByCount) {
- EXPECT_NO_ERROR(Executor.synchronousCopyH2D(
- ArrayRef<int>(Host5 + 1, 4), DeviceA5.asSlice().drop_front(1), 4));
- for (int I = 1; I < 5; ++I) {
- EXPECT_EQ(HostA5[I], Host5[I]);
- }
-
- EXPECT_NO_ERROR(Executor.synchronousCopyH2D(
- ArrayRef<int>(Host5), DeviceB5.asSlice().drop_back(1), 2));
- for (int I = 0; I < 2; ++I) {
- EXPECT_EQ(HostB5[I], Host5[I]);
- }
-
- EXPECT_ERROR(
- Executor.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5.asSlice(), 7));
-
- EXPECT_ERROR(
- Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7.asSlice(), 7));
-
- EXPECT_ERROR(
- Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5.asSlice(), 7));
-}
-
-TEST_F(ExecutorTest, SyncCopyH2DSliceToArrayRef) {
- EXPECT_NO_ERROR(
- Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5.asSlice()));
- for (int I = 0; I < 5; ++I) {
- EXPECT_EQ(HostA5[I], Host5[I]);
- }
-
- EXPECT_ERROR(
- Executor.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7.asSlice()));
-
- EXPECT_ERROR(
- Executor.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5.asSlice()));
-}
-
-TEST_F(ExecutorTest, SyncCopyH2DSliceToPointer) {
- EXPECT_NO_ERROR(Executor.synchronousCopyH2D(Host5, DeviceA5.asSlice(), 5));
- for (int I = 0; I < 5; ++I) {
- EXPECT_EQ(HostA5[I], Host5[I]);
- }
-
- EXPECT_ERROR(Executor.synchronousCopyH2D(Host7, DeviceA5.asSlice(), 7));
-}
-
-// D2D tests
-
-TEST_F(ExecutorTest, SyncCopyD2DByCount) {
- EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB5, 5));
- for (int I = 0; I < 5; ++I) {
- EXPECT_EQ(HostA5[I], HostB5[I]);
- }
-
- EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB7, 2));
- for (int I = 0; I < 2; ++I) {
- EXPECT_EQ(HostA7[I], HostB7[I]);
- }
-
- EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB5, 7));
-
- EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB5, 7));
-
- EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB7, 7));
-}
-
-TEST_F(ExecutorTest, SyncCopyD2D) {
- EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB5));
- for (int I = 0; I < 5; ++I) {
- EXPECT_EQ(HostA5[I], HostB5[I]);
- }
-
- EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB5));
-
- EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB7));
-}
-
-TEST_F(ExecutorTest, SyncCopySliceD2DByCount) {
- EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA5.asSlice().drop_front(1),
- DeviceB5, 4));
- for (int I = 0; I < 4; ++I) {
- EXPECT_EQ(HostA5[I + 1], HostB5[I]);
- }
-
- EXPECT_NO_ERROR(Executor.synchronousCopyD2D(DeviceA7.asSlice().drop_back(1),
- DeviceB7, 2));
- for (int I = 0; I < 2; ++I) {
- EXPECT_EQ(HostA7[I], HostB7[I]);
- }
-
- EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5, 7));
-
- EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5, 7));
-
- EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7, 7));
-}
-
-TEST_F(ExecutorTest, SyncCopySliceD2D) {
- EXPECT_NO_ERROR(
- Executor.synchronousCopyD2D(DeviceA7.asSlice().drop_back(2), DeviceB5));
- for (int I = 0; I < 5; ++I) {
- EXPECT_EQ(HostA7[I], HostB5[I]);
- }
-
- EXPECT_ERROR(
- Executor.synchronousCopyD2D(DeviceA7.asSlice().drop_front(1), DeviceB5));
-
- EXPECT_ERROR(
- Executor.synchronousCopyD2D(DeviceA5.asSlice().drop_back(1), DeviceB7));
-}
-
-TEST_F(ExecutorTest, SyncCopyD2DSliceByCount) {
- EXPECT_NO_ERROR(Executor.synchronousCopyD2D(
- DeviceA5, DeviceB7.asSlice().drop_front(2), 5));
- for (int I = 0; I < 5; ++I) {
- EXPECT_EQ(HostA5[I], HostB7[I + 2]);
- }
-
- EXPECT_NO_ERROR(Executor.synchronousCopyD2D(
- DeviceA7, DeviceB7.asSlice().drop_back(3), 2));
- for (int I = 0; I < 2; ++I) {
- EXPECT_EQ(HostA7[I], HostB7[I]);
- }
-
- EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB5.asSlice(), 7));
-
- EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice(), 7));
-
- EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice(), 7));
-}
-
-TEST_F(ExecutorTest, SyncCopyD2DSlice) {
- EXPECT_NO_ERROR(
- Executor.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice().drop_back(2)));
- for (int I = 0; I < 5; ++I) {
- EXPECT_EQ(HostA5[I], HostB7[I]);
- }
-
- EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice()));
-
- EXPECT_ERROR(Executor.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice()));
-}
-
-TEST_F(ExecutorTest, SyncCopySliceD2DSliceByCount) {
- EXPECT_NO_ERROR(
- Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 5));
- for (int I = 0; I < 5; ++I) {
- EXPECT_EQ(HostA5[I], HostB5[I]);
- }
-
- EXPECT_NO_ERROR(
- Executor.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB7.asSlice(), 2));
- for (int I = 0; I < 2; ++I) {
- EXPECT_EQ(HostA7[I], HostB7[I]);
- }
-
- EXPECT_ERROR(
- Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 7));
-
- EXPECT_ERROR(
- Executor.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice(), 7));
-
- EXPECT_ERROR(
- Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice(), 7));
-}
-
-TEST_F(ExecutorTest, SyncCopySliceD2DSlice) {
- EXPECT_NO_ERROR(
- Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice()));
- for (int I = 0; I < 5; ++I) {
- EXPECT_EQ(HostA5[I], HostB5[I]);
- }
-
- EXPECT_ERROR(
- Executor.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice()));
-
- EXPECT_ERROR(
- Executor.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice()));
-}
-
-} // namespace
diff --git a/parallel-libs/streamexecutor/lib/unittests/KernelTest.cpp b/parallel-libs/streamexecutor/lib/unittests/KernelTest.cpp
index b5ee8a0cbfc8..a19ebfb96bdd 100644
--- a/parallel-libs/streamexecutor/lib/unittests/KernelTest.cpp
+++ b/parallel-libs/streamexecutor/lib/unittests/KernelTest.cpp
@@ -14,7 +14,7 @@
#include <cassert>
-#include "streamexecutor/Executor.h"
+#include "streamexecutor/Device.h"
#include "streamexecutor/Kernel.h"
#include "streamexecutor/KernelSpec.h"
#include "streamexecutor/PlatformInterfaces.h"
@@ -27,7 +27,7 @@ namespace {
namespace se = ::streamexecutor;
-// An Executor that returns a dummy KernelInterface.
+// A Device that returns a dummy KernelInterface.
//
// During construction it creates a unique_ptr to a dummy KernelInterface and it
// also stores a separate copy of the raw pointer that is stored by that
@@ -39,10 +39,10 @@ namespace se = ::streamexecutor;
// object. The raw pointer copy can then be used to identify the unique_ptr in
// its new location (by comparing the raw pointer with unique_ptr::get), to
// verify that the unique_ptr ended up where it was supposed to be.
-class MockExecutor : public se::Executor {
+class MockDevice : public se::Device {
public:
- MockExecutor()
- : se::Executor(nullptr), Unique(llvm::make_unique<se::KernelInterface>()),
+ MockDevice()
+ : se::Device(nullptr), Unique(llvm::make_unique<se::KernelInterface>()),
Raw(Unique.get()) {}
// Moves the unique pointer into the returned se::Expected instance.
@@ -51,7 +51,7 @@ public:
// out.
se::Expected<std::unique_ptr<se::KernelInterface>>
getKernelImplementation(const se::MultiKernelLoaderSpec &) override {
- assert(Unique && "MockExecutor getKernelImplementation should not be "
+ assert(Unique && "MockDevice getKernelImplementation should not be "
"called more than once");
return std::move(Unique);
}
@@ -79,15 +79,15 @@ TYPED_TEST_CASE(GetImplementationTest, GetImplementationTypes);
// Tests that the kernel create functions properly fetch the implementation
// pointers for the kernel objects they construct from the passed-in
-// Executor objects.
+// Device objects.
TYPED_TEST(GetImplementationTest, SetImplementationDuringCreate) {
se::MultiKernelLoaderSpec Spec;
- MockExecutor MockExecutor;
+ MockDevice Dev;
- auto MaybeKernel = TypeParam::create(&MockExecutor, Spec);
+ auto MaybeKernel = TypeParam::create(&Dev, Spec);
EXPECT_TRUE(static_cast<bool>(MaybeKernel));
se::KernelInterface *Implementation = MaybeKernel->getImplementation();
- EXPECT_EQ(MockExecutor.getRaw(), Implementation);
+ EXPECT_EQ(Dev.getRaw(), Implementation);
}
} // namespace
diff --git a/parallel-libs/streamexecutor/lib/unittests/StreamTest.cpp b/parallel-libs/streamexecutor/lib/unittests/StreamTest.cpp
index 756467057ac7..d05c928dcb35 100644
--- a/parallel-libs/streamexecutor/lib/unittests/StreamTest.cpp
+++ b/parallel-libs/streamexecutor/lib/unittests/StreamTest.cpp
@@ -14,7 +14,7 @@
#include <cstring>
-#include "streamexecutor/Executor.h"
+#include "streamexecutor/Device.h"
#include "streamexecutor/Kernel.h"
#include "streamexecutor/KernelSpec.h"
#include "streamexecutor/PlatformInterfaces.h"
@@ -26,14 +26,14 @@ namespace {
namespace se = ::streamexecutor;
-/// Mock PlatformExecutor that performs asynchronous memcpy operations by
+/// Mock PlatformDevice that performs asynchronous memcpy operations by
/// ignoring the stream argument and calling std::memcpy on device memory
/// handles.
-class MockPlatformExecutor : public se::PlatformExecutor {
+class MockPlatformDevice : public se::PlatformDevice {
public:
- ~MockPlatformExecutor() override {}
+ ~MockPlatformDevice() override {}
- std::string getName() const override { return "MockPlatformExecutor"; }
+ std::string getName() const override { return "MockPlatformDevice"; }
se::Expected<std::unique_ptr<se::PlatformStreamHandle>>
createStream() override {
@@ -83,7 +83,7 @@ public:
DeviceA7(se::GlobalDeviceMemory<int>::makeFromElementCount(HostA7, 7)),
DeviceB7(se::GlobalDeviceMemory<int>::makeFromElementCount(HostB7, 7)),
Host5{24, 25, 26, 27, 28}, Host7{29, 30, 31, 32, 33, 34, 35},
- Stream(llvm::make_unique<se::PlatformStreamHandle>(&PExecutor)) {}
+ Stream(llvm::make_unique<se::PlatformStreamHandle>(&PDevice)) {}
protected:
// Device memory is backed by host arrays.
@@ -100,7 +100,7 @@ protected:
int Host5[5];
int Host7[7];
- MockPlatformExecutor PExecutor;
+ MockPlatformDevice PDevice;
se::Stream Stream;
};