summaryrefslogtreecommitdiff
path: root/parallel-libs
diff options
context:
space:
mode:
authorJason Henline <jhen@google.com>2016-08-03 18:04:13 +0000
committerJason Henline <jhen@google.com>2016-08-03 18:04:13 +0000
commit074ded3ad45ca70527917586519ea8251db8eb97 (patch)
treea7af8fd53bc399dc534523480da7a9fe2edabf35 /parallel-libs
parentaee419d1a14ce35304e2e166213192241b7b505c (diff)
[StreamExecutor] Add KernelLoaderSpec
Summary: Add definitions for the KernelLoaderSpec and MultiKernelLoaderSpec classes to StreamExecutor. Instances of these classes are generated by the compiler in order to provide host code with a handle to device code. Reviewers: jlebar, tra Subscribers: parallel_libs-commits Differential Revision: https://reviews.llvm.org/D23038
Diffstat (limited to 'parallel-libs')
-rw-r--r--parallel-libs/streamexecutor/CMakeLists.txt1
-rw-r--r--parallel-libs/streamexecutor/include/streamexecutor/KernelSpec.h263
-rw-r--r--parallel-libs/streamexecutor/lib/CMakeLists.txt9
-rw-r--r--parallel-libs/streamexecutor/lib/KernelSpec.cpp94
-rw-r--r--parallel-libs/streamexecutor/lib/unittests/CMakeLists.txt9
-rw-r--r--parallel-libs/streamexecutor/lib/unittests/KernelSpecTest.cpp132
6 files changed, 508 insertions, 0 deletions
diff --git a/parallel-libs/streamexecutor/CMakeLists.txt b/parallel-libs/streamexecutor/CMakeLists.txt
index 2cf7cee34d9..18c28d62fef 100644
--- a/parallel-libs/streamexecutor/CMakeLists.txt
+++ b/parallel-libs/streamexecutor/CMakeLists.txt
@@ -32,6 +32,7 @@ if(STREAM_EXECUTOR_STANDALONE)
enable_testing()
find_package(GTest REQUIRED)
include_directories(${GTEST_INCLUDE_DIRS})
+ find_package(Threads REQUIRED)
endif()
else(NOT STREAM_EXECUTOR_STANDALONE)
if(STREAM_EXECUTOR_UNIT_TESTS)
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/KernelSpec.h b/parallel-libs/streamexecutor/include/streamexecutor/KernelSpec.h
new file mode 100644
index 00000000000..a6943143086
--- /dev/null
+++ b/parallel-libs/streamexecutor/include/streamexecutor/KernelSpec.h
@@ -0,0 +1,263 @@
+//===-- KernelSpec.h - Kernel loader spec types -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// KernelLoaderSpec is the base class for types that know where to find the
+/// code for a data-parallel kernel in a particular format on a particular
+/// platform. So, for example, there will be one subclass that deals with CUDA
+/// PTX code, another subclass that deals with CUDA fatbin code, and yet another
+/// subclass that deals with OpenCL text code.
+///
+/// A MultiKernelLoaderSpec is basically a collection of KernelLoaderSpec
+/// instances. This is useful when code is available for the same kernel in
+/// several different formats or targeted for several different platforms. All
+/// the various KernelLoaderSpec instances for this kernel can be combined
+/// together in one MultiKernelLoaderSpec and the specific platform consumer can
+/// decide which instance of the code it wants to use.
+///
+/// MultiKernelLoaderSpec provides several helper functions to build and
+/// register KernelLoaderSpec instances all in a single operation. For example,
+/// MultiKernelLoaderSpec::addCUDAPTXInMemory can be used to construct and
+/// register a CUDAPTXInMemorySpec KernelLoaderSpec.
+///
+/// The loader spec classes declared here are designed primarily to be
+/// instantiated by the compiler, but they can also be instantiated directly by
+/// the user. A simplified example workflow which a compiler might follow in the
+/// case of a CUDA kernel that is compiled to CUDA fatbin code is as follows:
+///
+/// 1. The user defines a kernel function called UserKernel.
+/// 2. The compiler compiles the kernel code into CUDA fatbin data and embeds
+/// that data into the host code at address __UserKernelFatbinAddress.
+/// 3. The compiler adds code at the beginning of the host code to instantiate a
+/// MultiKernelLoaderSpec:
+/// \code
+/// namespace compiler_cuda_namespace {
+/// MultiKernelLoaderSpec UserKernelLoaderSpec;
+/// } // namespace compiler_cuda_namespace
+/// \endcode
+/// 4. The compiler then adds code to the host code to add the fatbin data to
+/// the new MultiKernelLoaderSpec, and to associate that data with the kernel
+/// name "UserKernel":
+/// \code
+/// namespace compiler_cuda_namespace {
+/// UserKernelLoaderSpec.addCUDAFatbinInMemory(
+/// __UserKernelFatbinAddress, "UserKernel");
+/// } // namespace compiler_cuda_namespace
+/// \encode
+/// 5. The host code, having known beforehand that the compiler would initialize
+/// a MultiKernelLoaderSpec based on the name of the CUDA kernel, makes use
+/// of the symbol cudanamespace::UserKernelLoaderSpec without defining it.
+///
+/// In the example above, the MultiKernelLoaderSpec instance created by the
+/// compiler can be used by the host code to create StreamExecutor kernel
+/// objects. In turn, those StreamExecutor kernel objects can be used by the
+/// host code to launch the kernel on the device as desired.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef STREAMEXECUTOR_KERNELSPEC_H
+#define STREAMEXECUTOR_KERNELSPEC_H
+
+#include <cassert>
+#include <map>
+#include <memory>
+#include <string>
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace streamexecutor {
+
+/// An object that knows how to find the code for a device kernel.
+///
+/// This is the base class for the hierarchy of loader specs. The different
+/// subclasses know how to find code in different formats (e.g. CUDA PTX, OpenCL
+/// binary).
+///
+/// This base class has functionality for storing and getting the name of the
+/// kernel as a string.
+class KernelLoaderSpec {
+public:
+ /// Returns the name of the kernel this spec loads.
+ const std::string &getKernelName() const { return KernelName; }
+
+protected:
+ explicit KernelLoaderSpec(llvm::StringRef KernelName);
+
+private:
+ std::string KernelName;
+
+ KernelLoaderSpec(const KernelLoaderSpec &) = delete;
+ KernelLoaderSpec &operator=(const KernelLoaderSpec &) = delete;
+};
+
+/// A KernelLoaderSpec for CUDA PTX code that resides in memory as a
+/// null-terminated string.
+class CUDAPTXInMemorySpec : public KernelLoaderSpec {
+public:
+ /// First component is major version, second component is minor version.
+ using ComputeCapability = std::pair<int, int>;
+
+ /// PTX code combined with its compute capability.
+ struct PTXSpec {
+ ComputeCapability TheComputeCapability;
+ const char *PTXCode;
+ };
+
+ /// Creates a CUDAPTXInMemorySpec from an array of PTXSpec objects.
+ ///
+ /// Adds each item in SpecList to this object.
+ ///
+ /// Does not take ownership of the PTXCode pointers in the SpecList elements.
+ CUDAPTXInMemorySpec(llvm::StringRef KernelName,
+ const llvm::ArrayRef<PTXSpec> SpecList);
+
+ /// Returns a pointer to the PTX code for the requested compute capability.
+ ///
+ /// Returns nullptr on failed lookup (if the requested compute capability is
+ /// not available). Matches exactly the specified compute capability. Doesn't
+ /// try to do anything smart like finding the next best compute capability if
+ /// the specified capability cannot be found.
+ const char *getCode(int ComputeCapabilityMajor,
+ int ComputeCapabilityMinor) const;
+
+private:
+ /// PTX code contents in memory.
+ ///
+ /// The key is a pair (cc_major, cc_minor), i.e., (2, 0), (3, 0), (3, 5).
+ std::map<ComputeCapability, const char *> PTXByComputeCapability;
+
+ CUDAPTXInMemorySpec(const CUDAPTXInMemorySpec &) = delete;
+ CUDAPTXInMemorySpec &operator=(const CUDAPTXInMemorySpec &) = delete;
+};
+
+/// A KernelLoaderSpec for CUDA fatbin code that resides in memory.
+class CUDAFatbinInMemorySpec : public KernelLoaderSpec {
+public:
+ /// Creates a CUDAFatbinInMemorySpec with a reference to the given fatbin
+ /// bytes.
+ ///
+ /// Does not take ownership of the Bytes pointer.
+ CUDAFatbinInMemorySpec(llvm::StringRef KernelName, const void *Bytes);
+
+ /// Gets the fatbin data bytes.
+ const void *getBytes() const { return Bytes; }
+
+private:
+ const void *Bytes;
+
+ CUDAFatbinInMemorySpec(const CUDAFatbinInMemorySpec &) = delete;
+ CUDAFatbinInMemorySpec &operator=(const CUDAFatbinInMemorySpec &) = delete;
+};
+
+/// A KernelLoaderSpec for OpenCL text that resides in memory as a
+/// null-terminated string.
+class OpenCLTextInMemorySpec : public KernelLoaderSpec {
+public:
+ /// Creates a OpenCLTextInMemorySpec with a reference to the given OpenCL text
+ /// code bytes.
+ ///
+ /// Does not take ownership of the Text pointer.
+ OpenCLTextInMemorySpec(llvm::StringRef KernelName, const char *Text);
+
+ /// Returns the OpenCL text contents.
+ const char *getText() const { return Text; }
+
+private:
+ const char *Text;
+
+ OpenCLTextInMemorySpec(const OpenCLTextInMemorySpec &) = delete;
+ OpenCLTextInMemorySpec &operator=(const OpenCLTextInMemorySpec &) = delete;
+};
+
+/// An object to store several different KernelLoaderSpecs for the same kernel.
+///
+/// This allows code in different formats and for different platforms to be
+/// stored all together for a single kernel.
+///
+/// Various methods are available to add a new KernelLoaderSpec to a
+/// MultiKernelLoaderSpec. There are also methods to query which formats and
+/// platforms are supported by the currently added KernelLoaderSpec objects, and
+/// methods to get the KernelLoaderSpec objects for each format and platform.
+///
+/// Since all stored KernelLoaderSpecs are supposed to reference the same
+/// kernel, they are all assumed to take the same number and type of parameters,
+/// but no checking is done to enforce this. In debug mode, all
+/// KernelLoaderSpecs are checked to make sure they have the same kernel name,
+/// so passing in specs with different kernel names can cause the program to
+/// abort.
+///
+/// This interface is prone to errors, so it is better to leave
+/// MultiKernelLoaderSpec creation and initialization to the compiler rather
+/// than doing it by hand.
+class MultiKernelLoaderSpec {
+public:
+ // Convenience getters for testing whether these platform variants have
+ // kernel loader specifications available.
+
+ bool hasCUDAPTXInMemory() const { return TheCUDAPTXInMemorySpec != nullptr; }
+ bool hasCUDAFatbinInMemory() const {
+ return TheCUDAFatbinInMemorySpec != nullptr;
+ }
+ bool hasOpenCLTextInMemory() const {
+ return TheOpenCLTextInMemorySpec != nullptr;
+ }
+
+ // Accessors for platform variant kernel load specifications.
+ //
+ // Precondition: corresponding has* method returns true.
+
+ const CUDAPTXInMemorySpec &getCUDAPTXInMemory() const {
+ assert(hasCUDAPTXInMemory() && "getting spec that is not present");
+ return *TheCUDAPTXInMemorySpec;
+ }
+ const CUDAFatbinInMemorySpec &getCUDAFatbinInMemory() const {
+ assert(hasCUDAFatbinInMemory() && "getting spec that is not present");
+ return *TheCUDAFatbinInMemorySpec;
+ }
+ const OpenCLTextInMemorySpec &getOpenCLTextInMemory() const {
+ assert(hasOpenCLTextInMemory() && "getting spec that is not present");
+ return *TheOpenCLTextInMemorySpec;
+ }
+
+ // Builder-pattern-like methods for use in initializing a
+ // MultiKernelLoaderSpec.
+ //
+ // Each of these should be used at most once for a single
+ // MultiKernelLoaderSpec object. See file comment for example usage.
+ //
+ // Note that the KernelName parameter must be consistent with the kernel in
+ // the PTX or OpenCL being loaded. Also be aware that in CUDA C++ the kernel
+ // name may be mangled by the compiler if it is not declared extern "C".
+
+ /// Does not take ownership of the PTXCode pointers in the SpecList elements.
+ MultiKernelLoaderSpec &
+ addCUDAPTXInMemory(llvm::StringRef KernelName,
+ llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList);
+
+ /// Does not take ownership of the FatbinBytes pointer.
+ MultiKernelLoaderSpec &addCUDAFatbinInMemory(llvm::StringRef KernelName,
+ const void *FatbinBytes);
+
+ /// Does not take ownership of the OpenCLText pointer.
+ MultiKernelLoaderSpec &addOpenCLTextInMemory(llvm::StringRef KernelName,
+ const char *OpenCLText);
+
+private:
+ void setKernelName(llvm::StringRef KernelName);
+
+ std::unique_ptr<std::string> TheKernelName;
+ std::unique_ptr<CUDAPTXInMemorySpec> TheCUDAPTXInMemorySpec;
+ std::unique_ptr<CUDAFatbinInMemorySpec> TheCUDAFatbinInMemorySpec;
+ std::unique_ptr<OpenCLTextInMemorySpec> TheOpenCLTextInMemorySpec;
+};
+
+} // namespace streamexecutor
+
+#endif // STREAMEXECUTOR_KERNELSPEC_H
diff --git a/parallel-libs/streamexecutor/lib/CMakeLists.txt b/parallel-libs/streamexecutor/lib/CMakeLists.txt
index 5b3fe92d4b4..f8f0c163988 100644
--- a/parallel-libs/streamexecutor/lib/CMakeLists.txt
+++ b/parallel-libs/streamexecutor/lib/CMakeLists.txt
@@ -2,3 +2,12 @@ add_library(
utils
OBJECT
Utils/Error.cpp)
+
+add_library(
+ streamexecutor
+ $<TARGET_OBJECTS:utils>
+ KernelSpec.cpp)
+
+if(STREAM_EXECUTOR_UNIT_TESTS)
+ add_subdirectory(unittests)
+endif()
diff --git a/parallel-libs/streamexecutor/lib/KernelSpec.cpp b/parallel-libs/streamexecutor/lib/KernelSpec.cpp
new file mode 100644
index 00000000000..d2715aa88a5
--- /dev/null
+++ b/parallel-libs/streamexecutor/lib/KernelSpec.cpp
@@ -0,0 +1,94 @@
+//===-- KernelSpec.cpp - General kernel spec implementation ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the implementation details for kernel loader specs.
+///
+//===----------------------------------------------------------------------===//
+
+#include "streamexecutor/KernelSpec.h"
+
+#include "llvm/ADT/STLExtras.h"
+
+namespace streamexecutor {
+
+KernelLoaderSpec::KernelLoaderSpec(llvm::StringRef KernelName)
+ : KernelName(KernelName) {}
+
+CUDAPTXInMemorySpec::CUDAPTXInMemorySpec(
+ llvm::StringRef KernelName,
+ const llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList)
+ : KernelLoaderSpec(KernelName) {
+ for (const auto &Spec : SpecList) {
+ PTXByComputeCapability.emplace(Spec.TheComputeCapability, Spec.PTXCode);
+ }
+}
+
+const char *CUDAPTXInMemorySpec::getCode(int ComputeCapabilityMajor,
+ int ComputeCapabilityMinor) const {
+ auto PTXIter =
+ PTXByComputeCapability.find(CUDAPTXInMemorySpec::ComputeCapability{
+ ComputeCapabilityMajor, ComputeCapabilityMinor});
+ if (PTXIter == PTXByComputeCapability.end()) {
+ return nullptr;
+ }
+ return PTXIter->second;
+}
+
+CUDAFatbinInMemorySpec::CUDAFatbinInMemorySpec(llvm::StringRef KernelName,
+ const void *Bytes)
+ : KernelLoaderSpec(KernelName), Bytes(Bytes) {}
+
+OpenCLTextInMemorySpec::OpenCLTextInMemorySpec(llvm::StringRef KernelName,
+ const char *Text)
+ : KernelLoaderSpec(KernelName), Text(Text) {}
+
+void MultiKernelLoaderSpec::setKernelName(llvm::StringRef KernelName) {
+ if (TheKernelName) {
+ assert(KernelName.equals(*TheKernelName) &&
+ "different kernel names in one MultiKernelLoaderSpec");
+ } else {
+ TheKernelName = llvm::make_unique<std::string>(KernelName);
+ }
+}
+
+MultiKernelLoaderSpec &MultiKernelLoaderSpec::addCUDAPTXInMemory(
+ llvm::StringRef KernelName,
+ llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList) {
+ assert((TheCUDAPTXInMemorySpec == nullptr) &&
+ "illegal loader spec overwrite");
+ setKernelName(KernelName);
+ TheCUDAPTXInMemorySpec =
+ llvm::make_unique<CUDAPTXInMemorySpec>(KernelName, SpecList);
+ return *this;
+}
+
+MultiKernelLoaderSpec &
+MultiKernelLoaderSpec::addCUDAFatbinInMemory(llvm::StringRef KernelName,
+ const void *Bytes) {
+ assert((TheCUDAFatbinInMemorySpec == nullptr) &&
+ "illegal loader spec overwrite");
+ setKernelName(KernelName);
+ TheCUDAFatbinInMemorySpec =
+ llvm::make_unique<CUDAFatbinInMemorySpec>(KernelName, Bytes);
+ return *this;
+}
+
+MultiKernelLoaderSpec &
+MultiKernelLoaderSpec::addOpenCLTextInMemory(llvm::StringRef KernelName,
+ const char *OpenCLText) {
+ assert((TheOpenCLTextInMemorySpec == nullptr) &&
+ "illegal loader spec overwrite");
+ setKernelName(KernelName);
+ TheOpenCLTextInMemorySpec =
+ llvm::make_unique<OpenCLTextInMemorySpec>(KernelName, OpenCLText);
+ return *this;
+}
+
+} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/unittests/CMakeLists.txt b/parallel-libs/streamexecutor/lib/unittests/CMakeLists.txt
new file mode 100644
index 00000000000..55717aa33c7
--- /dev/null
+++ b/parallel-libs/streamexecutor/lib/unittests/CMakeLists.txt
@@ -0,0 +1,9 @@
+add_executable(
+ kernel_spec_test
+ KernelSpecTest.cpp)
+target_link_libraries(
+ kernel_spec_test
+ streamexecutor
+ ${GTEST_BOTH_LIBRARIES}
+ ${CMAKE_THREAD_LIBS_INIT})
+add_test(KernelSpecTest kernel_spec_test)
diff --git a/parallel-libs/streamexecutor/lib/unittests/KernelSpecTest.cpp b/parallel-libs/streamexecutor/lib/unittests/KernelSpecTest.cpp
new file mode 100644
index 00000000000..fc9eb549968
--- /dev/null
+++ b/parallel-libs/streamexecutor/lib/unittests/KernelSpecTest.cpp
@@ -0,0 +1,132 @@
+//===-- KernelSpecTest.cpp - Tests for KernelSpec -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the unit tests for the code in KernelSpec.
+///
+//===----------------------------------------------------------------------===//
+
+#include "streamexecutor/KernelSpec.h"
+
+#include "gtest/gtest.h"
+
+namespace {
+
+namespace se = ::streamexecutor;
+
+TEST(CUDAPTXInMemorySpec, NoCode) {
+ se::CUDAPTXInMemorySpec Spec("KernelName", {});
+ EXPECT_EQ("KernelName", Spec.getKernelName());
+ EXPECT_EQ(nullptr, Spec.getCode(1, 0));
+}
+
+TEST(CUDAPTXInMemorySpec, SingleComputeCapability) {
+ const char *PTXCodeString = "Dummy PTX code";
+ se::CUDAPTXInMemorySpec Spec("KernelName", {{{1, 0}, PTXCodeString}});
+ EXPECT_EQ("KernelName", Spec.getKernelName());
+ EXPECT_EQ(PTXCodeString, Spec.getCode(1, 0));
+ EXPECT_EQ(nullptr, Spec.getCode(2, 0));
+}
+
+TEST(CUDAPTXInMemorySpec, TwoComputeCapabilities) {
+ const char *PTXCodeString10 = "Dummy PTX code 10";
+ const char *PTXCodeString30 = "Dummy PTX code 30";
+ se::CUDAPTXInMemorySpec Spec(
+ "KernelName", {{{1, 0}, PTXCodeString10}, {{3, 0}, PTXCodeString30}});
+ EXPECT_EQ("KernelName", Spec.getKernelName());
+ EXPECT_EQ(PTXCodeString10, Spec.getCode(1, 0));
+ EXPECT_EQ(PTXCodeString30, Spec.getCode(3, 0));
+ EXPECT_EQ(nullptr, Spec.getCode(2, 0));
+}
+
+TEST(CUDAFatbinInMemorySpec, BasicUsage) {
+ const char *FatbinBytes = "Dummy fatbin bytes";
+ se::CUDAFatbinInMemorySpec Spec("KernelName", FatbinBytes);
+ EXPECT_EQ("KernelName", Spec.getKernelName());
+ EXPECT_EQ(FatbinBytes, Spec.getBytes());
+}
+
+TEST(OpenCLTextInMemorySpec, BasicUsage) {
+ const char *OpenCLText = "Dummy OpenCL text";
+ se::OpenCLTextInMemorySpec Spec("KernelName", OpenCLText);
+ EXPECT_EQ("KernelName", Spec.getKernelName());
+ EXPECT_EQ(OpenCLText, Spec.getText());
+}
+
+TEST(MultiKernelLoaderSpec, NoCode) {
+ se::MultiKernelLoaderSpec MultiSpec;
+ EXPECT_FALSE(MultiSpec.hasCUDAPTXInMemory());
+ EXPECT_FALSE(MultiSpec.hasCUDAFatbinInMemory());
+ EXPECT_FALSE(MultiSpec.hasOpenCLTextInMemory());
+
+ EXPECT_DEBUG_DEATH(MultiSpec.getCUDAPTXInMemory(),
+ "getting spec that is not present");
+ EXPECT_DEBUG_DEATH(MultiSpec.getCUDAFatbinInMemory(),
+ "getting spec that is not present");
+ EXPECT_DEBUG_DEATH(MultiSpec.getOpenCLTextInMemory(),
+ "getting spec that is not present");
+}
+
+TEST(MultiKernelLoaderSpec, Registration) {
+ se::MultiKernelLoaderSpec MultiSpec;
+ const char *KernelName = "KernelName";
+ const char *PTXCodeString = "Dummy PTX code";
+ const char *FatbinBytes = "Dummy fatbin bytes";
+ const char *OpenCLText = "Dummy OpenCL text";
+
+ MultiSpec.addCUDAPTXInMemory(KernelName, {{{1, 0}, PTXCodeString}})
+ .addCUDAFatbinInMemory(KernelName, FatbinBytes)
+ .addOpenCLTextInMemory(KernelName, OpenCLText);
+
+ EXPECT_TRUE(MultiSpec.hasCUDAPTXInMemory());
+ EXPECT_TRUE(MultiSpec.hasCUDAFatbinInMemory());
+ EXPECT_TRUE(MultiSpec.hasOpenCLTextInMemory());
+
+ EXPECT_EQ(KernelName, MultiSpec.getCUDAPTXInMemory().getKernelName());
+ EXPECT_EQ(PTXCodeString, MultiSpec.getCUDAPTXInMemory().getCode(1, 0));
+ EXPECT_EQ(nullptr, MultiSpec.getCUDAPTXInMemory().getCode(2, 0));
+
+ EXPECT_EQ(KernelName, MultiSpec.getCUDAFatbinInMemory().getKernelName());
+ EXPECT_EQ(FatbinBytes, MultiSpec.getCUDAFatbinInMemory().getBytes());
+
+ EXPECT_EQ(KernelName, MultiSpec.getOpenCLTextInMemory().getKernelName());
+ EXPECT_EQ(OpenCLText, MultiSpec.getOpenCLTextInMemory().getText());
+}
+
+TEST(MultiKernelLoaderSpec, RegisterTwice) {
+ se::MultiKernelLoaderSpec MultiSpec;
+ const char *KernelName = "KernelName";
+ const char *FatbinBytes = "Dummy fatbin bytes";
+
+ MultiSpec.addCUDAFatbinInMemory(KernelName, FatbinBytes);
+
+ EXPECT_DEBUG_DEATH(MultiSpec.addCUDAFatbinInMemory(KernelName, FatbinBytes),
+ "illegal loader spec overwrite");
+}
+
+TEST(MultiKernelLoaderSpec, ConflictingKernelNames) {
+ se::MultiKernelLoaderSpec MultiSpec;
+ const char *KernelNameA = "KernelName";
+ std::string KernelNameB = KernelNameA;
+ const char *PTXCodeString = "Dummy PTX code";
+ const char *FatbinBytes = "Dummy fatbin bytes";
+
+ // Check that names don't conflict if they are equivalent strings in different
+ // locations.
+ MultiSpec.addCUDAPTXInMemory(KernelNameA, {{{1, 0}, PTXCodeString}})
+ .addCUDAFatbinInMemory(KernelNameB, FatbinBytes);
+
+ const char *OtherKernelName = "OtherKernelName";
+ const char *OpenCLText = "Dummy OpenCL text";
+ EXPECT_DEBUG_DEATH(
+ MultiSpec.addOpenCLTextInMemory(OtherKernelName, OpenCLText),
+ "different kernel names in one MultiKernelLoaderSpec");
+}
+
+} // namespace