diff options
Diffstat (limited to 'platform')
169 files changed, 6727 insertions, 884 deletions
diff --git a/platform/linux-dpdk/Makefile.am b/platform/linux-dpdk/Makefile.am index 886fe8f48..0bfe35092 100644 --- a/platform/linux-dpdk/Makefile.am +++ b/platform/linux-dpdk/Makefile.am @@ -13,6 +13,7 @@ AM_CPPFLAGS += -I$(top_srcdir)/platform/$(with_platform)/arch/@ARCH_DIR@ AM_CPPFLAGS += -I$(top_srcdir)/platform/$(with_platform)/arch/default AM_CPPFLAGS += $(OPENSSL_CPPFLAGS) +AM_CPPFLAGS += $(ORT_CPPFLAGS) AM_CFLAGS += $(DPDK_CFLAGS) AM_CFLAGS += $(LIBCONFIG_CFLAGS) @@ -88,6 +89,7 @@ odpapiabiarchinclude_HEADERS += \ include-abi/odp/api/abi/init.h \ include-abi/odp/api/abi/ipsec.h \ include-abi/odp/api/abi/ipsec_types.h \ + include-abi/odp/api/abi/ml_types.h \ include-abi/odp/api/abi/packet.h \ include-abi/odp/api/abi/packet_types.h \ include-abi/odp/api/abi/packet_flags.h \ @@ -136,6 +138,7 @@ noinst_HEADERS = \ ${top_srcdir}/platform/linux-generic/include/odp_classification_internal.h \ include/odp_eventdev_internal.h \ ${top_srcdir}/platform/linux-generic/include/odp_forward_typedefs_internal.h \ + ${top_srcdir}/platform/linux-generic/include/odp_ml_fp16.h \ ${top_srcdir}/platform/linux-generic/include/odp_global_data.h \ ${top_srcdir}/platform/linux-generic/include/odp_init_internal.h \ ${top_srcdir}/platform/linux-generic/include/odp_ipsec_internal.h \ @@ -217,6 +220,8 @@ __LIB__libodp_dpdk_la_SOURCES = \ ../linux-generic/odp_ipsec_sad.c \ ../linux-generic/odp_name_table.c \ ../linux-generic/odp_libconfig.c \ + ../linux-generic/odp_ml_fp16.c \ + ../linux-generic/odp_ml_quantize.c \ odp_packet.c \ odp_packet_dpdk.c \ ../linux-generic/odp_packet_vector.c \ @@ -255,6 +260,14 @@ __LIB__libodp_dpdk_la_SOURCES = \ ../linux-generic/odp_version.c \ ../linux-generic/odp_weak.c +if WITH_ML +__LIB__libodp_dpdk_la_SOURCES += \ + ../linux-generic/odp_ml.c +else +__LIB__libodp_dpdk_la_SOURCES += \ + ../linux-generic/odp_ml_null.c +endif + if ODP_ABI_COMPAT __LIB__libodp_dpdk_la_SOURCES += \ ../linux-generic/odp_atomic_api.c \ @@ -301,15 +314,11 @@ odpapiabiarchinclude_HEADERS += arch/default/odp/api/abi/atomic_generic.h \ arch/default/odp/api/abi/wait_until_generic.h \ arch/default/odp/api/abi/wait_until.h endif -noinst_HEADERS += arch/arm/odp_atomic.h \ - arch/arm/odp_cpu.h \ - arch/arm/odp_cpu_idling.h \ - arch/arm/odp_llsc.h \ +noinst_HEADERS += arch/arm/odp_cpu.h \ arch/default/odp_atomic.h \ arch/default/odp_cpu.h \ - arch/default/odp_cpu_idling.h \ - arch/default/odp_random.h - + arch/default/odp_random.h \ + arch/default/odp_wait_until.h endif if ARCH_IS_AARCH64 __LIB__libodp_dpdk_la_SOURCES += arch/aarch64/odp_atomic.c \ @@ -334,9 +343,8 @@ endif noinst_HEADERS += arch/aarch64/odp_atomic.h \ arch/aarch64/odp_cpu.h \ arch/aarch64/cpu_flags.h \ - arch/aarch64/odp_cpu_idling.h \ - arch/aarch64/odp_llsc.h \ - arch/aarch64/odp_random.h + arch/aarch64/odp_random.h \ + arch/aarch64/odp_wait_until.h endif if ARCH_IS_DEFAULT __LIB__libodp_dpdk_la_SOURCES += arch/default/odp_atomic.c \ @@ -357,8 +365,8 @@ odpapiabiarchinclude_HEADERS += arch/default/odp/api/abi/atomic_generic.h \ endif noinst_HEADERS += arch/default/odp_atomic.h \ arch/default/odp_cpu.h \ - arch/default/odp_cpu_idling.h \ - arch/default/odp_random.h + arch/default/odp_random.h \ + arch/default/odp_wait_until.h endif if ARCH_IS_POWERPC __LIB__libodp_dpdk_la_SOURCES += arch/default/odp_atomic.c \ @@ -379,8 +387,8 @@ odpapiabiarchinclude_HEADERS += arch/default/odp/api/abi/atomic_generic.h \ endif noinst_HEADERS += arch/default/odp_atomic.h \ arch/default/odp_cpu.h \ - arch/default/odp_cpu_idling.h \ - arch/default/odp_random.h + arch/default/odp_random.h \ + arch/default/odp_wait_until.h endif if ARCH_IS_X86 __LIB__libodp_dpdk_la_SOURCES += arch/default/odp_atomic.c \ @@ -406,7 +414,7 @@ noinst_HEADERS += arch/x86/cpu_flags.h \ arch/x86/odp_random.h \ arch/default/odp_atomic.h \ arch/default/odp_cpu.h \ - arch/default/odp_cpu_idling.h + arch/default/odp_wait_until.h endif __LIB__libodp_dpdk_la_LIBADD = $(ATOMIC_LIBS) @@ -415,6 +423,7 @@ __LIB__libodp_dpdk_la_LIBADD += $(LIBCONFIG_LIBS) __LIB__libodp_dpdk_la_LIBADD += $(DPDK_LIBS_LIBODP) __LIB__libodp_dpdk_la_LIBADD += $(PTHREAD_LIBS) __LIB__libodp_dpdk_la_LIBADD += $(TIMER_LIBS) +__LIB__libodp_dpdk_la_LIBADD += $(ORT_LIBS) CHECK_GLOBALS_REGEX = " (odp_|_odp_|_deprecated_odp_|miniz_|mz_|tdefl_|tinfl_|mp_hdlr_init_odp_pool_ops)" diff --git a/platform/linux-dpdk/README b/platform/linux-dpdk/README index 421d3f958..c0298ab34 100644 --- a/platform/linux-dpdk/README +++ b/platform/linux-dpdk/README @@ -36,7 +36,8 @@ cmds below for Ubuntu, where it has been compiled and tested. On Ubuntu install pcap development library: sudo apt-get install libpcap-dev -Right now ODP-DPDK supports DPDK v21.11 and v22.11 (recommended version). +Right now ODP-DPDK supports DPDK v21.11, v22.11 (recommended version), and +v23.11. Compile DPDK ------------ diff --git a/platform/linux-dpdk/arch/aarch64/odp_cpu_idling.h b/platform/linux-dpdk/arch/aarch64/odp_cpu_idling.h deleted file mode 120000 index c8230bb63..000000000 --- a/platform/linux-dpdk/arch/aarch64/odp_cpu_idling.h +++ /dev/null @@ -1 +0,0 @@ -../../../linux-generic/arch/aarch64/odp_cpu_idling.h
\ No newline at end of file diff --git a/platform/linux-dpdk/arch/aarch64/odp_llsc.h b/platform/linux-dpdk/arch/aarch64/odp_llsc.h deleted file mode 120000 index eb8d1200b..000000000 --- a/platform/linux-dpdk/arch/aarch64/odp_llsc.h +++ /dev/null @@ -1 +0,0 @@ -../../../linux-generic/arch/aarch64/odp_llsc.h
\ No newline at end of file diff --git a/platform/linux-dpdk/arch/aarch64/odp_wait_until.h b/platform/linux-dpdk/arch/aarch64/odp_wait_until.h new file mode 120000 index 000000000..f7d35f0ca --- /dev/null +++ b/platform/linux-dpdk/arch/aarch64/odp_wait_until.h @@ -0,0 +1 @@ +../../../linux-generic/arch/aarch64/odp_wait_until.h
\ No newline at end of file diff --git a/platform/linux-dpdk/arch/arm/odp_atomic.h b/platform/linux-dpdk/arch/arm/odp_atomic.h deleted file mode 120000 index 61a8c536b..000000000 --- a/platform/linux-dpdk/arch/arm/odp_atomic.h +++ /dev/null @@ -1 +0,0 @@ -../../../linux-generic/arch/arm/odp_atomic.h
\ No newline at end of file diff --git a/platform/linux-dpdk/arch/arm/odp_cpu_idling.h b/platform/linux-dpdk/arch/arm/odp_cpu_idling.h deleted file mode 120000 index 56afe9027..000000000 --- a/platform/linux-dpdk/arch/arm/odp_cpu_idling.h +++ /dev/null @@ -1 +0,0 @@ -../../../linux-generic/arch/arm/odp_cpu_idling.h
\ No newline at end of file diff --git a/platform/linux-dpdk/arch/arm/odp_llsc.h b/platform/linux-dpdk/arch/arm/odp_llsc.h deleted file mode 120000 index b3f3f371f..000000000 --- a/platform/linux-dpdk/arch/arm/odp_llsc.h +++ /dev/null @@ -1 +0,0 @@ -../../../linux-generic/arch/arm/odp_llsc.h
\ No newline at end of file diff --git a/platform/linux-dpdk/arch/default/odp_cpu_idling.h b/platform/linux-dpdk/arch/default/odp_cpu_idling.h deleted file mode 120000 index eb2d21c4b..000000000 --- a/platform/linux-dpdk/arch/default/odp_cpu_idling.h +++ /dev/null @@ -1 +0,0 @@ -../../../linux-generic/arch/default/odp_cpu_idling.h
\ No newline at end of file diff --git a/platform/linux-dpdk/arch/default/odp_wait_until.h b/platform/linux-dpdk/arch/default/odp_wait_until.h new file mode 120000 index 000000000..d2e7b5316 --- /dev/null +++ b/platform/linux-dpdk/arch/default/odp_wait_until.h @@ -0,0 +1 @@ +../../../linux-generic/arch/default/odp_wait_until.h
\ No newline at end of file diff --git a/platform/linux-dpdk/example/Makefile.am b/platform/linux-dpdk/example/Makefile.am new file mode 100644 index 000000000..84f337387 --- /dev/null +++ b/platform/linux-dpdk/example/Makefile.am @@ -0,0 +1,5 @@ +SUBDIRS = + +if WITH_ML +SUBDIRS += ml +endif diff --git a/platform/linux-dpdk/example/ml/.gitignore b/platform/linux-dpdk/example/ml/.gitignore new file mode 100644 index 000000000..d845f6bb5 --- /dev/null +++ b/platform/linux-dpdk/example/ml/.gitignore @@ -0,0 +1,5 @@ +model_explorer +simple_linear +mnist +*.log +*.trs diff --git a/platform/linux-dpdk/example/ml/Makefile.am b/platform/linux-dpdk/example/ml/Makefile.am new file mode 100644 index 000000000..7abbc3828 --- /dev/null +++ b/platform/linux-dpdk/example/ml/Makefile.am @@ -0,0 +1,54 @@ +include $(top_srcdir)/example/Makefile.inc + +LDADD += -lm + +bin_PROGRAMS = model_explorer simple_linear mnist + +simple_linear_SOURCES = \ + ../../../linux-generic/example/ml/simple_linear.c \ + ../../../linux-generic/example/ml/model_read.c \ + ../../../linux-generic/example/ml/model_read.h +model_explorer_SOURCES = \ + ../../../linux-generic/example/ml/model_explorer.c \ + ../../../linux-generic/example/ml/model_read.c \ + ../../../linux-generic/example/ml/model_read.h +mnist_SOURCES = \ + ../../../linux-generic/example/ml/mnist.c \ + ../../../linux-generic/example/ml/model_read.c \ + ../../../linux-generic/example/ml/model_read.h + +EXTRA_DIST = \ + odp_ml_run_mnist.sh \ + example_digit.csv \ + mnist-12.onnx \ + odp_ml_run_model_explorer.sh \ + odp_ml_run_simple_linear.sh \ + simple_linear.onnx + +if test_example +TESTS = \ + odp_ml_run_mnist.sh \ + odp_ml_run_model_explorer.sh \ + odp_ml_run_simple_linear.sh +endif + +# If building out-of-tree, make check will not copy the scripts and data to the +# $(builddir) assuming that all commands are run locally. However this prevents +# running tests on a remote target using LOG_COMPILER. +# So copy all script and data files explicitly here. +all-local: + if [ "x$(srcdir)" != "x$(builddir)" ]; then \ + for f in $(EXTRA_DIST); do \ + if [ -e $(srcdir)/$$f ]; then \ + mkdir -p $(builddir)/$$(dirname $$f); \ + cp -f $(srcdir)/$$f $(builddir)/$$f; \ + fi \ + done \ + fi + +clean-local: + if [ "x$(srcdir)" != "x$(builddir)" ]; then \ + for f in $(EXTRA_DIST); do \ + rm -f $(builddir)/$$f; \ + done \ + fi diff --git a/platform/linux-dpdk/example/ml/README.md b/platform/linux-dpdk/example/ml/README.md new file mode 120000 index 000000000..ddeec649f --- /dev/null +++ b/platform/linux-dpdk/example/ml/README.md @@ -0,0 +1 @@ +../../../linux-generic/example/ml/README.md
\ No newline at end of file diff --git a/platform/linux-dpdk/example/ml/example_digit.csv b/platform/linux-dpdk/example/ml/example_digit.csv new file mode 120000 index 000000000..5e5514aaf --- /dev/null +++ b/platform/linux-dpdk/example/ml/example_digit.csv @@ -0,0 +1 @@ +../../../linux-generic/example/ml/example_digit.csv
\ No newline at end of file diff --git a/platform/linux-dpdk/example/ml/mnist-12.onnx b/platform/linux-dpdk/example/ml/mnist-12.onnx new file mode 120000 index 000000000..94d4515b8 --- /dev/null +++ b/platform/linux-dpdk/example/ml/mnist-12.onnx @@ -0,0 +1 @@ +../../../linux-generic/example/ml/mnist-12.onnx
\ No newline at end of file diff --git a/platform/linux-dpdk/example/ml/odp_ml_run_mnist.sh b/platform/linux-dpdk/example/ml/odp_ml_run_mnist.sh new file mode 120000 index 000000000..7d9c6f84c --- /dev/null +++ b/platform/linux-dpdk/example/ml/odp_ml_run_mnist.sh @@ -0,0 +1 @@ +../../../linux-generic/example/ml/odp_ml_run_mnist.sh
\ No newline at end of file diff --git a/platform/linux-dpdk/example/ml/odp_ml_run_model_explorer.sh b/platform/linux-dpdk/example/ml/odp_ml_run_model_explorer.sh new file mode 120000 index 000000000..f28535b64 --- /dev/null +++ b/platform/linux-dpdk/example/ml/odp_ml_run_model_explorer.sh @@ -0,0 +1 @@ +../../../linux-generic/example/ml/odp_ml_run_model_explorer.sh
\ No newline at end of file diff --git a/platform/linux-dpdk/example/ml/odp_ml_run_simple_linear.sh b/platform/linux-dpdk/example/ml/odp_ml_run_simple_linear.sh new file mode 120000 index 000000000..2691d9282 --- /dev/null +++ b/platform/linux-dpdk/example/ml/odp_ml_run_simple_linear.sh @@ -0,0 +1 @@ +../../../linux-generic/example/ml/odp_ml_run_simple_linear.sh
\ No newline at end of file diff --git a/platform/linux-dpdk/example/ml/simple_linear.onnx b/platform/linux-dpdk/example/ml/simple_linear.onnx new file mode 120000 index 000000000..5893a9176 --- /dev/null +++ b/platform/linux-dpdk/example/ml/simple_linear.onnx @@ -0,0 +1 @@ +../../../linux-generic/example/ml/simple_linear.onnx
\ No newline at end of file diff --git a/platform/linux-dpdk/include-abi/odp/api/abi/dma_types.h b/platform/linux-dpdk/include-abi/odp/api/abi/dma_types.h index 3c009f4c4..318c2c385 100644 --- a/platform/linux-dpdk/include-abi/odp/api/abi/dma_types.h +++ b/platform/linux-dpdk/include-abi/odp/api/abi/dma_types.h @@ -11,7 +11,7 @@ extern "C" { #include <odp/api/plat/strong_types.h> -/** @ingroup odp_dma +/** @addtogroup odp_dma * @{ */ diff --git a/platform/linux-dpdk/include-abi/odp/api/abi/ml_types.h b/platform/linux-dpdk/include-abi/odp/api/abi/ml_types.h new file mode 120000 index 000000000..18b483da1 --- /dev/null +++ b/platform/linux-dpdk/include-abi/odp/api/abi/ml_types.h @@ -0,0 +1 @@ +../../../../../linux-generic/include-abi/odp/api/abi/ml_types.h
\ No newline at end of file diff --git a/platform/linux-dpdk/include-abi/odp/api/abi/packet_types.h b/platform/linux-dpdk/include-abi/odp/api/abi/packet_types.h index 1f5f9e6f7..9ca66db54 100644 --- a/platform/linux-dpdk/include-abi/odp/api/abi/packet_types.h +++ b/platform/linux-dpdk/include-abi/odp/api/abi/packet_types.h @@ -21,7 +21,7 @@ extern "C" { #include <odp/api/std_types.h> #include <odp/api/plat/strong_types.h> -/** @ingroup odp_packet +/** @addtogroup odp_packet * @{ */ diff --git a/platform/linux-dpdk/include/odp/api/plat/event_inline_types.h b/platform/linux-dpdk/include/odp/api/plat/event_inline_types.h index ee5490ff1..94a95a889 100644 --- a/platform/linux-dpdk/include/odp/api/plat/event_inline_types.h +++ b/platform/linux-dpdk/include/odp/api/plat/event_inline_types.h @@ -28,6 +28,7 @@ extern "C" { typedef struct _odp_event_inline_offset_t { uint16_t event_type; uint16_t base_data; + uint16_t subtype; uint16_t flow_id; uint16_t pool; uint16_t buf_len; diff --git a/platform/linux-dpdk/include/odp/api/plat/packet_inline_types.h b/platform/linux-dpdk/include/odp/api/plat/packet_inline_types.h index 5bbcadd32..255db9d78 100644 --- a/platform/linux-dpdk/include/odp/api/plat/packet_inline_types.h +++ b/platform/linux-dpdk/include/odp/api/plat/packet_inline_types.h @@ -41,7 +41,6 @@ typedef struct _odp_packet_inline_offset_t { uint16_t timestamp; uint16_t input_flags; uint16_t flags; - uint16_t subtype; uint16_t cls_mark; uint16_t ipsec_ctx; uint16_t crypto_op; diff --git a/platform/linux-dpdk/include/odp/api/plat/packet_inlines.h b/platform/linux-dpdk/include/odp/api/plat/packet_inlines.h index 7b2764a05..b41a272ef 100644 --- a/platform/linux-dpdk/include/odp/api/plat/packet_inlines.h +++ b/platform/linux-dpdk/include/odp/api/plat/packet_inlines.h @@ -25,6 +25,7 @@ extern "C" { #include <odp/api/time_types.h> #include <odp/api/plat/debug_inlines.h> +#include <odp/api/plat/event_inline_types.h> #include <odp/api/plat/event_validation_external.h> #include <odp/api/plat/packet_io_inlines.h> #include <odp/api/plat/packet_inline_types.h> @@ -647,7 +648,8 @@ _ODP_INLINE void odp_packet_to_event_multi(const odp_packet_t pkt[], _ODP_INLINE odp_event_subtype_t odp_packet_subtype(odp_packet_t pkt) { - return (odp_event_subtype_t)_odp_pkt_get(pkt, int8_t, subtype); + return (odp_event_subtype_t)_odp_event_hdr_field((odp_event_t)(uintptr_t)pkt, + int8_t, subtype); } _ODP_INLINE odp_packet_tx_compl_t odp_packet_tx_compl_from_event(odp_event_t ev) diff --git a/platform/linux-dpdk/include/odp_buffer_internal.h b/platform/linux-dpdk/include/odp_buffer_internal.h index dc65fd17d..cb7f50073 100644 --- a/platform/linux-dpdk/include/odp_buffer_internal.h +++ b/platform/linux-dpdk/include/odp_buffer_internal.h @@ -73,6 +73,13 @@ static inline odp_buffer_hdr_t *_odp_buf_hdr(odp_buffer_t buf) return (odp_buffer_hdr_t *)(uintptr_t)buf; } +static inline void _odp_buffer_subtype_set(odp_buffer_t buf, int subtype) +{ + odp_buffer_hdr_t *buf_hdr = _odp_buf_hdr(buf); + + buf_hdr->event_hdr.subtype = subtype; +} + #ifdef __cplusplus } #endif diff --git a/platform/linux-dpdk/include/odp_config_internal.h b/platform/linux-dpdk/include/odp_config_internal.h index bc69610ca..c423ec14b 100644 --- a/platform/linux-dpdk/include/odp_config_internal.h +++ b/platform/linux-dpdk/include/odp_config_internal.h @@ -177,6 +177,15 @@ extern "C" { */ #define CONFIG_IPSEC_MAX_NUM_SA 4000 +/* Maximum number of ML models that can be created or loaded. */ +#define CONFIG_ML_MAX_MODELS 4 + +/* Maximum number of inputs for a ML model. */ +#define CONFIG_ML_MAX_INPUTS 4 + +/* Maximum number of outputs for a ML model. */ +#define CONFIG_ML_MAX_OUTPUTS 4 + #ifdef __cplusplus } #endif diff --git a/platform/linux-dpdk/include/odp_packet_internal.h b/platform/linux-dpdk/include/odp_packet_internal.h index c86f0646a..cae77245a 100644 --- a/platform/linux-dpdk/include/odp_packet_internal.h +++ b/platform/linux-dpdk/include/odp_packet_internal.h @@ -218,9 +218,9 @@ static inline struct rte_mbuf *pkt_to_mbuf(odp_packet_t pkt) return (struct rte_mbuf *)(uintptr_t)pkt; } -static inline void packet_subtype_set(odp_packet_t pkt, int ev) +static inline void packet_subtype_set(odp_packet_t pkt, int subtype) { - packet_hdr(pkt)->event_hdr.subtype = ev; + packet_hdr(pkt)->event_hdr.subtype = subtype; } /** @@ -236,7 +236,9 @@ static inline void packet_init(odp_packet_hdr_t *pkt_hdr, odp_pktio_t input) pkt_hdr->p.l3_offset = ODP_PACKET_OFFSET_INVALID; pkt_hdr->p.l4_offset = ODP_PACKET_OFFSET_INVALID; - pkt_hdr->event_hdr.subtype = ODP_EVENT_PACKET_BASIC; + if (odp_unlikely(pkt_hdr->event_hdr.subtype != ODP_EVENT_PACKET_BASIC)) + pkt_hdr->event_hdr.subtype = ODP_EVENT_PACKET_BASIC; + pkt_hdr->input = input; } diff --git a/platform/linux-dpdk/include/odp_timer_internal.h b/platform/linux-dpdk/include/odp_timer_internal.h index 3cc8ca469..35a4911af 100644 --- a/platform/linux-dpdk/include/odp_timer_internal.h +++ b/platform/linux-dpdk/include/odp_timer_internal.h @@ -24,6 +24,14 @@ #include <rte_config.h> +#include <stdint.h> + +/* + * Use as the argument to timer_run() to force a scan and to ignore rate + * limit. + */ +#define TIMER_SCAN_FORCE INT32_MAX + /** * Internal Timeout header */ @@ -56,10 +64,13 @@ ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) <= 3 * RTE_CACHE_LINE_SIZE, void _odp_timer_run_inline(int dec); /* Static inline wrapper to minimize modification of schedulers. */ -static inline void timer_run(int dec) +static inline uint64_t timer_run(int dec) { if (odp_global_rw->inline_timers) _odp_timer_run_inline(dec); + + /* Time to the next timeout not available with DPDK timers */ + return UINT64_MAX; } #endif diff --git a/platform/linux-dpdk/libodp-dpdk.pc.in b/platform/linux-dpdk/libodp-dpdk.pc.in index c3ee4f7b9..8fcc4ac0a 100644 --- a/platform/linux-dpdk/libodp-dpdk.pc.in +++ b/platform/linux-dpdk/libodp-dpdk.pc.in @@ -8,5 +8,5 @@ Description: The ODP packet processing engine Version: @PKGCONFIG_VERSION@ Requires.private: libconfig Libs: -L${libdir} -l@ODP_LIB_NAME@ @DPDK_LIBS_NON_ABI_COMPAT@ @ATOMIC_LIBS_NON_ABI_COMPAT@ -Libs.private: @DPDK_LIBS_ABI_COMPAT@ @OPENSSL_STATIC_LIBS@ @PTHREAD_LIBS@ @TIMER_LIBS@ -lpthread @ATOMIC_LIBS_ABI_COMPAT@ +Libs.private: @DPDK_LIBS_ABI_COMPAT@ @OPENSSL_STATIC_LIBS@ @PTHREAD_LIBS@ @TIMER_LIBS@ -lpthread @ATOMIC_LIBS_ABI_COMPAT@ @ORT_LIBS@ Cflags: -I${includedir} @DPDK_CFLAGS@ diff --git a/platform/linux-dpdk/m4/configure.m4 b/platform/linux-dpdk/m4/configure.m4 index 0fcd4a5b3..535cfc5fa 100644 --- a/platform/linux-dpdk/m4/configure.m4 +++ b/platform/linux-dpdk/m4/configure.m4 @@ -11,6 +11,7 @@ m4_include([platform/linux-dpdk/m4/odp_openssl.m4]) m4_include([platform/linux-dpdk/m4/odp_pcapng.m4]) m4_include([platform/linux-dpdk/m4/odp_scheduler.m4]) m4_include([platform/linux-dpdk/m4/odp_wfe.m4]) +m4_include([platform/linux-dpdk/m4/odp_ml.m4]) ODP_EVENT_VALIDATION ODP_PTHREAD @@ -66,7 +67,7 @@ esac # Required for experimental rte_event_port_unlinks_in_progress() API DPDK_CFLAGS="${DPDK_CFLAGS} -DALLOW_EXPERIMENTAL_API" -AS_VAR_APPEND([PLAT_DEP_LIBS], ["${ATOMIC_LIBS} ${LIBCONFIG_LIBS} ${OPENSSL_LIBS} ${DPDK_LIBS_LT} ${LIBCLI_LIBS}"]) +AS_VAR_APPEND([PLAT_DEP_LIBS], ["${ATOMIC_LIBS} ${LIBCONFIG_LIBS} ${OPENSSL_LIBS} ${DPDK_LIBS_LT} ${LIBCLI_LIBS} ${ORT_LIBS}"]) # Add text to the end of configure with platform specific settings. # Make sure it's aligned same as other lines in configure.ac. @@ -77,6 +78,7 @@ AS_VAR_APPEND([PLAT_CFG_TEXT], [" pcap: ${have_pmd_pcap} pcapng: ${have_pcapng} wfe_locks: ${use_wfe_locks} + ml_support: ${ml_support} default_config_path: ${default_config_path}"]) ODP_CHECK_CFLAG([-Wno-error=cast-align]) @@ -94,6 +96,8 @@ AM_CONDITIONAL([PLATFORM_IS_LINUX_DPDK], AC_CONFIG_FILES([platform/linux-dpdk/Makefile platform/linux-dpdk/libodp-dpdk.pc platform/linux-dpdk/dumpconfig/Makefile + platform/linux-dpdk/example/Makefile + platform/linux-dpdk/example/ml/Makefile platform/linux-dpdk/test/Makefile platform/linux-dpdk/test/example/Makefile platform/linux-dpdk/test/example/classifier/Makefile @@ -108,5 +112,6 @@ AC_CONFIG_FILES([platform/linux-dpdk/Makefile platform/linux-dpdk/test/example/switch/Makefile platform/linux-dpdk/test/performance/Makefile platform/linux-dpdk/test/performance/dmafwd/Makefile + platform/linux-dpdk/test/validation/api/ml/Makefile platform/linux-dpdk/test/validation/api/pktio/Makefile]) ]) diff --git a/platform/linux-dpdk/m4/odp_libconfig.m4 b/platform/linux-dpdk/m4/odp_libconfig.m4 index 7bcfb4505..2bf89ac2e 100644 --- a/platform/linux-dpdk/m4/odp_libconfig.m4 +++ b/platform/linux-dpdk/m4/odp_libconfig.m4 @@ -3,7 +3,7 @@ ########################################################################## m4_define([_odp_config_version_generation], [0]) m4_define([_odp_config_version_major], [1]) -m4_define([_odp_config_version_minor], [25]) +m4_define([_odp_config_version_minor], [26]) m4_define([_odp_config_version], [_odp_config_version_generation._odp_config_version_major._odp_config_version_minor]) diff --git a/platform/linux-dpdk/m4/odp_ml.m4 b/platform/linux-dpdk/m4/odp_ml.m4 new file mode 120000 index 000000000..6e76047e5 --- /dev/null +++ b/platform/linux-dpdk/m4/odp_ml.m4 @@ -0,0 +1 @@ +../../linux-generic/m4/odp_ml.m4
\ No newline at end of file diff --git a/platform/linux-dpdk/odp_crypto.c b/platform/linux-dpdk/odp_crypto.c index aeb9c11e3..6170dd8df 100644 --- a/platform/linux-dpdk/odp_crypto.c +++ b/platform/linux-dpdk/odp_crypto.c @@ -707,12 +707,11 @@ static int cipher_gen_capability(const struct rte_crypto_param_range *key_size, for (uint32_t key_len = key_size_min; key_len <= key_size_max; key_len += key_inc) { - for (uint32_t iv_size = iv_size_min; - iv_size <= iv_size_max; iv_size += iv_inc) { + for (uint32_t iv_len = iv_size_min; iv_len <= iv_size_max; iv_len += iv_inc) { odp_crypto_cipher_capability_t capa; capa.key_len = key_len; - capa.iv_len = iv_size; + capa.iv_len = iv_len; capa.bit_mode = false; idx = cipher_capa_insert(src, &capa, idx, num_copy); @@ -918,14 +917,14 @@ static int auth_gen_capability(const struct rte_crypto_param_range *key_size, for (uint16_t key_len = key_size_min; key_len <= key_size_max; key_len += key_inc) { - for (uint16_t iv_size = iv_size_min; - iv_size <= iv_size_max; - iv_size += iv_inc) { + for (uint16_t iv_len = iv_size_min; + iv_len <= iv_size_max; + iv_len += iv_inc) { odp_crypto_auth_capability_t capa; capa.digest_len = digest_len; capa.key_len = key_len; - capa.iv_len = iv_size; + capa.iv_len = iv_len; capa.bit_mode = false; capa.aad_len.min = aad_size->min; capa.aad_len.max = aad_size->max; diff --git a/platform/linux-dpdk/odp_event.c b/platform/linux-dpdk/odp_event.c index dff3e2ed2..0c2f3d22e 100644 --- a/platform/linux-dpdk/odp_event.c +++ b/platform/linux-dpdk/odp_event.c @@ -12,6 +12,7 @@ #include <odp/api/packet.h> #include <odp/api/timer.h> #include <odp/api/pool.h> +#include <odp/api/ml.h> #include <odp_buffer_internal.h> #include <odp_ipsec_internal.h> @@ -36,6 +37,7 @@ const _odp_event_inline_offset_t _odp_event_inline_offset ODP_ALIGNED_CACHE = { .event_type = offsetof(_odp_event_hdr_t, hdr.event_type), .base_data = offsetof(_odp_event_hdr_t, mb.buf_addr), + .subtype = offsetof(_odp_event_hdr_t, hdr.subtype), .flow_id = offsetof(_odp_event_hdr_t, hdr.flow_id), .pool = offsetof(_odp_event_hdr_t, hdr.pool), .buf_len = offsetof(_odp_event_hdr_t, mb.buf_len) @@ -69,6 +71,9 @@ static inline void event_free(odp_event_t event, _odp_ev_id_t id) case ODP_EVENT_DMA_COMPL: odp_dma_compl_free(odp_dma_compl_from_event(event)); break; + case ODP_EVENT_ML_COMPL: + odp_ml_compl_free(odp_ml_compl_from_event(event)); + break; default: _ODP_ABORT("Invalid event type: %d\n", odp_event_type(event)); } @@ -117,6 +122,8 @@ int odp_event_is_valid(odp_event_t event) /* Fall through */ case ODP_EVENT_DMA_COMPL: /* Fall through */ + case ODP_EVENT_ML_COMPL: + /* Fall through */ case ODP_EVENT_PACKET_TX_COMPL: break; default: diff --git a/platform/linux-dpdk/odp_init.c b/platform/linux-dpdk/odp_init.c index 4d6f395c5..79c449f1d 100644 --- a/platform/linux-dpdk/odp_init.c +++ b/platform/linux-dpdk/odp_init.c @@ -59,6 +59,7 @@ enum init_stage { IPSEC_SAD_INIT, IPSEC_INIT, DMA_INIT, + ML_INIT, ALL_INIT /* All init stages completed */ }; @@ -103,6 +104,7 @@ static void disable_features(odp_global_data_ro_t *global_ro, global_ro->disable.traffic_mngr = init_param->not_used.feat.tm; global_ro->disable.compress = init_param->not_used.feat.compress; + global_ro->disable.ml = init_param->not_used.feat.ml; } static int read_pci_config(char **pci_cmd) @@ -331,6 +333,13 @@ static int term_global(enum init_stage stage) switch (stage) { case ALL_INIT: + case ML_INIT: + if (_odp_ml_term_global()) { + _ODP_ERR("ODP ML term failed.\n"); + rc = -1; + } + /* Fall through */ + case DMA_INIT: if (_odp_dma_term_global()) { _ODP_ERR("ODP DMA term failed.\n"); @@ -689,6 +698,12 @@ int odp_init_global(odp_instance_t *instance, } stage = DMA_INIT; + if (_odp_ml_init_global()) { + _ODP_ERR("ODP ML init failed.\n"); + goto init_failed; + } + stage = ML_INIT; + /* Dummy support for single instance */ *instance = (odp_instance_t)odp_global_ro.main_pid; diff --git a/platform/linux-dpdk/odp_packet.c b/platform/linux-dpdk/odp_packet.c index a61c4c34a..bcd2c2fb3 100644 --- a/platform/linux-dpdk/odp_packet.c +++ b/platform/linux-dpdk/odp_packet.c @@ -64,7 +64,6 @@ const _odp_packet_inline_offset_t _odp_packet_inline ODP_ALIGNED_CACHE = { .timestamp = offsetof(odp_packet_hdr_t, timestamp), .input_flags = offsetof(odp_packet_hdr_t, p.input_flags), .flags = offsetof(odp_packet_hdr_t, p.flags), - .subtype = offsetof(odp_packet_hdr_t, event_hdr.subtype), .cls_mark = offsetof(odp_packet_hdr_t, cls_mark), .ipsec_ctx = offsetof(odp_packet_hdr_t, ipsec_ctx), .crypto_op = offsetof(odp_packet_hdr_t, crypto_op_result), diff --git a/platform/linux-dpdk/odp_pool.c b/platform/linux-dpdk/odp_pool.c index f7726f97b..dfd14a978 100644 --- a/platform/linux-dpdk/odp_pool.c +++ b/platform/linux-dpdk/odp_pool.c @@ -626,6 +626,7 @@ static void init_obj_priv_data(struct rte_mempool *mp ODP_UNUSED, void *arg, voi event_hdr->hdr.pool = _odp_pool_handle(pool); event_hdr->hdr.type = priv_data->type; event_hdr->hdr.event_type = priv_data->event_type; + event_hdr->hdr.subtype = ODP_EVENT_NO_SUBTYPE; switch (priv_data->type) { case ODP_POOL_BUFFER: @@ -852,6 +853,8 @@ static const char *get_short_type_str(odp_pool_type_t type) return "V"; case ODP_POOL_DMA_COMPL: return "D"; + case ODP_POOL_ML_COMPL: + return "M"; default: return "-"; } @@ -946,6 +949,11 @@ int odp_pool_info(odp_pool_t pool_hdl, odp_pool_info_t *info) info->dma_pool_param.uarea_size = pool->params.buf.uarea_size; info->dma_pool_param.cache_size = pool->params.buf.cache_size; + } else if (pool->type_2 == ODP_POOL_ML_COMPL) { + info->ml_pool_param.num = pool->params.buf.num; + info->ml_pool_param.uarea_size = pool->params.buf.uarea_size; + info->ml_pool_param.cache_size = pool->params.buf.cache_size; + } else { info->params = pool->params; } @@ -1091,6 +1099,7 @@ int odp_pool_ext_capability(odp_pool_type_t type, case ODP_POOL_TIMEOUT: case ODP_POOL_VECTOR: case ODP_POOL_DMA_COMPL: + case ODP_POOL_ML_COMPL: memset(capa, 0, sizeof(odp_pool_ext_capability_t)); return 0; default: @@ -1361,6 +1370,7 @@ static void init_ext_obj(struct rte_mempool *mp, void *arg, void *mbuf, unsigned event_hdr->hdr.pool = _odp_pool_handle(pool); event_hdr->hdr.type = mb_ctor_arg->type; event_hdr->hdr.event_type = mb_ctor_arg->event_type; + event_hdr->hdr.subtype = ODP_EVENT_NO_SUBTYPE; switch (mb_ctor_arg->type) { case ODP_POOL_BUFFER: diff --git a/platform/linux-dpdk/odp_schedule_eventdev.c b/platform/linux-dpdk/odp_schedule_eventdev.c index d6f3ba2f7..4ef8a51b7 100644 --- a/platform/linux-dpdk/odp_schedule_eventdev.c +++ b/platform/linux-dpdk/odp_schedule_eventdev.c @@ -604,8 +604,8 @@ static inline uint16_t input_cached(odp_event_t out_ev[], unsigned int max_num, uint8_t first_queue = _odp_eventdev_local.cache.event[idx].queue_id; for (i = 0; i < max_num && _odp_eventdev_local.cache.count; i++) { - uint16_t idx = _odp_eventdev_local.cache.idx; - struct rte_event *event = &_odp_eventdev_local.cache.event[idx]; + uint16_t cache_idx = _odp_eventdev_local.cache.idx; + struct rte_event *event = &_odp_eventdev_local.cache.event[cache_idx]; if (odp_unlikely(event->queue_id != first_queue)) break; diff --git a/platform/linux-dpdk/odp_system_info.c b/platform/linux-dpdk/odp_system_info.c index 0ebb3e09f..d9cddcb40 100644 --- a/platform/linux-dpdk/odp_system_info.c +++ b/platform/linux-dpdk/odp_system_info.c @@ -27,7 +27,6 @@ #include <odp_packet_internal.h> #include <errno.h> -#include <pthread.h> #include <string.h> #include <stdio.h> #include <inttypes.h> @@ -312,8 +311,9 @@ int _odp_system_info_init(void) num_cpus); /* Read and save all CPU frequencies for static mode */ - for (i = 0; i < CONFIG_NUM_CPU_IDS; i++) - odp_global_ro.system_info.cpu_hz[i] = cpu_hz_current(i); + if (odp_global_ro.system_info.cpu_hz_static) + for (i = 0; i < CONFIG_NUM_CPU_IDS; i++) + odp_global_ro.system_info.cpu_hz[i] = cpu_hz_current(i); /* By default, read max frequency from a cpufreq file */ for (i = 0; i < CONFIG_NUM_CPU_IDS; i++) { @@ -552,5 +552,8 @@ void odp_sys_config_print(void) _ODP_PRINT("CONFIG_POOLS: %i\n", CONFIG_POOLS); _ODP_PRINT("CONFIG_POOL_MAX_NUM: %i\n", CONFIG_POOL_MAX_NUM); _ODP_PRINT("CONFIG_IPSEC_MAX_NUM_SA: %i\n", CONFIG_IPSEC_MAX_NUM_SA); + _ODP_PRINT("CONFIG_ML_MAX_MODELS: %i\n", CONFIG_ML_MAX_MODELS); + _ODP_PRINT("CONFIG_ML_MAX_INPUTS: %i\n", CONFIG_ML_MAX_INPUTS); + _ODP_PRINT("CONFIG_ML_MAX_OUTPUTS: %i\n", CONFIG_ML_MAX_OUTPUTS); _ODP_PRINT("\n"); } diff --git a/platform/linux-dpdk/odp_timer.c b/platform/linux-dpdk/odp_timer.c index e01a541ad..f4c190aad 100644 --- a/platform/linux-dpdk/odp_timer.c +++ b/platform/linux-dpdk/odp_timer.c @@ -438,7 +438,7 @@ int _odp_timer_term_local(void) void _odp_timer_run_inline(int dec) { - int poll_interval = timer_global->poll_interval; + int poll_interval = (dec == TIMER_SCAN_FORCE) ? 0 : timer_global->poll_interval; odp_time_t now; int ret; diff --git a/platform/linux-dpdk/test/Makefile.am b/platform/linux-dpdk/test/Makefile.am index c210edbdc..2a33bfbcd 100644 --- a/platform/linux-dpdk/test/Makefile.am +++ b/platform/linux-dpdk/test/Makefile.am @@ -18,6 +18,12 @@ test_SCRIPTS = $(dist_check_SCRIPTS) SUBDIRS += validation/api/pktio \ example \ performance + +if WITH_ML +TESTS += validation/api/ml/ml_linux$(EXEEXT) +SUBDIRS += validation/api/ml +endif + else #performance tests refer to pktio_env if test_perf diff --git a/platform/linux-dpdk/test/crypto.conf b/platform/linux-dpdk/test/crypto.conf index 9b10b9a76..97fdea6f5 100644 --- a/platform/linux-dpdk/test/crypto.conf +++ b/platform/linux-dpdk/test/crypto.conf @@ -1,6 +1,6 @@ # Mandatory fields odp_implementation = "linux-dpdk" -config_file_version = "0.1.25" +config_file_version = "0.1.26" system: { # One crypto queue pair is required per thread for lockless operation diff --git a/platform/linux-dpdk/test/default-timer.conf b/platform/linux-dpdk/test/default-timer.conf index 495972c5e..3219854de 100644 --- a/platform/linux-dpdk/test/default-timer.conf +++ b/platform/linux-dpdk/test/default-timer.conf @@ -1,6 +1,6 @@ # Mandatory fields odp_implementation = "linux-dpdk" -config_file_version = "0.1.25" +config_file_version = "0.1.26" timer: { # Use DPDK default timer API based implementation diff --git a/platform/linux-dpdk/test/example/ipsec_api/Makefile.am b/platform/linux-dpdk/test/example/ipsec_api/Makefile.am index 101c97cdf..2535ad466 100644 --- a/platform/linux-dpdk/test/example/ipsec_api/Makefile.am +++ b/platform/linux-dpdk/test/example/ipsec_api/Makefile.am @@ -19,5 +19,3 @@ clean-local: rm -f $(builddir)/$$f; \ done \ fi - -.NOTPARALLEL: diff --git a/platform/linux-dpdk/test/example/ipsec_crypto/Makefile.am b/platform/linux-dpdk/test/example/ipsec_crypto/Makefile.am index 101c97cdf..2535ad466 100644 --- a/platform/linux-dpdk/test/example/ipsec_crypto/Makefile.am +++ b/platform/linux-dpdk/test/example/ipsec_crypto/Makefile.am @@ -19,5 +19,3 @@ clean-local: rm -f $(builddir)/$$f; \ done \ fi - -.NOTPARALLEL: diff --git a/platform/linux-dpdk/test/process-mode.conf b/platform/linux-dpdk/test/process-mode.conf index a40fd7aa0..827eb6074 100644 --- a/platform/linux-dpdk/test/process-mode.conf +++ b/platform/linux-dpdk/test/process-mode.conf @@ -1,6 +1,6 @@ # Mandatory fields odp_implementation = "linux-dpdk" -config_file_version = "0.1.25" +config_file_version = "0.1.26" dpdk: { process_mode_memory_mb = 1024 diff --git a/platform/linux-dpdk/test/sched-basic.conf b/platform/linux-dpdk/test/sched-basic.conf index a11d35706..2c11cb419 100644 --- a/platform/linux-dpdk/test/sched-basic.conf +++ b/platform/linux-dpdk/test/sched-basic.conf @@ -1,6 +1,6 @@ # Mandatory fields odp_implementation = "linux-dpdk" -config_file_version = "0.1.25" +config_file_version = "0.1.26" # Test scheduler with an odd spread value and without dynamic load balance sched_basic: { diff --git a/platform/linux-dpdk/test/stash-custom.conf b/platform/linux-dpdk/test/stash-custom.conf index bb9c37fda..62f314c4e 100644 --- a/platform/linux-dpdk/test/stash-custom.conf +++ b/platform/linux-dpdk/test/stash-custom.conf @@ -1,6 +1,6 @@ # Mandatory fields odp_implementation = "linux-dpdk" -config_file_version = "0.1.25" +config_file_version = "0.1.26" # Test overflow safe stash variant stash: { diff --git a/platform/linux-dpdk/test/validation/api/ml/.gitignore b/platform/linux-dpdk/test/validation/api/ml/.gitignore new file mode 100644 index 000000000..e31f902c4 --- /dev/null +++ b/platform/linux-dpdk/test/validation/api/ml/.gitignore @@ -0,0 +1 @@ +ml_linux diff --git a/platform/linux-dpdk/test/validation/api/ml/Makefile.am b/platform/linux-dpdk/test/validation/api/ml/Makefile.am new file mode 100644 index 000000000..40910d5c6 --- /dev/null +++ b/platform/linux-dpdk/test/validation/api/ml/Makefile.am @@ -0,0 +1,29 @@ +include ../Makefile.inc + +test_PROGRAMS = ml_linux +ml_linux_SOURCES = ../../../../../linux-generic/test/validation/api/ml/ml_linux.c + +EXTRA_DIST = \ + batch_add.onnx \ + simple_linear.onnx + +# If building out-of-tree, make check will not copy the scripts and data to the +# $(builddir) assuming that all commands are run locally. However this prevents +# running tests on a remote target using LOG_COMPILER. +# So copy all script and data files explicitly here. +all-local: + if [ "x$(srcdir)" != "x$(builddir)" ]; then \ + for f in $(EXTRA_DIST); do \ + if [ -e $(srcdir)/$$f ]; then \ + mkdir -p $(builddir)/$$(dirname $$f); \ + cp -f $(srcdir)/$$f $(builddir)/$$f; \ + fi \ + done \ + fi + +clean-local: + if [ "x$(srcdir)" != "x$(builddir)" ]; then \ + for f in $(EXTRA_DIST); do \ + rm -f $(builddir)/$$f; \ + done \ + fi diff --git a/platform/linux-dpdk/test/validation/api/ml/README.md b/platform/linux-dpdk/test/validation/api/ml/README.md new file mode 120000 index 000000000..d121a477d --- /dev/null +++ b/platform/linux-dpdk/test/validation/api/ml/README.md @@ -0,0 +1 @@ +../../../../../linux-generic/test/validation/api/ml/README.md
\ No newline at end of file diff --git a/platform/linux-dpdk/test/validation/api/ml/batch_add.onnx b/platform/linux-dpdk/test/validation/api/ml/batch_add.onnx new file mode 120000 index 000000000..b827c0e58 --- /dev/null +++ b/platform/linux-dpdk/test/validation/api/ml/batch_add.onnx @@ -0,0 +1 @@ +../../../../../linux-generic/test/validation/api/ml/batch_add.onnx
\ No newline at end of file diff --git a/platform/linux-dpdk/test/validation/api/ml/batch_add_gen.py b/platform/linux-dpdk/test/validation/api/ml/batch_add_gen.py new file mode 120000 index 000000000..695b6d399 --- /dev/null +++ b/platform/linux-dpdk/test/validation/api/ml/batch_add_gen.py @@ -0,0 +1 @@ +../../../../../linux-generic/test/validation/api/ml/batch_add_gen.py
\ No newline at end of file diff --git a/platform/linux-dpdk/test/validation/api/ml/gen_models.sh b/platform/linux-dpdk/test/validation/api/ml/gen_models.sh new file mode 120000 index 000000000..e9b25d58f --- /dev/null +++ b/platform/linux-dpdk/test/validation/api/ml/gen_models.sh @@ -0,0 +1 @@ +../../../../../linux-generic/test/validation/api/ml/gen_models.sh
\ No newline at end of file diff --git a/platform/linux-dpdk/test/validation/api/ml/requirements.txt b/platform/linux-dpdk/test/validation/api/ml/requirements.txt new file mode 120000 index 000000000..b94d5d389 --- /dev/null +++ b/platform/linux-dpdk/test/validation/api/ml/requirements.txt @@ -0,0 +1 @@ +../../../../../linux-generic/test/validation/api/ml/requirements.txt
\ No newline at end of file diff --git a/platform/linux-dpdk/test/validation/api/ml/simple_linear.onnx b/platform/linux-dpdk/test/validation/api/ml/simple_linear.onnx new file mode 120000 index 000000000..f471922d1 --- /dev/null +++ b/platform/linux-dpdk/test/validation/api/ml/simple_linear.onnx @@ -0,0 +1 @@ +../../../../../linux-generic/test/validation/api/ml/simple_linear.onnx
\ No newline at end of file diff --git a/platform/linux-dpdk/test/validation/api/ml/simple_linear_gen.py b/platform/linux-dpdk/test/validation/api/ml/simple_linear_gen.py new file mode 120000 index 000000000..53fb4f6ed --- /dev/null +++ b/platform/linux-dpdk/test/validation/api/ml/simple_linear_gen.py @@ -0,0 +1 @@ +../../../../../linux-generic/test/validation/api/ml/simple_linear_gen.py
\ No newline at end of file diff --git a/platform/linux-generic/Makefile.am b/platform/linux-generic/Makefile.am index f3707ab3a..11cdb4c64 100644 --- a/platform/linux-generic/Makefile.am +++ b/platform/linux-generic/Makefile.am @@ -13,6 +13,7 @@ AM_CPPFLAGS += -I$(top_srcdir)/platform/$(with_platform)/arch/default AM_CPPFLAGS += -I$(top_srcdir)/platform/$(with_platform)/arch/common AM_CPPFLAGS += $(OPENSSL_CPPFLAGS) +AM_CPPFLAGS += $(ORT_CPPFLAGS) AM_CFLAGS += $(AARCH64CRYPTO_CFLAGS) AM_CFLAGS += $(DPDK_CFLAGS) @@ -90,6 +91,7 @@ odpapiabiarchinclude_HEADERS += \ include-abi/odp/api/abi/init.h \ include-abi/odp/api/abi/ipsec.h \ include-abi/odp/api/abi/ipsec_types.h \ + include-abi/odp/api/abi/ml_types.h \ include-abi/odp/api/abi/packet.h \ include-abi/odp/api/abi/packet_types.h \ include-abi/odp/api/abi/packet_flags.h \ @@ -140,6 +142,7 @@ noinst_HEADERS = \ include/odp_event_validation_internal.h \ include/odp_fdserver_internal.h \ include/odp_forward_typedefs_internal.h \ + include/odp_ml_fp16.h \ include/odp_global_data.h \ include/odp_init_internal.h \ include/odp_ipsec_internal.h \ @@ -228,6 +231,8 @@ __LIB__libodp_linux_la_SOURCES = \ odp_ishmphy.c \ odp_ishmpool.c \ odp_libconfig.c \ + odp_ml_fp16.c \ + odp_ml_quantize.c \ odp_name_table.c \ odp_packet.c \ odp_packet_vector.c \ @@ -297,6 +302,15 @@ __LIB__libodp_linux_la_SOURCES += \ endif endif endif + +if WITH_ML +__LIB__libodp_linux_la_SOURCES += \ + odp_ml.c +else +__LIB__libodp_linux_la_SOURCES += \ + odp_ml_null.c +endif + if ODP_ABI_COMPAT __LIB__libodp_linux_la_SOURCES += \ odp_atomic_api.c \ @@ -345,14 +359,11 @@ odpapiabiarchinclude_HEADERS += arch/default/odp/api/abi/atomic_generic.h \ arch/default/odp/api/abi/wait_until_generic.h \ arch/default/odp/api/abi/wait_until.h endif -noinst_HEADERS += arch/arm/odp_atomic.h \ - arch/arm/odp_cpu.h \ - arch/arm/odp_cpu_idling.h \ - arch/arm/odp_llsc.h \ +noinst_HEADERS += arch/arm/odp_cpu.h \ arch/default/odp_atomic.h \ arch/default/odp_cpu.h \ - arch/default/odp_cpu_idling.h \ - arch/default/odp_random.h + arch/default/odp_random.h \ + arch/default/odp_wait_until.h endif if ARCH_IS_AARCH64 __LIB__libodp_linux_la_SOURCES += arch/aarch64/odp_atomic.c \ @@ -380,9 +391,8 @@ endif noinst_HEADERS += arch/aarch64/odp_atomic.h \ arch/aarch64/odp_cpu.h \ arch/aarch64/cpu_flags.h \ - arch/aarch64/odp_cpu_idling.h \ - arch/aarch64/odp_llsc.h \ - arch/aarch64/odp_random.h + arch/aarch64/odp_random.h \ + arch/aarch64/odp_wait_until.h endif if ARCH_IS_DEFAULT __LIB__libodp_linux_la_SOURCES += arch/default/odp_atomic.c \ @@ -405,8 +415,8 @@ odpapiabiarchinclude_HEADERS += arch/default/odp/api/abi/atomic_generic.h \ endif noinst_HEADERS += arch/default/odp_atomic.h \ arch/default/odp_cpu.h \ - arch/default/odp_cpu_idling.h \ - arch/default/odp_random.h + arch/default/odp_random.h \ + arch/default/odp_wait_until.h endif if ARCH_IS_POWERPC __LIB__libodp_linux_la_SOURCES += arch/default/odp_atomic.c \ @@ -429,8 +439,8 @@ odpapiabiarchinclude_HEADERS += arch/default/odp/api/abi/atomic_generic.h \ endif noinst_HEADERS += arch/default/odp_atomic.h \ arch/default/odp_cpu.h \ - arch/default/odp_cpu_idling.h \ - arch/default/odp_random.h + arch/default/odp_random.h \ + arch/default/odp_wait_until.h endif if ARCH_IS_X86 __LIB__libodp_linux_la_SOURCES += arch/default/odp_atomic.c \ @@ -460,7 +470,7 @@ noinst_HEADERS += arch/x86/cpu_flags.h \ arch/x86/odp_random.h \ arch/default/odp_atomic.h \ arch/default/odp_cpu.h \ - arch/default/odp_cpu_idling.h + arch/default/odp_wait_until.h endif if ODP_PKTIO_PCAP @@ -476,6 +486,7 @@ __LIB__libodp_linux_la_LIBADD += $(PTHREAD_LIBS) __LIB__libodp_linux_la_LIBADD += $(TIMER_LIBS) __LIB__libodp_linux_la_LIBADD += $(LIBXDP_LIBS) __LIB__libodp_linux_la_LIBADD += $(IPSEC_MB_LIBS) +__LIB__libodp_linux_la_LIBADD += $(ORT_LIBS) if ODP_PKTIO_PCAP __LIB__libodp_linux_la_LIBADD += $(PCAP_LIBS) diff --git a/platform/linux-generic/arch/aarch64/odp_cpu.h b/platform/linux-generic/arch/aarch64/odp_cpu.h index 84bc4dffd..ad8b36d87 100644 --- a/platform/linux-generic/arch/aarch64/odp_cpu.h +++ b/platform/linux-generic/arch/aarch64/odp_cpu.h @@ -14,6 +14,7 @@ #endif #include <odp_debug_internal.h> +#include <odp_types_internal.h> /* * Use LLD/SCD atomic primitives instead of lock-based code path in llqueue @@ -31,20 +32,6 @@ */ #define CONFIG_DMBSTR -/* - * Use ARM event signalling mechanism - * Event signalling minimises spinning (busy waiting) which decreases - * cache coherency traffic when spinning on shared locations (thus faster and - * more scalable) and enables the CPU to enter a sleep state (lower power - * consumption). - */ -#define CONFIG_WFE - -static inline void _odp_dmb(void) -{ - __asm__ volatile("dmb" : : : "memory"); -} - /* Only ARMv8 supports DMB ISHLD */ /* A load only barrier is much cheaper than full barrier */ #define _odp_release_barrier(ro) \ @@ -55,9 +42,156 @@ do { \ __asm__ volatile("dmb ish" ::: "memory"); \ } while (0) -#include "odp_llsc.h" +static inline uint16_t ll8(uint8_t *var, int mm) +{ + uint16_t old; + + _ODP_ASSERT(mm == __ATOMIC_ACQUIRE || mm == __ATOMIC_RELAXED); + + if (mm == __ATOMIC_ACQUIRE) + __asm__ volatile("ldaxrb %w0, [%1]" + : "=&r" (old) + : "r" (var) + : "memory"); + else + __asm__ volatile("ldxrb %w0, [%1]" + : "=&r" (old) + : "r" (var) + : ); + return old; +} + +static inline uint32_t ll32(uint32_t *var, int mm) +{ + uint32_t old; + + _ODP_ASSERT(mm == __ATOMIC_ACQUIRE || mm == __ATOMIC_RELAXED); + + if (mm == __ATOMIC_ACQUIRE) + __asm__ volatile("ldaxr %w0, [%1]" + : "=&r" (old) + : "r" (var) + : "memory"); + else + __asm__ volatile("ldxr %w0, [%1]" + : "=&r" (old) + : "r" (var) + : ); + return old; +} + +/* Return 0 on success, 1 on failure */ +static inline uint32_t sc32(uint32_t *var, uint32_t neu, int mm) +{ + uint32_t ret; + + _ODP_ASSERT(mm == __ATOMIC_RELEASE || mm == __ATOMIC_RELAXED); + + if (mm == __ATOMIC_RELEASE) + __asm__ volatile("stlxr %w0, %w1, [%2]" + : "=&r" (ret) + : "r" (neu), "r" (var) + : "memory"); + else + __asm__ volatile("stxr %w0, %w1, [%2]" + : "=&r" (ret) + : "r" (neu), "r" (var) + : ); + return ret; +} + +static inline uint64_t ll64(uint64_t *var, int mm) +{ + uint64_t old; + + _ODP_ASSERT(mm == __ATOMIC_ACQUIRE || mm == __ATOMIC_RELAXED); + + if (mm == __ATOMIC_ACQUIRE) + __asm__ volatile("ldaxr %0, [%1]" + : "=&r" (old) + : "r" (var) + : "memory"); + else + __asm__ volatile("ldxr %0, [%1]" + : "=&r" (old) + : "r" (var) + : ); + return old; +} + +/* Return 0 on success, 1 on failure */ +static inline uint32_t sc64(uint64_t *var, uint64_t neu, int mm) +{ + uint32_t ret; + + _ODP_ASSERT(mm == __ATOMIC_RELEASE || mm == __ATOMIC_RELAXED); + + if (mm == __ATOMIC_RELEASE) + __asm__ volatile("stlxr %w0, %1, [%2]" + : "=&r" (ret) + : "r" (neu), "r" (var) + : "memory"); + else + __asm__ volatile("stxr %w0, %1, [%2]" + : "=&r" (ret) + : "r" (neu), "r" (var) + : ); + return ret; +} + +union i128 { + _odp_u128_t i128; + int64_t i64[2]; +}; + +static inline _odp_u128_t lld(_odp_u128_t *var, int mm) +{ + union i128 old; + + _ODP_ASSERT(mm == __ATOMIC_ACQUIRE || mm == __ATOMIC_RELAXED); + + if (mm == __ATOMIC_ACQUIRE) + __asm__ volatile("ldaxp %0, %1, [%2]" + : "=&r" (old.i64[0]), "=&r" (old.i64[1]) + : "r" (var) + : "memory"); + else + __asm__ volatile("ldxp %0, %1, [%2]" + : "=&r" (old.i64[0]), "=&r" (old.i64[1]) + : "r" (var) + : ); + return old.i128; +} + +/* Return 0 on success, 1 on failure */ +static inline uint32_t scd(_odp_u128_t *var, _odp_u128_t neu, int mm) +{ + uint32_t ret; + + _ODP_ASSERT(mm == __ATOMIC_RELEASE || mm == __ATOMIC_RELAXED); + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" + if (mm == __ATOMIC_RELEASE) + __asm__ volatile("stlxp %w0, %1, %2, [%3]" + : "=&r" (ret) + : "r" (((*(union i128 *)&neu)).i64[0]), + "r" (((*(union i128 *)&neu)).i64[1]), + "r" (var) + : "memory"); + else + __asm__ volatile("stxp %w0, %1, %2, [%3]" + : "=&r" (ret) + : "r" (((*(union i128 *)&neu)).i64[0]), + "r" (((*(union i128 *)&neu)).i64[1]), + "r" (var) + : ); +#pragma GCC diagnostic pop + return ret; +} + #include "odp_atomic.h" -#include "odp_cpu_idling.h" +#include "odp_wait_until.h" #ifdef __ARM_FEATURE_UNALIGNED #define _ODP_UNALIGNED 1 diff --git a/platform/linux-generic/arch/aarch64/odp_cpu_idling.h b/platform/linux-generic/arch/aarch64/odp_cpu_idling.h deleted file mode 100644 index a6cea8c63..000000000 --- a/platform/linux-generic/arch/aarch64/odp_cpu_idling.h +++ /dev/null @@ -1,39 +0,0 @@ -/* Copyright (c) 2017, ARM Limited. All rights reserved. - * - * Copyright (c) 2017-2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ - -#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_CPU_IDLING_H -#define PLATFORM_LINUXGENERIC_ARCH_ARM_CPU_IDLING_H - -#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H -#error This file should not be included directly, please include odp_cpu.h -#endif - -#ifndef CONFIG_WFE - -#include "../default/odp_cpu_idling.h" - -#else /* CONFIG_WFE */ - -static inline void sevl(void) -{ - __asm__ volatile("sevl" : : : ); -} - -static inline int wfe(void) -{ - __asm__ volatile("wfe" : : : "memory"); - return 1; -} - -#define monitor128(addr, mo) lld((addr), (mo)) -#define monitor64(addr, mo) ll64((addr), (mo)) -#define monitor32(addr, mo) ll32((addr), (mo)) -#define monitor8(addr, mo) ll8((addr), (mo)) -#endif /* CONFIG_WFE */ - -#endif /* PLATFORM_LINUXGENERIC_ARCH_ARM_CPU_IDLING_H */ diff --git a/platform/linux-generic/arch/aarch64/odp_llsc.h b/platform/linux-generic/arch/aarch64/odp_llsc.h deleted file mode 100644 index 498785bd4..000000000 --- a/platform/linux-generic/arch/aarch64/odp_llsc.h +++ /dev/null @@ -1,170 +0,0 @@ -/* Copyright (c) 2017, ARM Limited. All rights reserved. - * - * Copyright (c) 2017-2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ - -#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_LLSC_H -#define PLATFORM_LINUXGENERIC_ARCH_ARM_LLSC_H - -#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H -#error This file should not be included directly, please include odp_cpu.h -#endif - -#include <odp_types_internal.h> - -static inline uint16_t ll8(uint8_t *var, int mm) -{ - uint16_t old; - - if (mm == __ATOMIC_ACQUIRE) - __asm__ volatile("ldaxrb %w0, [%1]" - : "=&r" (old) - : "r" (var) - : "memory"); - else if (mm == __ATOMIC_RELAXED) - __asm__ volatile("ldxrb %w0, [%1]" - : "=&r" (old) - : "r" (var) - : ); - else - _ODP_ABORT(); - return old; -} - -static inline uint32_t ll32(uint32_t *var, int mm) -{ - uint32_t old; - - if (mm == __ATOMIC_ACQUIRE) - __asm__ volatile("ldaxr %w0, [%1]" - : "=&r" (old) - : "r" (var) - : "memory"); - else if (mm == __ATOMIC_RELAXED) - __asm__ volatile("ldxr %w0, [%1]" - : "=&r" (old) - : "r" (var) - : ); - else - _ODP_ABORT(); - return old; -} - -/* Return 0 on success, 1 on failure */ -static inline uint32_t sc32(uint32_t *var, uint32_t neu, int mm) -{ - uint32_t ret; - - if (mm == __ATOMIC_RELEASE) - __asm__ volatile("stlxr %w0, %w1, [%2]" - : "=&r" (ret) - : "r" (neu), "r" (var) - : "memory"); - else if (mm == __ATOMIC_RELAXED) - __asm__ volatile("stxr %w0, %w1, [%2]" - : "=&r" (ret) - : "r" (neu), "r" (var) - : ); - else - _ODP_ABORT(); - return ret; -} - -static inline uint64_t ll(uint64_t *var, int mm) -{ - uint64_t old; - - if (mm == __ATOMIC_ACQUIRE) - __asm__ volatile("ldaxr %0, [%1]" - : "=&r" (old) - : "r" (var) - : "memory"); - else if (mm == __ATOMIC_RELAXED) - __asm__ volatile("ldxr %0, [%1]" - : "=&r" (old) - : "r" (var) - : ); - else - _ODP_ABORT(); - return old; -} - -#define ll64(a, b) ll((a), (b)) - -/* Return 0 on success, 1 on failure */ -static inline uint32_t sc(uint64_t *var, uint64_t neu, int mm) -{ - uint32_t ret; - - if (mm == __ATOMIC_RELEASE) - __asm__ volatile("stlxr %w0, %1, [%2]" - : "=&r" (ret) - : "r" (neu), "r" (var) - : "memory"); - else if (mm == __ATOMIC_RELAXED) - __asm__ volatile("stxr %w0, %1, [%2]" - : "=&r" (ret) - : "r" (neu), "r" (var) - : ); - else - _ODP_ABORT(); - return ret; -} - -#define sc64(a, b, c) sc((a), (b), (c)) - -union i128 { - _odp_u128_t i128; - int64_t i64[2]; -}; - -static inline _odp_u128_t lld(_odp_u128_t *var, int mm) -{ - union i128 old; - - if (mm == __ATOMIC_ACQUIRE) - __asm__ volatile("ldaxp %0, %1, [%2]" - : "=&r" (old.i64[0]), "=&r" (old.i64[1]) - : "r" (var) - : "memory"); - else if (mm == __ATOMIC_RELAXED) - __asm__ volatile("ldxp %0, %1, [%2]" - : "=&r" (old.i64[0]), "=&r" (old.i64[1]) - : "r" (var) - : ); - else - _ODP_ABORT(); - return old.i128; -} - -/* Return 0 on success, 1 on failure */ -static inline uint32_t scd(_odp_u128_t *var, _odp_u128_t neu, int mm) -{ - uint32_t ret; - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wpedantic" - if (mm == __ATOMIC_RELEASE) - __asm__ volatile("stlxp %w0, %1, %2, [%3]" - : "=&r" (ret) - : "r" (((*(union i128 *)&neu)).i64[0]), - "r" (((*(union i128 *)&neu)).i64[1]), - "r" (var) - : "memory"); - else if (mm == __ATOMIC_RELAXED) - __asm__ volatile("stxp %w0, %1, %2, [%3]" - : "=&r" (ret) - : "r" (((*(union i128 *)&neu)).i64[0]), - "r" (((*(union i128 *)&neu)).i64[1]), - "r" (var) - : ); - else - _ODP_ABORT(); -#pragma GCC diagnostic pop - return ret; -} - -#endif /* PLATFORM_LINUXGENERIC_ARCH_ARM_LLSC_H */ diff --git a/platform/linux-generic/arch/aarch64/odp_wait_until.h b/platform/linux-generic/arch/aarch64/odp_wait_until.h new file mode 100644 index 000000000..eca3f9ce5 --- /dev/null +++ b/platform/linux-generic/arch/aarch64/odp_wait_until.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2017 ARM Limited + * Copyright (c) 2017-2018 Linaro Limited + * Copyright (c) 2024 Nokia + */ + +#ifndef ODP_AARCH64_WAIT_UNTIL_H_ +#define ODP_AARCH64_WAIT_UNTIL_H_ + +#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H +#error This file should not be included directly, please include odp_cpu.h +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include <odp/api/cpu.h> + +#include <odp_cpu.h> + +#include <stdint.h> + +static inline void _odp_sevl(void) +{ + __asm__ volatile("sevl" : : : ); +} + +static inline int _odp_wfe(void) +{ + __asm__ volatile("wfe" : : : "memory"); + return 1; +} + +#define _odp_monitor_u8(addr, mo) ll8((addr), (mo)) +#define _odp_monitor_u32(addr, mo) ll32((addr), (mo)) +#define _odp_monitor_u64(addr, mo) ll64((addr), (mo)) +#define _odp_monitor_u128(addr, mo) lld((addr), (mo)) + +#if ATOM_BITSET_SIZE <= 32 +static inline bitset_t _odp_bitset_monitor(bitset_t *bs, int mo) +{ + return _odp_monitor_u32(bs, mo); +} +#elif ATOM_BITSET_SIZE <= 64 +static inline bitset_t _odp_bitset_monitor(bitset_t *bs, int mo) +{ + return _odp_monitor_u64(bs, mo); +} +#elif ATOM_BITSET_SIZE <= 128 +static inline bitset_t _odp_bitset_monitor(bitset_t *bs, int mo) +{ + return _odp_monitor_u128(bs, mo); +} +#else +#error Unsupported size of bit sets (ATOM_BITSET_SIZE) +#endif + +/** + * The _odp_wait_until_eq_*() functions defined in this header are intended to + * be used only with the scalable scheduler and queue implementations. Even + * though these functions use standard non-atomic parameter types, the + * parameters must only be operated using atomic operations. If new functions + * are added to this file, they should use _odp_wait_until_equal_*() prefix and + * atomic parameter types. + */ + +static inline void _odp_wait_until_eq_u32(uint32_t *val, uint32_t expected) +{ + _odp_sevl(); + while (_odp_wfe() && _odp_monitor_u32(val, __ATOMIC_RELAXED) != expected) + odp_cpu_pause(); +} + +static inline void _odp_wait_until_eq_bitset(bitset_t *val, bitset_t expected) +{ + _odp_sevl(); + while (_odp_wfe() && _odp_bitset_monitor(val, __ATOMIC_RELAXED != expected)) + odp_cpu_pause(); +} + +static inline void _odp_wait_until_eq_acq_u8(uint8_t *val, uint8_t expected) +{ + _odp_sevl(); + while (_odp_wfe() && _odp_monitor_u8(val, __ATOMIC_ACQUIRE) != expected) + odp_cpu_pause(); +} + +static inline void _odp_wait_until_eq_acq_u32(uint32_t *val, uint32_t expected) +{ + _odp_sevl(); + while (_odp_wfe() && _odp_monitor_u32(val, __ATOMIC_ACQUIRE) != expected) + odp_cpu_pause(); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/platform/linux-generic/arch/arm/odp_atomic.h b/platform/linux-generic/arch/arm/odp_atomic.h deleted file mode 100644 index e400f52d4..000000000 --- a/platform/linux-generic/arch/arm/odp_atomic.h +++ /dev/null @@ -1,109 +0,0 @@ -/* Copyright (c) 2017-2021, ARM Limited. All rights reserved. - * Copyright (c) 2017-2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ - -#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_ATOMIC_H -#define PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_ATOMIC_H - -#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H -#error This file should not be included directly, please include odp_cpu.h -#endif - -#include <odp_types_internal.h> -#include <limits.h> - -#ifdef CONFIG_DMBSTR - -#define atomic_store_release(loc, val, ro) \ -do { \ - _odp_release_barrier(ro); \ - __atomic_store_n(loc, val, __ATOMIC_RELAXED); \ -} while (0) - -#else - -#define atomic_store_release(loc, val, ro) \ - __atomic_store_n(loc, val, __ATOMIC_RELEASE) - -#endif /* CONFIG_DMBSTR */ - -/** Atomic bit set operations with memory ordering */ -#if __GCC_ATOMIC_LLONG_LOCK_FREE == 2 && \ - __SIZEOF_LONG_LONG__ != __SIZEOF_LONG__ -typedef unsigned long long bitset_t; -#define ATOM_BITSET_SIZE (CHAR_BIT * __SIZEOF_LONG_LONG__) - -#elif __GCC_ATOMIC_LONG_LOCK_FREE == 2 && __SIZEOF_LONG__ != __SIZEOF_INT__ -typedef unsigned long bitset_t; -#define ATOM_BITSET_SIZE (CHAR_BIT * __SIZEOF_LONG__) - -#elif __GCC_ATOMIC_INT_LOCK_FREE == 2 -typedef unsigned int bitset_t; -#define ATOM_BITSET_SIZE (CHAR_BIT * __SIZEOF_INT__) - -#else -/* Target does not support lock-free atomic operations */ -typedef unsigned int bitset_t; -#define ATOM_BITSET_SIZE (CHAR_BIT * __SIZEOF_INT__) -#endif - -#if ATOM_BITSET_SIZE <= 32 - -static inline bitset_t bitset_mask(uint32_t bit) -{ - return 1UL << bit; -} - -#elif ATOM_BITSET_SIZE <= 64 - -static inline bitset_t bitset_mask(uint32_t bit) -{ - return 1ULL << bit; -} - -#elif ATOM_BITSET_SIZE <= 128 - -static inline bitset_t bitset_mask(uint32_t bit) -{ - if (bit < 64) - return 1ULL << bit; - else - return (_odp_u128_t)(1ULL << (bit - 64)) << 64; -} - -#else -#error Unsupported size of bit sets (ATOM_BITSET_SIZE) -#endif - -static inline bitset_t atom_bitset_load(bitset_t *bs, int mo) -{ - return __atomic_load_n(bs, mo); -} - -static inline void atom_bitset_set(bitset_t *bs, uint32_t bit, int mo) -{ - (void)__atomic_fetch_or(bs, bitset_mask(bit), mo); -} - -static inline void atom_bitset_clr(bitset_t *bs, uint32_t bit, int mo) -{ - (void)__atomic_fetch_and(bs, ~bitset_mask(bit), mo); -} - -static inline bitset_t atom_bitset_xchg(bitset_t *bs, bitset_t neu, int mo) -{ - return __atomic_exchange_n(bs, neu, mo); -} - -static inline bitset_t atom_bitset_cmpxchg(bitset_t *bs, bitset_t *old, - bitset_t neu, bool weak, - int mo_success, int mo_failure) -{ - return __atomic_compare_exchange_n(bs, old, neu, weak, mo_success, - mo_failure); -} - -#endif /* PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_ATOMIC_H */ diff --git a/platform/linux-generic/arch/arm/odp_cpu.h b/platform/linux-generic/arch/arm/odp_cpu.h index 82d47325f..6b2674736 100644 --- a/platform/linux-generic/arch/arm/odp_cpu.h +++ b/platform/linux-generic/arch/arm/odp_cpu.h @@ -31,26 +31,52 @@ */ #define CONFIG_DMBSTR -/* - * Use ARM event signalling mechanism - * Event signalling minimises spinning (busy waiting) which decreases - * cache coherency traffic when spinning on shared locations (thus faster and - * more scalable) and enables the CPU to enter a sleep state (lower power - * consumption). - */ -/* #define CONFIG_WFE */ +static inline uint64_t lld(uint64_t *var, int mm) +{ + uint64_t old; -static inline void _odp_dmb(void) + __asm__ volatile("ldrexd %0, %H0, [%1]" + : "=&r" (old) + : "r" (var) + : ); + /* Barrier after an acquiring load */ + if (mm == __ATOMIC_ACQUIRE) + __asm__ volatile("dmb" : : : "memory"); + return old; +} + +/* Return 0 on success, 1 on failure */ +static inline uint32_t scd(uint64_t *var, uint64_t neu, int mm) { - __asm__ volatile("dmb" : : : "memory"); + uint32_t ret; + + /* Barrier before a releasing store */ + if (mm == __ATOMIC_RELEASE) + __asm__ volatile("dmb" : : : "memory"); + __asm__ volatile("strexd %0, %1, %H1, [%2]" + : "=&r" (ret) + : "r" (neu), "r" (var) + : ); + return ret; } -#define _odp_release_barrier(ro) \ - __atomic_thread_fence(__ATOMIC_RELEASE) +#ifdef CONFIG_DMBSTR + +#define atomic_store_release(loc, val, ro) \ +do { \ + __atomic_thread_fence(__ATOMIC_RELEASE); \ + __atomic_store_n(loc, val, __ATOMIC_RELAXED); \ +} while (0) + +#else + +#define atomic_store_release(loc, val, ro) \ + __atomic_store_n(loc, val, __ATOMIC_RELEASE) + +#endif /* CONFIG_DMBSTR */ -#include "odp_llsc.h" -#include "odp_atomic.h" -#include "odp_cpu_idling.h" +#include "../default/odp_atomic.h" +#include "../default/odp_wait_until.h" #ifdef __ARM_FEATURE_UNALIGNED #define _ODP_UNALIGNED 1 diff --git a/platform/linux-generic/arch/arm/odp_cpu_idling.h b/platform/linux-generic/arch/arm/odp_cpu_idling.h deleted file mode 100644 index a6cea8c63..000000000 --- a/platform/linux-generic/arch/arm/odp_cpu_idling.h +++ /dev/null @@ -1,39 +0,0 @@ -/* Copyright (c) 2017, ARM Limited. All rights reserved. - * - * Copyright (c) 2017-2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ - -#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_CPU_IDLING_H -#define PLATFORM_LINUXGENERIC_ARCH_ARM_CPU_IDLING_H - -#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H -#error This file should not be included directly, please include odp_cpu.h -#endif - -#ifndef CONFIG_WFE - -#include "../default/odp_cpu_idling.h" - -#else /* CONFIG_WFE */ - -static inline void sevl(void) -{ - __asm__ volatile("sevl" : : : ); -} - -static inline int wfe(void) -{ - __asm__ volatile("wfe" : : : "memory"); - return 1; -} - -#define monitor128(addr, mo) lld((addr), (mo)) -#define monitor64(addr, mo) ll64((addr), (mo)) -#define monitor32(addr, mo) ll32((addr), (mo)) -#define monitor8(addr, mo) ll8((addr), (mo)) -#endif /* CONFIG_WFE */ - -#endif /* PLATFORM_LINUXGENERIC_ARCH_ARM_CPU_IDLING_H */ diff --git a/platform/linux-generic/arch/arm/odp_llsc.h b/platform/linux-generic/arch/arm/odp_llsc.h deleted file mode 100644 index 2fea6a0dc..000000000 --- a/platform/linux-generic/arch/arm/odp_llsc.h +++ /dev/null @@ -1,96 +0,0 @@ -/* Copyright (c) 2017, ARM Limited. All rights reserved. - * - * Copyright (c) 2017-2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ - -#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_LLSC_H -#define PLATFORM_LINUXGENERIC_ARCH_ARM_LLSC_H - -#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H -#error This file should not be included directly, please include odp_cpu.h -#endif - -static inline uint32_t ll8(uint8_t *var, int mm) -{ - uint8_t old; - - __asm__ volatile("ldrexb %0, [%1]" - : "=&r" (old) - : "r" (var) - : ); - /* Barrier after an acquiring load */ - if (mm == __ATOMIC_ACQUIRE) - _odp_dmb(); - return old; -} - -static inline uint32_t ll(uint32_t *var, int mm) -{ - uint32_t old; - - __asm__ volatile("ldrex %0, [%1]" - : "=&r" (old) - : "r" (var) - : ); - /* Barrier after an acquiring load */ - if (mm == __ATOMIC_ACQUIRE) - _odp_dmb(); - return old; -} - -#define ll32(a, b) ll((a), (b)) - -/* Return 0 on success, 1 on failure */ -static inline uint32_t sc(uint32_t *var, uint32_t neu, int mm) -{ - uint32_t ret; - - /* Barrier before a releasing store */ - if (mm == __ATOMIC_RELEASE) - _odp_dmb(); - __asm__ volatile("strex %0, %1, [%2]" - : "=&r" (ret) - : "r" (neu), "r" (var) - : ); - return ret; -} - -#define sc32(a, b, c) sc((a), (b), (c)) - -static inline uint64_t lld(uint64_t *var, int mm) -{ - uint64_t old; - - __asm__ volatile("ldrexd %0, %H0, [%1]" - : "=&r" (old) - : "r" (var) - : ); - /* Barrier after an acquiring load */ - if (mm == __ATOMIC_ACQUIRE) - _odp_dmb(); - return old; -} - -#define ll64(a, b) lld((a), (b)) - -/* Return 0 on success, 1 on failure */ -static inline uint32_t scd(uint64_t *var, uint64_t neu, int mm) -{ - uint32_t ret; - - /* Barrier before a releasing store */ - if (mm == __ATOMIC_RELEASE) - _odp_dmb(); - __asm__ volatile("strexd %0, %1, %H1, [%2]" - : "=&r" (ret) - : "r" (neu), "r" (var) - : ); - return ret; -} - -#define sc64(a, b, c) scd((a), (b), (c)) - -#endif /* PLATFORM_LINUXGENERIC_ARCH_ARM_LLSC_H */ diff --git a/platform/linux-generic/arch/default/odp/api/abi/atomic_generic.h b/platform/linux-generic/arch/default/odp/api/abi/atomic_generic.h index af435e495..c6ed86363 100644 --- a/platform/linux-generic/arch/default/odp/api/abi/atomic_generic.h +++ b/platform/linux-generic/arch/default/odp/api/abi/atomic_generic.h @@ -197,11 +197,11 @@ static inline int _odp_atomic_cas_acq_rel_u128(odp_atomic_u128_t *atom, odp_u128 #define ATOMIC_CAS_OP_128(ret_ptr, old_val, new_val) \ __extension__ ({ \ int *_ret_ptr = ret_ptr; \ - odp_u128_t *_old_val = old_val; \ - odp_u128_t _new_val = new_val; \ - if (((_atom)->v.u64[0] == (_old_val)->u64[0]) && \ - ((_atom)->v.u64[1] == (_old_val)->u64[1])) { \ - (_atom)->v = (_new_val); \ + odp_u128_t *_cas_old = old_val; \ + odp_u128_t _cas_new = new_val; \ + if (((_atom)->v.u64[0] == (_cas_old)->u64[0]) && \ + ((_atom)->v.u64[1] == (_cas_old)->u64[1])) { \ + (_atom)->v = (_cas_new); \ *(_ret_ptr) = 1; \ } else { \ *(_ret_ptr) = 0; \ diff --git a/platform/linux-generic/arch/default/odp_cpu.h b/platform/linux-generic/arch/default/odp_cpu.h index 821956819..6b10966c6 100644 --- a/platform/linux-generic/arch/default/odp_cpu.h +++ b/platform/linux-generic/arch/default/odp_cpu.h @@ -21,6 +21,6 @@ __atomic_store_n(loc, val, __ATOMIC_RELEASE) #include "odp_atomic.h" -#include "odp_cpu_idling.h" +#include "odp_wait_until.h" #endif diff --git a/platform/linux-generic/arch/default/odp_cpu_idling.h b/platform/linux-generic/arch/default/odp_cpu_idling.h deleted file mode 100644 index 9d23ad20d..000000000 --- a/platform/linux-generic/arch/default/odp_cpu_idling.h +++ /dev/null @@ -1,31 +0,0 @@ -/* Copyright (c) 2017, ARM Limited. All rights reserved. - * - * Copyright (c) 2017-2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ - -#ifndef ODP_DEFAULT_CPU_IDLING_H_ -#define ODP_DEFAULT_CPU_IDLING_H_ - -/****************************************************************************** - * Idle mgmt - *****************************************************************************/ - -static inline void sevl(void) -{ - /* empty */ -} - -static inline int wfe(void) -{ - return 1; -} - -#define monitor128(addr, mo) __atomic_load_n((addr), (mo)) -#define monitor64(addr, mo) __atomic_load_n((addr), (mo)) -#define monitor32(addr, mo) __atomic_load_n((addr), (mo)) -#define monitor8(addr, mo) __atomic_load_n((addr), (mo)) - -#endif diff --git a/platform/linux-generic/arch/default/odp_wait_until.h b/platform/linux-generic/arch/default/odp_wait_until.h new file mode 100644 index 000000000..c51f4355e --- /dev/null +++ b/platform/linux-generic/arch/default/odp_wait_until.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2024 Nokia + */ + +#ifndef ODP_DEFAULT_WAIT_UNTIL_H_ +#define ODP_DEFAULT_WAIT_UNTIL_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <odp/api/plat/cpu_inlines.h> + +#include <stdint.h> + +/** + * The _odp_wait_until_eq_*() functions defined in this header are intended to + * be used only with the scalable scheduler and queue implementations. Even + * though these functions use standard non-atomic parameter types, the + * parameters must only be operated using atomic operations. If new functions + * are added to this file, they should use _odp_wait_until_equal_*() prefix and + * atomic parameter types. + */ + +static inline void _odp_wait_until_eq_u32(uint32_t *val, uint32_t expected) +{ + while (__atomic_load_n(val, __ATOMIC_RELAXED) != expected) + odp_cpu_pause(); +} + +static inline void _odp_wait_until_eq_bitset(bitset_t *val, bitset_t expected) +{ + while (__atomic_load_n(val, __ATOMIC_RELAXED) != expected) + odp_cpu_pause(); +} + +static inline void _odp_wait_until_eq_acq_u8(uint8_t *val, uint8_t expected) +{ + while (__atomic_load_n(val, __ATOMIC_ACQUIRE) != expected) + odp_cpu_pause(); +} + +static inline void _odp_wait_until_eq_acq_u32(uint32_t *val, uint32_t expected) +{ + while (__atomic_load_n(val, __ATOMIC_ACQUIRE) != expected) + odp_cpu_pause(); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/platform/linux-generic/arch/x86/odp_time_cpu.c b/platform/linux-generic/arch/x86/odp_time_cpu.c index aa00ac04e..ab897296d 100644 --- a/platform/linux-generic/arch/x86/odp_time_cpu.c +++ b/platform/linux-generic/arch/x86/odp_time_cpu.c @@ -1,7 +1,6 @@ -/* Copyright (c) 2015-2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2015-2018 Linaro Limited + * Copyright (c) 2024 Nokia */ #include <odp_posix_extensions.h> @@ -14,42 +13,77 @@ #include <odp_debug_internal.h> #include <time.h> +#include <errno.h> +#include <string.h> -/* Measure TSC frequency. Frequency information registers are defined for x86, - * but those are often not enumerated. */ +static int nwait(uint64_t nsec) +{ + struct timespec ts1, ts2; + uint64_t diff; + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts1)) + return 1; + + do { + if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts2)) + return 1; + + diff = (ts2.tv_sec - ts1.tv_sec) * ODP_TIME_SEC_IN_NS + + ts2.tv_nsec - ts1.tv_nsec; + } while (diff < nsec); + + return 0; +} + +static void sort(uint64_t values[], int num) +{ + for (int n = 0; n < num; n++) { + for (int i = n + 1; i < num; i++) { + if (values[i] < values[n]) { + uint64_t tmp = values[i]; + + values[i] = values[n]; + values[n] = tmp; + } + } + } +} + +static uint64_t median(uint64_t values[], int num) +{ + sort(values, num); + if (num % 2 == 0) + return (values[num / 2 - 1] + values[num / 2]) / 2; + else + return values[num / 2]; +} + +/* Measure TSC frequency. */ uint64_t _odp_time_cpu_global_freq(void) { - struct timespec sleep, ts1, ts2; - uint64_t t1, t2, ts_nsec, cycles, hz; + struct timespec ts1, ts2; + uint64_t t1, t2, ts_nsec, cycles; int i; - uint64_t avg = 0; - int rounds = 3; + const int rounds = 6; /* first round is warmup */ int warm_up = 1; + uint64_t hz[rounds]; for (i = 0; i < rounds; i++) { - sleep.tv_sec = 0; + uint64_t wait_nsec = ODP_TIME_SEC_IN_NS / 50; if (warm_up) - sleep.tv_nsec = ODP_TIME_SEC_IN_NS / 1000; - else - sleep.tv_nsec = ODP_TIME_SEC_IN_NS / 4; + wait_nsec = ODP_TIME_SEC_IN_NS / 1000; - if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts1)) { - _ODP_ERR("clock_gettime() failed\n"); - return 0; - } + if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts1)) + goto err_out; t1 = _odp_time_cpu_global(); - if (nanosleep(&sleep, NULL) < 0) { - _ODP_ERR("nanosleep() failed\n"); - return 0; - } + if (nwait(wait_nsec)) + goto err_out; - if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts2)) { - _ODP_ERR("clock_gettime() failed\n"); - return 0; - } + if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts2)) + goto err_out; t2 = _odp_time_cpu_global(); @@ -58,13 +92,15 @@ uint64_t _odp_time_cpu_global_freq(void) cycles = t2 - t1; - hz = (cycles * ODP_TIME_SEC_IN_NS) / ts_nsec; + hz[i] = (cycles * ODP_TIME_SEC_IN_NS) / ts_nsec; if (warm_up) warm_up = 0; - else - avg += hz; } - return avg / (rounds - 1); + return median(&hz[1], rounds - 1); + +err_out: + _ODP_ERR("clock_gettime() failed (%s)\n", strerror(errno)); + return 0; } diff --git a/platform/linux-generic/example/Makefile.am b/platform/linux-generic/example/Makefile.am new file mode 100644 index 000000000..84f337387 --- /dev/null +++ b/platform/linux-generic/example/Makefile.am @@ -0,0 +1,5 @@ +SUBDIRS = + +if WITH_ML +SUBDIRS += ml +endif diff --git a/platform/linux-generic/example/ml/.gitignore b/platform/linux-generic/example/ml/.gitignore new file mode 100644 index 000000000..d845f6bb5 --- /dev/null +++ b/platform/linux-generic/example/ml/.gitignore @@ -0,0 +1,5 @@ +model_explorer +simple_linear +mnist +*.log +*.trs diff --git a/platform/linux-generic/example/ml/Makefile.am b/platform/linux-generic/example/ml/Makefile.am new file mode 100644 index 000000000..3692b704e --- /dev/null +++ b/platform/linux-generic/example/ml/Makefile.am @@ -0,0 +1,46 @@ +include $(top_srcdir)/example/Makefile.inc + +LDADD += -lm + +bin_PROGRAMS = model_explorer simple_linear mnist + +simple_linear_SOURCES = simple_linear.c model_read.c model_read.h +model_explorer_SOURCES = model_explorer.c model_read.c model_read.h +mnist_SOURCES = mnist.c model_read.c model_read.h + +EXTRA_DIST = \ + odp_ml_run_mnist.sh \ + example_digit.csv \ + mnist-12.onnx \ + odp_ml_run_model_explorer.sh \ + odp_ml_run_simple_linear.sh \ + simple_linear.onnx \ + README.md + +if test_example +TESTS = \ + odp_ml_run_mnist.sh \ + odp_ml_run_model_explorer.sh \ + odp_ml_run_simple_linear.sh +endif + +# If building out-of-tree, make check will not copy the scripts and data to the +# $(builddir) assuming that all commands are run locally. However this prevents +# running tests on a remote target using LOG_COMPILER. +# So copy all script and data files explicitly here. +all-local: + if [ "x$(srcdir)" != "x$(builddir)" ]; then \ + for f in $(EXTRA_DIST); do \ + if [ -e $(srcdir)/$$f ]; then \ + mkdir -p $(builddir)/$$(dirname $$f); \ + cp -f $(srcdir)/$$f $(builddir)/$$f; \ + fi \ + done \ + fi + +clean-local: + if [ "x$(srcdir)" != "x$(builddir)" ]; then \ + for f in $(EXTRA_DIST); do \ + rm -f $(builddir)/$$f; \ + done \ + fi diff --git a/platform/linux-generic/example/ml/README.md b/platform/linux-generic/example/ml/README.md new file mode 100644 index 000000000..fc6a57c0a --- /dev/null +++ b/platform/linux-generic/example/ml/README.md @@ -0,0 +1,94 @@ +# ML examples + +Machine Learning API examples demonstrate how to use ODP ML API in different tasks: +for example simple linear computation and predicting a handwritten digit in +a given image. + +## Simple Linear + +This example runs on a very simple model of form y = 3 * x + 4 where x is given +as the second argument. + +### Generate model + +```bash +python3 <odp_directory>/platform/linux-generic/test/validation/api/ml/simple_linear_gen.py +``` + +### Run simple linear + +```bash +$ ./simple_linear 3 +. +. +. +y = 3 * 3 + 4: 13 +. +``` + +Or run the program with multiple threads, each thread inferences on one x given in +the input. Thus, the number of threads is the number of numbers in the second argument. + +```bash +$ ./simple_linear [2,4,5] +. +. +. +y = 3 * 2 + 4: 10 +y = 3 * 5 + 4: 19 +y = 3 * 4 + 4: 16 +. +``` + +## MNIST + +This example predicts a handwritten digit in a given image. Refer to +https://github.com/onnx/models/tree/main/validated/vision/classification/mnist +for more information. The model file is from +https://github.com/onnx/models/raw/main/validated/vision/classification/mnist/model/mnist-12.onnx +(SPDX-License-Identifier: MIT). + +### Prepare input data + +The input image is stored in a csv file which contains, comma separated, the +digit label (a number from 0 to 9) and the 784 pixel values (a number from 0 to +255). Pixel order is left to right and then top down. The MNIST dataset is +available in this format at https://www.kaggle.com/oddrationale/mnist-in-csv. + +### Run mnist + +```bash +$ ./mnist mnist-12.onnx example_digit.csv +. +. +. +predicted_digit: 4, expected_digit: 4 +. +``` + +## Model Explorer + +The example prints basic model information. + +### Run model_explorer + +```bash +$ ./model_explorer simple_linear.onnx +. +. +. +Model info +---------- + Model handle: 0x7fe8426ce1d8 + Name: model-explorer + Model version: 1 + Model interface version: 0 + Index: 0 + Number of inputs: 1 + Input[0]: Name: x, Data_type: int32, Shape: static [1], Size: 4 + Number of outputs: 1 + Output[0]: Name: y, Data_type: int32, Shape: static [1], Size: 4 +. +. +. +``` diff --git a/platform/linux-generic/example/ml/example_digit.csv b/platform/linux-generic/example/ml/example_digit.csv new file mode 100644 index 000000000..2ab0f4a0c --- /dev/null +++ b/platform/linux-generic/example/ml/example_digit.csv @@ -0,0 +1 @@ +4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,55,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,36,215,98,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,36,249,144,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,34,246,148,0,0,0,0,0,0,0,7,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,39,255,139,0,0,0,0,0,0,2,95,117,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,51,255,97,0,0,0,0,0,0,8,203,211,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,255,58,0,0,0,0,0,0,13,238,167,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,111,255,23,0,0,0,0,0,0,24,255,110,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,209,222,1,0,0,0,0,0,0,62,255,51,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,55,255,125,0,0,0,0,0,0,0,117,255,19,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,164,255,60,0,0,0,0,0,0,0,171,230,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24,235,255,178,120,89,74,72,72,72,74,246,241,121,141,153,148,83,1,0,0,0,0,0,0,0,0,0,6,121,231,255,255,255,255,255,255,255,255,255,255,255,255,255,253,173,14,0,0,0,0,0,0,0,0,0,0,1,19,44,63,76,83,83,83,83,100,255,192,66,52,45,46,34,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,39,255,138,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,68,255,113,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,104,255,84,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,147,255,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,190,255,23,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,25,229,210,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,50,255,117,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,91,255,34,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,49,120,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 diff --git a/platform/linux-generic/example/ml/mnist-12.onnx b/platform/linux-generic/example/ml/mnist-12.onnx Binary files differnew file mode 100644 index 000000000..6661bfe3c --- /dev/null +++ b/platform/linux-generic/example/ml/mnist-12.onnx diff --git a/platform/linux-generic/example/ml/mnist.c b/platform/linux-generic/example/ml/mnist.c new file mode 100644 index 000000000..4c1066302 --- /dev/null +++ b/platform/linux-generic/example/ml/mnist.c @@ -0,0 +1,300 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2023 Nokia + */ + +#include <odp_api.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include <inttypes.h> + +#include "model_read.h" + +/** + * About MNIST model used in this example. + * + * The model predicts handwritten digits. It has one input and one output whose + * detailed information is as follows: + * + * Input: + * Name: Input3, type: float32, shape: [1, 1, 28, 28] + * + * Output: + * Name: Plus214_Output_0, type: float32, shape: [1, 10] + * + * Refer https://github.com/onnx/models/tree/main/validated/vision/classification/mnist + * for more information about the model. + * + * The model outputs the likelihood of each number before softmax, so we need to + * map the output to probabilities across the 10 classes with softmax function. + * + * In this example, the input image is stored in example_digit.csv file, which + * contains, comma separated, the digit label (a number from 0 to 9) and the 784 + * pixel values (a number from 0 to 255). Pixel order is first left to right and + * then top down. The MNIST dataset is available in this format at + * https://www.kaggle.com/oddrationale/mnist-in-csv. + */ + +#define MAX_MODEL_SIZE 30000 +#define INPUT_NUM_ELEMS 784 /* Total shape for input: 1 * 1 * 28 * 28 */ +#define OUTPUT_NUM_ELEMS 10 /* Total shape for output: 1 * 10 */ + +static int read_digit_csv(const char *file_name, uint8_t *expected_digit, float *pixels) +{ + char *tmp; + char *token; + char *end; + FILE *digit_file; + size_t size, num_elem; + const char *delim = ","; /* Delimiter */ + size_t num_pixel = 0; + + /* Get the model file size in bytes */ + digit_file = fopen(file_name, "rb"); + fseek(digit_file, 0, SEEK_END); + size = ftell(digit_file); + rewind(digit_file); + + tmp = malloc(size); + memset(tmp, 0, size); + num_elem = fread(tmp, size, 1, digit_file); + + fclose(digit_file); + if (num_elem != 1) { + printf("Read digit file failed\n"); + free(tmp); + return -1; + } + + /* Get the first token which is the expected digit */ + token = strtok(tmp, delim); + *expected_digit = (uint8_t)strtol(token, &end, 10); + if ((*expected_digit > 9) || (end == token)/*No numeric character*/) { + printf("Invalid digit %u or no numeric character available\n", + *expected_digit); + free(tmp); + return -1; + } + + /* The rest 784 numbers are pixel values */ + token = strtok(NULL, delim); + while (token != NULL) { + pixels[num_pixel] = strtof(token, NULL); + num_pixel++; + token = strtok(NULL, delim); + } + + if (num_pixel != INPUT_NUM_ELEMS) { + printf("Wrong number of pixels: %zu (expected:784)\n", num_pixel); + free(tmp); + return -1; + } + + free(tmp); + return 0; +} + +static int prepare_run_params(const char *file_name, uint8_t *expected_digit, + odp_ml_data_seg_t *input, odp_ml_data_seg_t *output) +{ + input->size = INPUT_NUM_ELEMS * sizeof(float); + input->addr = malloc(input->size); + memset(input->addr, 0, input->size); + + if (read_digit_csv(file_name, expected_digit, input->addr)) { + free(input->addr); + return -1; + } + + output->size = OUTPUT_NUM_ELEMS * sizeof(float); + output->addr = malloc(output->size); + memset(output->addr, 0, output->size); + + return 0; +} + +static float array_max(float *arr, uint8_t arr_len) +{ + float max = arr[0]; + + for (size_t i = 1; i < arr_len; i++) { + if (arr[i] > max) + max = arr[i]; + } + + return max; +} + +static void softmax(float *input, uint8_t input_len) +{ + float rowmax = array_max(input, input_len); + + float input_exp[input_len]; + float sum = 0.0f; + + for (size_t i = 0; i != input_len; ++i) { + input_exp[i] = exp(input[i] - rowmax); + sum += input_exp[i]; + } + + for (size_t i = 0; i != input_len; ++i) + input[i] = input_exp[i] / sum; +} + +static uint8_t index_of_max(float *arr, uint8_t arr_len) +{ + uint8_t i = 0; + uint8_t max_index = 0; + float max = arr[0]; + + for (i = 1; i < arr_len; i++) { + if (arr[i] > max) { + max = arr[i]; + max_index = i; + } + } + + return max_index; +} + +int main(int argc, char *argv[]) +{ + const char *model_file; + const char *input_file; + float *probabilities; + uint8_t expected_digit; + uint8_t predicted_digit; + odp_instance_t inst; + odp_ml_data_t data; + odp_ml_model_t ml_model; + odp_ml_data_seg_t input; + odp_ml_data_seg_t output; + odp_ml_capability_t capa; + odp_ml_config_t ml_config; + odp_ml_model_param_t model_param; + int ret = 0; + + if (argc != 3) { + printf("Please provide an input image file for classification.\n" + "\nUsage:\n" + " %s model_file input_image\n" + "\nThis example classifies digit written on the input image.\n\n", + argv[0]); + return -1; + } + + model_file = argv[1]; + input_file = argv[2]; + + if (odp_init_global(&inst, NULL, NULL)) { + printf("Global init failed.\n"); + return -1; + } + + if (odp_init_local(inst, ODP_THREAD_CONTROL)) { + printf("Local init failed.\n"); + return -1; + } + + if (odp_ml_capability(&capa)) { + printf("odp_ml_capability() failed\n"); + ret = -1; + goto odp_term; + } + + if (MAX_MODEL_SIZE > capa.max_model_size) { + printf("Configured max model size %d exceeds max mode size %" PRIu64 " in capa\n", + MAX_MODEL_SIZE, capa.max_model_size); + ret = -1; + goto odp_term; + } + + odp_ml_config_init(&ml_config); + ml_config.max_model_size = MAX_MODEL_SIZE; + ml_config.load_mode_mask = ODP_ML_COMPL_MODE_SYNC; + ml_config.run_mode_mask = ODP_ML_COMPL_MODE_SYNC; + + if (odp_ml_config(&ml_config)) { + printf("odp_ml_config() failed\n"); + ret = -1; + goto odp_term; + } + + odp_ml_model_param_init(&model_param); + if (read_model_from_file(model_file, &model_param)) { + printf("Read model file failed\n"); + ret = -1; + goto odp_term; + } + + ml_model = odp_ml_model_create("mnist", &model_param); + free(model_param.model); + if (ml_model == ODP_ML_MODEL_INVALID) { + printf("odp_ml_model_create() failed\n"); + ret = -1; + goto odp_term; + } + + odp_ml_model_print(ml_model); + + if (odp_ml_model_load(ml_model, NULL)) { + printf("odp_ml_model_load() failed\n"); + ret = -1; + goto destroy_model; + } + + data.num_input_seg = 1; + data.num_output_seg = 1; + data.input_seg = &input; + data.output_seg = &output; + if (prepare_run_params(input_file, &expected_digit, &input, &output)) { + printf("prepare_run_params() failed\n"); + ret = -1; + goto unload; + } + + if (odp_ml_run(ml_model, &data, NULL) != 1) { + printf("odp_ml_model_run() failed\n"); + ret = -1; + goto free_model_io; + } + + probabilities = output.addr; + + /* Post-process the model output */ + softmax(probabilities, OUTPUT_NUM_ELEMS); + predicted_digit = index_of_max(probabilities, OUTPUT_NUM_ELEMS); + printf("predicted_digit: %u, expected_digit: %u\n", predicted_digit, expected_digit); + +free_model_io: + free(input.addr); + free(output.addr); + +unload: + if (odp_ml_model_unload(ml_model, NULL)) { + printf("odp_ml_model_unload() failed\n"); + ret = -1; + goto odp_term; + } + +destroy_model: + /* Destroy the model */ + if (odp_ml_model_destroy(ml_model)) { + printf("odp_ml_model_destroy() failed\n"); + ret = -1; + } + +odp_term: + if (odp_term_local()) { + printf("Local term failed.\n"); + return -1; + } + + if (odp_term_global(inst)) { + printf("Global term failed.\n"); + return -1; + } + + return ret; +} diff --git a/platform/linux-generic/example/ml/model_explorer.c b/platform/linux-generic/example/ml/model_explorer.c new file mode 100644 index 000000000..bd449b032 --- /dev/null +++ b/platform/linux-generic/example/ml/model_explorer.c @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2023 Nokia + */ + +#include <odp_api.h> +#include <stdio.h> +#include <stdlib.h> + +#include "model_read.h" + +/** + * Read basic model information, e.g. inputs/outputs. + */ + +int main(int argc, char *argv[]) +{ + odp_instance_t inst; + odp_ml_model_t ml_model; + odp_ml_capability_t capa; + odp_ml_config_t ml_config; + odp_ml_model_param_t model_param; + int ret = 0; + + if (argc != 2) { + printf("Please specify model path\n" + "\nUsage:\n" + " %s model_path\n" + "\nThis example prints model information\n\n", + argv[0]); + return -1; + } + + if (odp_init_global(&inst, NULL, NULL)) { + printf("Global init failed.\n"); + return -1; + } + + if (odp_init_local(inst, ODP_THREAD_CONTROL)) { + printf("Local init failed.\n"); + return -1; + } + + if (odp_ml_capability(&capa)) { + printf("odp_ml_capability() failed\n"); + ret = -1; + goto odp_term; + } + + odp_ml_config_init(&ml_config); + ml_config.max_model_size = capa.max_model_size; + ml_config.load_mode_mask = ODP_ML_COMPL_MODE_SYNC; + ml_config.run_mode_mask = ODP_ML_COMPL_MODE_SYNC; + + if (odp_ml_config(&ml_config)) { + printf("odp_ml_config() failed\n"); + ret = -1; + goto odp_term; + } + + odp_ml_model_param_init(&model_param); + if (read_model_from_file(argv[1], &model_param)) { + ret = -1; + goto odp_term; + } + + ml_model = odp_ml_model_create("model-explorer", &model_param); + free(model_param.model); + if (ml_model == ODP_ML_MODEL_INVALID) { + printf("odp_ml_model_create failed.\n"); + ret = -1; + goto odp_term; + } + + odp_ml_model_print(ml_model); + +odp_term: + if (odp_term_local()) { + printf("Local term failed.\n"); + return -1; + } + + if (odp_term_global(inst)) { + printf("Global term failed.\n"); + return -1; + } + + return ret; +} diff --git a/platform/linux-generic/example/ml/model_read.c b/platform/linux-generic/example/ml/model_read.c new file mode 100644 index 000000000..7aa20bf35 --- /dev/null +++ b/platform/linux-generic/example/ml/model_read.c @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2023 Nokia + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <odp_api.h> + +#include "model_read.h" + +int read_model_from_file(const char *file_name, odp_ml_model_param_t *model_param) +{ + FILE *model_file; + /* Number of elements successfully read */ + size_t num_elem; + + /* Get the model file size in bytes */ + model_file = fopen(file_name, "rb"); + if (model_file == NULL) { + perror("Failed to open model file"); + return -1; + } + + fseek(model_file, 0, SEEK_END); + model_param->size = ftell(model_file); + rewind(model_file); + + /* Allocate memory for model buffer */ + model_param->model = malloc(model_param->size); + memset(model_param->model, 0, model_param->size); + if (!model_param->model) { + printf("Allocating memory for model buffer failed\n"); + return -1; + } + + /* Read the model file */ + num_elem = fread(model_param->model, model_param->size, 1, model_file); + fclose(model_file); + if (num_elem != 1) { + printf("Read model file failed\n"); + free(model_param->model); + return -1; + } + + return 0; +} diff --git a/platform/linux-generic/example/ml/model_read.h b/platform/linux-generic/example/ml/model_read.h new file mode 100644 index 000000000..df2062d5f --- /dev/null +++ b/platform/linux-generic/example/ml/model_read.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2023 Nokia + */ + +#ifndef ODP_MODEL_READ_H_ +#define ODP_MODEL_READ_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <odp_api.h> + +/** + * Read model binaries from model file + * + * @param file_name The name of model file + * @param model_param Model parameter where model content and size are read to + * + * @retval 0 on success + * @retval < 0 on failure + */ +int read_model_from_file(const char *file_name, odp_ml_model_param_t *model_param); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/platform/linux-generic/example/ml/odp_ml_run_mnist.sh b/platform/linux-generic/example/ml/odp_ml_run_mnist.sh new file mode 100755 index 000000000..f83d6f60d --- /dev/null +++ b/platform/linux-generic/example/ml/odp_ml_run_mnist.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Nokia +# +set -e + +# wget https://github.com/onnx/models/raw/main/validated/vision/classification/mnist/model/mnist-12.onnx +./mnist${EXEEXT} mnist-12.onnx example_digit.csv diff --git a/platform/linux-generic/example/ml/odp_ml_run_model_explorer.sh b/platform/linux-generic/example/ml/odp_ml_run_model_explorer.sh new file mode 100755 index 000000000..7f9fed5a6 --- /dev/null +++ b/platform/linux-generic/example/ml/odp_ml_run_model_explorer.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Nokia +# +set -e + +./model_explorer${EXEEXT} simple_linear.onnx diff --git a/platform/linux-generic/example/ml/odp_ml_run_simple_linear.sh b/platform/linux-generic/example/ml/odp_ml_run_simple_linear.sh new file mode 100755 index 000000000..b394b61a8 --- /dev/null +++ b/platform/linux-generic/example/ml/odp_ml_run_simple_linear.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Nokia +# +set -e + +./simple_linear${EXEEXT} [2,4,5] diff --git a/platform/linux-generic/example/ml/simple_linear.c b/platform/linux-generic/example/ml/simple_linear.c new file mode 100644 index 000000000..3417219c7 --- /dev/null +++ b/platform/linux-generic/example/ml/simple_linear.c @@ -0,0 +1,281 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2023 Nokia + */ + +#include <odp_api.h> +#include <odp/helper/odph_api.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <inttypes.h> + +#include "model_read.h" + +/** + * About model simple_linear.onnx used in this example. + * + * Model info: + * Inputs: name: x, type: int32, shape: [1] + * Outputs: name: y, type: int32, shape: [1] + * + * The model is of form y = 3 * x + 4 where x is given as the second argument. + * Thus when x = 5, the output y should be 19. + */ + +#define NUM_INPUTS 1 +#define NUM_OUTPUTS 1 +#define MAX_NUM_WORKERS 10 +#define MAX_MODEL_SIZE 500 + +typedef struct infer_param_t { + int32_t x; + odp_ml_model_t ml_model; +} infer_param_t; + +typedef struct { + odp_shm_t shm; + /* Thread specific arguments */ + infer_param_t infer_param[MAX_NUM_WORKERS]; +} thread_args_t; + +/* Global pointer to thread_args */ +static thread_args_t *thread_args; + +static int run_inference(void *infer_param) +{ + int32_t y; + odp_ml_data_t data; + odp_ml_data_seg_t input; + odp_ml_data_seg_t output; + infer_param_t *param = (infer_param_t *)infer_param; + + data.num_input_seg = NUM_INPUTS; + data.input_seg = &input; + input.addr = ¶m->x; + input.size = sizeof(int32_t); + + data.num_output_seg = NUM_OUTPUTS; + data.output_seg = &output; + output.addr = &y; + output.size = sizeof(int32_t); + + while (1) { + int ret = odp_ml_run(param->ml_model, &data, NULL); + + if (ret == 1) + break; + + if (ret < 0) { + ODPH_ERR("odp_ml_model_run() failed: %d\n", ret); + return -1; + } + } + + printf("y = 3 * %d + 4: %d\n", param->x, y); + + return 0; +} + +static int parse_argv1(char *argv1, uint32_t *num, int32_t *x) +{ + char *token; + int i; + + if (!strstr(argv1, "[")) { + *num = 1; + *x = strtol(argv1, NULL, 10); + return 0; + } + + token = strtok(argv1, "[,]"); + if (token == NULL) { + ODPH_ERR("Invalid argv[1]\n"); + return -1; + } + x[0] = strtol(token, NULL, 10); + + for (i = 0; i < MAX_NUM_WORKERS; i++) { + token = strtok(NULL, "[,]"); + if (token == NULL) + break; + + x[i + 1] = strtol(token, NULL, 10); + } + + if (i == MAX_NUM_WORKERS) { + ODPH_ERR("Too much xs, maximum number is: %d\n", MAX_NUM_WORKERS); + return -1; + } + + *num = i + 1; + return 0; +} + +int main(int argc, char *argv[]) +{ + odp_shm_t shm; + int num_workers; + odp_instance_t inst; + odp_cpumask_t cpumask; + odp_ml_model_t ml_model; + odp_ml_capability_t capa; + odp_ml_config_t ml_config; + int32_t x[MAX_NUM_WORKERS]; + odp_ml_model_param_t model_param; + odph_thread_t thread_tbl[MAX_NUM_WORKERS]; + odph_thread_common_param_t thr_common; + odph_thread_param_t thr_param[MAX_NUM_WORKERS]; + char cpumaskstr[ODP_CPUMASK_STR_SIZE]; + int ret = 0; + uint32_t num = 0; + + if (argc != 2) { + ODPH_ERR("Please specify x\n" + "\nUsage:\n" + " %s x\n" + "\nThis example runs inference on model y = 3x + 4\n\n", + argv[0]); + return -1; + } + + if (parse_argv1(argv[1], &num, x)) + return -1; + + if (odp_init_global(&inst, NULL, NULL)) { + ODPH_ERR("Global init failed.\n"); + return -1; + } + + if (odp_init_local(inst, ODP_THREAD_CONTROL)) { + ODPH_ERR("Local init failed.\n"); + return -1; + } + + if (odp_ml_capability(&capa)) { + ODPH_ERR("odp_ml_capability() failed\n"); + ret = -1; + goto odp_term; + } + + if (MAX_MODEL_SIZE > capa.max_model_size) { + ODPH_ERR("Configured max model size %d exceeds max mode size %" PRIu64 " in capa\n", + MAX_MODEL_SIZE, capa.max_model_size); + ret = -1; + goto odp_term; + } + + /* Set ML configuration parameter */ + odp_ml_config_init(&ml_config); + ml_config.max_model_size = MAX_MODEL_SIZE; + ml_config.load_mode_mask = ODP_ML_COMPL_MODE_SYNC; + ml_config.run_mode_mask = ODP_ML_COMPL_MODE_SYNC; + + if (odp_ml_config(&ml_config)) { + ODPH_ERR("odp_ml_config() failed\n"); + ret = -1; + goto odp_term; + } + + odp_ml_model_param_init(&model_param); + if (read_model_from_file("simple_linear.onnx", &model_param)) { + ret = -1; + goto odp_term; + } + + ml_model = odp_ml_model_create("simple linear", &model_param); + free(model_param.model); + if (ml_model == ODP_ML_MODEL_INVALID) { + ODPH_ERR("odp_ml_model_create() failed\n"); + ret = -1; + goto odp_term; + } + + odp_ml_model_print(ml_model); + odp_ml_print(); + + if (odp_ml_model_load(ml_model, NULL)) { + ODPH_ERR("odp_ml_model_load() failed\n"); + ret = -1; + goto destroy_model; + } + + /* Reserve memory for args from shared mem */ + shm = odp_shm_reserve("_thread_args", sizeof(thread_args_t), + ODP_CACHE_LINE_SIZE, 0); + if (shm == ODP_SHM_INVALID) { + ODPH_ERR("Error: shared mem reserve failed.\n"); + ret = -1; + goto unload; + } + + thread_args = odp_shm_addr(shm); + if (thread_args == NULL) { + ODPH_ERR("Error: shared mem alloc failed.\n"); + ret = -1; + goto free_shm; + } + thread_args->shm = shm; + memset(thread_args, 0, sizeof(thread_args_t)); + + /* Prepare inference parameter */ + for (uint32_t i = 0; i < num; i++) { + thread_args->infer_param[i].x = x[i]; + thread_args->infer_param[i].ml_model = ml_model; + } + + num_workers = odp_cpumask_default_worker(&cpumask, num); + (void)odp_cpumask_to_str(&cpumask, cpumaskstr, sizeof(cpumaskstr)); + + printf("num worker threads: %i\n", num_workers); + printf("first CPU: %i\n", odp_cpumask_first(&cpumask)); + printf("cpu mask: %s\n", cpumaskstr); + + /* Create and init worker threads */ + memset(thread_tbl, 0, sizeof(thread_tbl)); + odph_thread_common_param_init(&thr_common); + thr_common.instance = inst; + thr_common.cpumask = &cpumask; + + for (int i = 0; i < num_workers; ++i) { + odph_thread_param_init(&thr_param[i]); + thr_param[i].start = run_inference; + thr_param[i].arg = &thread_args->infer_param[i]; + thr_param[i].thr_type = ODP_THREAD_WORKER; + } + + odph_thread_create(thread_tbl, &thr_common, thr_param, num_workers); + + odph_thread_join(thread_tbl, num_workers); + +free_shm: + if (odp_shm_free(shm)) { + ODPH_ERR("Error: shm free global data\n"); + return -1; + } + +unload: + /* Unload a model */ + if (odp_ml_model_unload(ml_model, NULL)) { + ODPH_ERR("odp_ml_model_load() failed\n"); + ret = -1; + } + +destroy_model: + if (odp_ml_model_destroy(ml_model)) { + ODPH_ERR("odp_ml_model_destroy() failed\n"); + ret = -1; + } + +odp_term: + if (odp_term_local()) { + ODPH_ERR("Local term failed.\n"); + return -1; + } + + if (odp_term_global(inst)) { + ODPH_ERR("Global term failed.\n"); + return -1; + } + + return ret; +} diff --git a/platform/linux-generic/example/ml/simple_linear.onnx b/platform/linux-generic/example/ml/simple_linear.onnx Binary files differnew file mode 100644 index 000000000..45c4b95b9 --- /dev/null +++ b/platform/linux-generic/example/ml/simple_linear.onnx diff --git a/platform/linux-generic/include-abi/odp/api/abi/atomic.h b/platform/linux-generic/include-abi/odp/api/abi/atomic.h index 9c87f9cb8..4f481f913 100644 --- a/platform/linux-generic/include-abi/odp/api/abi/atomic.h +++ b/platform/linux-generic/include-abi/odp/api/abi/atomic.h @@ -80,7 +80,7 @@ typedef struct ODP_ALIGNED(sizeof(odp_u128_t)) odp_atomic_u128_s { #endif -/** @ingroup odp_atomic +/** @addtogroup odp_atomic * @{ */ diff --git a/platform/linux-generic/include-abi/odp/api/abi/buffer_types.h b/platform/linux-generic/include-abi/odp/api/abi/buffer_types.h index 1d54bab07..63067268c 100644 --- a/platform/linux-generic/include-abi/odp/api/abi/buffer_types.h +++ b/platform/linux-generic/include-abi/odp/api/abi/buffer_types.h @@ -21,7 +21,7 @@ extern "C" { #include <odp/api/std_types.h> #include <odp/api/plat/strong_types.h> -/** @ingroup odp_buffer +/** @addtogroup odp_buffer * @{ */ diff --git a/platform/linux-generic/include-abi/odp/api/abi/classification.h b/platform/linux-generic/include-abi/odp/api/abi/classification.h index 342f4124c..d63763dbd 100644 --- a/platform/linux-generic/include-abi/odp/api/abi/classification.h +++ b/platform/linux-generic/include-abi/odp/api/abi/classification.h @@ -19,7 +19,7 @@ extern "C" { #include <odp/api/plat/strong_types.h> -/** @ingroup odp_classification +/** @addtogroup odp_classification * @{ */ diff --git a/platform/linux-generic/include-abi/odp/api/abi/comp.h b/platform/linux-generic/include-abi/odp/api/abi/comp.h index ac3d3a4a9..45681e961 100644 --- a/platform/linux-generic/include-abi/odp/api/abi/comp.h +++ b/platform/linux-generic/include-abi/odp/api/abi/comp.h @@ -15,7 +15,7 @@ extern "C" { #include <stdint.h> -/** @ingroup odp_compression +/** @addtogroup odp_compression * @{ */ diff --git a/platform/linux-generic/include-abi/odp/api/abi/crypto_types.h b/platform/linux-generic/include-abi/odp/api/abi/crypto_types.h index d49caf89a..b1e4aa5ae 100644 --- a/platform/linux-generic/include-abi/odp/api/abi/crypto_types.h +++ b/platform/linux-generic/include-abi/odp/api/abi/crypto_types.h @@ -22,7 +22,7 @@ extern "C" { #include <odp/api/plat/strong_types.h> -/** @ingroup odp_crypto +/** @addtogroup odp_crypto * @{ */ diff --git a/platform/linux-generic/include-abi/odp/api/abi/dma_types.h b/platform/linux-generic/include-abi/odp/api/abi/dma_types.h index 768591b10..d5bee0374 100644 --- a/platform/linux-generic/include-abi/odp/api/abi/dma_types.h +++ b/platform/linux-generic/include-abi/odp/api/abi/dma_types.h @@ -13,7 +13,7 @@ extern "C" { #include <odp/api/plat/strong_types.h> -/** @ingroup odp_dma +/** @addtogroup odp_dma * @{ */ diff --git a/platform/linux-generic/include-abi/odp/api/abi/event_types.h b/platform/linux-generic/include-abi/odp/api/abi/event_types.h index 8ff5acd6b..01ee66cd3 100644 --- a/platform/linux-generic/include-abi/odp/api/abi/event_types.h +++ b/platform/linux-generic/include-abi/odp/api/abi/event_types.h @@ -1,5 +1,5 @@ /* Copyright (c) 2015-2018, Linaro Limited - * Copyright (c) 2022, Nokia + * Copyright (c) 2022-2023, Nokia * All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause @@ -20,7 +20,7 @@ extern "C" { #include <odp/api/plat/strong_types.h> -/** @ingroup odp_event +/** @addtogroup odp_event * @{ */ @@ -36,6 +36,7 @@ typedef enum odp_event_type_t { ODP_EVENT_PACKET_VECTOR = 6, ODP_EVENT_PACKET_TX_COMPL = 7, ODP_EVENT_DMA_COMPL = 8, + ODP_EVENT_ML_COMPL = 9 } odp_event_type_t; typedef enum odp_event_subtype_t { @@ -43,7 +44,9 @@ typedef enum odp_event_subtype_t { ODP_EVENT_PACKET_BASIC = 1, ODP_EVENT_PACKET_CRYPTO = 2, ODP_EVENT_PACKET_IPSEC = 3, - ODP_EVENT_PACKET_COMP = 4 + ODP_EVENT_PACKET_COMP = 4, + ODP_EVENT_ML_COMPL_LOAD = 5, + ODP_EVENT_ML_COMPL_RUN = 6 } odp_event_subtype_t; /** diff --git a/platform/linux-generic/include-abi/odp/api/abi/ipsec_types.h b/platform/linux-generic/include-abi/odp/api/abi/ipsec_types.h index 376666ded..1c5501997 100644 --- a/platform/linux-generic/include-abi/odp/api/abi/ipsec_types.h +++ b/platform/linux-generic/include-abi/odp/api/abi/ipsec_types.h @@ -22,7 +22,7 @@ extern "C" { #include <odp/api/plat/strong_types.h> -/** @ingroup odp_ipsec +/** @addtogroup odp_ipsec * @{ */ diff --git a/platform/linux-generic/include-abi/odp/api/abi/ml_types.h b/platform/linux-generic/include-abi/odp/api/abi/ml_types.h new file mode 100644 index 000000000..0fdb7a8dc --- /dev/null +++ b/platform/linux-generic/include-abi/odp/api/abi/ml_types.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2021-2023 Nokia + */ + +#ifndef ODP_API_ABI_ML_TYPES_H_ +#define ODP_API_ABI_ML_TYPES_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <odp/api/std_types.h> +#include <odp/api/plat/strong_types.h> + +/** @internal Implementation specific ML parameters */ +struct _odp_ml_model_extra_param_t { + /** @internal Dummy field to avoid empty struct */ + char dummy; +}; + +/** @addtogroup odp_ml + * @{ + */ + +typedef ODP_HANDLE_T(odp_ml_model_t); +typedef ODP_HANDLE_T(odp_ml_compl_t); +typedef struct _odp_ml_model_extra_param_t odp_ml_model_extra_param_t; + +#define ODP_ML_MODEL_INVALID _odp_cast_scalar(odp_ml_model_t, 0) +#define ODP_ML_COMPL_INVALID _odp_cast_scalar(odp_ml_compl_t, 0) + +#define ODP_ML_MODEL_NAME_LEN 64 +#define ODP_ML_MODEL_IO_NAME_LEN 64 +#define ODP_ML_SHAPE_NAME_LEN 16 +#define ODP_ML_EXTRA_STAT_NAME_LEN 64 + +/** + * @} + */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/platform/linux-generic/include-abi/odp/api/abi/packet_io_types.h b/platform/linux-generic/include-abi/odp/api/abi/packet_io_types.h index 87e297f1d..76b162020 100644 --- a/platform/linux-generic/include-abi/odp/api/abi/packet_io_types.h +++ b/platform/linux-generic/include-abi/odp/api/abi/packet_io_types.h @@ -21,7 +21,7 @@ extern "C" { #include <odp/api/std_types.h> #include <odp/api/plat/strong_types.h> -/** @ingroup odp_packet_io +/** @addtogroup odp_packet_io * @{ */ diff --git a/platform/linux-generic/include-abi/odp/api/abi/packet_types.h b/platform/linux-generic/include-abi/odp/api/abi/packet_types.h index 4da9332ba..90b2af107 100644 --- a/platform/linux-generic/include-abi/odp/api/abi/packet_types.h +++ b/platform/linux-generic/include-abi/odp/api/abi/packet_types.h @@ -21,7 +21,7 @@ extern "C" { #include <odp/api/std_types.h> #include <odp/api/plat/strong_types.h> -/** @ingroup odp_packet +/** @addtogroup odp_packet * @{ */ diff --git a/platform/linux-generic/include-abi/odp/api/abi/pool_types.h b/platform/linux-generic/include-abi/odp/api/abi/pool_types.h index 0c0dbc97f..77b0ff638 100644 --- a/platform/linux-generic/include-abi/odp/api/abi/pool_types.h +++ b/platform/linux-generic/include-abi/odp/api/abi/pool_types.h @@ -19,7 +19,7 @@ extern "C" { #include <odp/api/plat/strong_types.h> -/** @ingroup odp_pool +/** @addtogroup odp_pool * @{ */ diff --git a/platform/linux-generic/include-abi/odp/api/abi/proto_stats_types.h b/platform/linux-generic/include-abi/odp/api/abi/proto_stats_types.h index 2ebddce62..d9db29188 100644 --- a/platform/linux-generic/include-abi/odp/api/abi/proto_stats_types.h +++ b/platform/linux-generic/include-abi/odp/api/abi/proto_stats_types.h @@ -21,7 +21,7 @@ extern "C" { #include <odp/api/std_types.h> #include <odp/api/plat/strong_types.h> -/** @ingroup odp_proto_stats +/** @addtogroup odp_proto_stats * @{ */ diff --git a/platform/linux-generic/include-abi/odp/api/abi/queue_types.h b/platform/linux-generic/include-abi/odp/api/abi/queue_types.h index 1a56c7682..4eff762bd 100644 --- a/platform/linux-generic/include-abi/odp/api/abi/queue_types.h +++ b/platform/linux-generic/include-abi/odp/api/abi/queue_types.h @@ -21,7 +21,7 @@ extern "C" { #include <odp/api/std_types.h> #include <odp/api/plat/strong_types.h> -/** @ingroup odp_queue +/** @addtogroup odp_queue * @{ */ diff --git a/platform/linux-generic/include-abi/odp/api/abi/shared_memory.h b/platform/linux-generic/include-abi/odp/api/abi/shared_memory.h index 551d49e30..bfcb9ebe5 100644 --- a/platform/linux-generic/include-abi/odp/api/abi/shared_memory.h +++ b/platform/linux-generic/include-abi/odp/api/abi/shared_memory.h @@ -21,7 +21,7 @@ extern "C" { #include <odp/api/std_types.h> #include <odp/api/plat/strong_types.h> -/** @ingroup odp_shared_memory +/** @addtogroup odp_shared_memory * @{ */ diff --git a/platform/linux-generic/include-abi/odp/api/abi/stash_types.h b/platform/linux-generic/include-abi/odp/api/abi/stash_types.h index 960f3ef17..2a4115886 100644 --- a/platform/linux-generic/include-abi/odp/api/abi/stash_types.h +++ b/platform/linux-generic/include-abi/odp/api/abi/stash_types.h @@ -17,7 +17,7 @@ extern "C" { #include <odp/api/plat/strong_types.h> -/** @ingroup odp_stash +/** @addtogroup odp_stash * @{ */ diff --git a/platform/linux-generic/include-abi/odp/api/abi/sync.h b/platform/linux-generic/include-abi/odp/api/abi/sync.h index 236e92c8c..276514b58 100644 --- a/platform/linux-generic/include-abi/odp/api/abi/sync.h +++ b/platform/linux-generic/include-abi/odp/api/abi/sync.h @@ -17,7 +17,7 @@ extern "C" { #endif -/** @ingroup odp_barrier +/** @addtogroup odp_barrier * @{ */ diff --git a/platform/linux-generic/include-abi/odp/api/abi/ticketlock.h b/platform/linux-generic/include-abi/odp/api/abi/ticketlock.h index 6543a1cf7..b621bea7e 100644 --- a/platform/linux-generic/include-abi/odp/api/abi/ticketlock.h +++ b/platform/linux-generic/include-abi/odp/api/abi/ticketlock.h @@ -19,7 +19,7 @@ extern "C" { #include <odp/api/atomic.h> -/** @ingroup odp_locks +/** @addtogroup odp_locks * @{ */ diff --git a/platform/linux-generic/include/odp/api/plat/event_inline_types.h b/platform/linux-generic/include/odp/api/plat/event_inline_types.h index caa075871..cbf01588f 100644 --- a/platform/linux-generic/include/odp/api/plat/event_inline_types.h +++ b/platform/linux-generic/include/odp/api/plat/event_inline_types.h @@ -28,6 +28,7 @@ extern "C" { typedef struct _odp_event_inline_offset_t { uint16_t event_type; uint16_t base_data; + uint16_t subtype; uint16_t flow_id; uint16_t pool; diff --git a/platform/linux-generic/include/odp/api/plat/event_inlines.h b/platform/linux-generic/include/odp/api/plat/event_inlines.h index b68ced244..990575166 100644 --- a/platform/linux-generic/include/odp/api/plat/event_inlines.h +++ b/platform/linux-generic/include/odp/api/plat/event_inlines.h @@ -49,6 +49,15 @@ static inline odp_event_type_t __odp_event_type_get(odp_event_t event) return (odp_event_type_t)type; } +static inline odp_event_subtype_t __odp_event_subtype_get(odp_event_t event) +{ + int8_t type; + + type = _odp_event_hdr_field(event, int8_t, subtype); + + return (odp_event_subtype_t)type; +} + _ODP_INLINE odp_event_type_t odp_event_type(odp_event_t event) { return __odp_event_type_get(event); @@ -90,6 +99,7 @@ _ODP_INLINE void *odp_event_user_area(odp_event_t event) switch (type) { case ODP_EVENT_BUFFER: + case ODP_EVENT_ML_COMPL: case ODP_EVENT_DMA_COMPL: return _odp_buffer_get((odp_buffer_t)event, void *, uarea_addr); case ODP_EVENT_PACKET: @@ -112,6 +122,7 @@ _ODP_INLINE void *odp_event_user_area_and_flag(odp_event_t event, int *flag) switch (type) { case ODP_EVENT_BUFFER: case ODP_EVENT_DMA_COMPL: + case ODP_EVENT_ML_COMPL: *flag = -1; return _odp_buffer_get((odp_buffer_t)event, void *, uarea_addr); case ODP_EVENT_PACKET: @@ -145,10 +156,7 @@ _ODP_INLINE void *odp_event_user_area_and_flag(odp_event_t event, int *flag) _ODP_INLINE odp_event_subtype_t odp_event_subtype(odp_event_t event) { - if (__odp_event_type_get(event) != ODP_EVENT_PACKET) - return ODP_EVENT_NO_SUBTYPE; - - return (odp_event_subtype_t)_odp_pkt_get((odp_packet_t)event, int8_t, subtype); + return __odp_event_subtype_get(event); } _ODP_INLINE odp_event_type_t odp_event_types(odp_event_t event, @@ -156,9 +164,7 @@ _ODP_INLINE odp_event_type_t odp_event_types(odp_event_t event, { odp_event_type_t event_type = __odp_event_type_get(event); - *subtype = event_type == ODP_EVENT_PACKET ? - (odp_event_subtype_t)_odp_pkt_get((odp_packet_t)event, int8_t, subtype) : - ODP_EVENT_NO_SUBTYPE; + *subtype = __odp_event_subtype_get(event); return event_type; } @@ -172,11 +178,8 @@ _ODP_INLINE void odp_event_types_multi(const odp_event_t event[], odp_event_type if (subtype == NULL) return; - for (int i = 0; i < num; i++) { - subtype[i] = (type[i] == ODP_EVENT_PACKET) ? - (odp_event_subtype_t)_odp_pkt_get((odp_packet_t)event[i], int8_t, - subtype) : ODP_EVENT_NO_SUBTYPE; - } + for (int i = 0; i < num; i++) + subtype[i] = __odp_event_subtype_get(event[i]); } _ODP_INLINE uint32_t odp_event_flow_id(odp_event_t event) diff --git a/platform/linux-generic/include/odp/api/plat/packet_inline_types.h b/platform/linux-generic/include/odp/api/plat/packet_inline_types.h index eb20ca7d7..691965624 100644 --- a/platform/linux-generic/include/odp/api/plat/packet_inline_types.h +++ b/platform/linux-generic/include/odp/api/plat/packet_inline_types.h @@ -50,7 +50,6 @@ typedef struct _odp_packet_inline_offset_t { uint16_t timestamp; uint16_t input_flags; uint16_t flags; - uint16_t subtype; uint16_t cls_mark; uint16_t ipsec_ctx; uint16_t crypto_op; diff --git a/platform/linux-generic/include/odp/api/plat/packet_inlines.h b/platform/linux-generic/include/odp/api/plat/packet_inlines.h index 960dbc5fc..2dd74fa29 100644 --- a/platform/linux-generic/include/odp/api/plat/packet_inlines.h +++ b/platform/linux-generic/include/odp/api/plat/packet_inlines.h @@ -24,6 +24,7 @@ #include <odp/api/plat/packet_io_inlines.h> #include <odp/api/plat/packet_inline_types.h> #include <odp/api/plat/pool_inline_types.h> +#include <odp/api/plat/event_inline_types.h> #include <stdint.h> #include <string.h> @@ -571,7 +572,8 @@ _ODP_INLINE void odp_packet_to_event_multi(const odp_packet_t pkt[], _ODP_INLINE odp_event_subtype_t odp_packet_subtype(odp_packet_t pkt) { - return (odp_event_subtype_t)_odp_pkt_get(pkt, int8_t, subtype); + return (odp_event_subtype_t)_odp_event_hdr_field((odp_event_t)(uintptr_t)pkt, + int8_t, subtype); } _ODP_INLINE odp_packet_tx_compl_t odp_packet_tx_compl_from_event(odp_event_t ev) diff --git a/platform/linux-generic/include/odp_bitset.h b/platform/linux-generic/include/odp_bitset.h index 0931fb337..e55b9ef1a 100644 --- a/platform/linux-generic/include/odp_bitset.h +++ b/platform/linux-generic/include/odp_bitset.h @@ -32,12 +32,6 @@ static inline uint32_t bitset_ffs(bitset_t b) return __builtin_ffsl(b); } -/* Load-exclusive with memory ordering */ -static inline bitset_t bitset_monitor(bitset_t *bs, int mo) -{ - return monitor32(bs, mo); -} - #elif ATOM_BITSET_SIZE <= 64 /* Return first-bit-set with StdC ffs() semantics */ @@ -46,12 +40,6 @@ static inline uint32_t bitset_ffs(bitset_t b) return __builtin_ffsll(b); } -/* Load-exclusive with memory ordering */ -static inline bitset_t bitset_monitor(bitset_t *bs, int mo) -{ - return monitor64(bs, mo); -} - #elif ATOM_BITSET_SIZE <= 128 /* Return first-bit-set with StdC ffs() semantics */ @@ -65,12 +53,6 @@ static inline uint32_t bitset_ffs(bitset_t b) return 0; } -/* Load-exclusive with memory ordering */ -static inline bitset_t bitset_monitor(bitset_t *bs, int mo) -{ - return monitor128(bs, mo); -} - #else #error Unsupported size of bit sets (ATOM_BITSET_SIZE) #endif diff --git a/platform/linux-generic/include/odp_buffer_internal.h b/platform/linux-generic/include/odp_buffer_internal.h index 1cececb99..676b9f116 100644 --- a/platform/linux-generic/include/odp_buffer_internal.h +++ b/platform/linux-generic/include/odp_buffer_internal.h @@ -53,6 +53,13 @@ static inline odp_buffer_hdr_t *_odp_buf_hdr(odp_buffer_t buf) return (odp_buffer_hdr_t *)(uintptr_t)buf; } +static inline void _odp_buffer_subtype_set(odp_buffer_t buffer, int subtype) +{ + odp_buffer_hdr_t *buf_hdr = _odp_buf_hdr(buffer); + + buf_hdr->event_hdr.subtype = subtype; +} + #ifdef __cplusplus } #endif diff --git a/platform/linux-generic/include/odp_config_internal.h b/platform/linux-generic/include/odp_config_internal.h index 8fd8c4be7..89d89936c 100644 --- a/platform/linux-generic/include/odp_config_internal.h +++ b/platform/linux-generic/include/odp_config_internal.h @@ -199,6 +199,15 @@ extern "C" { /* Enable timer scan performance benchmark. This works with inline enabled. */ #define CONFIG_TIMER_PROFILE_INLINE 0 +/* Maximum number of ML models that can be created or loaded. */ +#define CONFIG_ML_MAX_MODELS 4 + +/* Maximum number of inputs for a ML model. */ +#define CONFIG_ML_MAX_INPUTS 4 + +/* Maximum number of outputs for a ML model. */ +#define CONFIG_ML_MAX_OUTPUTS 4 + #ifdef __cplusplus } #endif diff --git a/platform/linux-generic/include/odp_event_internal.h b/platform/linux-generic/include/odp_event_internal.h index d9957e530..1b85d64fc 100644 --- a/platform/linux-generic/include/odp_event_internal.h +++ b/platform/linux-generic/include/odp_event_internal.h @@ -65,6 +65,9 @@ typedef struct _odp_event_hdr_t { /* Event type. Maybe different than pool type (crypto compl event) */ int8_t event_type; + /* Event subtype */ + int8_t subtype; + /* Event flow id */ uint8_t flow_id; diff --git a/platform/linux-generic/include/odp_global_data.h b/platform/linux-generic/include/odp_global_data.h index f883cefd9..2a87192df 100644 --- a/platform/linux-generic/include/odp_global_data.h +++ b/platform/linux-generic/include/odp_global_data.h @@ -21,7 +21,6 @@ extern "C" { #include <odp_config_internal.h> #include <libconfig.h> -#include <pthread.h> #include <stdint.h> #include <sys/types.h> @@ -81,6 +80,7 @@ typedef struct odp_global_data_ro_t { uint8_t ipsec; uint8_t stash; uint8_t traffic_mngr; + uint8_t ml; } disable; diff --git a/platform/linux-generic/include/odp_init_internal.h b/platform/linux-generic/include/odp_init_internal.h index 24e8346ad..ca5d68c87 100644 --- a/platform/linux-generic/include/odp_init_internal.h +++ b/platform/linux-generic/include/odp_init_internal.h @@ -105,6 +105,9 @@ int _odp_stash_term_global(void); int _odp_dma_init_global(void); int _odp_dma_term_global(void); +int _odp_ml_init_global(void); +int _odp_ml_term_global(void); + #ifdef __cplusplus } #endif diff --git a/platform/linux-generic/include/odp_ipsec_internal.h b/platform/linux-generic/include/odp_ipsec_internal.h index 571796691..b97aa7031 100644 --- a/platform/linux-generic/include/odp_ipsec_internal.h +++ b/platform/linux-generic/include/odp_ipsec_internal.h @@ -30,7 +30,7 @@ extern "C" { #include <protocols/ip.h> #include <stdint.h> -/** @ingroup odp_ipsec +/** @addtogroup odp_ipsec * @{ */ diff --git a/platform/linux-generic/include/odp_macros_internal.h b/platform/linux-generic/include/odp_macros_internal.h index abf017aec..047e550f9 100644 --- a/platform/linux-generic/include/odp_macros_internal.h +++ b/platform/linux-generic/include/odp_macros_internal.h @@ -1,5 +1,5 @@ /* Copyright (c) 2014-2018, Linaro Limited - * Copyright (c) 2022, Nokia + * Copyright (c) 2022-2024, Nokia * All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause @@ -26,19 +26,35 @@ extern "C" { #define _ODP_MIN(a, b) \ __extension__ ({ \ - __typeof__(a) tmp_a = (a); \ - __typeof__(b) tmp_b = (b); \ - tmp_a < tmp_b ? tmp_a : tmp_b; \ + __typeof__(a) min_a = (a); \ + __typeof__(b) min_b = (b); \ + min_a < min_b ? min_a : min_b; \ }) #define _ODP_MAX(a, b) \ __extension__ ({ \ - __typeof__(a) tmp_a = (a); \ - __typeof__(b) tmp_b = (b); \ - tmp_a > tmp_b ? tmp_a : tmp_b; \ + __typeof__(a) max_a = (a); \ + __typeof__(b) max_b = (b); \ + max_a > max_b ? max_a : max_b; \ }) -#define _ODP_MAX3(a, b, c) (_ODP_MAX(_ODP_MAX((a), (b)), (c))) +#define _ODP_MIN3(a, b, c) \ +__extension__ ({ \ + __typeof__(a) min3_a = (a); \ + __typeof__(b) min3_b = (b); \ + __typeof__(c) min3_c = (c); \ + (min3_a < min3_b ? (min3_a < min3_c ? min3_a : min3_c) : \ + (min3_b < min3_c ? min3_b : min3_c)); \ +}) + +#define _ODP_MAX3(a, b, c) \ +__extension__ ({ \ + __typeof__(a) max3_a = (a); \ + __typeof__(b) max3_b = (b); \ + __typeof__(c) max3_c = (c); \ + (max3_a > max3_b ? (max3_a > max3_c ? max3_a : max3_c) : \ + (max3_b > max3_c ? max3_b : max3_c)); \ +}) /* Macros to calculate ODP_ROUNDUP_POWER2_U32() in five rounds of shift * and OR operations. */ diff --git a/platform/linux-generic/include/odp_ml_fp16.h b/platform/linux-generic/include/odp_ml_fp16.h new file mode 100644 index 000000000..476028cb4 --- /dev/null +++ b/platform/linux-generic/include/odp_ml_fp16.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2023 Nokia + */ + +#ifndef ODP_ML_FP16_H_ +#define ODP_ML_FP16_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> + +uint16_t _odp_float32_to_float16(float x); +float _odp_float16_to_float32(uint16_t f16); +uint16_t _odp_float32_to_bfloat16(float x); +float _odp_bfloat16_to_float32(uint16_t f16); + +#ifdef __cplusplus +} +#endif + +#endif /* ODP_ML_FP16_H_ */ diff --git a/platform/linux-generic/include/odp_packet_internal.h b/platform/linux-generic/include/odp_packet_internal.h index 41a44b83c..0b03aa211 100644 --- a/platform/linux-generic/include/odp_packet_internal.h +++ b/platform/linux-generic/include/odp_packet_internal.h @@ -107,8 +107,8 @@ typedef struct ODP_ALIGNED_CACHE odp_packet_hdr_t { uint16_t tailroom; - /* Event subtype */ - int8_t subtype; + /* Classifier handle index */ + uint16_t cos; /* Used as classifier destination queue, in IPsec inline input processing and as Tx * completion event queue. */ @@ -134,9 +134,6 @@ typedef struct ODP_ALIGNED_CACHE odp_packet_hdr_t { /* Classifier mark */ uint16_t cls_mark; - /* Classifier handle index */ - uint16_t cos; - /* Offset to payload start */ uint16_t payload_offset; @@ -214,9 +211,11 @@ static inline odp_packet_hdr_t *packet_last_seg(odp_packet_hdr_t *hdr) return hdr; } -static inline void packet_subtype_set(odp_packet_t pkt, int ev) +static inline void packet_subtype_set(odp_packet_t pkt, int subtype) { - packet_hdr(pkt)->subtype = ev; + odp_packet_hdr_t *pkt_hdr = packet_hdr(pkt); + + pkt_hdr->event_hdr.subtype = subtype; } /** @@ -258,8 +257,8 @@ static inline void packet_init(odp_packet_hdr_t *pkt_hdr, uint32_t len) pkt_hdr->headroom = pool->headroom; pkt_hdr->tailroom = pool->seg_len - seg_len + pool->tailroom; - if (odp_unlikely(pkt_hdr->subtype != ODP_EVENT_PACKET_BASIC)) - pkt_hdr->subtype = ODP_EVENT_PACKET_BASIC; + if (odp_unlikely(pkt_hdr->event_hdr.subtype != ODP_EVENT_PACKET_BASIC)) + pkt_hdr->event_hdr.subtype = ODP_EVENT_PACKET_BASIC; pkt_hdr->input = ODP_PKTIO_INVALID; } @@ -304,7 +303,7 @@ static inline void _odp_packet_copy_md(odp_packet_hdr_t *dst_hdr, odp_packet_hdr_t *src_hdr, odp_bool_t uarea_copy) { - int8_t subtype = src_hdr->subtype; + int8_t subtype = src_hdr->event_hdr.subtype; /* Lengths and segmentation data are not copied: * .frame_len @@ -316,7 +315,7 @@ static inline void _odp_packet_copy_md(odp_packet_hdr_t *dst_hdr, * .seg_count */ dst_hdr->input = src_hdr->input; - dst_hdr->subtype = subtype; + dst_hdr->event_hdr.subtype = subtype; dst_hdr->dst_queue = src_hdr->dst_queue; dst_hdr->cos = src_hdr->cos; dst_hdr->cls_mark = src_hdr->cls_mark; diff --git a/platform/linux-generic/include/odp_timer_internal.h b/platform/linux-generic/include/odp_timer_internal.h index 01ee4a0f3..38192d917 100644 --- a/platform/linux-generic/include/odp_timer_internal.h +++ b/platform/linux-generic/include/odp_timer_internal.h @@ -22,6 +22,12 @@ #include <odp_global_data.h> #include <odp_pool_internal.h> +/* + * Use as the argument to timer_run() to force a scan and to ignore rate + * limit. + */ +#define TIMER_SCAN_FORCE INT32_MAX + /** * Internal Timeout header */ @@ -48,13 +54,15 @@ ODP_STATIC_ASSERT(sizeof(odp_timeout_hdr_t) <= ODP_CACHE_LINE_SIZE, /* A larger decrement value should be used after receiving events compared to * an 'empty' call. */ -void _odp_timer_run_inline(int dec); +uint64_t _odp_timer_run_inline(int dec); /* Static inline wrapper to minimize modification of schedulers. */ -static inline void timer_run(int dec) +static inline uint64_t timer_run(int dec) { if (odp_global_rw->inline_timers) - _odp_timer_run_inline(dec); + return _odp_timer_run_inline(dec); + + return UINT64_MAX; } #endif diff --git a/platform/linux-generic/libodp-linux.pc.in b/platform/linux-generic/libodp-linux.pc.in index 05ba5b9d6..62589c1a3 100644 --- a/platform/linux-generic/libodp-linux.pc.in +++ b/platform/linux-generic/libodp-linux.pc.in @@ -8,5 +8,5 @@ Description: The ODP packet processing engine Version: @PKGCONFIG_VERSION@ Requires.private: libconfig@AARCH64CRYPTO_PKG@ Libs: -L${libdir} -l@ODP_LIB_NAME@ @ATOMIC_LIBS_NON_ABI_COMPAT@ -Libs.private: @OPENSSL_STATIC_LIBS@ @DPDK_LIBS@ @PCAP_LIBS@ @PTHREAD_LIBS@ @TIMER_LIBS@ @LIBXDP_LIBS@ -lpthread @ATOMIC_LIBS_ABI_COMPAT@ @IPSEC_MB_LIBS@ +Libs.private: @OPENSSL_STATIC_LIBS@ @DPDK_LIBS@ @PCAP_LIBS@ @PTHREAD_LIBS@ @TIMER_LIBS@ @LIBXDP_LIBS@ -lpthread @ATOMIC_LIBS_ABI_COMPAT@ @IPSEC_MB_LIBS@ @ORT_LIBS@ Cflags: -I${includedir} diff --git a/platform/linux-generic/m4/configure.m4 b/platform/linux-generic/m4/configure.m4 index 61b65540f..3306849d2 100644 --- a/platform/linux-generic/m4/configure.m4 +++ b/platform/linux-generic/m4/configure.m4 @@ -31,10 +31,11 @@ m4_include([platform/linux-generic/m4/odp_pcapng.m4]) m4_include([platform/linux-generic/m4/odp_dpdk.m4]) m4_include([platform/linux-generic/m4/odp_wfe.m4]) m4_include([platform/linux-generic/m4/odp_xdp.m4]) +m4_include([platform/linux-generic/m4/odp_ml.m4]) ODP_EVENT_VALIDATION ODP_SCHEDULER -AS_VAR_APPEND([PLAT_DEP_LIBS], ["${ATOMIC_LIBS} ${AARCH64CRYPTO_LIBS} ${LIBCONFIG_LIBS} ${OPENSSL_LIBS} ${IPSEC_MB_LIBS} ${DPDK_LIBS_LT} ${LIBCLI_LIBS} ${LIBXDP_LIBS}"]) +AS_VAR_APPEND([PLAT_DEP_LIBS], ["${ATOMIC_LIBS} ${AARCH64CRYPTO_LIBS} ${LIBCONFIG_LIBS} ${OPENSSL_LIBS} ${IPSEC_MB_LIBS} ${DPDK_LIBS_LT} ${LIBCLI_LIBS} ${LIBXDP_LIBS} ${ORT_LIBS}"]) # Add text to the end of configure with platform specific settings. # Make sure it's aligned same as other lines in configure.ac. @@ -46,6 +47,7 @@ AS_VAR_APPEND([PLAT_CFG_TEXT], [" pcap: ${have_pcap} pcapng: ${have_pcapng} wfe_locks: ${use_wfe_locks} + ml_support: ${ml_support} default_config_path: ${default_config_path}"]) # Ignore Clang specific errors about fields with variable sized type not at the @@ -59,6 +61,8 @@ AM_CONDITIONAL([PLATFORM_IS_LINUX_GENERIC], AC_CONFIG_FILES([platform/linux-generic/Makefile platform/linux-generic/libodp-linux.pc platform/linux-generic/dumpconfig/Makefile + platform/linux-generic/example/Makefile + platform/linux-generic/example/ml/Makefile platform/linux-generic/test/Makefile platform/linux-generic/test/example/Makefile platform/linux-generic/test/example/classifier/Makefile @@ -73,6 +77,7 @@ AC_CONFIG_FILES([platform/linux-generic/Makefile platform/linux-generic/test/example/switch/Makefile platform/linux-generic/test/validation/api/shmem/Makefile platform/linux-generic/test/validation/api/pktio/Makefile + platform/linux-generic/test/validation/api/ml/Makefile platform/linux-generic/test/performance/Makefile platform/linux-generic/test/performance/dmafwd/Makefile platform/linux-generic/test/pktio_ipc/Makefile]) diff --git a/platform/linux-generic/m4/odp_libconfig.m4 b/platform/linux-generic/m4/odp_libconfig.m4 index a6d19f661..77095e0fe 100644 --- a/platform/linux-generic/m4/odp_libconfig.m4 +++ b/platform/linux-generic/m4/odp_libconfig.m4 @@ -3,7 +3,7 @@ ########################################################################## m4_define([_odp_config_version_generation], [0]) m4_define([_odp_config_version_major], [1]) -m4_define([_odp_config_version_minor], [27]) +m4_define([_odp_config_version_minor], [28]) m4_define([_odp_config_version], [_odp_config_version_generation._odp_config_version_major._odp_config_version_minor]) diff --git a/platform/linux-generic/m4/odp_ml.m4 b/platform/linux-generic/m4/odp_ml.m4 new file mode 100644 index 000000000..a7b9a4fd6 --- /dev/null +++ b/platform/linux-generic/m4/odp_ml.m4 @@ -0,0 +1,46 @@ +########################################################################## +# Onnxruntime library path and name +########################################################################## +# Optional configure parameter for a non-standard install prefix of onnxruntime +AC_ARG_WITH([ort-path], + [AS_HELP_STRING([--with-ort-path=DIR], + [path to onnxruntime libs and headers [default=system]])], + [ort_path_given=yes + ORT_CPPFLAGS="-I$withval/include" + ORT_LIBS="-L$withval/lib" + ORT_RPATH="-R$withval/lib"], + []) + +########################################################################## +# Save and set temporary compilation flags +########################################################################## +OLD_CPPFLAGS=$CPPFLAGS +OLD_LIBS=$LIBS +CPPFLAGS="$ORT_CPPFLAGS $CPPFLAGS" +LIBS="$ORT_LIBS $LIBS" + +######################################################################### +# If ort is available, enable ML API +######################################################################### +ml_support=no +AC_CHECK_HEADERS([onnxruntime_c_api.h], + [AC_CHECK_LIB(onnxruntime, OrtGetApiBase, [ml_support=yes], [], [])], + [AS_IF([test "x$ort_path_given" = "xyes"], + [AC_MSG_ERROR([ort not found at the specified path (--with-ort-path)])])]) + +AS_IF([test "x$ml_support" != "xno"], + [ORT_LIBS="$ORT_RPATH $ORT_LIBS -lonnxruntime -lm"], + [ORT_CPPFLAGS="" ORT_LIBS="-lm"]) + +AC_CONFIG_COMMANDS_PRE([dnl +AM_CONDITIONAL([WITH_ML], [test x$ml_support = xyes ]) +]) + +########################################################################## +# Restore old saved variables +########################################################################## +LIBS=$OLD_LIBS +CPPFLAGS=$OLD_CPPFLAGS + +AC_SUBST([ORT_CPPFLAGS]) +AC_SUBST([ORT_LIBS]) diff --git a/platform/linux-generic/odp_classification.c b/platform/linux-generic/odp_classification.c index 0e6eea3ae..016a8f0c5 100644 --- a/platform/linux-generic/odp_classification.c +++ b/platform/linux-generic/odp_classification.c @@ -299,10 +299,11 @@ odp_cos_t odp_cls_cos_create(const char *name, const odp_cls_cos_param_t *param_ param.hash_proto); tbl_index = i * CLS_COS_QUEUE_MAX; for (j = 0; j < param.num_queue; j++) { - char name[ODP_QUEUE_NAME_LEN]; + char hq_name[ODP_QUEUE_NAME_LEN]; - snprintf(name, sizeof(name), "_odp_cos_hq_%u_%u", i, j); - queue = odp_queue_create(name, &cos->queue_param); + snprintf(hq_name, sizeof(hq_name), "_odp_cos_hq_%u_%u", + i, j); + queue = odp_queue_create(hq_name, &cos->queue_param); if (queue == ODP_QUEUE_INVALID) { /* unwind the queues */ _cls_queue_unwind(tbl_index, j); diff --git a/platform/linux-generic/odp_cpumask.c b/platform/linux-generic/odp_cpumask.c index d0a9953f7..7d7575f51 100644 --- a/platform/linux-generic/odp_cpumask.c +++ b/platform/linux-generic/odp_cpumask.c @@ -7,7 +7,6 @@ #include <odp_posix_extensions.h> #include <sched.h> -#include <pthread.h> #include <odp/api/cpumask.h> #include <odp/api/init.h> diff --git a/platform/linux-generic/odp_cpumask_task.c b/platform/linux-generic/odp_cpumask_task.c index 0807e231e..a579b2e7e 100644 --- a/platform/linux-generic/odp_cpumask_task.c +++ b/platform/linux-generic/odp_cpumask_task.c @@ -12,7 +12,6 @@ #include <odp_debug_internal.h> #include <odp_global_data.h> -#include <pthread.h> #include <sched.h> int odp_cpumask_default_worker(odp_cpumask_t *mask, int max_num) diff --git a/platform/linux-generic/odp_event.c b/platform/linux-generic/odp_event.c index e15cb1c50..f3644f02b 100644 --- a/platform/linux-generic/odp_event.c +++ b/platform/linux-generic/odp_event.c @@ -12,6 +12,7 @@ #include <odp/api/packet.h> #include <odp/api/timer.h> #include <odp/api/pool.h> +#include <odp/api/ml.h> #include <odp_buffer_internal.h> #include <odp_ipsec_internal.h> @@ -36,6 +37,7 @@ const _odp_event_inline_offset_t _odp_event_inline_offset ODP_ALIGNED_CACHE = { .event_type = offsetof(_odp_event_hdr_t, event_type), .base_data = offsetof(_odp_event_hdr_t, base_data), + .subtype = offsetof(_odp_event_hdr_t, subtype), .flow_id = offsetof(_odp_event_hdr_t, flow_id), .pool = offsetof(_odp_event_hdr_t, pool), }; @@ -68,6 +70,9 @@ static inline void event_free(odp_event_t event, _odp_ev_id_t id) case ODP_EVENT_DMA_COMPL: odp_dma_compl_free(odp_dma_compl_from_event(event)); break; + case ODP_EVENT_ML_COMPL: + odp_ml_compl_free(odp_ml_compl_from_event(event)); + break; default: _ODP_ABORT("Invalid event type: %d\n", odp_event_type(event)); } @@ -116,6 +121,8 @@ int odp_event_is_valid(odp_event_t event) /* Fall through */ case ODP_EVENT_DMA_COMPL: /* Fall through */ + case ODP_EVENT_ML_COMPL: + /* Fall through */ case ODP_EVENT_PACKET_TX_COMPL: break; default: diff --git a/platform/linux-generic/odp_init.c b/platform/linux-generic/odp_init.c index 05b693c94..795252df1 100644 --- a/platform/linux-generic/odp_init.c +++ b/platform/linux-generic/odp_init.c @@ -51,6 +51,7 @@ enum init_stage { IPSEC_SAD_INIT, IPSEC_INIT, DMA_INIT, + ML_INIT, ALL_INIT /* All init stages completed */ }; @@ -95,6 +96,7 @@ static void disable_features(odp_global_data_ro_t *global_ro, global_ro->disable.traffic_mngr = init_param->not_used.feat.tm; global_ro->disable.compress = init_param->not_used.feat.compress; + global_ro->disable.ml = init_param->not_used.feat.ml; } void odp_init_param_init(odp_init_t *param) @@ -145,6 +147,13 @@ static int term_global(enum init_stage stage) switch (stage) { case ALL_INIT: + case ML_INIT: + if (_odp_ml_term_global()) { + _ODP_ERR("ODP ML term failed.\n"); + rc = -1; + } + /* Fall through */ + case DMA_INIT: if (_odp_dma_term_global()) { _ODP_ERR("ODP DMA term failed.\n"); @@ -509,6 +518,12 @@ int odp_init_global(odp_instance_t *instance, } stage = DMA_INIT; + if (_odp_ml_init_global()) { + _ODP_ERR("ODP ML init failed.\n"); + goto init_failed; + } + stage = ML_INIT; + *instance = (odp_instance_t)odp_global_ro.main_pid; return 0; diff --git a/platform/linux-generic/odp_ipsec.c b/platform/linux-generic/odp_ipsec.c index 8c97a0f55..ee402b935 100644 --- a/platform/linux-generic/odp_ipsec.c +++ b/platform/linux-generic/odp_ipsec.c @@ -2180,7 +2180,7 @@ finish: int odp_ipsec_in(const odp_packet_t pkt_in[], int num_in, odp_packet_t pkt_out[], int *num_out, const odp_ipsec_in_param_t *param) { - int max_out = _ODP_MIN(_ODP_MIN(num_in, *num_out), MAX_BURST), num_crypto; + int max_out = _ODP_MIN3(num_in, *num_out, MAX_BURST), num_crypto; odp_packet_t crypto_pkts[MAX_BURST]; odp_crypto_packet_op_param_t crypto_param[MAX_BURST]; ipsec_op_t ops[MAX_BURST], *crypto_ops[MAX_BURST]; @@ -2288,7 +2288,7 @@ finish: int odp_ipsec_out(const odp_packet_t pkt_in[], int num_in, odp_packet_t pkt_out[], int *num_out, const odp_ipsec_out_param_t *param) { - int max_out = _ODP_MIN(_ODP_MIN(num_in, *num_out), MAX_BURST), num_crypto; + int max_out = _ODP_MIN3(num_in, *num_out, MAX_BURST), num_crypto; odp_packet_t crypto_pkts[MAX_BURST]; odp_crypto_packet_op_param_t crypto_param[MAX_BURST]; ipsec_op_t ops[MAX_BURST], *crypto_ops[MAX_BURST]; diff --git a/platform/linux-generic/odp_ishmpool.c b/platform/linux-generic/odp_ishmpool.c index 9b6340d7e..89ec10695 100644 --- a/platform/linux-generic/odp_ishmpool.c +++ b/platform/linux-generic/odp_ishmpool.c @@ -135,8 +135,8 @@ static inline uintptr_t get_bblock_nr(pool_t *bpool, void *addr) static inline void remove_from_list(pool_t *bpool, uint8_t order, bblock_t *bblock) { - bblock_t *curr; /* current bblock (when parsing list) */ - bblock_t *prev; /* previous bblock (when parsing list) */ + bblock_t *curr; + bblock_t *prev = NULL; curr = bpool->ctrl.free_heads[order]; if (!curr) diff --git a/platform/linux-generic/odp_ml.c b/platform/linux-generic/odp_ml.c new file mode 100644 index 000000000..6ab9e7177 --- /dev/null +++ b/platform/linux-generic/odp_ml.c @@ -0,0 +1,2646 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2023 Nokia + */ + +#include <odp/autoheader_external.h> + +#include <odp/api/atomic.h> +#include <odp/api/buffer.h> +#include <odp/api/event.h> +#include <odp/api/hints.h> +#include <odp/api/ml.h> +#include <odp/api/pool.h> +#include <odp/api/queue.h> +#include <odp/api/shared_memory.h> +#include <odp/api/std_types.h> +#include <odp/api/ticketlock.h> + +#include <odp/api/plat/event_inline_types.h> +#include <odp/api/plat/strong_types.h> + +#include <odp_buffer_internal.h> +#include <odp_config_internal.h> +#include <odp_debug_internal.h> +#include <odp_global_data.h> +#include <odp_init_internal.h> +#include <odp_libconfig_internal.h> +#include <odp_macros_internal.h> +#include <odp_pool_internal.h> + +#include <onnxruntime_c_api.h> + +#include <inttypes.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +#define ML_MAX_IO_SEGS UINT32_MAX +#define ML_MAX_COMPL_ID 32 +#define ML_MAX_CONFIG_STR_LEN 65 +#define ML_MAX_MODEL_SIZE (1024 * 1024 * 1024) +#define ML_MAX_MODELS_CREATED CONFIG_ML_MAX_MODELS +#define ML_MAX_MODELS_LOADED CONFIG_ML_MAX_MODELS + +/* Error codes */ +enum { + /* Feature not supported */ + ML_FEATURE_NOT_SUPPORTED = 1, + + /* Model is not created */ + ML_NOT_CREATED, + + /* Model was not loaded */ + ML_NOT_LOADED, + + /* Model has already loaded */ + ML_LOADED, + + /* Bad input */ + ML_BAD_INPUT, + + /* Fail from underlying library onnxruntime */ + ML_LIB_FAILED, + + /* Bad output */ + ML_BAD_OUTPUT, + + /* Bad handle */ + ML_BAD_HDL +}; + +typedef struct ort_run_opts_t { + int enable_profiling; + + ExecutionMode execution_mode; + + int inter_op_num_threads; + + int intra_op_num_threads; + + GraphOptimizationLevel graph_opt_level; + + char opt_model_filepath[ML_MAX_CONFIG_STR_LEN]; +} ort_run_opts_t; + +typedef struct ml_input_t { + /* Combined input start address */ + void *addr; + /* Data size in bytes */ + uint64_t size; +} ml_input_t; + +/* Onnxruntime model info */ +typedef struct ml_model_t { + /* Guards state, which must be accessed atomically */ + odp_ticketlock_t lock; + + enum { + ML_STATE_FREE = 0, /* Not allocated */ + ML_STATE_CREATED, /* Model is created */ + ML_STATE_LOADED, /* Model is loaded */ + ML_STATE_INFERENCING, /* Model is inferencing */ + } state; + + OrtSession *session; + OrtSessionOptions *session_opts; + uint32_t max_compl_id; + odp_atomic_u32_t compl_status[ML_MAX_COMPL_ID]; + + odp_ml_model_info_t info; + odp_ml_input_info_t input_info[CONFIG_ML_MAX_INPUTS]; + uint64_t input_sizes[CONFIG_ML_MAX_INPUTS]; + odp_ml_output_info_t output_info[CONFIG_ML_MAX_OUTPUTS]; + uint64_t output_sizes[CONFIG_ML_MAX_OUTPUTS]; + + struct { + void *user_ptr; + } result[ML_MAX_COMPL_ID]; +} ml_model_t; + +typedef struct ml_global_t { + odp_shm_t shm; + + odp_ml_capability_t capa; + odp_ml_config_t ml_config; + + odp_pool_param_t pool_param; + + const OrtApi *ort_api; + OrtEnv *env; + ort_run_opts_t ort_run_opts; + + ml_model_t models[ML_MAX_MODELS_CREATED]; + +} ml_global_t; + +static ml_global_t *_odp_ml_glb; + +static inline ml_model_t *ml_model_from_handle(odp_ml_model_t model) +{ + return (ml_model_t *)(uintptr_t)model; +} + +int odp_ml_capability(odp_ml_capability_t *capa) +{ + odp_pool_capability_t pool_capa; + + memset(capa, 0, sizeof(odp_ml_capability_t)); + + if (odp_global_ro.disable.ml) { + _ODP_PRINT("ML is disabled\n"); + return 0; + } + + capa->max_model_size = ML_MAX_MODEL_SIZE; + capa->max_models = ML_MAX_MODELS_CREATED; + capa->max_models_loaded = ML_MAX_MODELS_LOADED; + capa->max_compl_id = ML_MAX_COMPL_ID; + capa->max_inputs = CONFIG_ML_MAX_INPUTS; + capa->max_outputs = CONFIG_ML_MAX_OUTPUTS; + capa->max_segs_per_input = ML_MAX_IO_SEGS; + capa->max_segs_per_output = ML_MAX_IO_SEGS; + capa->min_input_align = 1; + capa->min_output_align = 1; + + capa->load.compl_mode_mask = ODP_ML_COMPL_MODE_SYNC | + ODP_ML_COMPL_MODE_POLL | + ODP_ML_COMPL_MODE_EVENT; + capa->load.compl_queue_plain = 1; + capa->load.compl_queue_sched = 1; + + capa->run.compl_mode_mask = ODP_ML_COMPL_MODE_SYNC | + ODP_ML_COMPL_MODE_POLL | + ODP_ML_COMPL_MODE_EVENT; + capa->run.compl_queue_plain = 1; + capa->run.compl_queue_sched = 1; + + if (odp_pool_capability(&pool_capa)) { + _ODP_ERR("Pool capability failed\n"); + return -1; + } + + capa->pool.max_pools = pool_capa.buf.max_pools; + capa->pool.max_num = pool_capa.buf.max_num; + capa->pool.max_uarea_size = pool_capa.buf.max_uarea_size; + capa->pool.uarea_persistence = pool_capa.buf.uarea_persistence; + capa->pool.max_cache_size = pool_capa.buf.max_cache_size; + capa->pool.min_cache_size = pool_capa.buf.min_cache_size; + + return 0; +} + +void odp_ml_config_init(odp_ml_config_t *config) +{ + memset(config, 0, sizeof(odp_ml_config_t)); + config->max_models_created = 1; + config->max_models_loaded = 1; +} + +int odp_ml_config(const odp_ml_config_t *config) +{ + if (!config) { + _ODP_ERR("Error: config must not be NULL\n"); + return -1; + } + + if (config->max_model_size == 0 || config->max_models_created == 0 || + config->max_models_loaded == 0) { + _ODP_ERR("Error: max_model_size, max_models_created and max_models_loaded" + " must be bigger than 0\n"); + return -1; + } + + if (config->max_models_loaded > config->max_models_created) { + _ODP_ERR("Error: max_models_loaded %d exceeds max_models_created %d\n", + config->max_models_loaded, config->max_models_created); + return -1; + } + + if (config->max_models_created > ML_MAX_MODELS_CREATED) { + _ODP_ERR("Error: max_models_created %d exceeds maximum number" + " of models that can be created in this driver %d\n", + config->max_models_created, ML_MAX_MODELS_CREATED); + return -1; + } + + if (config->max_models_loaded > ML_MAX_MODELS_LOADED) { + _ODP_ERR("Error: max_models_loaded %d exceeds maximum number" + " of models that can be loaded in this driver %d\n", + config->max_models_loaded, ML_MAX_MODELS_LOADED); + return -1; + } + + if (config->max_model_size > ML_MAX_MODEL_SIZE) { + _ODP_ERR("max_model_size %" PRIu64 " exceeds supported maximum model size %d\n", + config->max_model_size, ML_MAX_MODEL_SIZE); + return -1; + } + + _odp_ml_glb->ml_config = *config; + return 0; +} + +void odp_ml_model_param_init(odp_ml_model_param_t *param) +{ + memset(param, 0, sizeof(odp_ml_model_param_t)); +} + +static int check_ortstatus(OrtStatus * const status) +{ + if (status != NULL) { + const char *msg = _odp_ml_glb->ort_api->GetErrorMessage(status); + + _ODP_ERR("%s\n", msg); + _odp_ml_glb->ort_api->ReleaseStatus(status); + return -1; + } + + return 0; +} + +/* Get model input and output count */ +static int get_model_io_count(OrtSession *model, uint32_t *num_inputs, uint32_t *num_outputs) +{ + size_t num = 0; + OrtStatus *status = NULL; + const OrtApi *ort_api = _odp_ml_glb->ort_api; + + status = ort_api->SessionGetInputCount(model, &num); + if (check_ortstatus(status)) { + _ODP_ERR("Get model input count failed\n"); + return -1; + } + + *num_inputs = num; + _ODP_DBG("num_inputs: %u\n", *num_inputs); + + status = ort_api->SessionGetOutputCount(model, &num); + if (check_ortstatus(status)) { + _ODP_ERR("Get model output count failed\n"); + return -1; + } + + *num_outputs = num; + _ODP_DBG("num_outputs: %u\n", *num_outputs); + + return 0; +} + +static odp_ml_data_type_t onnx_dtype_to_odp_dtype(ONNXTensorElementDataType onnx_dtype) +{ + switch (onnx_dtype) { + case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT: + return ODP_ML_DATA_TYPE_FP32; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8: + return ODP_ML_DATA_TYPE_UINT8; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8: + return ODP_ML_DATA_TYPE_INT8; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16: + return ODP_ML_DATA_TYPE_UINT16; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16: + return ODP_ML_DATA_TYPE_INT16; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32: + return ODP_ML_DATA_TYPE_INT32; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32: + return ODP_ML_DATA_TYPE_UINT32; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64: + return ODP_ML_DATA_TYPE_INT64; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64: + return ODP_ML_DATA_TYPE_UINT64; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16: + return ODP_ML_DATA_TYPE_FP16; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16: + return ODP_ML_DATA_TYPE_BFP16; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE: + return ODP_ML_DATA_TYPE_FP64; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL: + /* Fall through */ + case ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64: + /* Fall through */ + case ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128: + /* Fall through */ + default: + _ODP_ERR("onnx_dtype %d not supported by odp_ml\n", onnx_dtype); + return ODP_ML_DATA_TYPE_NONE; + } +} + +/* Get the size of given odp_ml_data_type_t in bytes */ +static uint32_t size_of_odp_ml_data_type(odp_ml_data_type_t data_type) +{ + switch (data_type) { + case ODP_ML_DATA_TYPE_NONE: + return 0; + + case ODP_ML_DATA_TYPE_INT8: + /* Fall through */ + case ODP_ML_DATA_TYPE_UINT8: + return 1; + + case ODP_ML_DATA_TYPE_INT16: + /* Fall through */ + case ODP_ML_DATA_TYPE_UINT16: + /* Fall through */ + case ODP_ML_DATA_TYPE_FP16: + /* Fall through */ + case ODP_ML_DATA_TYPE_BFP16: + return 2; + + case ODP_ML_DATA_TYPE_INT24: + /* Fall through */ + case ODP_ML_DATA_TYPE_UINT24: + return 3; + + case ODP_ML_DATA_TYPE_INT32: + /* Fall through */ + case ODP_ML_DATA_TYPE_UINT32: + /* Fall through */ + case ODP_ML_DATA_TYPE_FP32: + return 4; + + case ODP_ML_DATA_TYPE_INT64: + /* Fall through */ + case ODP_ML_DATA_TYPE_UINT64: + /* Fall through */ + case ODP_ML_DATA_TYPE_FP64: + return 8; + + default: + return 0; + } +} + +static int get_shape(int64_t dims[], odp_ml_shape_info_t *shape) +{ + uint32_t dyn_cnt = 0; + + for (uint32_t i = 0; i < shape->num_dim; i++) { + if (dims[i] == 0) { + _ODP_ERR("Dimension value: %" PRId64 " must be at least 1\n", dims[i]); + return -1; + } else if (dims[i] == -1) { /* Symbolic dimension */ + dyn_cnt++; + shape->dim[i] = ODP_ML_DIM_DYNAMIC; + shape->dim_min[i] = 0; /*unknown*/ + shape->dim_max[i] = 0; /*unknown*/ + } else if (dims[i] > 0 && dims[i] < UINT32_MAX) { + shape->dim[i] = dims[i]; + shape->dim_min[i] = dims[i]; + shape->dim_max[i] = dims[i]; + } else { + _ODP_ERR("Dimension value: %" PRId64 " invalid\n", dims[i]); + return -1; + } + } + + if (dyn_cnt == 0) { + shape->type = ODP_ML_SHAPE_STATIC; + } else if (dyn_cnt == 1) { + shape->type = ODP_ML_SHAPE_BATCH; + } else { + _ODP_ERR("Data shape type not supported by ODP\n"); + return -1; + } + + return 0; +} + +static inline void calculate_model_io_size(const odp_ml_shape_info_t *shape, uint64_t *size) +{ + /* Calculate the data size in bytes of this tensor, 0 for tensors with + * dynamic batch sizes */ + for (size_t i = 0; i < shape->num_dim; i++) { + /* Skip dynamic dimension size */ + if (shape->dim[i] == ODP_ML_DIM_DYNAMIC) { + *size = 0; + break; + } + (*size) *= shape->dim[i]; + } +} + +static int get_model_io_type_shape_size(OrtTypeInfo *type_info, odp_ml_shape_info_t *shape, + odp_ml_data_type_t *data_type, uint32_t *data_type_size, + uint64_t *size) +{ + ONNXTensorElementDataType tensor_type; + const OrtTensorTypeAndShapeInfo *tensor_info; + size_t num_dim = 0; + OrtStatus *status = NULL; + int64_t dims[ODP_ML_MAX_DIMS] = {0}; + const OrtApi *ort_api = _odp_ml_glb->ort_api; + + status = ort_api->CastTypeInfoToTensorInfo(type_info, &tensor_info); + if (check_ortstatus(status)) { + _ODP_ERR("CastTypeInfoToTensorInfo failed\n"); + return -1; + } + + status = ort_api->GetTensorElementType(tensor_info, &tensor_type); + if (check_ortstatus(status)) { + _ODP_ERR("GetTensorElementType failed\n"); + return -1; + } + + *data_type = onnx_dtype_to_odp_dtype(tensor_type); + if (*data_type == ODP_ML_DATA_TYPE_NONE) /* Type not supported by odp */ + return -1; + + status = ort_api->GetDimensionsCount(tensor_info, &num_dim); + if (check_ortstatus(status)) { + _ODP_ERR("GetDimensionsCount failed\n"); + return -1; + } + + if (num_dim > ODP_ML_MAX_DIMS) { + _ODP_ERR("Number of dimensions: %zu exceeds supported maximum number" + " of dimensions: %d\n", num_dim, ODP_ML_MAX_DIMS); + return -1; + } + shape->num_dim = num_dim; + + status = ort_api->GetDimensions(tensor_info, dims, num_dim); + if (check_ortstatus(status)) { + _ODP_ERR("GetDimensions failed\n"); + return -1; + } + + if (get_shape(dims, shape)) + return -1; + + *data_type_size = size_of_odp_ml_data_type(*data_type); + + *size = *data_type_size; + calculate_model_io_size(shape, size); + + return 0; +} + +/* Get model input and output info */ +static int get_model_io_info(OrtSession *session, ml_model_t *mdl, + const odp_ml_model_param_t *param) +{ + char *name; + OrtTypeInfo *type_info; + const odp_ml_data_format_t *data_format; + OrtStatus *status = NULL; + OrtAllocator *allocator = NULL; + const OrtApi *ort_api = _odp_ml_glb->ort_api; + odp_ml_input_info_t *input_info = mdl->input_info; + odp_ml_output_info_t *output_info = mdl->output_info; + + status = ort_api->GetAllocatorWithDefaultOptions(&allocator); + if (check_ortstatus(status)) { + _ODP_ERR("GetAllocatorWithDefaultOptions failed\n"); + return -1; + } + + /* Retrieve info about input array. */ + memset(input_info, 0, sizeof(mdl->input_info)); + for (uint32_t i = 0; i < mdl->info.num_inputs; i++) { + name = NULL; + status = ort_api->SessionGetInputName(session, i, allocator, &name); + if (check_ortstatus(status)) { + _ODP_ERR("Get %uth input name failed\n", i); + return -1; + } + + strncpy(input_info[i].name, name, ODP_ML_MODEL_IO_NAME_LEN - 1); + input_info[i].name[ODP_ML_MODEL_IO_NAME_LEN - 1] = 0; + + /* Free memory allocated by SessionGetInputName */ + status = ort_api->AllocatorFree(allocator, name); + if (check_ortstatus(status)) { + _ODP_ERR("AllocatorFree %uth input_name failed\n", i); + return -1; + } + + if (param->extra_info.num_inputs) { + data_format = ¶m->extra_info.input_format[i]; + + input_info[i].shape = data_format->shape; + input_info[i].data_type = data_format->data_type; + input_info[i].data_type_size = data_format->data_type_size; + + mdl->input_sizes[i] = input_info[i].data_type_size; + calculate_model_io_size(&data_format->shape, &mdl->input_sizes[i]); + continue; + } + + type_info = NULL; + status = ort_api->SessionGetInputTypeInfo(session, i, &type_info); + if (check_ortstatus(status)) { + _ODP_ERR("SessionGetInputTypeInfo failed\n"); + return -1; + } + + if (get_model_io_type_shape_size(type_info, &input_info[i].shape, + &input_info[i].data_type, + &input_info[i].data_type_size, + &mdl->input_sizes[i])) { + _ODP_ERR("get_model_io_type_shape_size() for input failed\n"); + ort_api->ReleaseTypeInfo(type_info); + return -1; + } + + ort_api->ReleaseTypeInfo(type_info); + } + + /* Retrieve info about output array. */ + memset(output_info, 0, sizeof(mdl->output_info)); + for (uint32_t i = 0; i < mdl->info.num_outputs; i++) { + name = NULL; + status = ort_api->SessionGetOutputName(session, i, allocator, &name); + if (check_ortstatus(status)) { + _ODP_ERR("Get %uth output name failed\n", i); + return -1; + } + + strncpy(output_info[i].name, name, ODP_ML_MODEL_IO_NAME_LEN - 1); + output_info[i].name[ODP_ML_MODEL_IO_NAME_LEN - 1] = 0; + + /* Free memory allocated by SessionGetOutputName */ + status = ort_api->AllocatorFree(allocator, name); + if (check_ortstatus(status)) { + _ODP_ERR("AllocatorFree %uth output_name failed\n", i); + return -1; + } + + if (param->extra_info.num_outputs) { + data_format = ¶m->extra_info.output_format[i]; + + output_info[i].shape = data_format->shape; + output_info[i].data_type = data_format->data_type; + output_info[i].data_type_size = data_format->data_type_size; + + mdl->output_sizes[i] = output_info[i].data_type_size; + calculate_model_io_size(&data_format->shape, &mdl->output_sizes[i]); + continue; + } + + type_info = NULL; + status = ort_api->SessionGetOutputTypeInfo(session, i, &type_info); + if (check_ortstatus(status)) { + _ODP_ERR("SessionGetOutputTypeInfo failed\n"); + return -1; + } + + if (get_model_io_type_shape_size(type_info, &output_info[i].shape, + &output_info[i].data_type, + &output_info[i].data_type_size, + &mdl->output_sizes[i])) { + _ODP_ERR("get_model_io_type_shape_size() for output failed\n"); + ort_api->ReleaseTypeInfo(type_info); + return -1; + } + + ort_api->ReleaseTypeInfo(type_info); + } + + return 0; +} + +static inline int check_model_io_num(const odp_ml_model_param_t *param, + uint32_t num_inputs, uint32_t num_outputs) +{ + /* Make sure the number of inputs/outputs not exceeding the supported + * model max inputs/outputs */ + if (num_inputs > CONFIG_ML_MAX_INPUTS) { + _ODP_ERR("The model's number of inputs %u exceeds the maximum " + "number of inputs supported in a model %u\n", + num_inputs, CONFIG_ML_MAX_INPUTS); + return -1; + } + + if (num_outputs > CONFIG_ML_MAX_OUTPUTS) { + _ODP_ERR("The model's number of outputs %u exceeds the maximum " + "number of outputs supported in a model %u\n", + num_outputs, CONFIG_ML_MAX_OUTPUTS); + + return -1; + } + + /* Make sure the numbers of inputs/outputs provided in the extra_info of + * param match the numbers defined in model metadata. */ + if (param->extra_info.num_inputs && + param->extra_info.num_inputs != num_inputs) { + _ODP_ERR("Provided param->extra_info.num_inputs %u does not match the" + " number of inputs defined in model metadata: %u\n", + param->extra_info.num_inputs, num_inputs); + return -1; + } + + if (param->extra_info.num_outputs && param->extra_info.num_outputs != num_outputs) { + _ODP_ERR("Provided param->extra_info.num_outputs %u does not match the" + " number of outputs defined in model metadata: %u\n", + param->extra_info.num_outputs, num_outputs); + return -1; + } + + if (param->extra_info.num_inputs && !param->extra_info.input_format) { + _ODP_ERR("num_inputs is provided but not input_format in param->extra_info\n"); + return -1; + } + + if (param->extra_info.num_outputs && !param->extra_info.output_format) { + _ODP_ERR("num_outputs is provided but not output_format in param->extra_info\n"); + return -1; + } + + return 0; +} + +static int create_ort_model(const odp_ml_model_param_t *param, OrtSession **session, + ml_model_t *mdl, OrtSessionOptions *session_opts) +{ + OrtStatus *status; + int64_t model_version; + uint32_t num_inputs = 0; + uint32_t num_outputs = 0; + OrtModelMetadata *metadata = {0}; + const OrtApi *ort_api = _odp_ml_glb->ort_api; + + status = ort_api->CreateSessionFromArray(_odp_ml_glb->env, + param->model, + param->size, + session_opts, + session); + if (check_ortstatus(status) || !(*session)) { + _ODP_ERR("CreateSessionFromArray failed\n"); + return -1; + } + + if (get_model_io_count(*session, &num_inputs, &num_outputs)) { + _ODP_ERR("get_model_io_count() failed\n"); + ort_api->ReleaseSession(*session); + return -1; + } + + if (check_model_io_num(param, num_inputs, num_outputs)) { + ort_api->ReleaseSession(*session); + return -1; + } + + mdl->max_compl_id = param->max_compl_id; + mdl->info.num_inputs = num_inputs; + mdl->info.num_outputs = num_outputs; + + /* Get metadata */ + status = ort_api->SessionGetModelMetadata(*session, &metadata); + if (check_ortstatus(status) || !metadata) { + _ODP_ERR("SessionGetModelMetadata failed\n"); + ort_api->ReleaseSession(*session); + return -1; + } + + /* Get model version */ + status = ort_api->ModelMetadataGetVersion(metadata, &model_version); + if (check_ortstatus(status)) { + _ODP_ERR("ModelMetadataGetVersion failed\n"); + ort_api->ReleaseModelMetadata(metadata); + ort_api->ReleaseSession(*session); + return -1; + } + mdl->info.model_version = model_version; + mdl->info.interface_version = 0; + + if (get_model_io_info(*session, mdl, param)) { + _ODP_ERR("get_model_io_info() failed\n"); + ort_api->ReleaseModelMetadata(metadata); + ort_api->ReleaseSession(*session); + return -1; + } + + ort_api->ReleaseModelMetadata(metadata); + return 0; +} + +static int set_ort_run_opts(const char *name, OrtSessionOptions *se_opts) +{ + OrtStatus *status; + ort_run_opts_t *opts = &_odp_ml_glb->ort_run_opts; + const OrtApi *ort_api = _odp_ml_glb->ort_api; + + if (opts->enable_profiling) { + status = ort_api->EnableProfiling(se_opts, name); + if (check_ortstatus(status)) { + _ODP_ERR("Enable profiling failed\n"); + return -1; + } + } + + status = ort_api->SetSessionExecutionMode(se_opts, opts->execution_mode); + if (check_ortstatus(status)) { + _ODP_ERR("SetSessionExecutionMode failed\n"); + return -1; + } + + if (opts->intra_op_num_threads) { + status = ort_api->SetIntraOpNumThreads(se_opts, opts->intra_op_num_threads); + if (check_ortstatus(status)) { + _ODP_ERR("SetIntraOpNumThreads failed\n"); + return -1; + } + } + + if (opts->inter_op_num_threads) { + status = ort_api->SetInterOpNumThreads(se_opts, opts->inter_op_num_threads); + if (check_ortstatus(status)) { + _ODP_ERR("SetInterOpNumThreads failed\n"); + return -1; + } + } + + status = ort_api->SetSessionGraphOptimizationLevel(se_opts, opts->graph_opt_level); + if (check_ortstatus(status)) { + _ODP_ERR("SetSessionGraphOptimizationLevel failed\n"); + return -1; + } + + /* Optimized model file path is not provided */ + if (opts->opt_model_filepath[0] == '\0') + return 0; + + status = ort_api->SetOptimizedModelFilePath(se_opts, opts->opt_model_filepath); + if (check_ortstatus(status)) { + _ODP_ERR("SetOptimizedModelFilePath failed\n"); + return -1; + } + + return 0; +} + +static inline void reset_mdl_info_sizes(ml_model_t *mdl) +{ + memset(&mdl->info, 0, sizeof(odp_ml_model_info_t)); + memset(mdl->input_info, 0, sizeof(mdl->input_info)); + memset(mdl->output_info, 0, sizeof(mdl->output_info)); + memset(mdl->input_sizes, 0, sizeof(mdl->input_sizes)); + memset(mdl->output_sizes, 0, sizeof(mdl->output_sizes)); +} + +static int check_io_shape(ml_model_t *mdl) +{ + odp_ml_shape_info_t *shape; + + for (uint32_t i = 0; i < mdl->info.num_inputs; i++) { + shape = &mdl->input_info[i].shape; + + if (shape->type == ODP_ML_SHAPE_NONE) { + _ODP_ERR("Undefined shape type for model input[%u]\n", i); + return -1; + } + + if (shape->type == ODP_ML_SHAPE_STATIC) + continue; + + /* shape->type == ODP_ML_SHAPE_BATCH */ + for (uint32_t j = 0; j < shape->num_dim; j++) { + if (shape->dim[j] == ODP_ML_DIM_DYNAMIC && !shape->dim_max[j]) { + _ODP_ERR("Missing dim_max[%u] for dynamic sized input[%u], please" + " provide via the extra_info of model param\n", j, i); + return -1; + } + } + } + + for (uint32_t i = 0; i < mdl->info.num_outputs; i++) { + if (mdl->output_info[i].shape.type == ODP_ML_SHAPE_NONE) { + _ODP_ERR("Undefined shape type for model output[%u]\n", i); + return -1; + } + } + + return 0; +} + +odp_ml_model_t odp_ml_model_create(const char *name, const odp_ml_model_param_t *param) +{ + OrtStatus *status; + odp_ml_model_info_t *info; + OrtSessionOptions *session_opts; + uint32_t i = 0; + ml_model_t *mdl = NULL; + OrtSession *session = NULL; + const OrtApi *ort_api = _odp_ml_glb->ort_api; + + if (odp_unlikely(odp_global_ro.disable.ml)) { + _ODP_ERR("ML is disabled\n"); + return ODP_ML_MODEL_INVALID; + } + + if (odp_unlikely(param->size > _odp_ml_glb->ml_config.max_model_size)) { + _ODP_ERR("Model size %" PRIu64 " exceeds maximum model size configured %" PRIu64 "\n", + param->size, _odp_ml_glb->ml_config.max_model_size); + return ODP_ML_MODEL_INVALID; + } + + if (odp_unlikely(!param->size || !param->model)) { + _ODP_ERR("Invalid model param: param->model: %p, param->size: %" PRIu64 "\n", + param->model, param->size); + return ODP_ML_MODEL_INVALID; + } + + if (odp_unlikely(param->max_compl_id > ML_MAX_COMPL_ID)) { + _ODP_ERR("param->max_compl_id: %u exceeds maximum completion id supported: %d\n", + param->max_compl_id, ML_MAX_COMPL_ID); + return ODP_ML_MODEL_INVALID; + } + + /* Find an emtpy slot to store the new model */ + for (i = 0; i < ML_MAX_MODELS_CREATED; i++) { + if (_odp_ml_glb->models[i].state) + continue; + + odp_ticketlock_lock(&_odp_ml_glb->models[i].lock); + + if (_odp_ml_glb->models[i].state) { + odp_ticketlock_unlock(&_odp_ml_glb->models[i].lock); + continue; + } + + mdl = &_odp_ml_glb->models[i]; + break; + } + + if (i == ML_MAX_MODELS_CREATED) { + _ODP_ERR("Maximum number of models has already been created!\n"); + return ODP_ML_MODEL_INVALID; + } + + /* Free model entry was found and is now locked */ + mdl->state = ML_STATE_CREATED; + + status = ort_api->CreateSessionOptions(&session_opts); + if (check_ortstatus(status) || !session_opts) { + _ODP_ERR("Error: CreateSessionOptions failed.\n"); + mdl->state = ML_STATE_FREE; + odp_ticketlock_unlock(&mdl->lock); + return ODP_ML_MODEL_INVALID; + } + + if (set_ort_run_opts(name, session_opts)) { + _odp_ml_glb->ort_api->ReleaseSessionOptions(session_opts); + mdl->state = ML_STATE_FREE; + odp_ticketlock_unlock(&mdl->lock); + return ODP_ML_MODEL_INVALID; + } + + /* Store model info */ + info = &mdl->info; + memset(info, 0, sizeof(odp_ml_model_info_t)); + + if (create_ort_model(param, &session, mdl, session_opts)) { + mdl->state = ML_STATE_FREE; + + /* Initialize info back to 0 when some fields have been filled + * while later failed */ + reset_mdl_info_sizes(mdl); + odp_ticketlock_unlock(&mdl->lock); + + _odp_ml_glb->ort_api->ReleaseSessionOptions(session_opts); + _ODP_ERR("create_ort_model() failed\n"); + return ODP_ML_MODEL_INVALID; + } + + if (check_io_shape(mdl)) { + mdl->state = ML_STATE_FREE; + reset_mdl_info_sizes(mdl); + odp_ticketlock_unlock(&mdl->lock); + + ort_api->ReleaseSession(session); + _odp_ml_glb->ort_api->ReleaseSessionOptions(session_opts); + return ODP_ML_MODEL_INVALID; + } + + mdl->session = session; + mdl->session_opts = session_opts; + info->index = i; + + if (name) { + strncpy(info->name, name, ODP_ML_MODEL_NAME_LEN - 1); + info->name[ODP_ML_MODEL_NAME_LEN - 1] = 0; + } + + mdl->max_compl_id = param->max_compl_id; + for (uint32_t j = 0; j < ML_MAX_COMPL_ID; j++) + odp_atomic_init_u32(&mdl->compl_status[j], 1); + + odp_ticketlock_unlock(&mdl->lock); + return (odp_ml_model_t)mdl; +} + +int odp_ml_model_destroy(odp_ml_model_t model) +{ + ml_model_t *mdl = ml_model_from_handle(model); + + if (model == ODP_ML_MODEL_INVALID) { + _ODP_ERR("Bad ML model handle\n"); + return -1; + } + + odp_ticketlock_lock(&mdl->lock); + + if (mdl->state != ML_STATE_CREATED) { + _ODP_ERR("Model not created\n"); + odp_ticketlock_unlock(&mdl->lock); + return -1; + } + + _odp_ml_glb->ort_api->ReleaseSessionOptions(mdl->session_opts); + _odp_ml_glb->ort_api->ReleaseSession(mdl->session); + mdl->state = ML_STATE_FREE; + mdl->session = NULL; + odp_ticketlock_unlock(&mdl->lock); + + return 0; +} + +int odp_ml_model_info(odp_ml_model_t model, odp_ml_model_info_t *info) +{ + ml_model_t *mdl = ml_model_from_handle(model); + + if (odp_unlikely(model == ODP_ML_MODEL_INVALID)) { + _ODP_ERR("Bad ML model handle\n"); + return -1; + } + + if (odp_unlikely(!info)) { + _ODP_ERR("info must not be NULL\n"); + return -1; + } + + odp_ticketlock_lock(&mdl->lock); + if (odp_unlikely(mdl->state == ML_STATE_FREE)) { + _ODP_ERR("Model not created\n"); + odp_ticketlock_unlock(&mdl->lock); + return -1; + } + + *info = mdl->info; + + odp_ticketlock_unlock(&mdl->lock); + return 0; +} + +uint32_t odp_ml_model_input_info(odp_ml_model_t model, odp_ml_input_info_t info[], uint32_t num) +{ + uint32_t num_model_inputs; + uint32_t num_written; + ml_model_t *mdl = ml_model_from_handle(model); + + if (odp_unlikely(model == ODP_ML_MODEL_INVALID)) { + _ODP_ERR("Bad ML model handle\n"); + return 0; + } + + odp_ticketlock_lock(&mdl->lock); + num_model_inputs = mdl->info.num_inputs; + num_written = num_model_inputs >= num ? num : num_model_inputs; + + if (num == 0) { + odp_ticketlock_unlock(&mdl->lock); + return num_model_inputs; + } + + for (uint32_t i = 0; i < num_written; i++) + info[i] = mdl->input_info[i]; + + odp_ticketlock_unlock(&mdl->lock); + return num_model_inputs; +} + +uint32_t odp_ml_model_output_info(odp_ml_model_t model, odp_ml_output_info_t info[], uint32_t num) +{ + uint32_t num_model_outputs; + uint32_t num_written; + ml_model_t *mdl = ml_model_from_handle(model); + + if (odp_unlikely(model == ODP_ML_MODEL_INVALID)) { + _ODP_ERR("Bad ML model handle\n"); + return 0; + } + + odp_ticketlock_lock(&mdl->lock); + num_model_outputs = mdl->info.num_outputs; + num_written = num_model_outputs >= num ? num : num_model_outputs; + + if (num == 0) { + odp_ticketlock_unlock(&mdl->lock); + return num_model_outputs; + } + + for (uint32_t i = 0; i < num_written; i++) + info[i] = mdl->output_info[i]; + + odp_ticketlock_unlock(&mdl->lock); + return num_model_outputs; +} + +odp_ml_model_t odp_ml_model_lookup(const char *name) +{ + uint32_t i; + ml_model_t *mdl; + + for (i = 0; i < ML_MAX_MODELS_CREATED; i++) { + mdl = &_odp_ml_glb->models[i]; + + odp_ticketlock_lock(&mdl->lock); + + if (mdl->state == ML_STATE_FREE) { + odp_ticketlock_unlock(&mdl->lock); + continue; + } + + if (!strcmp(mdl->info.name, name)) { + /* found it */ + odp_ticketlock_unlock(&mdl->lock); + return (odp_ml_model_t)mdl; + } + odp_ticketlock_unlock(&mdl->lock); + } + + return ODP_ML_MODEL_INVALID; +} + +uint64_t odp_ml_model_to_u64(odp_ml_model_t model) +{ + return _odp_pri(model); +} + +static const char *data_type_str(odp_ml_data_type_t data_type) +{ + switch (data_type) { + case ODP_ML_DATA_TYPE_INT8: + return "int8"; + case ODP_ML_DATA_TYPE_UINT8: + return "uint8"; + case ODP_ML_DATA_TYPE_UINT16: + return "uint16"; + case ODP_ML_DATA_TYPE_INT16: + return "int16"; + case ODP_ML_DATA_TYPE_INT32: + return "int32"; + case ODP_ML_DATA_TYPE_UINT32: + return "uint32"; + case ODP_ML_DATA_TYPE_INT64: + return "int64"; + case ODP_ML_DATA_TYPE_UINT64: + return "uint64"; + case ODP_ML_DATA_TYPE_FP16: + return "fp16"; + case ODP_ML_DATA_TYPE_FP32: + return "fp32"; + case ODP_ML_DATA_TYPE_BFP16: + return "bfp16"; + default: + return "unknown"; + } +} + +static const char *shape_type_str(odp_ml_shape_type_t shape_type) +{ + switch (shape_type) { + case ODP_ML_SHAPE_NONE: + return "none"; + case ODP_ML_SHAPE_STATIC: + return "static"; + case ODP_ML_SHAPE_BATCH: + return "batch"; + default: + return "Unknown"; + } +} + +static void print_shape(const odp_ml_shape_info_t *shape) +{ + /* Print shape */ + _ODP_PRINT("Shape: %s [", shape_type_str(shape->type)); + + for (uint32_t i = 0; i < shape->num_dim; i++) { + if (shape->dim[i] == ODP_ML_DIM_DYNAMIC) + _ODP_PRINT("Dyn"); + else + _ODP_PRINT("%" PRIu32, shape->dim[i]); + + if (i == (shape->num_dim - 1)) + _ODP_PRINT("]\n"); + else + _ODP_PRINT(", "); + } + + /* The number of dimensions for a scalar input is 0, in which case did not + * go into above for loop */ + if (shape->num_dim == 0) + _ODP_PRINT("]\n"); +} + +void odp_ml_model_print(odp_ml_model_t model) +{ + ml_model_t *mdl = ml_model_from_handle(model); + const odp_ml_model_info_t * const info = &mdl->info; + const odp_ml_input_info_t * const input_info = mdl->input_info; + const odp_ml_output_info_t * const output_info = mdl->output_info; + + if (odp_unlikely(model == ODP_ML_MODEL_INVALID)) { + _ODP_ERR("Bad ML model handle\n"); + return; + } + + odp_ticketlock_lock(&mdl->lock); + if (odp_unlikely(mdl->state == ML_STATE_FREE)) { + odp_ticketlock_unlock(&mdl->lock); + _ODP_ERR("Model not created\n"); + return; + } + + _ODP_PRINT("\nModel info\n"); + _ODP_PRINT("----------\n"); + _ODP_PRINT(" Model handle: 0x%" PRIx64 "\n", odp_ml_model_to_u64(model)); + _ODP_PRINT(" Name: %s\n", info->name); + _ODP_PRINT(" Model version: %" PRIu64 "\n", info->model_version); + _ODP_PRINT(" Model interface version: %" PRIu64 "\n", info->interface_version); + _ODP_PRINT(" Index: %u\n", info->index); + _ODP_PRINT(" Number of inputs: %u\n", info->num_inputs); + + for (uint32_t i = 0; i < info->num_inputs; i++) { + _ODP_PRINT(" Input[%u]: ", i); + _ODP_PRINT("Name: %s, ", input_info[i].name); + _ODP_PRINT("Data_type: %s, ", data_type_str(input_info[i].data_type)); + print_shape(&input_info[i].shape); + } + + _ODP_PRINT(" Number of outputs: %u\n", info->num_outputs); + for (uint32_t i = 0; i < info->num_outputs; i++) { + _ODP_PRINT(" Output[%u]: ", i); + _ODP_PRINT("Name: %s, ", output_info[i].name); + _ODP_PRINT("Data_type: %s, ", data_type_str(output_info[i].data_type)); + print_shape(&output_info[i].shape); + } + + odp_ticketlock_unlock(&mdl->lock); + + _ODP_PRINT("\n"); +} + +static inline void mode_print(odp_ml_compl_mode_t compl_mode_mask) +{ + if (compl_mode_mask & ODP_ML_COMPL_MODE_SYNC) + _ODP_PRINT(" syn"); + + if (compl_mode_mask & ODP_ML_COMPL_MODE_POLL) + _ODP_PRINT(" poll"); + + if (compl_mode_mask & ODP_ML_COMPL_MODE_EVENT) + _ODP_PRINT(" event"); +} + +void odp_ml_print(void) +{ + _ODP_PRINT("\nML info\n"); + _ODP_PRINT("-----------\n"); + _ODP_PRINT(" max_model_size: %u\n", ML_MAX_MODEL_SIZE); + _ODP_PRINT(" max_compl_id: %u\n", ML_MAX_COMPL_ID); + _ODP_PRINT(" max_models_created: %u\n", ML_MAX_MODELS_CREATED); + _ODP_PRINT(" max_models_loaded: %u\n", ML_MAX_MODELS_LOADED); + _ODP_PRINT(" model_max_inputs: %u\n", CONFIG_ML_MAX_INPUTS); + _ODP_PRINT(" model_max_outputs: %u\n", CONFIG_ML_MAX_OUTPUTS); + + _ODP_PRINT(" load:\n"); + _ODP_PRINT(" completion mode: "); + mode_print(_odp_ml_glb->capa.load.compl_mode_mask); + _ODP_PRINT(", plain queue: %c, schedule queue: %c\n", + _odp_ml_glb->capa.load.compl_queue_plain ? 'Y' : 'N', + _odp_ml_glb->capa.load.compl_queue_sched ? 'Y' : 'N'); + + _ODP_PRINT(" run:\n"); + _ODP_PRINT(" completion mode:"); + mode_print(_odp_ml_glb->capa.run.compl_mode_mask); + _ODP_PRINT(", plain queue: %c, schedule queue: %c\n", + _odp_ml_glb->capa.run.compl_queue_plain ? 'Y' : 'N', + _odp_ml_glb->capa.run.compl_queue_sched ? 'Y' : 'N'); + _ODP_PRINT("\n"); +} + +int odp_ml_model_extra_stat_info(odp_ml_model_t model, + odp_ml_extra_stat_info_t info[] ODP_UNUSED, + int num ODP_UNUSED) +{ + if (odp_unlikely(model == ODP_ML_MODEL_INVALID)) { + _ODP_ERR("Bad ML model handle\n"); + return -1; + } + + return 0; +} + +int odp_ml_model_extra_stats(odp_ml_model_t model, uint64_t stats[] ODP_UNUSED, int num ODP_UNUSED) +{ + if (odp_unlikely(model == ODP_ML_MODEL_INVALID)) { + _ODP_ERR("Bad ML model handle\n"); + return -1; + } + + return 0; +} + +void odp_ml_compl_pool_param_init(odp_ml_compl_pool_param_t *pool_param) +{ + if (odp_unlikely(!pool_param)) { + _ODP_ERR("Param 'pool_param' must not NULL\n"); + return; + } + + memset(pool_param, 0, sizeof(odp_ml_compl_pool_param_t)); + + pool_param->cache_size = _odp_ml_glb->pool_param.buf.cache_size; +} + +odp_pool_t odp_ml_compl_pool_create(const char *name, const odp_ml_compl_pool_param_t *pool_param) +{ + odp_pool_t pool; + odp_pool_param_t ml_pool_param; + uint32_t num = pool_param->num; + uint32_t uarea_size = pool_param->uarea_size; + uint32_t cache_size = pool_param->cache_size; + uint32_t buf_size = _ODP_MAX(sizeof(odp_ml_run_result_t), + sizeof(odp_ml_load_result_t)); + + if (num > _odp_ml_glb->capa.pool.max_num) { + _ODP_ERR("Too many ML completion events: %u\n", num); + return ODP_POOL_INVALID; + } + + if (uarea_size > _odp_ml_glb->capa.pool.max_uarea_size) { + _ODP_ERR("Bad uarea size: %u\n", uarea_size); + return ODP_POOL_INVALID; + } + + if (cache_size < _odp_ml_glb->capa.pool.min_cache_size || + cache_size > _odp_ml_glb->capa.pool.max_cache_size) { + _ODP_ERR("Bad cache size: %u\n", cache_size); + return ODP_POOL_INVALID; + } + + odp_pool_param_init(&ml_pool_param); + ml_pool_param.type = ODP_POOL_BUFFER; + ml_pool_param.uarea_init.init_fn = pool_param->uarea_init.init_fn; + ml_pool_param.uarea_init.args = pool_param->uarea_init.args; + ml_pool_param.buf.num = num; + ml_pool_param.buf.cache_size = cache_size; + ml_pool_param.buf.size = buf_size; + ml_pool_param.buf.uarea_size = uarea_size; + + pool = _odp_pool_create(name, &ml_pool_param, ODP_POOL_ML_COMPL); + + return pool; +} + +odp_ml_compl_t odp_ml_compl_alloc(odp_pool_t pool) +{ + odp_buffer_t buf; + odp_event_t ev; + odp_ml_run_result_t *result; + uint32_t buf_size = _ODP_MAX(sizeof(odp_ml_run_result_t), + sizeof(odp_ml_load_result_t)); + + buf = odp_buffer_alloc(pool); + + if (odp_unlikely(buf == ODP_BUFFER_INVALID)) + return ODP_ML_COMPL_INVALID; + + result = odp_buffer_addr(buf); + memset(result, 0, buf_size); + + ev = odp_buffer_to_event(buf); + _odp_event_type_set(ev, ODP_EVENT_ML_COMPL); + + return (odp_ml_compl_t)(uintptr_t)buf; +} + +void odp_ml_compl_free(odp_ml_compl_t ml_compl) +{ + odp_event_t ev; + odp_buffer_t buf = (odp_buffer_t)(uintptr_t)ml_compl; + + if (odp_unlikely(ml_compl == ODP_ML_COMPL_INVALID)) { + _ODP_ERR("Bad ML job completion handle\n"); + return; + } + + ev = odp_buffer_to_event(buf); + _odp_event_type_set(ev, ODP_EVENT_BUFFER); + + odp_buffer_free(buf); +} + +int odp_ml_compl_run_result(odp_ml_compl_t ml_compl, odp_ml_run_result_t *result) +{ + odp_event_subtype_t subtype; + odp_ml_run_result_t *run_result; + odp_buffer_t buf = (odp_buffer_t)(uintptr_t)ml_compl; + odp_event_t ev = odp_buffer_to_event(buf); + + if (odp_unlikely(ml_compl == ODP_ML_COMPL_INVALID)) { + _ODP_ERR("Given ML completion event is invalid\n"); + return -2; + } + + if (odp_event_types(ev, &subtype) != ODP_EVENT_ML_COMPL || + subtype != ODP_EVENT_ML_COMPL_RUN) { + _ODP_ERR("Given completion event has wrong event type or subtype\n"); + return -2; + } + + run_result = odp_buffer_addr(buf); + if (result) + *result = *run_result; + + return run_result->error_code ? -1 : 0; +} + +int odp_ml_compl_load_result(odp_ml_compl_t ml_compl, odp_ml_load_result_t *result) +{ + odp_event_subtype_t subtype; + odp_ml_load_result_t *load_result; + odp_buffer_t buf = (odp_buffer_t)(uintptr_t)ml_compl; + odp_event_t ev = odp_buffer_to_event(buf); + + if (odp_unlikely(ml_compl == ODP_ML_COMPL_INVALID)) { + _ODP_ERR("Given ML completion event is invalid\n"); + return -2; + } + + if (odp_event_types(ev, &subtype) != ODP_EVENT_ML_COMPL || + subtype != ODP_EVENT_ML_COMPL_LOAD) { + _ODP_ERR("Given completion event has wrong event type or subtype\n"); + return -2; + } + + load_result = odp_buffer_addr(buf); + if (result) + *result = *load_result; + + return load_result->error_code ? -1 : 0; +} + +void *odp_ml_compl_user_area(odp_ml_compl_t ml_compl) +{ + return odp_buffer_user_area((odp_buffer_t)(uintptr_t)ml_compl); +} + +odp_ml_compl_t odp_ml_compl_from_event(odp_event_t event) +{ + _ODP_ASSERT(_odp_event_hdr_field(event, int8_t, event_type) == ODP_EVENT_ML_COMPL); + + return (odp_ml_compl_t)(uintptr_t)event; +} + +odp_event_t odp_ml_compl_to_event(odp_ml_compl_t ml_compl) +{ + return (odp_event_t)(uintptr_t)ml_compl; +} + +uint64_t odp_ml_compl_to_u64(odp_ml_compl_t ml_compl) +{ + return (uint64_t)(uintptr_t)ml_compl; +} + +void odp_ml_compl_param_init(odp_ml_compl_param_t *compl_param) +{ + memset(compl_param, 0, sizeof(odp_ml_compl_param_t)); + + compl_param->queue = ODP_QUEUE_INVALID; + compl_param->event = ODP_EVENT_INVALID; +} + +int odp_ml_model_load(odp_ml_model_t model, odp_ml_load_result_t *result) +{ + odp_ml_load_result_t result_local; + int ret = -1; + ml_model_t *mdl = ml_model_from_handle(model); + + memset(&result_local, 0, sizeof(result_local)); + + if (odp_unlikely(model == ODP_ML_MODEL_INVALID)) { + _ODP_ERR("Bad ML model handle\n"); + result_local.error_code = ML_BAD_HDL; + goto load_fail; + } + + odp_ticketlock_lock(&mdl->lock); + if (odp_unlikely(mdl->state != ML_STATE_CREATED)) { + _ODP_ERR("Model has not been created yet or is already loaded\n"); + odp_ticketlock_unlock(&mdl->lock); + result_local.error_code = ML_NOT_CREATED; + goto load_fail; + } + + mdl->state = ML_STATE_LOADED; + odp_ticketlock_unlock(&mdl->lock); + ret = 0; + +load_fail: + if (result) + *result = result_local; + + return ret; +} + +static inline int check_compl_param(const odp_ml_compl_param_t *compl_param, + uint32_t max_compl_id, odp_bool_t is_load) +{ + odp_ml_config_t *config = &_odp_ml_glb->ml_config; + + switch (compl_param->mode) { + case ODP_ML_COMPL_MODE_POLL: + if (is_load && !(config->load_mode_mask & ODP_ML_COMPL_MODE_POLL)) { + _ODP_ERR("Poll mode loading/unloading is not configured\n"); + return -1; + } + + if (!is_load && !(config->run_mode_mask & ODP_ML_COMPL_MODE_POLL)) { + _ODP_ERR("Poll mode run is not configured\n"); + return -1; + } + + if (compl_param->compl_id > max_compl_id) { + _ODP_ERR("Bad compl_id: %u, exceeding model max completion id %u\n", + compl_param->compl_id, max_compl_id); + return -1; + } + break; + case ODP_ML_COMPL_MODE_EVENT: + if (is_load && !(config->load_mode_mask & ODP_ML_COMPL_MODE_EVENT)) { + _ODP_ERR("Event mode loading/unloading is not configured\n"); + return -1; + } + + if (!is_load && !(config->run_mode_mask & ODP_ML_COMPL_MODE_EVENT)) { + _ODP_ERR("Event mode run is not configured\n"); + return -1; + } + + if (compl_param->event == ODP_EVENT_INVALID || + compl_param->queue == ODP_QUEUE_INVALID) { + _ODP_ERR("Bad event or queue\n"); + return -1; + } + + if (odp_event_type(compl_param->event) != ODP_EVENT_ML_COMPL) { + _ODP_ERR("Bad completion event type\n"); + return -1; + } + break; + default: + /* Including ODP_ML_COMPL_MODE_SYNC, which is not supported by + * asynchrous functions (e.g. *_start()) either. + */ + _ODP_ERR("Invalid completion mode %u\n", compl_param->mode); + return -1; + } + + return 0; +} + +int odp_ml_model_load_start(odp_ml_model_t model, const odp_ml_compl_param_t *compl_param) +{ + int ret; + ml_model_t *mdl = ml_model_from_handle(model); + + if (odp_unlikely(model == ODP_ML_MODEL_INVALID)) { + _ODP_ERR("Bad model handle\n"); + return -1; + } + + if (odp_unlikely(check_compl_param(compl_param, mdl->max_compl_id, true))) + return -1; + + if (compl_param->mode == ODP_ML_COMPL_MODE_POLL) + odp_atomic_store_rel_u32(&mdl->compl_status[compl_param->compl_id], 0); + + ret = odp_ml_model_load(model, NULL); + + if (odp_unlikely(ret)) + return -1; + + /* Send a completion event to the given queue */ + if (compl_param->mode == ODP_ML_COMPL_MODE_EVENT) { + odp_ml_load_result_t *result; + odp_buffer_t buf = (odp_buffer_t)(uintptr_t)compl_param->event; + + _odp_buffer_subtype_set(buf, ODP_EVENT_ML_COMPL_LOAD); + + result = odp_buffer_addr(buf); + result->error_code = 0; + result->user_ptr = compl_param->user_ptr; + + if (odp_unlikely(odp_queue_enq(compl_param->queue, compl_param->event))) { + _ODP_ERR("Completion event enqueue failed %" PRIu64 "\n", + odp_queue_to_u64(compl_param->queue)); + if (odp_ml_model_unload(model, NULL)) + _ODP_ERR("Failed to unload model\n"); + return -1; + } + + return 0; + } + + mdl->result[compl_param->compl_id].user_ptr = compl_param->user_ptr; + odp_atomic_store_rel_u32(&mdl->compl_status[compl_param->compl_id], 1); + return 0; +} + +int odp_ml_model_load_status(odp_ml_model_t model, uint32_t compl_id, odp_ml_load_result_t *result) +{ + int ret; + ml_model_t *mdl = ml_model_from_handle(model); + + if (odp_unlikely(model == ODP_ML_MODEL_INVALID || compl_id > mdl->max_compl_id)) { + _ODP_ERR("Invalid model or compl_id: %u\n", compl_id); + return -2; + } + + ret = odp_atomic_load_acq_u32(&mdl->compl_status[compl_id]); + + if (ret && result) { + result->error_code = 0; + result->user_ptr = mdl->result[compl_id].user_ptr; + } + + return ret; +} + +int odp_ml_model_unload(odp_ml_model_t model, odp_ml_load_result_t *result) +{ + odp_ml_load_result_t result_local; + int ret = -1; + ml_model_t *mdl = ml_model_from_handle(model); + + memset(&result_local, 0, sizeof(result_local)); + + if (odp_unlikely(model == ODP_ML_MODEL_INVALID)) { + result_local.error_code = ML_BAD_HDL; + _ODP_ERR("Bad ML model handle\n"); + goto unload_fail; + } + + odp_ticketlock_lock(&mdl->lock); + /* mdl->state == ML_STATE_FREE, ML_STATE_CREATED, ML_STATE_INFERENCING */ + if (odp_unlikely(mdl->state != ML_STATE_LOADED)) { + _ODP_ERR("Model has not been created/loaded or inferencing has not finished yet\n"); + odp_ticketlock_unlock(&mdl->lock); + result_local.error_code = ML_NOT_LOADED; + goto unload_fail; + } + + mdl->state = ML_STATE_CREATED; + odp_ticketlock_unlock(&mdl->lock); + + ret = 0; + +unload_fail: + if (result) + *result = result_local; + + return ret; +} + +int odp_ml_model_unload_start(odp_ml_model_t model, const odp_ml_compl_param_t *compl_param) +{ + int ret; + ml_model_t *mdl = ml_model_from_handle(model); + + if (odp_unlikely(model == ODP_ML_MODEL_INVALID)) { + _ODP_ERR("Bad model handle\n"); + return -1; + } + + if (odp_unlikely(check_compl_param(compl_param, mdl->max_compl_id, true))) + return -1; + + if (compl_param->mode == ODP_ML_COMPL_MODE_POLL) + odp_atomic_store_rel_u32(&mdl->compl_status[compl_param->compl_id], 0); + + ret = odp_ml_model_unload(model, NULL); + + if (odp_unlikely(ret)) + return -1; + + /* Upon successful unloading, send a completion event to the given queue */ + if (compl_param->mode == ODP_ML_COMPL_MODE_EVENT) { + odp_ml_load_result_t *result; + odp_buffer_t buf = (odp_buffer_t)(uintptr_t)compl_param->event; + + _odp_buffer_subtype_set(buf, ODP_EVENT_ML_COMPL_LOAD); + + result = odp_buffer_addr(buf); + result->error_code = 0; + result->user_ptr = compl_param->user_ptr; + + if (odp_unlikely(odp_queue_enq(compl_param->queue, compl_param->event))) { + _ODP_ERR("Completion event enqueue failed %" PRIu64 "\n", + odp_queue_to_u64(compl_param->queue)); + return -1; + } + + return 0; + } + + mdl->result[compl_param->compl_id].user_ptr = compl_param->user_ptr; + odp_atomic_store_rel_u32(&mdl->compl_status[compl_param->compl_id], 1); + return 0; +} + +int odp_ml_model_unload_status(odp_ml_model_t model, uint32_t compl_id, + odp_ml_load_result_t *result) +{ + return odp_ml_model_load_status(model, compl_id, result); +} + +void odp_ml_run_param_init(odp_ml_run_param_t *param) +{ + memset(param, 0, sizeof(odp_ml_run_param_t)); +} + +static void ml_shape_to_int64(const odp_ml_shape_info_t *shape, uint32_t batch_size, int64_t *array) +{ + for (uint32_t i = 0; i < shape->num_dim; i++) { + /* Replace dynamic dimension size with provided batch_size */ + if (shape->dim[i] == ODP_ML_DIM_DYNAMIC) + array[i] = batch_size; + else + array[i] = shape->dim[i]; + } +} + +/* Get the number of elements in given shape */ +static inline uint64_t get_num_elem(uint32_t batch_size, const odp_ml_shape_info_t *shape) +{ + uint64_t num_elements = 1; + int64_t dim[ODP_ML_MAX_DIMS] = {0}; + + ml_shape_to_int64(shape, batch_size, dim); + + for (uint32_t i = 0; i < shape->num_dim; i++) + num_elements *= (uint64_t)dim[i]; + + return num_elements; +} + +static inline uint32_t dyn_io_size(const odp_ml_shape_info_t *shape, uint32_t data_type_size, + const odp_ml_run_param_t *param) +{ + uint32_t size; + + if (!param || !param->batch_size) { + _ODP_ERR("Parameter 'param' must not be NULL and batch_size must be " + "provided when a input/output has dynamic dimension size\n"); + return 0; + } + + size = get_num_elem(param->batch_size, shape); + size *= data_type_size; + + return size; +} + +static int verify_run_params(odp_ml_model_t model, const odp_ml_data_t *data, + const odp_ml_run_param_t *param) +{ + const ml_model_t *mdl = ml_model_from_handle(model); + + if (odp_unlikely(model == ODP_ML_MODEL_INVALID)) { + _ODP_ERR("Bad ML model handle\n"); + return -1; + } + + if (odp_unlikely(!data)) { + _ODP_ERR("Parameter 'data' must not be NULL\n"); + return -1; + } + + /* Make sure that the number of input data segments equals or bigger than + * the number of model inputs. */ + if (mdl->info.num_inputs > data->num_input_seg) { + _ODP_ERR("The num of input data segments %u must not less than " + "the number of model inputs %u\n", data->num_input_seg, + mdl->info.num_inputs); + return -1; + } + + if (mdl->info.num_outputs > data->num_output_seg) { + _ODP_ERR("The num of output data segments %u must not less than " + "the number of model outputs %u\n", data->num_output_seg, + mdl->info.num_outputs); + return -1; + } + + if (data->num_input_seg > mdl->info.num_inputs && + (_odp_ml_glb->capa.max_segs_per_input == 1)) { + _ODP_ERR("Segmented input data is not supported\n"); + return -1; + } + + if (data->num_output_seg > mdl->info.num_outputs && + (_odp_ml_glb->capa.max_segs_per_output == 1)) { + _ODP_ERR("Segmented output data is not supported"); + return -1; + } + + uint32_t size = 0; + uint32_t input_index = 0; + uint32_t seg_size_sum = 0; + odp_bool_t index_new = true; + uint32_t segs_per_input = 1; + + for (uint32_t i = 0; i < data->num_input_seg; i++) { + if (data->input_seg[i].addr == NULL) { + _ODP_ERR("data->input_seg[%u].addr must not NULL\n", i); + return -1; + }; + + if (index_new) { + if (input_index > mdl->info.num_inputs - 1) { + _ODP_ERR("Too much number of input segments given\n"); + return -1; + } + + /* Input with dynamic batch size */ + if (mdl->input_info[input_index].shape.type == ODP_ML_SHAPE_BATCH) + size = dyn_io_size(&mdl->input_info[input_index].shape, + mdl->input_info[input_index].data_type_size, + param); + else + size = mdl->input_sizes[input_index]; + + if (!size) { + _ODP_ERR("Size for %uth input is 0\n", input_index); + return -1; + } + } + + seg_size_sum += data->input_seg[i].size; + + if (seg_size_sum > size) { + _ODP_ERR("Sum of segment sizes %u exceeds %uth input data size %u\n", + seg_size_sum, input_index, size); + return -1; + } + + if (seg_size_sum == size) { + if (segs_per_input > _odp_ml_glb->capa.max_segs_per_input) { + _ODP_ERR("Number of segments %u for input[%u] exceeds maximum" + " number of data segments per model input %u\n", + segs_per_input, input_index, + _odp_ml_glb->capa.max_segs_per_input); + return -1; + } + input_index++; + index_new = true; + seg_size_sum = 0; + segs_per_input = 1; + } else { + segs_per_input++; + index_new = false; + } + } + + if (input_index != mdl->info.num_inputs) { + _ODP_ERR("Data is not provided for all model inputs\n"); + return -1; + } + + seg_size_sum = 0; + index_new = true; + uint32_t output_index = 0; + uint32_t segs_per_output = 1; + + for (uint32_t i = 0; i < data->num_output_seg; i++) { + if (data->output_seg[i].addr == NULL) { + _ODP_ERR("data->output_seg[%u].addr must not NULL\n", i); + return -1; + } + + if (index_new) { + if (output_index > mdl->info.num_outputs - 1) { + _ODP_ERR("Too much number of output segments given\n"); + return -1; + } + + /* Output with dynamic batch size */ + if (mdl->output_info[output_index].shape.type == ODP_ML_SHAPE_BATCH) + size = dyn_io_size(&mdl->output_info[output_index].shape, + mdl->output_info[output_index].data_type_size, + param); + else + size = mdl->output_sizes[output_index]; + + if (!size) { + _ODP_ERR("Size for %uth output is 0\n", output_index); + return -1; + } + } + + seg_size_sum += data->output_seg[i].size; + + if (seg_size_sum > size) { + _ODP_ERR("Sum of segment sizes %u exceeds %uth output data size %u\n", + seg_size_sum, output_index, size); + return -1; + } + + if (seg_size_sum >= size) { + if (segs_per_output > _odp_ml_glb->capa.max_segs_per_output) { + _ODP_ERR("Number of segments %u for output[%u] exceeds maximum" + " number of data segments per model output %u\n", + segs_per_output, output_index, + _odp_ml_glb->capa.max_segs_per_output); + return -1; + } + output_index++; + index_new = true; + seg_size_sum = 0; + segs_per_output = 1; + } else { + segs_per_output++; + index_new = false; + } + } + + if (output_index != mdl->info.num_outputs) { + _ODP_ERR("Not enough output_segs to hold all output data\n"); + return -1; + } + + return 0; +} + +static ONNXTensorElementDataType onnx_dtype_from_odp_dtype(odp_ml_data_type_t data_type) +{ + switch (data_type) { + case ODP_ML_DATA_TYPE_NONE: + return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED; + case ODP_ML_DATA_TYPE_INT8: + return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8; + case ODP_ML_DATA_TYPE_UINT8: + return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8; + case ODP_ML_DATA_TYPE_INT16: + return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16; + case ODP_ML_DATA_TYPE_UINT16: + return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16; + case ODP_ML_DATA_TYPE_INT24: + /* Fall through*/ + case ODP_ML_DATA_TYPE_UINT24: + return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED; + case ODP_ML_DATA_TYPE_FP64: + return ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE; + case ODP_ML_DATA_TYPE_INT32: + return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32; + case ODP_ML_DATA_TYPE_UINT32: + return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32; + case ODP_ML_DATA_TYPE_INT64: + return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64; + case ODP_ML_DATA_TYPE_UINT64: + return ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64; + case ODP_ML_DATA_TYPE_FP16: + return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; + case ODP_ML_DATA_TYPE_FP32: + return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; + case ODP_ML_DATA_TYPE_BFP16: + return ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16; + default: + return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED; + } +} + +static int verify_tensor(const OrtValue *tensor, odp_ml_data_type_t expected_type, + const odp_ml_shape_info_t *expected_shape, uint32_t batch_size) +{ + OrtTensorTypeAndShapeInfo *tensor_info; + ONNXTensorElementDataType tensor_type; + size_t dim_count; + OrtStatus *status = NULL; + int64_t dims[ODP_ML_MAX_DIMS] = {0}; + int64_t shape_arr[ODP_ML_MAX_DIMS] = {0}; + const OrtApi *ort_api = _odp_ml_glb->ort_api; + + status = ort_api->GetTensorTypeAndShape(tensor, &tensor_info); + if (check_ortstatus(status)) { + _ODP_ERR("GetTensorTypeAndShape() failed\n"); + return -1; + } + + status = ort_api->GetTensorElementType(tensor_info, &tensor_type); + if (check_ortstatus(status)) { + ort_api->ReleaseTensorTypeAndShapeInfo(tensor_info); + _ODP_ERR("GetTensorElementType() failed\n"); + return -1; + } + + if (onnx_dtype_to_odp_dtype(tensor_type) != expected_type) { + ort_api->ReleaseTensorTypeAndShapeInfo(tensor_info); + _ODP_ERR("Tensor type does not match model type\n"); + return -1; + } + + status = ort_api->GetDimensionsCount(tensor_info, &dim_count); + if (check_ortstatus(status)) { + ort_api->ReleaseTensorTypeAndShapeInfo(tensor_info); + _ODP_ERR("GetDimensionsCount() failed\n"); + return -1; + } + + if (dim_count != expected_shape->num_dim) { + ort_api->ReleaseTensorTypeAndShapeInfo(tensor_info); + _ODP_ERR("Tensor dimension does not match shape_dim\n"); + return -1; + } + + status = ort_api->GetDimensions(tensor_info, dims, dim_count); + if (check_ortstatus(status)) { + ort_api->ReleaseTensorTypeAndShapeInfo(tensor_info); + _ODP_ERR("GetDimensions() failed\n"); + return -1; + } + + ml_shape_to_int64(expected_shape, batch_size, shape_arr); + + for (uint32_t i = 0; i < dim_count; i++) { + if (dims[i] != shape_arr[i]) { + ort_api->ReleaseTensorTypeAndShapeInfo(tensor_info); + _ODP_ERR("Shape[%u]: %" PRIu64 " does not match expected: %" PRIu64 "\n", + i, dims[i], shape_arr[i]); + return -1; + } + } + + ort_api->ReleaseTensorTypeAndShapeInfo(tensor_info); + return 0; +} + +static int input_data_to_tensor(const odp_ml_input_info_t *input_info, uint32_t num_seg, + const odp_ml_data_seg_t *input_seg, uint32_t *seg_idx, + uint32_t batch_size, OrtValue **input_tensor) +{ + int is_tensor; + uint64_t input_size; + OrtAllocator *allocator; + void *data = NULL; + OrtStatus *status = NULL; + int64_t shape[ODP_ML_MAX_DIMS] = {0}; + const OrtApi *ort_api = _odp_ml_glb->ort_api; + ONNXTensorElementDataType onnx_dtype = ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED; + + ml_shape_to_int64(&input_info->shape, batch_size, shape); + + onnx_dtype = onnx_dtype_from_odp_dtype(input_info->data_type); + _ODP_ASSERT(onnx_dtype != ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED); + + status = ort_api->GetAllocatorWithDefaultOptions(&allocator); + if (check_ortstatus(status)) { + _ODP_ERR("GetAllocatorWithDefaultOptions() failed\n"); + return -1; + } + + status = ort_api->CreateTensorAsOrtValue(allocator, + shape, + input_info->shape.num_dim, + onnx_dtype, + input_tensor); + if (check_ortstatus(status) || !input_tensor[0]) { + _ODP_ERR("CreateTensorWithDataAsOrtValue() failed\n"); + return -1; + } + + input_size = input_info->data_type_size * get_num_elem(batch_size, &input_info->shape); + + status = ort_api->GetTensorMutableData(input_tensor[0], &data); + if (check_ortstatus(status) || !data) { + _ODP_ERR("GetTensorMutableData() failed\n"); + return -1; + } + + for (uint64_t i = 0; i < input_size; ) { + if (*seg_idx >= num_seg) { + _ODP_ERR("Insufficient input data\n"); + return -1; + } + + uint64_t seg_size = input_seg[*seg_idx].size; + + if (i + seg_size > input_size) { + _ODP_ERR("Excess input data in segment %" PRIu32 "\n", *seg_idx); + return -1; + } + + memcpy((uint8_t *)data + i, input_seg[(*seg_idx)++].addr, seg_size); + i += seg_size; + } + + if (!ODP_DEBUG) + return 0; + + status = ort_api->IsTensor(input_tensor[0], &is_tensor); + if (check_ortstatus(status) || !is_tensor) { + _ODP_ERR("input_tensor IsTensor failed\n"); + return -1; + } + + /* Make sure tensor shape matches input_shape */ + if (verify_tensor(input_tensor[0], input_info->data_type, + &input_info->shape, batch_size)) { + _ODP_ERR("Verify input_tensor failed\n"); + return -1; + } + + return 0; +} + +static int verify_output_tensor(OrtValue *output_tensor, odp_ml_data_type_t expected_type, + const odp_ml_shape_info_t *expected_shape, uint32_t batch_size) +{ + int is_tensor = 0; + const OrtApi *ort_api = _odp_ml_glb->ort_api; + OrtStatus *status = ort_api->IsTensor(output_tensor, &is_tensor); + + if (check_ortstatus(status) || !is_tensor) { + _ODP_ERR("output_tensor IsTensor failed\n"); + return -1; + } + + /* Make sure tensor shape matches output_shape */ + if (verify_tensor(output_tensor, expected_type, expected_shape, batch_size)) { + _ODP_ERR("Verify output_tensor failed\n"); + return -1; + } + + return 0; +} + +static int get_tensor_data_size(OrtValue *tensor, uint32_t *size, uint32_t data_type_size) +{ + size_t num_elem; + OrtStatus *status; + OrtTensorTypeAndShapeInfo *tensor_info; + const OrtApi *ort_api = _odp_ml_glb->ort_api; + + status = ort_api->GetTensorTypeAndShape(tensor, &tensor_info); + if (check_ortstatus(status)) { + _ODP_ERR("GetTensorTypeAndShape() failed\n"); + return -1; + } + + status = ort_api->GetTensorShapeElementCount(tensor_info, &num_elem); + if (check_ortstatus(status)) { + ort_api->ReleaseTensorTypeAndShapeInfo(tensor_info); + _ODP_ERR("GetTensorShapeElementCount() failed\n"); + return -1; + } + *size = data_type_size * num_elem; + + ort_api->ReleaseTensorTypeAndShapeInfo(tensor_info); + return 0; +} + +static int check_output_size(odp_bool_t is_segmented, uint32_t output_idx, uint32_t seg_idx, + uint64_t out_tensor_data_size, const odp_ml_data_t data[]) +{ + uint64_t output_size = 0; + + /* Output is not segmented */ + if (!is_segmented) { + /* Make sure tensor data size does not exceed size allocated for + * data->output_seg[seg_idx].addr */ + if (out_tensor_data_size > data->output_seg[seg_idx].size) { + _ODP_ERR("Malloc at least %" PRIu64 " bytes for %dth output tensor\n", + out_tensor_data_size, output_idx); + return -1; + } + + return 0; + } + + /* Output is segmented, first calculate total size for one tensor */ + for (; seg_idx < data->num_output_seg; seg_idx++) { + output_size += data->output_seg[seg_idx].size; + if (output_size >= out_tensor_data_size) + break; + } + + if (0 == output_size) { + _ODP_ERR("No output data segments for %uth output tensor\n", output_idx); + return -1; + } + + if (out_tensor_data_size > output_size) { + _ODP_ERR("Output segments (%" PRIu64 " bytes in total) for %uth output" + " is expected to be at least %" PRIu64 " bytes\n", + output_size, output_idx, out_tensor_data_size); + return -1; + } + + return 0; +} + +static int output_tensors_to_data(OrtValue **output_tensors, + uint32_t model_num_outputs, + const odp_ml_run_param_t *param, + const odp_ml_output_info_t *output_info, + const odp_ml_data_t *data, + odp_ml_run_result_t *result_local) +{ + uint32_t seg_idx; + uint64_t seg_size; + uint64_t cpy_size; + uint64_t left_size; + uint64_t output_val_offset; + uint32_t out_tensor_data_size; + void *output_val = NULL; /* Pointer to store one raw output value */ + OrtStatus *status = NULL; + uint32_t batch_size = (param && param->batch_size) ? param->batch_size : 0; + const OrtApi *ort_api = _odp_ml_glb->ort_api; + odp_bool_t is_segmented = (data->num_output_seg != model_num_outputs); + + seg_idx = 0; + for (uint32_t i = 0; i < model_num_outputs; i++) { + if (ODP_DEBUG && + verify_output_tensor(output_tensors[i], output_info[i].data_type, + &output_info[i].shape, batch_size)){ + result_local->error_code = ML_BAD_OUTPUT; + return -1; + } + + /* Get tensor data size */ + if (get_tensor_data_size(output_tensors[i], &out_tensor_data_size, + output_info[i].data_type_size)) { + result_local->error_code = ML_LIB_FAILED; + return -1; + } + + /* When output_tensor is an empty tensor [], skip getting data */ + if (out_tensor_data_size == 0) + continue; + + if (ODP_DEBUG && check_output_size(is_segmented, i, seg_idx, + out_tensor_data_size, data)) { + result_local->error_code = ML_BAD_OUTPUT; + return -1; + } + + /* Following assumes param and data->output_seg are valid */ + /* Get tensor data */ + output_val = NULL; + status = ort_api->GetTensorMutableData(output_tensors[i], &output_val); + if (check_ortstatus(status) || !output_val) { + result_local->error_code = ML_LIB_FAILED; + return -1; + } + + /* Output is not segmented */ + if (!is_segmented) { + /* Store output data to data->output_seg[i].addr */ + memcpy(data->output_seg[i].addr, output_val, out_tensor_data_size); + seg_idx++; + continue; + } + + /* Output is segmented */ + output_val_offset = 0; + left_size = out_tensor_data_size; + for (; seg_idx < data->num_output_seg; seg_idx++) { + seg_size = data->output_seg[seg_idx].size; + cpy_size = left_size > seg_size ? seg_size : left_size; + memcpy(data->output_seg[seg_idx].addr, + ((char *)output_val) + output_val_offset, cpy_size); + + output_val_offset += cpy_size; + left_size = out_tensor_data_size - output_val_offset; + + if (!left_size) { + seg_idx++; + break; + } + } + } + + return 0; +} + +int odp_ml_run(odp_ml_model_t model, const odp_ml_data_t *data, const odp_ml_run_param_t *param) +{ + odp_ml_run_result_t result_local; + + int retval = -1; /* Return value of this function */ + int ret = 0; + OrtStatus *status = NULL; + uint32_t batch_size = 0; + + OrtValue *input_tensor[CONFIG_ML_MAX_INPUTS] = {0}; + OrtValue *output_tensors[CONFIG_ML_MAX_OUTPUTS] = {0}; + const char *input_names[CONFIG_ML_MAX_INPUTS] = {0}; + const char *output_names[CONFIG_ML_MAX_OUTPUTS] = {0}; + + const OrtApi *ort_api = _odp_ml_glb->ort_api; + ml_model_t *mdl = ml_model_from_handle(model); + const odp_ml_model_info_t *ml_info = &mdl->info; + const odp_ml_input_info_t *input_info = mdl->input_info; + const odp_ml_output_info_t *output_info = mdl->output_info; + OrtSession *session = mdl->session; + + odp_ticketlock_lock(&mdl->lock); + if (odp_unlikely(mdl->state == ML_STATE_INFERENCING)) { + odp_ticketlock_unlock(&mdl->lock); + return 0; + } + if (odp_unlikely(mdl->state != ML_STATE_LOADED)) { + _ODP_ERR("Wrong model state: not created or not loaded\n"); + odp_ticketlock_unlock(&mdl->lock); + return -1; + } + mdl->state = ML_STATE_INFERENCING; + odp_ticketlock_unlock(&mdl->lock); + + memset(&result_local, 0, sizeof(result_local)); + + if (ODP_DEBUG && verify_run_params(model, data, param)) { + result_local.error_code = ML_BAD_INPUT; + goto init_fail; + } + + if (param && param->batch_size) + batch_size = param->batch_size; + + uint32_t seg_idx = 0; + + /* Transfer input data to tensor */ + for (uint32_t i = 0; i < ml_info->num_inputs; i++) { + ret = input_data_to_tensor(&input_info[i], + data->num_input_seg, + data->input_seg, + &seg_idx, + batch_size, + &input_tensor[i]); + if (ret) { + _ODP_ERR("%uth input data to tensor failed\n", i); + result_local.error_code = ML_LIB_FAILED; + goto release_input_tensors; + } + + _ODP_DBG("input_tensor[%u]: %p\n", i, input_tensor[i]); + + /* Model input names */ + input_names[i] = input_info[i].name; + } + + if (seg_idx < data->num_input_seg) { + _ODP_ERR("Excess input segments\n"); + ret = -1; + } + + for (uint32_t i = 0; i < ml_info->num_outputs; i++) + output_names[i] = output_info[i].name; + + /* Run inference */ + status = ort_api->Run(session, + NULL, + (const char * const *)input_names, + (const OrtValue * const*)input_tensor, + ml_info->num_inputs, + (const char * const *)output_names, + ml_info->num_outputs, + output_tensors); + + if (check_ortstatus(status)) { + _ODP_ERR("Run inference failed\n"); + result_local.error_code = ML_LIB_FAILED; + goto release_all_tensors; + } + + /* Verify output tensors and store them to output */ + if (output_tensors_to_data(output_tensors, ml_info->num_outputs, param, + output_info, data, &result_local)) { + _ODP_ERR("Output tensors to data failed\n"); + goto release_all_tensors; + } + + retval = 1; + +release_all_tensors: + for (uint32_t i = 0; i < ml_info->num_outputs; i++) + ort_api->ReleaseValue(output_tensors[i]); + +release_input_tensors: + for (uint32_t i = 0; i < ml_info->num_inputs; i++) + ort_api->ReleaseValue(input_tensor[i]); + +init_fail: + if (param && param->result) + *param->result = result_local; + + odp_ticketlock_lock(&mdl->lock); + mdl->state = ML_STATE_LOADED; + odp_ticketlock_unlock(&mdl->lock); + + return retval; +} + +int odp_ml_run_multi(odp_ml_model_t model, const odp_ml_data_t data[], + const odp_ml_run_param_t param[], int num) +{ + int i; + int ret; + + if (odp_unlikely(num < 1)) { + _ODP_ERR("Bad number of runs\n"); + return -1; + } + + for (i = 0; i < num; i++) { + if (param) + ret = odp_ml_run(model, &data[i], ¶m[i]); + else + ret = odp_ml_run(model, &data[i], NULL); + + if (odp_unlikely(ret != 1)) + break; + } + + if (odp_unlikely(i == 0)) + return ret; + + return i; +} + +int odp_ml_run_start(odp_ml_model_t model, const odp_ml_data_t *data, + const odp_ml_compl_param_t *compl_param, + const odp_ml_run_param_t *run_param) +{ + int ret; + ml_model_t *mdl = ml_model_from_handle(model); + + if (odp_unlikely(model == ODP_ML_MODEL_INVALID)) { + _ODP_ERR("Bad model handle\n"); + return -1; + } + + if (odp_unlikely(!compl_param)) { + _ODP_ERR("Completion parameter is NULL\n"); + return -1; + } + + /* Check completion mode */ + if (odp_unlikely(check_compl_param(compl_param, mdl->max_compl_id, false))) { + _ODP_ERR("Bad ML job completion parameter\n"); + return -1; + } + + if (compl_param->mode == ODP_ML_COMPL_MODE_POLL) + odp_atomic_store_rel_u32(&mdl->compl_status[compl_param->compl_id], 0); + + ret = odp_ml_run(model, data, run_param); + + if (odp_unlikely(ret < 1)) + return ret; + + /* Send a completion event to the given queue */ + if (compl_param->mode == ODP_ML_COMPL_MODE_EVENT) { + odp_ml_run_result_t *result; + odp_buffer_t buf = (odp_buffer_t)(uintptr_t)compl_param->event; + + _odp_buffer_subtype_set(buf, ODP_EVENT_ML_COMPL_RUN); + + result = odp_buffer_addr(buf); + result->error_code = 0; + result->user_ptr = compl_param->user_ptr; + + if (odp_unlikely(odp_queue_enq(compl_param->queue, compl_param->event))) { + _ODP_ERR("Completion event enqueue failed %" PRIu64 "\n", + odp_queue_to_u64(compl_param->queue)); + return -1; + } + + return 1; + } + + /* compl_param->mode == ODP_ML_COMPL_MODE_POLL */ + mdl->result[compl_param->compl_id].user_ptr = compl_param->user_ptr; + odp_atomic_store_rel_u32(&mdl->compl_status[compl_param->compl_id], 1); + + return 1; +} + +int odp_ml_run_start_multi(odp_ml_model_t model, const odp_ml_data_t data[], + const odp_ml_compl_param_t compl_param[], + const odp_ml_run_param_t run_param[], int num) +{ + int i; + int ret = 0; + + if (odp_unlikely(num < 1)) { + _ODP_ERR("Bad number of runs\n"); + return -1; + } + + for (i = 0; i < num; i++) { + if (run_param) + ret = odp_ml_run_start(model, &data[i], &compl_param[i], &run_param[i]); + else + ret = odp_ml_run_start(model, &data[i], &compl_param[i], NULL); + + if (odp_unlikely(ret != 1)) + break; + } + + if (odp_unlikely(i == 0)) + return ret; + + return i; +} + +int odp_ml_run_status(odp_ml_model_t model, uint32_t compl_id, odp_ml_run_result_t *result) +{ + int ret; + ml_model_t *mdl = ml_model_from_handle(model); + + if (odp_unlikely(model == ODP_ML_MODEL_INVALID || + compl_id > mdl->max_compl_id)) { + _ODP_ERR("Invalid model handle or completion id: %u\n", compl_id); + return -2; + } + + ret = odp_atomic_load_acq_u32(&mdl->compl_status[compl_id]); + + if (result) { + result->error_code = 0; + result->user_ptr = mdl->result[compl_id].user_ptr; + } + + return ret; +} + +static int opt_level_from_str(const char *level_str, GraphOptimizationLevel *level) +{ + if (strcmp(level_str, "DISABLE_ALL") == 0) + *level = ORT_DISABLE_ALL; + else if (strcmp(level_str, "ENABLE_BASIC") == 0) + *level = ORT_ENABLE_BASIC; + else if (strcmp(level_str, "ENABLE_EXTENDED") == 0) + *level = ORT_ENABLE_EXTENDED; + else if (strcmp(level_str, "ENABLE_ALL") == 0) + *level = ORT_ENABLE_ALL; + else + return -1; + + return 0; +} + +static int execution_mode_from_str(const char *mode_str, ExecutionMode *mode) +{ + if (strcmp(mode_str, "SEQUENTIAL") == 0) + *mode = ORT_SEQUENTIAL; + else if (strcmp(mode_str, "PARALLEL") == 0) + *mode = ORT_PARALLEL; + else + return -1; + + return 0; +} + +static int read_config_file(ort_run_opts_t *opts) +{ + const char *conf_str; + char mode_str[ML_MAX_CONFIG_STR_LEN]; + char opt_level_str[ML_MAX_CONFIG_STR_LEN]; + + _ODP_PRINT("ML config:\n"); + + conf_str = "ml.enable_profiling"; + if (!_odp_libconfig_lookup_int(conf_str, &opts->enable_profiling)) { + _ODP_ERR("Config option '%s' not found.\n", conf_str); + return -1; + } + _ODP_PRINT(" %s: %i\n", conf_str, opts->enable_profiling); + + conf_str = "ml.execution_mode"; + if (_odp_libconfig_lookup_str(conf_str, mode_str, ML_MAX_CONFIG_STR_LEN) < 0) { + _ODP_ERR("Config option '%s' not found.\n", conf_str); + return -1; + } + + if (execution_mode_from_str(mode_str, &opts->execution_mode)) { + _ODP_ERR("Unsupported execution mode: %s\n", mode_str); + return -1; + } + _ODP_PRINT(" %s: %s\n", conf_str, mode_str); + + conf_str = "ml.inter_op_num_threads"; + if (!_odp_libconfig_lookup_int(conf_str, &opts->inter_op_num_threads)) { + _ODP_ERR("Config option '%s' not found.\n", conf_str); + return -1; + } + _ODP_PRINT(" %s: %i\n", conf_str, opts->inter_op_num_threads); + + conf_str = "ml.intra_op_num_threads"; + if (!_odp_libconfig_lookup_int(conf_str, &opts->intra_op_num_threads)) { + _ODP_ERR("Config option '%s' not found.\n", conf_str); + return -1; + } + _ODP_PRINT(" %s: %i\n", conf_str, opts->intra_op_num_threads); + + conf_str = "ml.graph_optimization_level"; + if (_odp_libconfig_lookup_str(conf_str, opt_level_str, + ML_MAX_CONFIG_STR_LEN) < 0) { + _ODP_ERR("Config option '%s' not found.\n", conf_str); + return -1; + } + + if (opt_level_from_str(opt_level_str, &opts->graph_opt_level)) { + _ODP_ERR("Graph optimize level %s not supported\n", opt_level_str); + return -1; + } + _ODP_PRINT(" %s: %s\n", conf_str, opt_level_str); + + conf_str = "ml.optimized_model_filepath"; + if (_odp_libconfig_lookup_str(conf_str, opts->opt_model_filepath, + ML_MAX_CONFIG_STR_LEN) < 0) { + _ODP_ERR("Config option '%s' not found.\n", conf_str); + return -1; + } + _ODP_PRINT(" %s: %s\n", conf_str, opts->opt_model_filepath); + + return 0; +} + +int _odp_ml_init_global(void) +{ + int i; + OrtEnv *env; + odp_shm_t shm; + OrtStatus *status; + const OrtApi *ort_api; + + if (odp_global_ro.disable.ml) { + _ODP_ERR("ML is disabled\n"); + return 0; + } + + shm = odp_shm_reserve("_odp_ml_global", sizeof(ml_global_t), ODP_CACHE_LINE_SIZE, 0); + _odp_ml_glb = odp_shm_addr(shm); + + if (_odp_ml_glb == NULL) { + _ODP_ERR("SHM reserve failed for odp_ml\n"); + return -1; + } + + memset(_odp_ml_glb, 0, sizeof(ml_global_t)); + _odp_ml_glb->shm = shm; + + if (odp_ml_capability(&_odp_ml_glb->capa)) { + _ODP_ERR("ML capability failed\n"); + return -1; + } + + odp_pool_param_init(&_odp_ml_glb->pool_param); + + if (read_config_file(&_odp_ml_glb->ort_run_opts)) + return -1; + + ort_api = OrtGetApiBase()->GetApi(ORT_API_VERSION); + if (!ort_api) { + _ODP_ERR("Failed to init ONNX Runtime engine.\n"); + return -1; + } + _odp_ml_glb->ort_api = ort_api; + + status = ort_api->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "Default", &env); + if (check_ortstatus(status) || !env) { + _ODP_ERR("ort_api->CreateEnv() failed.\n"); + return -1; + } + _odp_ml_glb->env = env; + + for (i = 0; i < ML_MAX_MODELS_CREATED; i++) + odp_ticketlock_init(&_odp_ml_glb->models[i].lock); + + return 0; +} + +int _odp_ml_term_global(void) +{ + if (odp_global_ro.disable.ml) + return 0; + + if (_odp_ml_glb == NULL) + return 0; + + if (_odp_ml_glb->env) + _odp_ml_glb->ort_api->ReleaseEnv(_odp_ml_glb->env); + + if (odp_shm_free(_odp_ml_glb->shm)) { + _ODP_ERR("Shm free failed for odp_ml\n"); + return -1; + } + + return 0; +} diff --git a/platform/linux-generic/odp_ml_fp16.c b/platform/linux-generic/odp_ml_fp16.c new file mode 100644 index 000000000..47b10f841 --- /dev/null +++ b/platform/linux-generic/odp_ml_fp16.c @@ -0,0 +1,425 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2022-2023 Marvell. + * Copyright (c) 2023 Nokia + * + * Based on + * - dpdk/lib/mldev/mldev_utils_scalar.h + * - dpdk/lib/mldev/mldev_utils_scalar.c + * - dpdk/lib/mldev/mldev_utils_scalar_bfloat16.c + */ + +#include <odp_ml_fp16.h> + +#include <errno.h> +#include <stdint.h> + +#ifndef BIT +#define BIT(nr) (1UL << (nr)) +#endif + +#ifndef BITS_PER_LONG +#define BITS_PER_LONG (__SIZEOF_LONG__ * 8) +#endif + +#ifndef GENMASK_U32 +#define GENMASK_U32(h, l) (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) +#endif + +/* float32: bit index of MSB & LSB of sign, exponent and mantissa */ +#define FP32_LSB_M 0 +#define FP32_MSB_M 22 +#define FP32_LSB_E 23 +#define FP32_MSB_E 30 +#define FP32_LSB_S 31 +#define FP32_MSB_S 31 + +/* float32: bitmask for sign, exponent and mantissa */ +#define FP32_MASK_S GENMASK_U32(FP32_MSB_S, FP32_LSB_S) +#define FP32_MASK_E GENMASK_U32(FP32_MSB_E, FP32_LSB_E) +#define FP32_MASK_M GENMASK_U32(FP32_MSB_M, FP32_LSB_M) + +/* float16: bit index of MSB & LSB of sign, exponent and mantissa */ +#define FP16_LSB_M 0 +#define FP16_MSB_M 9 +#define FP16_LSB_E 10 +#define FP16_MSB_E 14 +#define FP16_LSB_S 15 +#define FP16_MSB_S 15 + +/* float16: bitmask for sign, exponent and mantissa */ +#define FP16_MASK_S GENMASK_U32(FP16_MSB_S, FP16_LSB_S) +#define FP16_MASK_E GENMASK_U32(FP16_MSB_E, FP16_LSB_E) +#define FP16_MASK_M GENMASK_U32(FP16_MSB_M, FP16_LSB_M) + +/* bfloat16: bit index of MSB & LSB of sign, exponent and mantissa */ +#define BF16_LSB_M 0 +#define BF16_MSB_M 6 +#define BF16_LSB_E 7 +#define BF16_MSB_E 14 +#define BF16_LSB_S 15 +#define BF16_MSB_S 15 + +/* bfloat16: bitmask for sign, exponent and mantissa */ +#define BF16_MASK_S GENMASK_U32(BF16_MSB_S, BF16_LSB_S) +#define BF16_MASK_E GENMASK_U32(BF16_MSB_E, BF16_LSB_E) +#define BF16_MASK_M GENMASK_U32(BF16_MSB_M, BF16_LSB_M) + +/* Exponent bias */ +#define FP32_BIAS_E 127 +#define FP16_BIAS_E 15 +#define BF16_BIAS_E 127 + +#define FP32_PACK(sign, exponent, mantissa) \ + (((sign) << FP32_LSB_S) | ((exponent) << FP32_LSB_E) | (mantissa)) + +#define FP16_PACK(sign, exponent, mantissa) \ + (((sign) << FP16_LSB_S) | ((exponent) << FP16_LSB_E) | (mantissa)) + +#define BF16_PACK(sign, exponent, mantissa) \ + (((sign) << BF16_LSB_S) | ((exponent) << BF16_LSB_E) | (mantissa)) + +/* Represent float32 as float and uint32_t */ +union float32 { + float f; + uint32_t u; +}; + +/* Convert a single precision floating point number (float32) into a half precision + * floating point number (float16) using round to nearest rounding mode. + */ +static uint16_t +__float32_to_float16_scalar_rtn(float x) +{ + union float32 f32; /* float32 input */ + uint32_t f32_s; /* float32 sign */ + uint32_t f32_e; /* float32 exponent */ + uint32_t f32_m; /* float32 mantissa */ + uint16_t f16_s; /* float16 sign */ + uint16_t f16_e; /* float16 exponent */ + uint16_t f16_m; /* float16 mantissa */ + uint32_t tbits; /* number of truncated bits */ + uint32_t tmsb; /* MSB position of truncated bits */ + uint32_t m_32; /* temporary float32 mantissa */ + uint16_t m_16; /* temporary float16 mantissa */ + uint16_t u16; /* float16 output */ + int be_16; /* float16 biased exponent, signed */ + + f32.f = x; + f32_s = (f32.u & FP32_MASK_S) >> FP32_LSB_S; + f32_e = (f32.u & FP32_MASK_E) >> FP32_LSB_E; + f32_m = (f32.u & FP32_MASK_M) >> FP32_LSB_M; + + f16_s = f32_s; + f16_e = 0; + f16_m = 0; + + switch (f32_e) { + case (0): /* float32: zero or subnormal number */ + f16_e = 0; + f16_m = 0; /* convert to zero */ + break; + case (FP32_MASK_E >> FP32_LSB_E): /* float32: infinity or nan */ + f16_e = FP16_MASK_E >> FP16_LSB_E; + if (f32_m == 0) { /* infinity */ + f16_m = 0; + } else { /* nan, propagate mantissa and set MSB of mantissa to 1 */ + f16_m = f32_m >> (FP32_MSB_M - FP16_MSB_M); + f16_m |= BIT(FP16_MSB_M); + } + break; + default: /* float32: normal number */ + /* compute biased exponent for float16 */ + be_16 = (int)f32_e - FP32_BIAS_E + FP16_BIAS_E; + + /* overflow, be_16 = [31-INF], set to infinity */ + if (be_16 >= (int)(FP16_MASK_E >> FP16_LSB_E)) { + f16_e = FP16_MASK_E >> FP16_LSB_E; + f16_m = 0; + } else if ((be_16 >= 1) && (be_16 < (int)(FP16_MASK_E >> FP16_LSB_E))) { + /* normal float16, be_16 = [1:30]*/ + f16_e = be_16; + m_16 = f32_m >> (FP32_LSB_E - FP16_LSB_E); + tmsb = FP32_MSB_M - FP16_MSB_M - 1; + if ((f32_m & GENMASK_U32(tmsb, 0)) > BIT(tmsb)) { + /* round: non-zero truncated bits except MSB */ + m_16++; + + /* overflow into exponent */ + if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1) + f16_e++; + } else if ((f32_m & GENMASK_U32(tmsb, 0)) == BIT(tmsb)) { + /* round: MSB of truncated bits and LSB of m_16 is set */ + if ((m_16 & 0x1) == 0x1) { + m_16++; + + /* overflow into exponent */ + if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1) + f16_e++; + } + } + f16_m = m_16 & FP16_MASK_M; + } else if ((be_16 >= -(int)(FP16_MSB_M)) && (be_16 < 1)) { + /* underflow: zero / subnormal, be_16 = [-9:0] */ + f16_e = 0; + + /* add implicit leading zero */ + m_32 = f32_m | BIT(FP32_LSB_E); + tbits = FP32_LSB_E - FP16_LSB_E - be_16 + 1; + m_16 = m_32 >> tbits; + + /* if non-leading truncated bits are set */ + if ((f32_m & GENMASK_U32(tbits - 1, 0)) > BIT(tbits - 1)) { + m_16++; + + /* overflow into exponent */ + if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1) + f16_e++; + } else if ((f32_m & GENMASK_U32(tbits - 1, 0)) == BIT(tbits - 1)) { + /* if leading truncated bit is set */ + if ((m_16 & 0x1) == 0x1) { + m_16++; + + /* overflow into exponent */ + if (((m_16 & FP16_MASK_E) >> FP16_LSB_E) == 0x1) + f16_e++; + } + } + f16_m = m_16 & FP16_MASK_M; + } else if (be_16 == -(int)(FP16_MSB_M + 1)) { + /* underflow: zero, be_16 = [-10] */ + f16_e = 0; + if (f32_m != 0) + f16_m = 1; + else + f16_m = 0; + } else { + /* underflow: zero, be_16 = [-INF:-11] */ + f16_e = 0; + f16_m = 0; + } + + break; + } + + u16 = FP16_PACK(f16_s, f16_e, f16_m); + + return u16; +} + +/* Convert a half precision floating point number (float16) into a single precision + * floating point number (float32). + */ +static float +__float16_to_float32_scalar_rtx(uint16_t f16) +{ + union float32 f32; /* float32 output */ + uint16_t f16_s; /* float16 sign */ + uint16_t f16_e; /* float16 exponent */ + uint16_t f16_m; /* float16 mantissa */ + uint32_t f32_s; /* float32 sign */ + uint32_t f32_e; /* float32 exponent */ + uint32_t f32_m; /* float32 mantissa*/ + uint8_t shift; /* number of bits to be shifted */ + uint32_t clz; /* count of leading zeroes */ + int e_16; /* float16 exponent unbiased */ + + f16_s = (f16 & FP16_MASK_S) >> FP16_LSB_S; + f16_e = (f16 & FP16_MASK_E) >> FP16_LSB_E; + f16_m = (f16 & FP16_MASK_M) >> FP16_LSB_M; + + f32_s = f16_s; + switch (f16_e) { + case (FP16_MASK_E >> FP16_LSB_E): /* float16: infinity or nan */ + f32_e = FP32_MASK_E >> FP32_LSB_E; + if (f16_m == 0x0) { /* infinity */ + f32_m = f16_m; + } else { /* nan, propagate mantissa, set MSB of mantissa to 1 */ + f32_m = f16_m; + shift = FP32_MSB_M - FP16_MSB_M; + f32_m = (f32_m << shift) & FP32_MASK_M; + f32_m |= BIT(FP32_MSB_M); + } + break; + case 0: /* float16: zero or sub-normal */ + f32_m = f16_m; + if (f16_m == 0) { /* zero signed */ + f32_e = 0; + } else { /* subnormal numbers */ + clz = __builtin_clz((uint32_t)f16_m) - sizeof(uint32_t) * 8 + FP16_LSB_E; + e_16 = (int)f16_e - clz; + f32_e = FP32_BIAS_E + e_16 - FP16_BIAS_E; + + shift = clz + (FP32_MSB_M - FP16_MSB_M) + 1; + f32_m = (f32_m << shift) & FP32_MASK_M; + } + break; + default: /* normal numbers */ + f32_m = f16_m; + e_16 = (int)f16_e; + f32_e = FP32_BIAS_E + e_16 - FP16_BIAS_E; + + shift = (FP32_MSB_M - FP16_MSB_M); + f32_m = (f32_m << shift) & FP32_MASK_M; + } + + f32.u = FP32_PACK(f32_s, f32_e, f32_m); + + return f32.f; +} + +/* Convert a single precision floating point number (float32) into a + * brain float number (bfloat16) using round to nearest rounding mode. + */ +static uint16_t +__float32_to_bfloat16_scalar_rtn(float x) +{ + union float32 f32; /* float32 input */ + uint32_t f32_s; /* float32 sign */ + uint32_t f32_e; /* float32 exponent */ + uint32_t f32_m; /* float32 mantissa */ + uint16_t b16_s; /* float16 sign */ + uint16_t b16_e; /* float16 exponent */ + uint16_t b16_m; /* float16 mantissa */ + uint32_t tbits; /* number of truncated bits */ + uint16_t u16; /* float16 output */ + + f32.f = x; + f32_s = (f32.u & FP32_MASK_S) >> FP32_LSB_S; + f32_e = (f32.u & FP32_MASK_E) >> FP32_LSB_E; + f32_m = (f32.u & FP32_MASK_M) >> FP32_LSB_M; + + b16_s = f32_s; + b16_e = 0; + b16_m = 0; + + switch (f32_e) { + case (0): /* float32: zero or subnormal number */ + b16_e = 0; + if (f32_m == 0) /* zero */ + b16_m = 0; + else /* subnormal float32 number, normal bfloat16 */ + goto bf16_normal; + break; + case (FP32_MASK_E >> FP32_LSB_E): /* float32: infinity or nan */ + b16_e = BF16_MASK_E >> BF16_LSB_E; + if (f32_m == 0) { /* infinity */ + b16_m = 0; + } else { /* nan, propagate mantissa and set MSB of mantissa to 1 */ + b16_m = f32_m >> (FP32_MSB_M - BF16_MSB_M); + b16_m |= BIT(BF16_MSB_M); + } + break; + default: /* float32: normal number, normal bfloat16 */ + goto bf16_normal; + } + + goto bf16_pack; + +bf16_normal: + b16_e = f32_e; + tbits = FP32_MSB_M - BF16_MSB_M; + b16_m = f32_m >> tbits; + + /* if non-leading truncated bits are set */ + if ((f32_m & GENMASK_U32(tbits - 1, 0)) > BIT(tbits - 1)) { + b16_m++; + + /* if overflow into exponent */ + if (((b16_m & BF16_MASK_E) >> BF16_LSB_E) == 0x1) + b16_e++; + } else if ((f32_m & GENMASK_U32(tbits - 1, 0)) == BIT(tbits - 1)) { + /* if only leading truncated bit is set */ + if ((b16_m & 0x1) == 0x1) { + b16_m++; + + /* if overflow into exponent */ + if (((b16_m & BF16_MASK_E) >> BF16_LSB_E) == 0x1) + b16_e++; + } + } + b16_m = b16_m & BF16_MASK_M; + +bf16_pack: + u16 = BF16_PACK(b16_s, b16_e, b16_m); + + return u16; +} + +/* Convert a brain float number (bfloat16) into a + * single precision floating point number (float32). + */ +static float +__bfloat16_to_float32_scalar_rtx(uint16_t f16) +{ + union float32 f32; /* float32 output */ + uint16_t b16_s; /* float16 sign */ + uint16_t b16_e; /* float16 exponent */ + uint16_t b16_m; /* float16 mantissa */ + uint32_t f32_s; /* float32 sign */ + uint32_t f32_e; /* float32 exponent */ + uint32_t f32_m; /* float32 mantissa*/ + uint8_t shift; /* number of bits to be shifted */ + + b16_s = (f16 & BF16_MASK_S) >> BF16_LSB_S; + b16_e = (f16 & BF16_MASK_E) >> BF16_LSB_E; + b16_m = (f16 & BF16_MASK_M) >> BF16_LSB_M; + + f32_s = b16_s; + switch (b16_e) { + case (BF16_MASK_E >> BF16_LSB_E): /* bfloat16: infinity or nan */ + f32_e = FP32_MASK_E >> FP32_LSB_E; + if (b16_m == 0x0) { /* infinity */ + f32_m = 0; + } else { /* nan, propagate mantissa, set MSB of mantissa to 1 */ + f32_m = b16_m; + shift = FP32_MSB_M - BF16_MSB_M; + f32_m = (f32_m << shift) & FP32_MASK_M; + f32_m |= BIT(FP32_MSB_M); + } + break; + case 0: /* bfloat16: zero or subnormal */ + f32_m = b16_m; + if (b16_m == 0) { /* zero signed */ + f32_e = 0; + } else { /* subnormal numbers */ + goto fp32_normal; + } + break; + default: /* bfloat16: normal number */ + goto fp32_normal; + } + + goto fp32_pack; + +fp32_normal: + f32_m = b16_m; + f32_e = FP32_BIAS_E + b16_e - BF16_BIAS_E; + + shift = (FP32_MSB_M - BF16_MSB_M); + f32_m = (f32_m << shift) & FP32_MASK_M; + +fp32_pack: + f32.u = FP32_PACK(f32_s, f32_e, f32_m); + + return f32.f; +} + +uint16_t _odp_float32_to_float16(float x) +{ + return __float32_to_float16_scalar_rtn(x); +} + +float _odp_float16_to_float32(uint16_t f16) +{ + return __float16_to_float32_scalar_rtx(f16); +} + +uint16_t _odp_float32_to_bfloat16(float x) +{ + return __float32_to_bfloat16_scalar_rtn(x); +} + +float _odp_bfloat16_to_float32(uint16_t f16) +{ + return __bfloat16_to_float32_scalar_rtx(f16); +} diff --git a/platform/linux-generic/odp_ml_null.c b/platform/linux-generic/odp_ml_null.c new file mode 100644 index 000000000..718e80d76 --- /dev/null +++ b/platform/linux-generic/odp_ml_null.c @@ -0,0 +1,232 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2023 Nokia + */ + +#include <odp/api/hints.h> +#include <odp/api/ml.h> + +#include <odp_init_internal.h> + +#include <stdint.h> +#include <string.h> + +/* Dummy ML API implementation, no capability and just return error for + * other functions. + */ +int _odp_ml_init_global(void) +{ + return 0; +} + +int _odp_ml_term_global(void) +{ + return 0; +} + +int odp_ml_capability(odp_ml_capability_t *capa) +{ + memset(capa, 0, sizeof(odp_ml_capability_t)); + return 0; +} + +void odp_ml_config_init(odp_ml_config_t *config ODP_UNUSED) +{ +} + +int odp_ml_config(const odp_ml_config_t *config ODP_UNUSED) +{ + return -1; +} + +void odp_ml_model_param_init(odp_ml_model_param_t *param ODP_UNUSED) +{ +} + +odp_ml_model_t odp_ml_model_create(const char *name ODP_UNUSED, + const odp_ml_model_param_t *param ODP_UNUSED) +{ + return ODP_ML_MODEL_INVALID; +} + +int odp_ml_model_destroy(odp_ml_model_t model ODP_UNUSED) +{ + return -1; +} + +int odp_ml_model_info(odp_ml_model_t model ODP_UNUSED, odp_ml_model_info_t *info ODP_UNUSED) +{ + return -1; +} + +uint32_t odp_ml_model_input_info(odp_ml_model_t model ODP_UNUSED, + odp_ml_input_info_t info[] ODP_UNUSED, + uint32_t num ODP_UNUSED) +{ + return 0; +} + +uint32_t odp_ml_model_output_info(odp_ml_model_t model ODP_UNUSED, + odp_ml_output_info_t info[] ODP_UNUSED, + uint32_t num ODP_UNUSED) +{ + return 0; +} + +odp_ml_model_t odp_ml_model_lookup(const char *name ODP_UNUSED) +{ + return ODP_ML_MODEL_INVALID; +} + +uint64_t odp_ml_model_to_u64(odp_ml_model_t model ODP_UNUSED) +{ + return 0; +} + +void odp_ml_model_print(odp_ml_model_t model ODP_UNUSED) +{ +} + +void odp_ml_print(void) +{ +} + +void odp_ml_compl_pool_param_init(odp_ml_compl_pool_param_t *pool_param) +{ + memset(pool_param, 0, sizeof(odp_ml_compl_pool_param_t)); +} + +odp_pool_t odp_ml_compl_pool_create(const char *name ODP_UNUSED, + const odp_ml_compl_pool_param_t *pool_param ODP_UNUSED) +{ + return ODP_POOL_INVALID; +} + +odp_ml_compl_t odp_ml_compl_alloc(odp_pool_t pool ODP_UNUSED) +{ + return ODP_ML_COMPL_INVALID; +} + +void odp_ml_compl_free(odp_ml_compl_t ml_compl ODP_UNUSED) +{ +} + +int odp_ml_compl_run_result(odp_ml_compl_t ml_compl ODP_UNUSED, + odp_ml_run_result_t *result ODP_UNUSED) +{ + return -1; +} + +int odp_ml_compl_load_result(odp_ml_compl_t ml_compl ODP_UNUSED, + odp_ml_load_result_t *result ODP_UNUSED) +{ + return -1; +} + +void *odp_ml_compl_user_area(odp_ml_compl_t ml_compl ODP_UNUSED) +{ + return NULL; +} + +odp_ml_compl_t odp_ml_compl_from_event(odp_event_t event ODP_UNUSED) +{ + return ODP_ML_COMPL_INVALID; +} + +odp_event_t odp_ml_compl_to_event(odp_ml_compl_t ml_compl ODP_UNUSED) +{ + return ODP_EVENT_INVALID; +} + +uint64_t odp_ml_compl_to_u64(odp_ml_compl_t ml_compl ODP_UNUSED) +{ + return 0; +} + +void odp_ml_compl_param_init(odp_ml_compl_param_t *compl_param ODP_UNUSED) +{ +} + +int odp_ml_model_load(odp_ml_model_t model ODP_UNUSED, odp_ml_load_result_t *result ODP_UNUSED) +{ + return -1; +} + +int odp_ml_model_load_start(odp_ml_model_t model ODP_UNUSED, + const odp_ml_compl_param_t *compl_param ODP_UNUSED) +{ + return -1; +} + +int odp_ml_model_load_status(odp_ml_model_t model ODP_UNUSED, uint32_t compl_id ODP_UNUSED, + odp_ml_load_result_t *result ODP_UNUSED) +{ + return -1; +} + +int odp_ml_model_unload(odp_ml_model_t model ODP_UNUSED, odp_ml_load_result_t *result ODP_UNUSED) +{ + return -1; +} + +int odp_ml_model_unload_start(odp_ml_model_t model ODP_UNUSED, + const odp_ml_compl_param_t *compl_param ODP_UNUSED) +{ + return -1; +} + +int odp_ml_model_unload_status(odp_ml_model_t model ODP_UNUSED, uint32_t compl_id ODP_UNUSED, + odp_ml_load_result_t *result ODP_UNUSED) +{ + return -1; +} + +void odp_ml_run_param_init(odp_ml_run_param_t *param ODP_UNUSED) +{ +} + +int odp_ml_run(odp_ml_model_t model ODP_UNUSED, const odp_ml_data_t *data ODP_UNUSED, + const odp_ml_run_param_t *param ODP_UNUSED) +{ + return -1; +} + +int odp_ml_run_multi(odp_ml_model_t model ODP_UNUSED, const odp_ml_data_t data[] ODP_UNUSED, + const odp_ml_run_param_t param[] ODP_UNUSED, int num ODP_UNUSED) +{ + return -1; +} + +int odp_ml_run_start(odp_ml_model_t model ODP_UNUSED, const odp_ml_data_t *data ODP_UNUSED, + const odp_ml_compl_param_t *compl_param ODP_UNUSED, + const odp_ml_run_param_t *run_param ODP_UNUSED) +{ + return -1; +} + +int odp_ml_run_start_multi(odp_ml_model_t model ODP_UNUSED, + const odp_ml_data_t data[] ODP_UNUSED, + const odp_ml_compl_param_t compl_param[] ODP_UNUSED, + const odp_ml_run_param_t run_param[] ODP_UNUSED, + int num ODP_UNUSED) +{ + return -1; +} + +int odp_ml_run_status(odp_ml_model_t model ODP_UNUSED, uint32_t compl_id ODP_UNUSED, + odp_ml_run_result_t *result ODP_UNUSED) +{ + return -1; +} + +int odp_ml_model_extra_stat_info(odp_ml_model_t model ODP_UNUSED, + odp_ml_extra_stat_info_t info[] ODP_UNUSED, + int num ODP_UNUSED) +{ + return -1; +} + +int odp_ml_model_extra_stats(odp_ml_model_t model ODP_UNUSED, + uint64_t stats[] ODP_UNUSED, int num ODP_UNUSED) +{ + return -1; +} diff --git a/platform/linux-generic/odp_ml_quantize.c b/platform/linux-generic/odp_ml_quantize.c new file mode 100644 index 000000000..d3f3601e3 --- /dev/null +++ b/platform/linux-generic/odp_ml_quantize.c @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2023 Nokia + */ + +#include <odp/api/ml_quantize.h> + +#include <odp_debug_internal.h> +#include <odp_macros_internal.h> +#include <odp_ml_fp16.h> + +#include <math.h> +#include <stdint.h> + +void odp_ml_fp32_to_uint8(uint8_t *u8, const float *fp32, uint32_t num, float scale, + uint8_t zerop) +{ + float fval; + + _ODP_ASSERT(scale != 0); + + for (uint32_t i = 0; i < num; i++) { + /* Range mapping: map real values to signed integer */ + fval = nearbyintf(fp32[i] / scale) + (float)zerop; + + /* clip */ + fval = _ODP_MAX(fval, 0.f); + fval = _ODP_MIN(fval, 255.f); + u8[i] = (uint8_t)(int32_t)fval; + } +} + +void odp_ml_fp32_from_uint8(float *fp32, const uint8_t *u8, uint32_t num, float scale, + uint8_t zerop) +{ + for (uint32_t i = 0; i < num; i++) + fp32[i] = (float)(u8[i] - zerop) * scale; +} + +void odp_ml_fp32_to_int8(int8_t *i8, const float *fp32, uint32_t num, float scale, int8_t zerop) +{ + float fval; + + _ODP_ASSERT(scale != 0); + + for (uint32_t i = 0; i < num; i++) { + /* Range mapping: map real values to signed integer */ + fval = nearbyintf(fp32[i] / scale) + (float)zerop; + + /* NOTE: Clamps signed quantization values to [-127,127] instead of [-128,127]. + * This is to ensure that symmetric quantization results in a zero + * point of exactly 0 for signed 8 bit ints. + */ + fval = _ODP_MAX(fval, -127.f); + fval = _ODP_MIN(fval, 127.f); + i8[i] = (int8_t)(int32_t)fval; + } +} + +void odp_ml_fp32_from_int8(float *fp32, const int8_t *i8, uint32_t num, float scale, int8_t zerop) +{ + for (uint32_t i = 0; i < num; i++) + fp32[i] = (float)(i8[i] - zerop) * scale; +} + +void odp_ml_fp32_to_fp16(uint16_t *fp16, const float *fp32, uint32_t num) +{ + uint32_t i; + + for (i = 0; i < num; i++) + fp16[i] = _odp_float32_to_float16(fp32[i]); +} + +void odp_ml_fp32_from_fp16(float *fp32, const uint16_t *fp16, uint32_t num) +{ + uint32_t i; + + for (i = 0; i < num; i++) + fp32[i] = _odp_float16_to_float32(fp16[i]); +} diff --git a/platform/linux-generic/odp_packet.c b/platform/linux-generic/odp_packet.c index 96fcd928a..17a4a9298 100644 --- a/platform/linux-generic/odp_packet.c +++ b/platform/linux-generic/odp_packet.c @@ -66,7 +66,6 @@ const _odp_packet_inline_offset_t _odp_packet_inline ODP_ALIGNED_CACHE = { .timestamp = offsetof(odp_packet_hdr_t, timestamp), .input_flags = offsetof(odp_packet_hdr_t, p.input_flags), .flags = offsetof(odp_packet_hdr_t, p.flags), - .subtype = offsetof(odp_packet_hdr_t, subtype), .cls_mark = offsetof(odp_packet_hdr_t, cls_mark), .ipsec_ctx = offsetof(odp_packet_hdr_t, ipsec_ctx), .crypto_op = offsetof(odp_packet_hdr_t, crypto_op_result), @@ -1454,7 +1453,7 @@ void odp_packet_print(odp_packet_t pkt) len += _odp_snprint(&str[len], n - len, " pool index %u\n", hdr->event_hdr.index.pool); len += _odp_snprint(&str[len], n - len, " buf index %u\n", hdr->event_hdr.index.event); - len += _odp_snprint(&str[len], n - len, " ev subtype %i\n", hdr->subtype); + len += _odp_snprint(&str[len], n - len, " ev subtype %i\n", hdr->event_hdr.subtype); len += _odp_snprint(&str[len], n - len, " input_flags 0x%" PRIx64 "\n", hdr->p.input_flags.all); if (hdr->p.input_flags.all) { @@ -2401,7 +2400,7 @@ odp_packet_t odp_packet_reassemble(odp_pool_t pool_hdl, odp_packet_buf_t pkt_buf pkt_hdr->tailroom = tailroom; /* Reset metadata */ - pkt_hdr->subtype = ODP_EVENT_PACKET_BASIC; + pkt_hdr->event_hdr.subtype = ODP_EVENT_PACKET_BASIC; pkt_hdr->input = ODP_PKTIO_INVALID; packet_parse_reset(pkt_hdr, 1); diff --git a/platform/linux-generic/odp_packet_io.c b/platform/linux-generic/odp_packet_io.c index 236813e80..8283c41e6 100644 --- a/platform/linux-generic/odp_packet_io.c +++ b/platform/linux-generic/odp_packet_io.c @@ -3015,8 +3015,15 @@ static int lso_update_custom(lso_profile_t *lso_prof, odp_packet_t pkt, int segn ptr = &u32; else if (size == 2) ptr = &u16; - else + else { + /* + * odp_lso_profile_create() ensures that size is one of the allowed values. + * But compiler doesn't know that, so set it here to avoid possibility of + * out of bounds warnings. + */ + size = 1; ptr = &u8; + } if (odp_packet_copy_to_mem(pkt, offset, size, ptr)) { _ODP_ERR("Read from packet failed at offset %u\n", offset); diff --git a/platform/linux-generic/odp_pool.c b/platform/linux-generic/odp_pool.c index 94461e6b1..d3fde70f6 100644 --- a/platform/linux-generic/odp_pool.c +++ b/platform/linux-generic/odp_pool.c @@ -495,6 +495,7 @@ static void init_event_hdr(pool_t *pool, _odp_event_hdr_t *event_hdr, uint32_t e event_hdr->index.event = event_index; event_hdr->type = type; event_hdr->event_type = type; + event_hdr->subtype = ODP_EVENT_NO_SUBTYPE; event_hdr->pool = _odp_pool_handle(pool); /* Store base values for fast init */ @@ -542,7 +543,6 @@ static void init_event_hdr(pool_t *pool, _odp_event_hdr_t *event_hdr, uint32_t e static void init_buffers(pool_t *pool) { - uint64_t i; _odp_event_hdr_t *event_hdr; odp_buffer_hdr_t *buf_hdr; odp_packet_hdr_t *pkt_hdr; @@ -566,7 +566,7 @@ static void init_buffers(pool_t *pool) mask = pool->ring_mask; type = pool->type; - for (i = 0; i < pool->num + skipped_blocks ; i++) { + for (uint64_t i = 0; i < pool->num + skipped_blocks ; i++) { int skip = 0; addr = &pool->base_addr[i * pool->block_size]; @@ -1257,6 +1257,10 @@ int odp_pool_info(odp_pool_t pool_hdl, odp_pool_info_t *info) info->dma_pool_param.uarea_size = pool->params.buf.uarea_size; info->dma_pool_param.cache_size = pool->params.buf.cache_size; + } else if (pool->type_2 == ODP_POOL_ML_COMPL) { + info->ml_pool_param.num = pool->params.buf.num; + info->ml_pool_param.uarea_size = pool->params.buf.uarea_size; + info->ml_pool_param.cache_size = pool->params.buf.cache_size; } else { info->params = pool->params; } @@ -1371,11 +1375,11 @@ static inline void event_free_to_pool(pool_t *pool, if (odp_unlikely((uint32_t)num > cache_num)) burst = cache_num; - _odp_event_hdr_t *event_hdr[burst]; + _odp_event_hdr_t *ev_hdr[burst]; - cache_pop(cache, event_hdr, burst); + cache_pop(cache, ev_hdr, burst); - ring_ptr_enq_multi(ring, mask, (void **)event_hdr, burst); + ring_ptr_enq_multi(ring, mask, (void **)ev_hdr, burst); if (CONFIG_POOL_STATISTICS && pool->params.stats.bit.free_ops) odp_atomic_inc_u64(&pool->stats.free_ops); } @@ -1559,6 +1563,8 @@ static const char *get_long_type_str(odp_pool_type_t type) return "vector"; case ODP_POOL_DMA_COMPL: return "dma completion"; + case ODP_POOL_ML_COMPL: + return "ml completion"; default: return "unknown"; } @@ -1577,6 +1583,8 @@ static const char *get_short_type_str(odp_pool_type_t type) return "V"; case ODP_POOL_DMA_COMPL: return "D"; + case ODP_POOL_ML_COMPL: + return "M"; default: return "-"; } @@ -1875,6 +1883,7 @@ int odp_pool_ext_capability(odp_pool_type_t type, odp_pool_ext_capability_t *cap case ODP_POOL_TIMEOUT: case ODP_POOL_VECTOR: case ODP_POOL_DMA_COMPL: + case ODP_POOL_ML_COMPL: memset(capa, 0, sizeof(odp_pool_ext_capability_t)); return 0; default: diff --git a/platform/linux-generic/odp_queue_scalable.c b/platform/linux-generic/odp_queue_scalable.c index c7040dd3c..bddaa532d 100644 --- a/platform/linux-generic/odp_queue_scalable.c +++ b/platform/linux-generic/odp_queue_scalable.c @@ -18,7 +18,6 @@ #include <odp_config_internal.h> #include <odp_debug_internal.h> - #include <odp_event_internal.h> #include <odp_packet_io_internal.h> #include <odp_pool_internal.h> @@ -472,12 +471,8 @@ static int queue_destroy(odp_queue_t handle) */ while (__atomic_load_n(&q->qschst.numevts, __ATOMIC_RELAXED) != 0 || __atomic_load_n(&q->qschst.cur_ticket, __ATOMIC_RELAXED) != - __atomic_load_n(&q->qschst.nxt_ticket, __ATOMIC_RELAXED)) { - sevl(); - while (wfe() && monitor32((uint32_t *)&q->qschst.numevts, - __ATOMIC_RELAXED) != 0) - odp_cpu_pause(); - } + __atomic_load_n(&q->qschst.nxt_ticket, __ATOMIC_RELAXED)) + _odp_wait_until_eq_u32((uint32_t *)&q->qschst.numevts, 0); if (q->schedq != NULL) { _odp_sched_queue_rem(q->sched_grp, q->sched_prio); @@ -596,13 +591,8 @@ static inline int _odp_queue_enq(sched_elem_t *q, __builtin_prefetch(&q->node, 1, 0); #endif /* Wait for our turn to signal consumers */ - if (odp_unlikely(__atomic_load_n(&q->cons_write, - __ATOMIC_RELAXED) != old_write)) { - sevl(); - while (wfe() && monitor32(&q->cons_write, - __ATOMIC_RELAXED) != old_write) - odp_cpu_pause(); - } + if (odp_unlikely(__atomic_load_n(&q->cons_write, __ATOMIC_RELAXED) != old_write)) + _odp_wait_until_eq_u32(&q->cons_write, old_write); /* Signal consumers that events are available (release events) * Enable other producers to continue @@ -824,13 +814,8 @@ int _odp_queue_deq(sched_elem_t *q, _odp_event_hdr_t *event_hdr[], int num) __builtin_prefetch(&q->node, 1, 0); #endif /* Wait for our turn to signal producers */ - if (odp_unlikely(__atomic_load_n(&q->prod_read, __ATOMIC_RELAXED) != - old_read)) { - sevl(); - while (wfe() && monitor32(&q->prod_read, - __ATOMIC_RELAXED) != old_read) - odp_cpu_pause(); - } + if (odp_unlikely(__atomic_load_n(&q->prod_read, __ATOMIC_RELAXED) != old_read)) + _odp_wait_until_eq_u32(&q->prod_read, old_read); /* Signal producers that empty slots are available * (release ring slots) diff --git a/platform/linux-generic/odp_schedule_basic.c b/platform/linux-generic/odp_schedule_basic.c index 7bd8cbfed..379f1f828 100644 --- a/platform/linux-generic/odp_schedule_basic.c +++ b/platform/linux-generic/odp_schedule_basic.c @@ -44,6 +44,7 @@ #include <string.h> #include <time.h> +#include <inttypes.h> /* No synchronization context */ #define NO_SYNC_CONTEXT ODP_SCHED_SYNC_PARALLEL @@ -297,7 +298,7 @@ typedef struct { struct { uint32_t poll_time; - struct timespec sleep_time; + uint64_t sleep_time; } powersave; /* Scheduler interface config options (not used in fast path) */ @@ -545,8 +546,8 @@ static int read_config_file(sched_global_t *sched) } val = _ODP_MAX(0, val); - sched->powersave.sleep_time.tv_sec = val / 1000000000; - sched->powersave.sleep_time.tv_nsec = val % 1000000000; + val = _ODP_MIN((int)ODP_TIME_SEC_IN_NS - 1, val); + sched->powersave.sleep_time = val; _ODP_PRINT(" %s: %i\n", str, val); _ODP_PRINT(" dynamic load balance: %s\n", sched->load_balance ? "ON" : "OFF"); @@ -1672,7 +1673,7 @@ static inline int schedule_loop_sleep(odp_queue_t *out_queue, uint64_t wait, timer_run(2); break; } - timer_run(1); + uint64_t next = timer_run(sleep ? TIMER_SCAN_FORCE : 1); if (first) { start = odp_time_local(); @@ -1683,19 +1684,27 @@ static inline int schedule_loop_sleep(odp_queue_t *out_queue, uint64_t wait, continue; } - if (sleep) - nanosleep(&sched->powersave.sleep_time, NULL); + if (sleep && next) { + uint64_t sleep_nsec = _ODP_MIN(sched->powersave.sleep_time, next); - if (wait != ODP_SCHED_WAIT || !sleep) { - current = odp_time_local(); - if (odp_time_cmp(start_sleep, current) < 0) - sleep = 1; + if (wait != ODP_SCHED_WAIT) { + uint64_t nsec_to_end = odp_time_diff_ns(end, current); + + sleep_nsec = _ODP_MIN(sleep_nsec, nsec_to_end); + } + + struct timespec ts = { 0, sleep_nsec }; + + nanosleep(&ts, NULL); } - if (wait == ODP_SCHED_WAIT) - continue; + if (!sleep || wait != ODP_SCHED_WAIT) + current = odp_time_local(); + + if (!sleep && odp_time_cmp(start_sleep, current) < 0) + sleep = 1; - if (odp_time_cmp(end, current) < 0) + if (wait != ODP_SCHED_WAIT && odp_time_cmp(end, current) < 0) break; } diff --git a/platform/linux-generic/odp_schedule_scalable.c b/platform/linux-generic/odp_schedule_scalable.c index 6d60c048f..5166fb6d0 100644 --- a/platform/linux-generic/odp_schedule_scalable.c +++ b/platform/linux-generic/odp_schedule_scalable.c @@ -223,13 +223,9 @@ void _odp_sched_update_enq(sched_elem_t *q, uint32_t actual) if (odp_unlikely(ticket != TICKET_INVALID)) { /* Wait for our turn to update schedq. */ if (odp_unlikely(__atomic_load_n(&q->qschst.cur_ticket, - __ATOMIC_ACQUIRE) != ticket)) { - sevl(); - while (wfe() && - monitor8(&q->qschst.cur_ticket, - __ATOMIC_ACQUIRE) != ticket) - odp_cpu_pause(); - } + __ATOMIC_ACQUIRE) != ticket)) + _odp_wait_until_eq_acq_u8(&q->qschst.cur_ticket, ticket); + /* Enqueue at end of scheduler queue */ /* We are here because of empty-to-non-empty transition * This means queue must be pushed to schedq if possible @@ -366,13 +362,9 @@ sched_update_deq(sched_elem_t *q, _ODP_ASSERT(q->qschst_type != ODP_SCHED_SYNC_ATOMIC); /* Wait for our turn to update schedq. */ if (odp_unlikely(__atomic_load_n(&q->qschst.cur_ticket, - __ATOMIC_ACQUIRE) != ticket)) { - sevl(); - while (wfe() && - monitor8(&q->qschst.cur_ticket, - __ATOMIC_ACQUIRE) != ticket) - odp_cpu_pause(); - } + __ATOMIC_ACQUIRE) != ticket)) + _odp_wait_until_eq_acq_u8(&q->qschst.cur_ticket, ticket); + /* We are here because of non-empty-to-empty transition or * WRR budget exhausted * This means the queue must be popped from the schedq, now or @@ -494,12 +486,9 @@ static inline void sched_update_popd(sched_elem_t *elem) 1, __ATOMIC_RELAXED); if (odp_unlikely(__atomic_load_n(&elem->qschst.cur_ticket, - __ATOMIC_ACQUIRE) != ticket)) { - sevl(); - while (wfe() && monitor8(&elem->qschst.cur_ticket, - __ATOMIC_ACQUIRE) != ticket) - odp_cpu_pause(); - } + __ATOMIC_ACQUIRE) != ticket)) + _odp_wait_until_eq_acq_u8(&elem->qschst.cur_ticket, ticket); + sched_update_popd_sc(elem); atomic_store_release(&elem->qschst.cur_ticket, ticket + 1, /*readonly=*/false); @@ -1054,15 +1043,8 @@ restart_same: continue; } /* Wait for our turn to dequeue */ - if (odp_unlikely(__atomic_load_n(&rwin->turn, - __ATOMIC_ACQUIRE) - != sn)) { - sevl(); - while (wfe() && - monitor32(&rwin->turn, __ATOMIC_ACQUIRE) - != sn) - odp_cpu_pause(); - } + if (odp_unlikely(__atomic_load_n(&rwin->turn, __ATOMIC_ACQUIRE) != sn)) + _odp_wait_until_eq_acq_u32(&rwin->turn, sn); #ifdef CONFIG_QSCHST_LOCK LOCK(&elem->qschlock); #endif @@ -1143,13 +1125,8 @@ static void schedule_order_lock(uint32_t lock_index) return; } if (odp_unlikely(__atomic_load_n(&rctx->rwin->olock[lock_index], - __ATOMIC_ACQUIRE) != rctx->sn)) { - sevl(); - while (wfe() && - monitor32(&rctx->rwin->olock[lock_index], - __ATOMIC_ACQUIRE) != rctx->sn) - odp_cpu_pause(); - } + __ATOMIC_ACQUIRE) != rctx->sn)) + _odp_wait_until_eq_acq_u32(&rctx->rwin->olock[lock_index], rctx->sn); } static void schedule_order_unlock(uint32_t lock_index) @@ -1555,12 +1532,7 @@ static int schedule_group_destroy(odp_schedule_group_t group) if (sg->xcount[p] != 0) { bitset_t wanted = atom_bitset_load(&sg->thr_wanted, __ATOMIC_RELAXED); - sevl(); - while (wfe() && - !bitset_is_eql(wanted, - bitset_monitor(&sg->thr_actual[p], - __ATOMIC_RELAXED))) - odp_cpu_pause(); + _odp_wait_until_eq_bitset(&sg->thr_actual[p], wanted); } /* Else ignore because no ODP queues on this prio */ } @@ -2127,13 +2099,10 @@ static void order_lock(void) _ODP_ASSERT(ts->rctx != NULL); rwin = ts->rctx->rwin; sn = ts->rctx->sn; - sevl(); /* Use acquire ordering to be on the safe side even if * this isn't an acquire/release situation (aka lock). */ - while (wfe() && - monitor32(&rwin->hc.head, __ATOMIC_ACQUIRE) != sn) - odp_cpu_pause(); + _odp_wait_until_eq_acq_u32(&rwin->hc.head, sn); } } diff --git a/platform/linux-generic/odp_schedule_scalable_ordered.c b/platform/linux-generic/odp_schedule_scalable_ordered.c index f6655d7fa..f8568ce53 100644 --- a/platform/linux-generic/odp_schedule_scalable_ordered.c +++ b/platform/linux-generic/odp_schedule_scalable_ordered.c @@ -123,8 +123,6 @@ static void rwin_insert(reorder_window_t *rwin, /*readonly=*/false); rctx = NULL; do { - hc_t new; - new.head = old.head; new.chgi = old.chgi + 1; /* Changed value */ /* Update head & chgi, fail if any has changed */ diff --git a/platform/linux-generic/odp_system_info.c b/platform/linux-generic/odp_system_info.c index 52f1000f1..a2593b531 100644 --- a/platform/linux-generic/odp_system_info.c +++ b/platform/linux-generic/odp_system_info.c @@ -26,7 +26,6 @@ #include <odp/api/cpu.h> #include <errno.h> -#include <pthread.h> #include <string.h> #include <stdio.h> #include <inttypes.h> @@ -386,8 +385,9 @@ int _odp_system_info_init(void) num_cpus); /* Read and save all CPU frequencies for static mode */ - for (i = 0; i < CONFIG_NUM_CPU_IDS; i++) - odp_global_ro.system_info.cpu_hz[i] = cpu_hz_current(i); + if (odp_global_ro.system_info.cpu_hz_static) + for (i = 0; i < CONFIG_NUM_CPU_IDS; i++) + odp_global_ro.system_info.cpu_hz[i] = cpu_hz_current(i); /* By default, read max frequency from a cpufreq file */ for (i = 0; i < CONFIG_NUM_CPU_IDS; i++) { @@ -627,5 +627,8 @@ void odp_sys_config_print(void) _ODP_PRINT("CONFIG_IPSEC_MAX_NUM_SA: %i\n", CONFIG_IPSEC_MAX_NUM_SA); _ODP_PRINT("CONFIG_TIMER_128BIT_ATOMICS: %i\n", CONFIG_TIMER_128BIT_ATOMICS); _ODP_PRINT("CONFIG_TIMER_PROFILE_INLINE: %i\n", CONFIG_TIMER_PROFILE_INLINE); + _ODP_PRINT("CONFIG_ML_MAX_MODELS: %i\n", CONFIG_ML_MAX_MODELS); + _ODP_PRINT("CONFIG_ML_MAX_INPUTS: %i\n", CONFIG_ML_MAX_INPUTS); + _ODP_PRINT("CONFIG_ML_MAX_OUTPUTS: %i\n", CONFIG_ML_MAX_OUTPUTS); _ODP_PRINT("\n"); } diff --git a/platform/linux-generic/odp_timer.c b/platform/linux-generic/odp_timer.c index daf187390..c8ea31078 100644 --- a/platform/linux-generic/odp_timer.c +++ b/platform/linux-generic/odp_timer.c @@ -761,11 +761,12 @@ static inline void timer_expire(timer_pool_t *tp, uint32_t idx, uint64_t tick) } } -static inline void timer_pool_scan(timer_pool_t *tp, uint64_t tick) +static inline uint64_t timer_pool_scan(timer_pool_t *tp, uint64_t tick) { tick_buf_t *array = &tp->tick_buf[0]; uint32_t high_wm = odp_atomic_load_acq_u32(&tp->high_wm); uint32_t i; + uint64_t min = UINT64_MAX; _ODP_ASSERT(high_wm <= tp->param.num_timers); for (i = 0; i < high_wm; i++) { @@ -780,18 +781,23 @@ static inline void timer_pool_scan(timer_pool_t *tp, uint64_t tick) if (odp_unlikely(exp_tck <= tick)) { /* Attempt to expire timer */ timer_expire(tp, i, tick); + min = 0; + } else { + min = _ODP_MIN(min, exp_tck - tick); } } + + return min; } /****************************************************************************** * Inline timer processing *****************************************************************************/ -static inline void timer_pool_scan_inline(int num, odp_time_t now) +static inline uint64_t timer_pool_scan_inline(int num, odp_time_t now, int force) { timer_pool_t *tp; - uint64_t new_tick, old_tick, nsec; + uint64_t new_tick, old_tick, ticks_to_next_expire, nsec, min = UINT64_MAX; int64_t diff; int i; @@ -817,7 +823,7 @@ static inline void timer_pool_scan_inline(int num, odp_time_t now) old_tick = odp_atomic_load_u64(&tp->cur_tick); diff = new_tick - old_tick; - if (diff < 1) + if (diff < 1 && !force) continue; if (odp_atomic_cas_u64(&tp->cur_tick, &old_tick, new_tick)) { @@ -832,26 +838,30 @@ static inline void timer_pool_scan_inline(int num, odp_time_t now) odp_atomic_store_u32(&tp->notify_overrun, 2); } } - timer_pool_scan(tp, nsec); + ticks_to_next_expire = timer_pool_scan(tp, nsec); + min = _ODP_MIN(min, ticks_to_next_expire); } } + + return min; } -void _odp_timer_run_inline(int dec) +uint64_t _odp_timer_run_inline(int dec) { odp_time_t now; int num = timer_global->highest_tp_idx + 1; - int poll_interval = timer_global->poll_interval; + int force = (dec == TIMER_SCAN_FORCE); + int poll_interval = force ? 0 : timer_global->poll_interval; if (num == 0) - return; + return UINT64_MAX; /* Rate limit how often this thread checks the timer pools. */ if (poll_interval > 1) { timer_local.run_cnt -= dec; if (timer_local.run_cnt > 0) - return; + return UINT64_MAX; timer_local.run_cnt = poll_interval; } @@ -862,7 +872,12 @@ void _odp_timer_run_inline(int dec) if (odp_time_cmp(period, timer_global->poll_interval_time) < 0) - return; + return UINT64_MAX; + timer_local.last_run = now; + } + + if (force) { + timer_local.run_cnt = poll_interval; timer_local.last_run = now; } @@ -870,13 +885,14 @@ void _odp_timer_run_inline(int dec) if (CONFIG_TIMER_PROFILE_INLINE) { odp_time_t t1 = odp_time_local_strict(); - timer_pool_scan_inline(num, now); + uint64_t ret = timer_pool_scan_inline(num, now, force); odp_time_t t2 = odp_time_local_strict(); timer_local.prof_nsec += odp_time_diff_ns(t2, t1); timer_local.prof_rounds++; + return ret; } else { - timer_pool_scan_inline(num, now); + return timer_pool_scan_inline(num, now, force); } } diff --git a/platform/linux-generic/test/Makefile.am b/platform/linux-generic/test/Makefile.am index 30ef26078..7aca5fd3f 100644 --- a/platform/linux-generic/test/Makefile.am +++ b/platform/linux-generic/test/Makefile.am @@ -21,6 +21,11 @@ SUBDIRS += validation/api/pktio \ example \ performance +if WITH_ML +TESTS += validation/api/ml/ml_linux$(EXEEXT) +SUBDIRS += validation/api/ml +endif + if ODP_PKTIO_PCAP TESTS += validation/api/pktio/pktio_run_pcap.sh endif diff --git a/platform/linux-generic/test/example/ipsec_api/Makefile.am b/platform/linux-generic/test/example/ipsec_api/Makefile.am index 101c97cdf..2535ad466 100644 --- a/platform/linux-generic/test/example/ipsec_api/Makefile.am +++ b/platform/linux-generic/test/example/ipsec_api/Makefile.am @@ -19,5 +19,3 @@ clean-local: rm -f $(builddir)/$$f; \ done \ fi - -.NOTPARALLEL: diff --git a/platform/linux-generic/test/example/ipsec_crypto/Makefile.am b/platform/linux-generic/test/example/ipsec_crypto/Makefile.am index 101c97cdf..2535ad466 100644 --- a/platform/linux-generic/test/example/ipsec_crypto/Makefile.am +++ b/platform/linux-generic/test/example/ipsec_crypto/Makefile.am @@ -19,5 +19,3 @@ clean-local: rm -f $(builddir)/$$f; \ done \ fi - -.NOTPARALLEL: diff --git a/platform/linux-generic/test/inline-timer.conf b/platform/linux-generic/test/inline-timer.conf index d645bef3c..fa3b6982f 100644 --- a/platform/linux-generic/test/inline-timer.conf +++ b/platform/linux-generic/test/inline-timer.conf @@ -1,6 +1,6 @@ # Mandatory fields odp_implementation = "linux-generic" -config_file_version = "0.1.27" +config_file_version = "0.1.28" timer: { # Enable inline timer implementation diff --git a/platform/linux-generic/test/packet_align.conf b/platform/linux-generic/test/packet_align.conf index 427674bb2..fb1418348 100644 --- a/platform/linux-generic/test/packet_align.conf +++ b/platform/linux-generic/test/packet_align.conf @@ -1,6 +1,6 @@ # Mandatory fields odp_implementation = "linux-generic" -config_file_version = "0.1.27" +config_file_version = "0.1.28" pool: { pkt: { diff --git a/platform/linux-generic/test/pktio_ipc/ipc_common.c b/platform/linux-generic/test/pktio_ipc/ipc_common.c index f693feeb2..128a7c6e1 100644 --- a/platform/linux-generic/test/pktio_ipc/ipc_common.c +++ b/platform/linux-generic/test/pktio_ipc/ipc_common.c @@ -1,11 +1,12 @@ -/* Copyright (c) 2015-2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2015-2018 Linaro Limited + * Copyright (c) 2023 Nokia */ #include "ipc_common.h" +/** Start time in seconds */ +int start_time_sec; /** Run time in seconds */ int run_time_sec; /** Pid of the master process */ @@ -97,23 +98,28 @@ void parse_args(int argc, char *argv[]) int opt; int long_index; static struct option longopts[] = { - {"time", required_argument, NULL, 't'}, + {"start-timeout", required_argument, NULL, 's'}, + {"run-time", required_argument, NULL, 't'}, {"pid", required_argument, NULL, 'p'}, /* master process pid */ {"help", no_argument, NULL, 'h'}, /* return 'h' */ {NULL, 0, NULL, 0} }; + start_time_sec = 0; /* wait forever if time is 0 */ run_time_sec = 0; /* loop forever if time to run is 0 */ master_pid = 0; while (1) { - opt = getopt_long(argc, argv, "+t:p:h", + opt = getopt_long(argc, argv, "+s:t:p:h", longopts, &long_index); if (opt == -1) break; /* No more options */ switch (opt) { + case 's': + start_time_sec = atoi(optarg); + break; case 't': run_time_sec = atoi(optarg); break; @@ -151,15 +157,14 @@ void usage(char *progname) { printf("\n" "Usage: %s OPTIONS\n" - " E.g. -n ipc_name_space %s -t seconds\n" "\n" "OpenDataPlane odp-linux ipc test application.\n" "\n" - "Mandatory OPTIONS:\n" - " -n, --ns IPC name space ID /dev/shm/odp-<ns>-objname.\n" "Optional OPTIONS\n" " -h, --help Display help and exit.\n" - " -t, --time Time to run in seconds.\n" - "\n", NO_PATH(progname), NO_PATH(progname) + " -p, --pid PID of the master process.\n" + " -t, --run-time Time to run in seconds.\n" + " -s, --start-timeout Maximum time for pktio startup.\n" + "\n", NO_PATH(progname) ); } diff --git a/platform/linux-generic/test/pktio_ipc/ipc_common.h b/platform/linux-generic/test/pktio_ipc/ipc_common.h index b2b469553..94ec21460 100644 --- a/platform/linux-generic/test/pktio_ipc/ipc_common.h +++ b/platform/linux-generic/test/pktio_ipc/ipc_common.h @@ -1,7 +1,6 @@ -/* Copyright (c) 2015-2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2015-2018 Linaro Limited + * Copyright (c) 2023 Nokia */ #define _POSIX_C_SOURCE 200809L @@ -64,6 +63,9 @@ typedef struct ODP_PACKED { odp_u32be_t magic; } pkt_tail_t; +/** Start time in seconds */ +extern int start_time_sec; + /** Run time in seconds */ extern int run_time_sec; diff --git a/platform/linux-generic/test/pktio_ipc/pktio_ipc1.c b/platform/linux-generic/test/pktio_ipc/pktio_ipc1.c index 6c71e18da..df7a5ca3f 100644 --- a/platform/linux-generic/test/pktio_ipc/pktio_ipc1.c +++ b/platform/linux-generic/test/pktio_ipc/pktio_ipc1.c @@ -1,7 +1,6 @@ -/* Copyright (c) 2015-2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2015-2018 Linaro Limited + * Copyright (c) 2023 Nokia */ #include "ipc_common.h" @@ -49,17 +48,17 @@ static int pktio_run_loop(odp_pool_t pool) else sprintf(name, TEST_IPC_PKTIO_NAME); - wait = odp_time_local_from_ns(run_time_sec * ODP_TIME_SEC_IN_NS); + wait = odp_time_local_from_ns(start_time_sec * ODP_TIME_SEC_IN_NS); start_cycle = odp_time_local(); current_cycle = start_cycle; for (;;) { - if (run_time_sec) { + if (start_time_sec) { cycle = odp_time_local(); diff = odp_time_diff(cycle, start_cycle); if (odp_time_cmp(wait, diff) < 0) { - printf("timeout exit, run_time_sec %d\n", - run_time_sec); + printf("timeout exit 1, start_time_sec %d\n", + start_time_sec); return -1; } } @@ -83,12 +82,12 @@ static int pktio_run_loop(odp_pool_t pool) /* start ipc pktio, i.e. wait until other process connects */ for (;;) { - if (run_time_sec) { + if (start_time_sec) { cycle = odp_time_local(); diff = odp_time_diff(cycle, start_cycle); if (odp_time_cmp(wait, diff) < 0) { - printf("timeout exit, run_time_sec %d\n", - run_time_sec); + printf("timeout exit 2, start_time_sec %d\n", + start_time_sec); goto exit; } } @@ -102,6 +101,8 @@ static int pktio_run_loop(odp_pool_t pool) } /* packets loop */ + wait = odp_time_local_from_ns(run_time_sec * ODP_TIME_SEC_IN_NS); + start_cycle = odp_time_local(); for (;;) { int i; diff --git a/platform/linux-generic/test/pktio_ipc/pktio_ipc2.c b/platform/linux-generic/test/pktio_ipc/pktio_ipc2.c index e6ca5f5e5..fc3b6833a 100644 --- a/platform/linux-generic/test/pktio_ipc/pktio_ipc2.c +++ b/platform/linux-generic/test/pktio_ipc/pktio_ipc2.c @@ -1,7 +1,6 @@ -/* Copyright (c) 2015-2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2015-2018 Linaro Limited + * Copyright (c) 2023 Nokia */ /** @@ -46,17 +45,17 @@ static int ipc_second_process(int master_pid) exit(EXIT_FAILURE); } - wait = odp_time_local_from_ns(run_time_sec * ODP_TIME_SEC_IN_NS); + wait = odp_time_local_from_ns(start_time_sec * ODP_TIME_SEC_IN_NS); start_cycle = odp_time_local(); for (;;) { /* exit loop if time specified */ - if (run_time_sec) { + if (start_time_sec) { cycle = odp_time_local(); diff = odp_time_diff(cycle, start_cycle); if (odp_time_cmp(wait, diff) < 0) { - printf("timeout exit, run_time_sec %d\n", - run_time_sec); + printf("timeout exit 1, start_time_sec %d\n", + start_time_sec); goto not_started; } } @@ -85,12 +84,12 @@ static int ipc_second_process(int master_pid) /* start ipc pktio, i.e. wait until other process connects */ for (;;) { /* 1. exit loop if time specified */ - if (run_time_sec) { + if (start_time_sec) { cycle = odp_time_local(); diff = odp_time_diff(cycle, start_cycle); if (odp_time_cmp(wait, diff) < 0) { - printf("timeout exit, run_time_sec %d\n", - run_time_sec); + printf("timeout exit 2, start_time_sec %d\n", + start_time_sec); goto not_started; } } @@ -103,6 +102,8 @@ static int ipc_second_process(int master_pid) odp_time_wait_ns(50 * ODP_TIME_MSEC_IN_NS); } + wait = odp_time_local_from_ns(run_time_sec * ODP_TIME_SEC_IN_NS); + start_cycle = odp_time_local(); for (;;) { /* exit loop if time specified */ if (run_time_sec) { diff --git a/platform/linux-generic/test/pktio_ipc/pktio_ipc_run.sh b/platform/linux-generic/test/pktio_ipc/pktio_ipc_run.sh index bad2626bd..b181668e8 100755 --- a/platform/linux-generic/test/pktio_ipc/pktio_ipc_run.sh +++ b/platform/linux-generic/test/pktio_ipc/pktio_ipc_run.sh @@ -1,9 +1,8 @@ #!/bin/sh # -# Copyright (c) 2015-2018, Linaro Limited -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2015-2018 Linaro Limited +# Copyright (c) 2023 Nokia # # directories where test binary can be found: @@ -17,31 +16,23 @@ PATH=$(dirname $0):$PATH PATH=$(dirname $0)/../../../../platform/linux-generic/test/pktio_ipc:$PATH PATH=.:$PATH -RUNTIME1=3 -RUNTIME2=1 -TIMEOUT=3 -if [ "${TEST}" = "coverage" ]; then - RUNTIME1=30 - RUNTIME2=15 - TIMEOUT=20 -fi +STARTTIME=30 +RUNTIME=1 run() { local ret=0 echo "==== run pktio_ipc1 then pktio_ipc2 ====" - pktio_ipc1${EXEEXT} -t ${RUNTIME1} & + pktio_ipc1${EXEEXT} -s ${STARTTIME} -t ${RUNTIME} & IPC_PID=$! - pktio_ipc2${EXEEXT} -p ${IPC_PID} -t ${RUNTIME2} + pktio_ipc2${EXEEXT} -p ${IPC_PID} -s ${STARTTIME} -t ${RUNTIME} ret=$? - # pktio_ipc1 should do clean up and exit just - # after pktio_ipc2 exited. If it does not happen - # kill him in test. - sleep ${TIMEOUT} + (kill ${IPC_PID} 2>&1 > /dev/null ) > /dev/null if [ $? -eq 0 ]; then + wait $IPC_PID echo "pktio_ipc1${EXEEXT} was killed" ls -l /dev/shm/${UID}/odp* 2> /dev/null rm -rf /dev/shm/${UID}/odp-${IPC_PID}* 2>&1 > /dev/null @@ -58,16 +49,15 @@ run() fi echo "==== run pktio_ipc2 then pktio_ipc1 ====" - pktio_ipc2${EXEEXT} -t ${RUNTIME1} & + pktio_ipc2${EXEEXT} -s ${STARTTIME} -t ${RUNTIME} & IPC_PID=$! - pktio_ipc1${EXEEXT} -p ${IPC_PID} -t ${RUNTIME2} + pktio_ipc1${EXEEXT} -p ${IPC_PID} -s ${STARTTIME} -t ${RUNTIME} ret=$? - # pktio_ipc2 do not exit on pktio_ipc1 disconnect - # wait until it exits cleanly - sleep ${TIMEOUT} + (kill ${IPC_PID} 2>&1 > /dev/null ) > /dev/null if [ $? -eq 0 ]; then + wait $IPC_PID echo "pktio_ipc2${EXEEXT} was killed" ls -l /dev/shm/${UID}/odp* 2> /dev/null rm -rf /dev/shm/${UID}/odp-${IPC_PID}* 2>&1 > /dev/null diff --git a/platform/linux-generic/test/process-mode.conf b/platform/linux-generic/test/process-mode.conf index 5bfcb9f2f..f4c6f7952 100644 --- a/platform/linux-generic/test/process-mode.conf +++ b/platform/linux-generic/test/process-mode.conf @@ -1,6 +1,6 @@ # Mandatory fields odp_implementation = "linux-generic" -config_file_version = "0.1.27" +config_file_version = "0.1.28" # Shared memory options shm: { diff --git a/platform/linux-generic/test/sched-basic.conf b/platform/linux-generic/test/sched-basic.conf index 1a401298e..8a6d0ac98 100644 --- a/platform/linux-generic/test/sched-basic.conf +++ b/platform/linux-generic/test/sched-basic.conf @@ -1,6 +1,6 @@ # Mandatory fields odp_implementation = "linux-generic" -config_file_version = "0.1.27" +config_file_version = "0.1.28" # Test scheduler with an odd spread value and without dynamic load balance sched_basic: { diff --git a/platform/linux-generic/test/stash-custom.conf b/platform/linux-generic/test/stash-custom.conf index b96c1cf45..6a2496303 100644 --- a/platform/linux-generic/test/stash-custom.conf +++ b/platform/linux-generic/test/stash-custom.conf @@ -1,6 +1,6 @@ # Mandatory fields odp_implementation = "linux-generic" -config_file_version = "0.1.27" +config_file_version = "0.1.28" # Test overflow safe stash variant stash: { diff --git a/platform/linux-generic/test/validation/api/ml/.gitignore b/platform/linux-generic/test/validation/api/ml/.gitignore new file mode 100644 index 000000000..e31f902c4 --- /dev/null +++ b/platform/linux-generic/test/validation/api/ml/.gitignore @@ -0,0 +1 @@ +ml_linux diff --git a/platform/linux-generic/test/validation/api/ml/Makefile.am b/platform/linux-generic/test/validation/api/ml/Makefile.am new file mode 100644 index 000000000..f4b9e9755 --- /dev/null +++ b/platform/linux-generic/test/validation/api/ml/Makefile.am @@ -0,0 +1,34 @@ +include ../Makefile.inc + +test_PROGRAMS = ml_linux +ml_linux_SOURCES = ml_linux.c + +EXTRA_DIST = \ + batch_add_gen.py \ + batch_add.onnx \ + gen_models.sh \ + README.md \ + requirements.txt \ + simple_linear_gen.py \ + simple_linear.onnx + +# If building out-of-tree, make check will not copy the scripts and data to the +# $(builddir) assuming that all commands are run locally. However this prevents +# running tests on a remote target using LOG_COMPILER. +# So copy all script and data files explicitly here. +all-local: + if [ "x$(srcdir)" != "x$(builddir)" ]; then \ + for f in $(EXTRA_DIST); do \ + if [ -e $(srcdir)/$$f ]; then \ + mkdir -p $(builddir)/$$(dirname $$f); \ + cp -f $(srcdir)/$$f $(builddir)/$$f; \ + fi \ + done \ + fi + +clean-local: + if [ "x$(srcdir)" != "x$(builddir)" ]; then \ + for f in $(EXTRA_DIST); do \ + rm -f $(builddir)/$$f; \ + done \ + fi diff --git a/platform/linux-generic/test/validation/api/ml/README.md b/platform/linux-generic/test/validation/api/ml/README.md new file mode 100644 index 000000000..80ad30e96 --- /dev/null +++ b/platform/linux-generic/test/validation/api/ml/README.md @@ -0,0 +1,23 @@ +# How to run ML validation test + +Simple onnx models are used to test ML API. + +## Generate models + +### Install python requirements + +```bash +python3 -m pip install -r <this directory>/requirements.txt +``` + +### Generate models for validation tests + +```bash +<this directory>/gen_models.sh +``` + +## Run ML validation tests + +```bash +<this directory>/ml_linux +``` diff --git a/platform/linux-generic/test/validation/api/ml/batch_add.onnx b/platform/linux-generic/test/validation/api/ml/batch_add.onnx Binary files differnew file mode 100644 index 000000000..43485f463 --- /dev/null +++ b/platform/linux-generic/test/validation/api/ml/batch_add.onnx diff --git a/platform/linux-generic/test/validation/api/ml/batch_add_gen.py b/platform/linux-generic/test/validation/api/ml/batch_add_gen.py new file mode 100644 index 000000000..33515bd2f --- /dev/null +++ b/platform/linux-generic/test/validation/api/ml/batch_add_gen.py @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Nokia +# + +import onnx +from onnx import helper +from onnx import TensorProto + +graph = helper.make_graph( + [ # nodes + helper.make_node("Add", ["x1", "x2"], ["y"], "Batch Add"), + ], + "Batch Add", # name + [ # inputs + helper.make_tensor_value_info('x1', TensorProto.DOUBLE, ["c", 3]), + helper.make_tensor_value_info('x2', TensorProto.DOUBLE, ["c", 3]), + ], + [ # outputs + helper.make_tensor_value_info('y', TensorProto.DOUBLE, ["c", 3]), + ] +) + +model = helper.make_model( + graph, + opset_imports=[helper.make_opsetid("", 14)], + producer_name='ODP validation tests', + model_version=1, + doc_string="y = x1 + x2", + ir_version = 8 +) + +onnx.save(model, 'batch_add.onnx') diff --git a/platform/linux-generic/test/validation/api/ml/gen_models.sh b/platform/linux-generic/test/validation/api/ml/gen_models.sh new file mode 100755 index 000000000..d88f3c432 --- /dev/null +++ b/platform/linux-generic/test/validation/api/ml/gen_models.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Nokia +# + +set -e + +# cd to the directory where this script is in +cd "$( dirname "${BASH_SOURCE[0]}" )" + +python3 simple_linear_gen.py + +python3 batch_add_gen.py diff --git a/platform/linux-generic/test/validation/api/ml/ml_linux.c b/platform/linux-generic/test/validation/api/ml/ml_linux.c new file mode 100644 index 000000000..28e18fbb5 --- /dev/null +++ b/platform/linux-generic/test/validation/api/ml/ml_linux.c @@ -0,0 +1,1167 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2023 Nokia + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include <unistd.h> +#include <string.h> +#include <libgen.h> +#include <odp_api.h> +#include <odp/helper/odph_api.h> +#include "odp_cunit_common.h" + +#define TIMEOUT 5 +#define MODEL_NAME "Test" +#define NUM_INPUTS 1 +#define NUM_OUTPUTS 1 +#define RUN_NUM 2 +#define BUF_LEN 256 +#define CONFIG_MAX_MODEL_SIZE 500 + +#define COMPL_POOL_NAME "ML compl pool" +#define NUM_COMPL 10 + +/** + * About model simple_linear.onnx being tested in this suite + * + * Model info: + * Version: 1 + * Inputs: name: x, type: int32, shape: [1] + * Outputs: name: y, type: int32, shape: [1] + * + * The model is of form y = 3 * x + 4 + * Thus when x = 5, the output y should be 19. + */ +typedef struct global_t { + int disabled; + odp_ml_capability_t ml_capa; + odp_ml_config_t ml_config; + odp_ml_model_param_t model_param; + odp_ml_model_t ml_model; + odp_pool_t compl_pool; + odp_queue_t queue; + odp_ml_data_t data; + odp_ml_data_seg_t input_seg; + odp_ml_data_seg_t output_seg; + odp_ml_run_param_t run_param; + uint64_t wait_ns; + int32_t x; + int32_t y; + int32_t y_expected; + +} global_t; + +static global_t global; + +static int fill_model_param(const char *model_name, odp_ml_model_param_t *model_param) +{ + size_t size; + char *pos; + char *exe_dir; + size_t exe_dir_len; + FILE *model_file; + char exe_path[BUF_LEN]; + ssize_t exe_path_len; + char model_path[BUF_LEN]; + + /* Model file is placed in the same directory as the executable ml_linux */ + exe_path_len = readlink("/proc/self/exe", exe_path, BUF_LEN - 1); + if (exe_path_len != -1) { + exe_path[exe_path_len] = '\0'; + + pos = strstr(exe_path, ".libs"); + if (pos) + *(pos + 5) = '\0'; + + exe_dir = dirname(exe_path); + exe_dir_len = strlen(exe_dir); + + memcpy(model_path, exe_dir, exe_dir_len); + model_path[exe_dir_len] = '/'; + model_path[exe_dir_len + 1] = '\0'; + + strncat(model_path, model_name, BUF_LEN - strlen(model_path) - 1); + ODPH_DBG("model_path: %s\n", model_path); + model_file = fopen(model_path, "rb"); + } else { /* Can't get executable path, try to find model file at current dir*/ + model_file = fopen(model_name, "rb"); + } + + if (model_file == NULL) { + perror("Failed to open model file"); + return -1; + } + + /* Get the model file size in bytes */ + fseek(model_file, 0, SEEK_END); + model_param->size = ftell(model_file); + rewind(model_file); + + model_param->model = malloc(model_param->size); + if (!model_param->model) { + ODPH_ERR("\n\nMemory allocation failed\n"); + fclose(model_file); + return -1; + } + size = fread(model_param->model, model_param->size, 1, model_file); + + fclose(model_file); + if (size != 1) { + ODPH_ERR("\n\nRead model file failed\n"); + return -1; + } + + model_param->max_compl_id = 0; + + return 0; +} + +static int ml_suite_init(void) +{ + odp_ml_capability_t *ml_capa = &global.ml_capa; + odp_queue_param_t queue_param; + odp_ml_compl_pool_param_t ml_pool_param; + + memset(&global, 0, sizeof(global_t)); + global.queue = ODP_QUEUE_INVALID; + global.compl_pool = ODP_POOL_INVALID; + + if (odp_ml_capability(ml_capa)) { + ODPH_ERR("ML capability failed\n"); + return -1; + } + + if (ml_capa->max_models == 0) { + global.disabled = 1; + ODPH_DBG("ML test disabled\n"); + return 0; + } + + /* Configure ML */ + odp_ml_config_init(&global.ml_config); + global.ml_config.max_models_created = ml_capa->max_models; + global.ml_config.max_models_loaded = ml_capa->max_models_loaded; + global.ml_config.max_model_size = CONFIG_MAX_MODEL_SIZE; + + if (ml_capa->load.compl_mode_mask & ODP_ML_COMPL_MODE_SYNC) + global.ml_config.load_mode_mask |= ODP_ML_COMPL_MODE_SYNC; + + if (ml_capa->load.compl_mode_mask & ODP_ML_COMPL_MODE_POLL) + global.ml_config.load_mode_mask |= ODP_ML_COMPL_MODE_POLL; + + if (ml_capa->load.compl_mode_mask & ODP_ML_COMPL_MODE_EVENT) + global.ml_config.load_mode_mask |= ODP_ML_COMPL_MODE_EVENT; + + if (ml_capa->run.compl_mode_mask & ODP_ML_COMPL_MODE_SYNC) + global.ml_config.run_mode_mask |= ODP_ML_COMPL_MODE_SYNC; + + if (ml_capa->run.compl_mode_mask & ODP_ML_COMPL_MODE_POLL) + global.ml_config.run_mode_mask |= ODP_ML_COMPL_MODE_POLL; + + if (ml_capa->run.compl_mode_mask & ODP_ML_COMPL_MODE_EVENT) + global.ml_config.run_mode_mask |= ODP_ML_COMPL_MODE_EVENT; + + if (odp_ml_config(&global.ml_config)) { + ODPH_ERR("\n\nConfiguring ML failed\n"); + return -1; + } + + global.x = 5; + global.wait_ns = 500 * ODP_TIME_MSEC_IN_NS; + global.y_expected = 19; /* y = 3 * x + 4 = 3 * 5 + 4 = 19 */ + + /* Prepare data for running model inference */ + odp_ml_run_param_init(&global.run_param); + + global.data.num_input_seg = NUM_INPUTS; + global.data.input_seg = &global.input_seg; + global.input_seg.size = sizeof(int32_t); + global.input_seg.addr = &global.x; + + global.data.num_output_seg = NUM_OUTPUTS; + global.data.output_seg = &global.output_seg; + global.output_seg.size = sizeof(int32_t); + global.output_seg.addr = &global.y; + + if (fill_model_param("simple_linear.onnx", &global.model_param)) + return -1; + + /* Create ML model */ + global.ml_model = odp_ml_model_create(MODEL_NAME, &global.model_param); + if (global.ml_model == ODP_ML_MODEL_INVALID) { + ODPH_ERR("Create ML model failed\n"); + goto error; + } + + /* Asynchronous mode with event completion is not supported */ + if (!((ml_capa->load.compl_mode_mask & ODP_ML_COMPL_MODE_EVENT) || + (ml_capa->run.compl_mode_mask & ODP_ML_COMPL_MODE_EVENT))) + return 0; + + /* Create a queue for sending ML completion event to */ + odp_queue_param_init(&queue_param); + queue_param.type = ODP_QUEUE_TYPE_SCHED; + queue_param.sched.sync = ODP_SCHED_SYNC_PARALLEL; + queue_param.sched.prio = odp_schedule_default_prio(); + queue_param.sched.group = ODP_SCHED_GROUP_ALL; + + global.queue = odp_queue_create("ML compl queue", &queue_param); + if (global.queue == ODP_QUEUE_INVALID) { + ODPH_ERR("Queue create failed\n"); + goto error; + } + + /* Create an ML job completion pool */ + if (ml_capa->pool.max_num < NUM_COMPL) { + ODPH_ERR("Too small ML compl pool %u\n", ml_capa->pool.max_num); + goto error; + } + + odp_ml_compl_pool_param_init(&ml_pool_param); + ml_pool_param.num = NUM_COMPL; + + global.compl_pool = odp_ml_compl_pool_create(COMPL_POOL_NAME, &ml_pool_param); + if (global.compl_pool == ODP_POOL_INVALID) { + ODPH_ERR("Create ML completion pool failed\n"); + goto error; + } + + return 0; + +error: + free(global.model_param.model); + return -1; +} + +static int ml_suite_term(void) +{ + if (global.compl_pool != ODP_POOL_INVALID && + odp_pool_destroy(global.compl_pool)) { + ODPH_ERR("Completion pool destroy failed\n"); + return -1; + } + + if (global.ml_model && odp_ml_model_destroy(global.ml_model)) { + ODPH_ERR("Destroy ML model failed\n"); + return -1; + } + + if (global.queue != ODP_QUEUE_INVALID && + odp_queue_destroy(global.queue)) { + ODPH_ERR("Destroy ML queue failed\n"); + return -1; + } + + free(global.model_param.model); + + return 0; +} + +static int check_ml_support(void) +{ + if (global.disabled) + return ODP_TEST_INACTIVE; + + return ODP_TEST_ACTIVE; +} + +static int check_load_sync(void) +{ + if (global.disabled) + return ODP_TEST_INACTIVE; + + if (global.ml_config.load_mode_mask & ODP_ML_COMPL_MODE_SYNC) + return ODP_TEST_ACTIVE; + + return ODP_TEST_INACTIVE; +} + +static int check_load_poll(void) +{ + if (global.disabled) + return ODP_TEST_INACTIVE; + + if (global.ml_config.load_mode_mask & ODP_ML_COMPL_MODE_POLL) + return ODP_TEST_ACTIVE; + + return ODP_TEST_INACTIVE; +} + +static int check_load_event(void) +{ + if (global.disabled) + return ODP_TEST_INACTIVE; + + if (global.ml_config.load_mode_mask & ODP_ML_COMPL_MODE_EVENT) + return ODP_TEST_ACTIVE; + + return ODP_TEST_INACTIVE; +} + +static int check_run_sync(void) +{ + if (global.disabled) + return ODP_TEST_INACTIVE; + + /* Model run test uses synchronous load */ + if ((global.ml_config.run_mode_mask & ODP_ML_COMPL_MODE_SYNC) && + (global.ml_config.load_mode_mask & ODP_ML_COMPL_MODE_SYNC)) + return ODP_TEST_ACTIVE; + + return ODP_TEST_INACTIVE; +} + +static int check_run_poll(void) +{ + if (global.disabled) + return ODP_TEST_INACTIVE; + + /* Poll mode model run test uses synchronous load */ + if ((global.ml_config.run_mode_mask & ODP_ML_COMPL_MODE_POLL) && + (global.ml_config.load_mode_mask & ODP_ML_COMPL_MODE_SYNC)) + return ODP_TEST_ACTIVE; + + return ODP_TEST_INACTIVE; +} + +static int check_run_event(void) +{ + if (global.disabled) + return ODP_TEST_INACTIVE; + + /* Poll mode model run test uses synchronous load */ + if ((global.ml_config.run_mode_mask & ODP_ML_COMPL_MODE_EVENT) && + (global.ml_config.load_mode_mask & ODP_ML_COMPL_MODE_SYNC)) + return ODP_TEST_ACTIVE; + + return ODP_TEST_INACTIVE; +} + +static int check_run_poll_event(void) +{ + if (global.disabled) + return ODP_TEST_INACTIVE; + + /* test_ml_run_start_multi uses synchronous load, poll mode and event mode run */ + if ((global.ml_config.run_mode_mask & ODP_ML_COMPL_MODE_EVENT) && + (global.ml_config.run_mode_mask & ODP_ML_COMPL_MODE_POLL) && + (global.ml_config.load_mode_mask & ODP_ML_COMPL_MODE_SYNC)) + return ODP_TEST_ACTIVE; + + return ODP_TEST_INACTIVE; +} + +static void test_ml_debug(void) +{ + uint64_t u64; + + u64 = odp_ml_model_to_u64(global.ml_model); + CU_ASSERT(u64 != odp_ml_model_to_u64(ODP_ML_MODEL_INVALID)); + printf("\n ML model handle: 0x%" PRIx64 "\n", u64); + + odp_ml_model_print(global.ml_model); +} + +static void test_ml_model_create(void) +{ + uint32_t i; + /* One for global.ml_model */ + uint32_t max_models = global.ml_config.max_models_created - 1; + odp_ml_model_t models[max_models]; + + for (i = 0; i < max_models; i++) { + models[i] = odp_ml_model_create(NULL, &global.model_param); + + if (models[i] == ODP_ML_MODEL_INVALID) { + ODPH_ERR("ML model create failed: %u / %u\n", i, max_models); + break; + } + } + + CU_ASSERT(i == max_models); + max_models = i; + + /* Destroy valid models */ + for (i = 0; i < max_models; i++) + CU_ASSERT_FATAL(odp_ml_model_destroy(models[i]) == 0); +} + +static void test_ml_model_lookup(void) +{ + odp_ml_model_t model2; + odp_ml_model_t model_lookup; + + /* Look up model with the same name, should find one with equal handle */ + model_lookup = odp_ml_model_lookup(MODEL_NAME); + CU_ASSERT_FATAL(model_lookup != ODP_ML_MODEL_INVALID); + CU_ASSERT(odp_ml_model_to_u64(global.ml_model) == odp_ml_model_to_u64(model_lookup)); + + /* Look up model with a different name, should return invalid handle */ + model_lookup = odp_ml_model_lookup("diff"); + CU_ASSERT_FATAL(model_lookup == ODP_ML_MODEL_INVALID); + + model2 = odp_ml_model_create(MODEL_NAME, &global.model_param); + CU_ASSERT_FATAL(model2 != ODP_ML_MODEL_INVALID); + CU_ASSERT(odp_ml_model_to_u64(global.ml_model) != odp_ml_model_to_u64(model2)); + + model_lookup = odp_ml_model_lookup(MODEL_NAME); + CU_ASSERT(odp_ml_model_to_u64(model_lookup) == odp_ml_model_to_u64(global.ml_model) || + odp_ml_model_to_u64(model_lookup) == odp_ml_model_to_u64(model2)); + + CU_ASSERT(odp_ml_model_destroy(model2) == 0); +} + +static void test_ml_model_info(void) +{ + int ret; + uint32_t num_ret; + odp_ml_model_info_t ml_info; + odp_ml_input_info_t input_info[2]; + odp_ml_output_info_t output_info[2]; + + /* Verify model info about global.ml_model, namely, simple_linear.onnx */ + memset(&ml_info, 0x88, sizeof(odp_ml_model_info_t)); + ret = odp_ml_model_info(global.ml_model, &ml_info); + CU_ASSERT(ret == 0); + CU_ASSERT(!strcmp(ml_info.name, MODEL_NAME)); + CU_ASSERT(ml_info.model_version == 1); + CU_ASSERT(ml_info.num_inputs == NUM_INPUTS); + CU_ASSERT(ml_info.num_outputs == NUM_OUTPUTS); + + num_ret = odp_ml_model_input_info(global.ml_model, input_info, NUM_INPUTS); + CU_ASSERT(num_ret == NUM_INPUTS); + CU_ASSERT(!strcmp(input_info[0].name, "x")); + CU_ASSERT(input_info[0].shape.num_dim == 1); + CU_ASSERT(input_info[0].shape.dim[0] == 1); + CU_ASSERT((int)input_info[0].data_type == ODP_ML_DATA_TYPE_INT32); + + /* When num is 0, return normally, and input_info is ignored */ + num_ret = odp_ml_model_input_info(global.ml_model, input_info, 0); + CU_ASSERT(num_ret == NUM_INPUTS); + + /* When num is bigger than actual number of inputs, extra input_info is left untouched */ + input_info[1].data_type = (odp_ml_data_type_t)-1; + num_ret = odp_ml_model_input_info(global.ml_model, input_info, NUM_INPUTS + 1); + CU_ASSERT(num_ret == NUM_INPUTS); + CU_ASSERT(!strcmp(input_info[0].name, "x")); + CU_ASSERT(input_info[0].shape.num_dim == 1); + CU_ASSERT(input_info[0].shape.dim[0] == 1); + CU_ASSERT((int)input_info[0].data_type == ODP_ML_DATA_TYPE_INT32); + /* input_info[1] is left untouched */ + CU_ASSERT(input_info[1].data_type == (odp_ml_data_type_t)-1); + + num_ret = odp_ml_model_output_info(global.ml_model, output_info, NUM_OUTPUTS); + CU_ASSERT(num_ret == NUM_OUTPUTS); + CU_ASSERT(!strcmp(output_info[0].name, "y")); + CU_ASSERT(output_info[0].shape.num_dim == 1); + CU_ASSERT(output_info[0].shape.dim[0] == 1); + CU_ASSERT((int)output_info[0].data_type == ODP_ML_DATA_TYPE_INT32); + + /* When num is 0, return normally, and input_info is ignored */ + num_ret = odp_ml_model_output_info(global.ml_model, output_info, 0); + CU_ASSERT(num_ret == NUM_OUTPUTS); + + /* When num is bigger than actual number of inputs, extra output_info is left untouched */ + num_ret = odp_ml_model_output_info(global.ml_model, output_info, NUM_OUTPUTS + 1); + output_info[1].shape.num_dim = 98876; + CU_ASSERT(num_ret == NUM_OUTPUTS); + CU_ASSERT(!strcmp(output_info[0].name, "y")); + CU_ASSERT(output_info[0].shape.num_dim == 1); + CU_ASSERT(output_info[0].shape.dim[0] == 1); + CU_ASSERT((int)output_info[0].data_type == ODP_ML_DATA_TYPE_INT32); + /* output_info[1] is left untouched */ + CU_ASSERT(output_info[1].shape.num_dim == 98876); +} + +static void test_ml_model_load(void) +{ + int ret; + odp_ml_model_t test_model; + odp_ml_load_result_t result; + + test_model = odp_ml_model_create(NULL, &global.model_param); + CU_ASSERT_FATAL(test_model != ODP_ML_MODEL_INVALID); + + ret = odp_ml_model_load(test_model, &result); + CU_ASSERT(ret == 0); + CU_ASSERT(result.error_code == 0); + + ret = odp_ml_model_unload(test_model, NULL); + CU_ASSERT(ret == 0); + + CU_ASSERT(odp_ml_model_destroy(test_model) == 0); +} + +/* Test asynchronous model loading in ODP_ML_COMPL_MODE_POLL mode */ +static void test_ml_model_load_async_poll(void) +{ + int ret; + odp_ml_load_result_t result; + odp_ml_compl_param_t compl_param; + int dummy = 6; + void *user_ptr = &dummy; + uint64_t wait_ns = 500 * ODP_TIME_MSEC_IN_NS; + + memset(&result, 0, sizeof(result)); + odp_ml_compl_param_init(&compl_param); + compl_param.mode = ODP_ML_COMPL_MODE_POLL; + compl_param.compl_id = 0; + compl_param.user_ptr = user_ptr; + + ret = odp_ml_model_load_start(global.ml_model, &compl_param); + CU_ASSERT_FATAL(ret == 0); + + /* When odp_ml_model_load_start() succeeded, continue to check completion status */ + for (int i = 0; i < TIMEOUT; i++) { + ret = odp_ml_model_load_status(global.ml_model, 0, &result); + if (ret) + break; + + /* ret = 0 meaning run has not finished, continue to check status */ + odp_time_wait_ns(wait_ns); + } + + CU_ASSERT(ret > 0); + CU_ASSERT(result.error_code == 0); + CU_ASSERT(result.user_ptr == user_ptr); + /* odp_ml_model_load does not modify data in user_ptr */ + if (result.user_ptr) + CU_ASSERT(*(int *)result.user_ptr == dummy); + + ret = odp_ml_model_unload_start(global.ml_model, &compl_param); + CU_ASSERT_FATAL(ret == 0); + + /* When odp_ml_model_unload_start() succeeded, continue to check completion + * status */ + for (int i = 0; i < TIMEOUT; i++) { + ret = odp_ml_model_unload_status(global.ml_model, 0, &result); + if (ret) + break; + + /* ret = 0 meaning run has not finished, continue to check status */ + odp_time_wait_ns(wait_ns); + } + + CU_ASSERT_FATAL(ret > 0); + CU_ASSERT(result.error_code == 0); + CU_ASSERT(result.user_ptr == user_ptr); + + /* odp_ml_model_unload does not modify data in user_ptr */ + if (result.user_ptr) + CU_ASSERT(*(int *)result.user_ptr == dummy); +} + +static int +get_result_from_ml_compl_event(odp_ml_load_result_t *load_result, odp_ml_run_result_t *run_result) +{ + int ret; + odp_event_t ev; + odp_ml_compl_t compl; + odp_event_type_t ev_type; + odp_queue_t from_queue = ODP_QUEUE_INVALID; + uint64_t sched_wait = odp_schedule_wait_time(global.wait_ns); + + /* Run event scheduler to find the ml completion event */ + for (int i = 0; i < TIMEOUT; i++) { + ev = odp_schedule(&from_queue, sched_wait); + if (ev != ODP_EVENT_INVALID) + break; + } + + CU_ASSERT(ev != ODP_EVENT_INVALID); + if (ev == ODP_EVENT_INVALID) { + ODPH_ERR("Timeout while waiting for completion event\n"); + return -1; + } + + ev_type = odp_event_type(ev); + CU_ASSERT(from_queue == global.queue); + CU_ASSERT(ev_type == ODP_EVENT_ML_COMPL); + if (from_queue != global.queue || ev_type != ODP_EVENT_ML_COMPL) { + odp_event_free(ev); + ODPH_ERR("Received unexpected event while waiting for completion\n"); + return -1; + } + + compl = odp_ml_compl_from_event(ev); + CU_ASSERT(compl != ODP_ML_COMPL_INVALID); + + if (load_result) { + CU_ASSERT(odp_ml_compl_load_result(compl, NULL) == 0); + ret = odp_ml_compl_load_result(compl, load_result); + } else { + CU_ASSERT(odp_ml_compl_run_result(compl, NULL) == 0); + ret = odp_ml_compl_run_result(compl, run_result); + } + + CU_ASSERT(ret == 0); + odp_ml_compl_free(compl); + + return ret; +} + +/* Test asynchronous model loading in ODP_ML_COMPL_MODE_EVENT mode */ +static void test_ml_model_load_async_event(void) +{ + int ret; + odp_ml_compl_t compl; + odp_ml_load_result_t result; + odp_ml_compl_param_t compl_param; + int dummy = 6; + void *user_ptr = &dummy; + + compl = odp_ml_compl_alloc(global.compl_pool); + CU_ASSERT_FATAL(compl != ODP_ML_COMPL_INVALID); + + odp_ml_compl_param_init(&compl_param); + compl_param.mode = ODP_ML_COMPL_MODE_EVENT; + compl_param.event = odp_ml_compl_to_event(compl); + compl_param.queue = global.queue; + compl_param.user_ptr = user_ptr; + + ret = odp_ml_model_load_start(global.ml_model, &compl_param); + CU_ASSERT(ret == 0); + + /* Return when odp_ml_model_load_start() failed */ + if (ret) { + odp_ml_compl_free(compl); + ODPH_ERR("ML model odp_ml_model_load_start() failed\n"); + return; + } + + /* Run event scheduler to find the ml completion event and verify it */ + if (get_result_from_ml_compl_event(&result, NULL)) + return; + + CU_ASSERT(result.error_code == 0); + CU_ASSERT(result.user_ptr == user_ptr); + + /* Model load does not modify data in user_ptr */ + if (result.user_ptr) + CU_ASSERT(*(int *)result.user_ptr == dummy); + + compl = odp_ml_compl_alloc(global.compl_pool); + CU_ASSERT(compl != ODP_ML_COMPL_INVALID); + + if (compl == ODP_ML_COMPL_INVALID) + return; + + compl_param.event = odp_ml_compl_to_event(compl); + ret = odp_ml_model_unload_start(global.ml_model, &compl_param); + CU_ASSERT_FATAL(ret == 0); + + /* Run event scheduler to find the ml completion event and verify it */ + if (get_result_from_ml_compl_event(&result, NULL)) + return; + + CU_ASSERT(result.error_code == 0); + CU_ASSERT(result.user_ptr == user_ptr); + + /* odp_ml_model_unload does not modify data in user_ptr */ + if (result.user_ptr) + CU_ASSERT(*(int *)result.user_ptr == dummy); +} + +/* About model batch_add.onnx being tested in this function + * + * Model info: + * Version: 1 + * Inputs: + * inputs[0]: name: x1, type: double, shape: [c, 3] + * inputs[1]: name: x2, type: double, shape: [c, 3] + * Outputs: + * Outputs[0]: name: y, type: double, shape: [c, 3] + * + * The model computes element-wise sum of input tensors x1 and x2 and stores them + * in y. The first dimension of input and output tensors represent batch size, + * thus it must be the same for all tensors here. The dynamic dimension size + * in the output tensor here can be deduced from the given batch size, thus no + * need for the implementation to fill it. + */ +#define NUM_COLUMN 3 +#define MAX_BATCH_SIZE 4 +#define SIZE (NUM_COLUMN * MAX_BATCH_SIZE * sizeof(double)) +static void run_model_batch_add(void) +{ + int ret; + odp_ml_data_t data; + odp_ml_model_t model; + odp_ml_data_seg_t input_segs[SIZE * 2]; + odp_ml_data_seg_t output_segs[SIZE]; + odp_ml_run_result_t result; + odp_ml_run_param_t run_param; + odp_ml_model_param_t model_param; + + double y[12]; + double y_expected[12]; + uint32_t batch_size = MAX_BATCH_SIZE; + double x1[12] = {97, 47, 62, 19, 93, 59, 67, 42, 28, 55, 46, 31}; + double x2[12] = {81, 56, 27, 4, 69, 12, 91, 98, 23, 90, 52, 64}; + + for (int i = 0; i < 12; i++) + y_expected[i] = x1[i] + x2[i]; + + odp_ml_model_param_init(&model_param); + + odp_ml_data_format_t input_format[2] = { + { + .data_type = ODP_ML_DATA_TYPE_FP64, + .data_type_size = 8, + .shape.type = ODP_ML_SHAPE_BATCH, + .shape.num_dim = 2, + .shape.dim = {ODP_ML_DIM_DYNAMIC, NUM_COLUMN}, + .shape.dim_max = {MAX_BATCH_SIZE, NUM_COLUMN} + }, + { + .data_type = ODP_ML_DATA_TYPE_FP64, + .data_type_size = 8, + .shape.type = ODP_ML_SHAPE_BATCH, + .shape.num_dim = 2, + .shape.dim = {ODP_ML_DIM_DYNAMIC, NUM_COLUMN}, + .shape.dim_max = {MAX_BATCH_SIZE, NUM_COLUMN} + } + }; + + model_param.extra_info.num_inputs = 2; + model_param.extra_info.input_format = input_format; + + /* Verify model info about matrix_mul.onnx */ + if (fill_model_param("batch_add.onnx", &model_param)) + return; + + model = odp_ml_model_create("batch_add", &model_param); + free(model_param.model); + CU_ASSERT(model != ODP_ML_MODEL_INVALID); + if (!model) + return; + + if (odp_ml_model_load(model, NULL)) { + CU_ASSERT(odp_ml_model_destroy(model) == 0); + return; + } + + odp_ml_model_print(model); + + /* Prepare parameters for running inference */ + odp_ml_run_param_init(&run_param); + run_param.result = &result; + + data.num_input_seg = 2; + data.input_seg = input_segs; + input_segs[0].addr = x1; + input_segs[1].addr = x2; + + data.num_output_seg = 1; + data.output_seg = output_segs; + output_segs[0].size = sizeof(y); + output_segs[0].addr = y; + + /* Test different batch sizes */ + for (int i = 0; i < MAX_BATCH_SIZE; i++) { + run_param.batch_size = batch_size; + input_segs[0].size = sizeof(double) * NUM_COLUMN * batch_size; + input_segs[1].size = sizeof(double) * NUM_COLUMN * batch_size; + ret = odp_ml_run(model, &data, &run_param); + CU_ASSERT(ret == 1); + if (ret != 1) + goto fail; + + for (uint32_t j = 0; j < batch_size * NUM_COLUMN; j++) + CU_ASSERT(y[j] == y_expected[j]); + + batch_size--; + } + + /* Test also without run results */ + run_param.result = NULL; + ret = odp_ml_run(model, &data, &run_param); + CU_ASSERT(ret == 1); + + /* Test different segment sizes */ + batch_size = MAX_BATCH_SIZE; + odp_ml_run_param_init(&run_param); + run_param.result = &result; + run_param.batch_size = batch_size; + data.input_seg = input_segs; + data.output_seg = output_segs; + + for (int seg_size = SIZE; seg_size > 0; seg_size--) { + int num_seg = (SIZE + seg_size - 1) / seg_size; + + if ((uint32_t)num_seg > global.ml_capa.max_segs_per_input || + (uint32_t)num_seg > global.ml_capa.max_segs_per_output) + break; + + data.num_input_seg = num_seg * 2; + data.num_output_seg = num_seg; + + for (int seg = 0; seg < num_seg; seg++) { + int size = seg_size; + + if (seg == num_seg - 1) + size = SIZE - seg * seg_size; + + input_segs[seg].addr = (char *)x1 + seg * seg_size; + input_segs[seg].size = size; + input_segs[seg + num_seg].addr = (char *)x2 + seg * seg_size; + input_segs[seg + num_seg].size = size; + output_segs[seg].addr = (char *)y + seg * seg_size; + output_segs[seg].size = size; + } + + memset(y, 0, sizeof(y)); + ret = odp_ml_run(model, &data, &run_param); + CU_ASSERT(ret == 1); + if (ret != 1) + goto fail; + + for (uint32_t j = 0; j < batch_size * NUM_COLUMN; j++) + CU_ASSERT(y[j] == y_expected[j]); + } + +fail: + CU_ASSERT_FATAL(odp_ml_model_unload(model, NULL) == 0); + CU_ASSERT(odp_ml_model_destroy(model) == 0); +} + +static void run_global_ml_model(void) +{ + int ret = 0; + odp_ml_run_result_t result; + + ret = odp_ml_model_load(global.ml_model, NULL); + CU_ASSERT_FATAL(ret == 0); + + global.run_param.result = &result; + + ret = odp_ml_run(global.ml_model, &global.data, &global.run_param); + CU_ASSERT(ret == 1); + CU_ASSERT(!result.error_code); + CU_ASSERT(*(int32_t *)global.output_seg.addr == global.y_expected); + + ret = odp_ml_model_unload(global.ml_model, NULL); + CU_ASSERT_FATAL(ret == 0); + global.run_param.result = NULL; +} + +static void test_ml_run(void) +{ + run_global_ml_model(); + run_model_batch_add(); +} + +static void test_ml_run_multi(void) +{ + int ret; + int32_t y; + int32_t x = 8; + int32_t y_expected = 28; + odp_ml_data_t data[RUN_NUM]; + odp_ml_data_seg_t input_seg; + odp_ml_data_seg_t output_seg; + odp_ml_run_param_t param[RUN_NUM]; + odp_ml_run_result_t result[RUN_NUM]; + uint64_t wait_ns = 500 * ODP_TIME_MSEC_IN_NS; + + ret = odp_ml_model_load(global.ml_model, NULL); + CU_ASSERT_FATAL(ret == 0); + + param[0] = global.run_param; + param[0].result = &result[0]; + odp_ml_run_param_init(¶m[1]); + param[1].result = &result[1]; + + /* Prepare data for running model inference */ + data[0] = global.data; + data[1].num_input_seg = NUM_INPUTS; + data[1].input_seg = &input_seg; + input_seg.size = sizeof(int32_t); + input_seg.addr = &x; + + data[1].num_output_seg = NUM_OUTPUTS; + data[1].output_seg = &output_seg; + output_seg.size = sizeof(int32_t); + output_seg.addr = &y; + + int num_completed = 0; + + for (int i = 0; i < TIMEOUT; i++) { + ret = odp_ml_run_multi(global.ml_model, data + num_completed, param + num_completed, + RUN_NUM - num_completed); + CU_ASSERT(ret >= 0); + if (ret < 0) + break; + + num_completed += ret; + + if (num_completed >= RUN_NUM) + break; + + odp_time_wait_ns(wait_ns); + } + + CU_ASSERT(num_completed == RUN_NUM); + CU_ASSERT(!result[0].error_code); + CU_ASSERT(!result[1].error_code); + CU_ASSERT(*(int32_t *)global.output_seg.addr == global.y_expected); + CU_ASSERT(*(int32_t *)output_seg.addr == y_expected); + + ret = odp_ml_model_unload(global.ml_model, NULL); + CU_ASSERT_FATAL(ret == 0); +} + +/* Test asynchronous inference running in ODP_ML_COMPL_MODE_EVENT mode */ +static void test_ml_model_run_async_event(void) +{ + int ret; + void *user_ptr; + odp_ml_compl_t compl; + odp_ml_run_result_t result; + odp_ml_data_seg_t *outputs; + odp_ml_compl_param_t compl_param; + + /* Load model in order to run inference */ + ret = odp_ml_model_load(global.ml_model, NULL); + CU_ASSERT_FATAL(ret == 0); + + compl = odp_ml_compl_alloc(global.compl_pool); + CU_ASSERT_FATAL(compl != ODP_ML_COMPL_INVALID); + + odp_ml_compl_param_init(&compl_param); + compl_param.mode = ODP_ML_COMPL_MODE_EVENT; + compl_param.event = odp_ml_compl_to_event(compl); + compl_param.queue = global.queue; + + /* user_ptr structure maintains the output data pointer for output retrieval */ + user_ptr = &global.output_seg; + compl_param.user_ptr = user_ptr; + + memset(global.output_seg.addr, 0, global.output_seg.size); + ret = odp_ml_run_start(global.ml_model, &global.data, &compl_param, NULL); + CU_ASSERT_FATAL(ret == 1); + + /* Run event scheduler to find the ml completion event and verify it */ + if (get_result_from_ml_compl_event(NULL, &result)) + return; + + CU_ASSERT(!result.error_code); + CU_ASSERT(result.user_ptr == user_ptr); + + outputs = (odp_ml_data_seg_t *)result.user_ptr; + CU_ASSERT(*(int32_t *)outputs[0].addr == global.y_expected); + + /* Unload model */ + ret = odp_ml_model_unload(global.ml_model, NULL); + CU_ASSERT_FATAL(ret == 0); +} + +/* Test asynchronous inference running in ODP_ML_COMPL_MODE_POLL mode */ +static void test_ml_model_run_async_poll(void) +{ + int ret; + void *user_ptr; + odp_ml_run_result_t result; + odp_ml_data_seg_t *outputs; + odp_ml_compl_param_t compl_param; + uint64_t wait_ns = 500 * ODP_TIME_MSEC_IN_NS; + + memset(&result, 0, sizeof(result)); + /* Load model in order to run inference */ + ret = odp_ml_model_load(global.ml_model, NULL); + CU_ASSERT_FATAL(ret == 0); + + odp_ml_compl_param_init(&compl_param); + compl_param.mode = ODP_ML_COMPL_MODE_POLL; + compl_param.compl_id = 0; + + /* user_ptr structure maintains the output data pointer for output retrieval */ + user_ptr = &global.output_seg; + compl_param.user_ptr = user_ptr; + + memset(global.output_seg.addr, 0, global.output_seg.size); + ret = odp_ml_run_start(global.ml_model, &global.data, &compl_param, NULL); + CU_ASSERT_FATAL(ret == 1); + + /* When odp_ml_run_start() succeeded, continue to check completion status */ + for (int i = 0; i < TIMEOUT; i++) { + ret = odp_ml_run_status(global.ml_model, 0, &result); + if (ret) + break; + + /* ret = 0 meaning run has not finished, continue to check status */ + odp_time_wait_ns(wait_ns); + } + + outputs = (odp_ml_data_seg_t *)result.user_ptr; + + CU_ASSERT(ret > 0); + CU_ASSERT(!result.error_code); + CU_ASSERT(result.user_ptr == user_ptr); + CU_ASSERT(*(int32_t *)outputs[0].addr == global.y_expected); + + /* Unload model */ + ret = odp_ml_model_unload(global.ml_model, NULL); + CU_ASSERT_FATAL(ret == 0); +} + +static void test_ml_run_start_multi(void) +{ + int ret; + int32_t y; + odp_ml_compl_t compl; + odp_ml_data_t data[RUN_NUM]; + odp_ml_data_seg_t input_seg; + odp_ml_data_seg_t output_seg; + odp_ml_data_seg_t *outputs[RUN_NUM]; + odp_ml_compl_param_t compl_param[RUN_NUM]; + odp_ml_run_result_t run_result[RUN_NUM]; + int32_t x = 5; + int32_t y_expected = 19; + uint64_t wait_ns = 500 * ODP_TIME_MSEC_IN_NS; + + /* Load model in order to run inference */ + ret = odp_ml_model_load(global.ml_model, NULL); + CU_ASSERT_FATAL(ret == 0); + + compl = odp_ml_compl_alloc(global.compl_pool); + CU_ASSERT_FATAL(compl != ODP_ML_COMPL_INVALID); + + /* Prepare data for running model inference */ + data[0] = global.data; + + data[1].num_input_seg = NUM_INPUTS; + data[1].input_seg = &input_seg; + input_seg.size = sizeof(int32_t); + input_seg.addr = &x; + + data[1].num_output_seg = NUM_OUTPUTS; + data[1].output_seg = &output_seg; + output_seg.size = sizeof(int32_t); + output_seg.addr = &y; + + /* Two completion parameters: one use event mode, another poll mode */ + odp_ml_compl_param_init(&compl_param[0]); + compl_param[0].mode = ODP_ML_COMPL_MODE_EVENT; + compl_param[0].event = odp_ml_compl_to_event(compl); + compl_param[0].queue = global.queue; + /* user_ptr structure maintains the output data pointer for output retrieval */ + compl_param[0].user_ptr = &global.output_seg; + + odp_ml_compl_param_init(&compl_param[1]); + compl_param[1].mode = ODP_ML_COMPL_MODE_POLL; + compl_param[1].compl_id = 0; + /* user_ptr structure maintains the output data pointer for output retrieval */ + compl_param[1].user_ptr = &output_seg; + + memset(global.output_seg.addr, 0, sizeof(int32_t)); + + int num_completed = 0; + + for (int i = 0; i < TIMEOUT; i++) { + ret = odp_ml_run_start_multi(global.ml_model, data + num_completed, + compl_param + num_completed, NULL, + RUN_NUM - num_completed); + CU_ASSERT(ret >= 0); + if (ret < 0) + break; + + num_completed += ret; + + if (num_completed >= RUN_NUM) + break; + + odp_time_wait_ns(wait_ns); + } + + CU_ASSERT(num_completed == RUN_NUM); + + /* Run event scheduler to find the ml completion event and verify it */ + if (get_result_from_ml_compl_event(NULL, &run_result[0])) { + ret = odp_ml_model_unload(global.ml_model, NULL); + return; + } + + CU_ASSERT(!run_result[0].error_code); + CU_ASSERT(run_result[0].user_ptr == &global.output_seg); + outputs[0] = (odp_ml_data_seg_t *)run_result[0].user_ptr; + CU_ASSERT(*(int32_t *)outputs[0][0].addr == global.y_expected); + + /* Check completion status for the poll mode */ + for (int i = 0; i < TIMEOUT; i++) { + ret = odp_ml_run_status(global.ml_model, 0, &run_result[1]); + if (ret) + break; + + /* ret = 0 meaning run has not finished, continue to check status */ + odp_time_wait_ns(wait_ns); + } + + outputs[1] = (odp_ml_data_seg_t *)run_result[1].user_ptr; + CU_ASSERT(ret > 0); + CU_ASSERT(!run_result[1].error_code); + CU_ASSERT(run_result[1].user_ptr == &output_seg); + CU_ASSERT(*(int32_t *)outputs[1][0].addr == y_expected); + + /* Unload model */ + ret = odp_ml_model_unload(global.ml_model, NULL); + CU_ASSERT_FATAL(ret == 0); +} + +static void test_ml_model_extra_stat_info(void) +{ + int ret; + + ret = odp_ml_model_extra_stat_info(global.ml_model, NULL, 0); + CU_ASSERT(ret >= 0); +} + +static void test_ml_model_extra_stats(void) +{ + int ret; + + ret = odp_ml_model_extra_stats(global.ml_model, NULL, 0); + CU_ASSERT(ret >= 0); +} + +odp_testinfo_t ml_suite[] = { + ODP_TEST_INFO_CONDITIONAL(test_ml_debug, check_ml_support), + ODP_TEST_INFO_CONDITIONAL(test_ml_model_create, check_ml_support), + ODP_TEST_INFO_CONDITIONAL(test_ml_model_lookup, check_ml_support), + ODP_TEST_INFO_CONDITIONAL(test_ml_model_info, check_ml_support), + ODP_TEST_INFO_CONDITIONAL(test_ml_model_load, check_load_sync), + ODP_TEST_INFO_CONDITIONAL(test_ml_model_load_async_poll, check_load_poll), + ODP_TEST_INFO_CONDITIONAL(test_ml_model_load_async_event, check_load_event), + /* Synchronous load/unload is used load/unload model before/after model run */ + ODP_TEST_INFO_CONDITIONAL(test_ml_run, check_run_sync), + ODP_TEST_INFO_CONDITIONAL(test_ml_run_multi, check_run_sync), + ODP_TEST_INFO_CONDITIONAL(test_ml_model_run_async_event, check_run_event), + ODP_TEST_INFO_CONDITIONAL(test_ml_model_run_async_poll, check_run_poll), + ODP_TEST_INFO_CONDITIONAL(test_ml_run_start_multi, check_run_poll_event), + ODP_TEST_INFO_CONDITIONAL(test_ml_model_extra_stat_info, check_ml_support), + ODP_TEST_INFO_CONDITIONAL(test_ml_model_extra_stats, check_ml_support), + ODP_TEST_INFO_NULL +}; + +odp_suiteinfo_t ml_suites[] = { + {"ML", ml_suite_init, ml_suite_term, ml_suite}, + ODP_SUITE_INFO_NULL +}; + +int main(int argc, char *argv[]) +{ + int ret; + + /* parse common options: */ + if (odp_cunit_parse_options(&argc, argv)) + return -1; + + ret = odp_cunit_register(ml_suites); + + if (ret == 0) + ret = odp_cunit_run(); + + return ret; +} diff --git a/platform/linux-generic/test/validation/api/ml/requirements.txt b/platform/linux-generic/test/validation/api/ml/requirements.txt new file mode 100644 index 000000000..2dcba7a3a --- /dev/null +++ b/platform/linux-generic/test/validation/api/ml/requirements.txt @@ -0,0 +1,2 @@ +onnx +numpy diff --git a/platform/linux-generic/test/validation/api/ml/simple_linear.onnx b/platform/linux-generic/test/validation/api/ml/simple_linear.onnx Binary files differnew file mode 100644 index 000000000..45c4b95b9 --- /dev/null +++ b/platform/linux-generic/test/validation/api/ml/simple_linear.onnx diff --git a/platform/linux-generic/test/validation/api/ml/simple_linear_gen.py b/platform/linux-generic/test/validation/api/ml/simple_linear_gen.py new file mode 100644 index 000000000..b3e6124cd --- /dev/null +++ b/platform/linux-generic/test/validation/api/ml/simple_linear_gen.py @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2023 Nokia +# + +import onnx +from onnx import helper +from onnx import TensorProto + +weight = helper.make_tensor(name='w', data_type=TensorProto.INT32, dims=[1], vals=[3]) +w = helper.make_node('Constant', inputs=[], outputs=['w'], name='weight', value=weight) + +bias = helper.make_tensor(name='b', data_type=TensorProto.INT32, dims=[1], vals=[4]) +b = helper.make_node('Constant', inputs=[], outputs=['b'], name='bias', value=bias) + +# The functional nodes: +mul = helper.make_node('Mul', inputs=['x', 'w'], outputs=['wx'], name='Mul') +add = helper.make_node('Add', inputs=['wx', 'b'], outputs=['y'], name='Add') + +# Create the graph +g = helper.make_graph([w, mul, b, add], 'linear', + [helper.make_tensor_value_info('x', TensorProto.INT32, [1])], + [helper.make_tensor_value_info('y', TensorProto.INT32, [1])] +) + +model = helper.make_model( + producer_name='ODP validation tests', + model_version=1, + doc_string="y = 3x + 4", + graph=g, + opset_imports=[helper.make_opsetid("", 13)] +) + +# Save the model +onnx.save(model, 'simple_linear.onnx') diff --git a/platform/linux-generic/test/validation/api/shmem/shmem_odp1.c b/platform/linux-generic/test/validation/api/shmem/shmem_odp1.c index 3b4ba819c..98148d6c7 100644 --- a/platform/linux-generic/test/validation/api/shmem/shmem_odp1.c +++ b/platform/linux-generic/test/validation/api/shmem/shmem_odp1.c @@ -79,7 +79,7 @@ int main(int argc, char *argv[]) int ret; /* parse common options: */ - if (odp_cunit_parse_options(argc, argv)) + if (odp_cunit_parse_options(&argc, argv)) return -1; ret = odp_cunit_register(shmem_suites); |