From 74f7b6d1a003175f1cddf979212631a0d7832399 Mon Sep 17 00:00:00 2001 From: Maxim Uvarov Date: Tue, 20 Mar 2018 09:01:36 +0300 Subject: shippable: simplify test execution Signed-off-by: Maxim Uvarov Reviewed-by: Dmitry Eremin-Solenikov Reviewed-by: Bill Fischofer --- .shippable.yml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.shippable.yml b/.shippable.yml index 82960a39e..86556441b 100644 --- a/.shippable.yml +++ b/.shippable.yml @@ -34,17 +34,19 @@ build: ci: - mkdir -p $HOME/odp-shmdir + - export CI=true ODP_SHM_DIR=$HOME/odp-shmdir ODP_TEST_OUT_XML=yes - ./bootstrap - if [ "${CC#clang}" != "${CC}" ] ; then export CXX="${CC/clang/clang++}"; fi + - echo ./configure $CONF - ./configure $CONF - make -j $(nproc) - - sudo env CI=true ODP_SHM_DIR=$HOME/odp-shmdir ODP_TEST_OUT_XML=yes ODP_SCHEDULER=basic make check + - ODP_SCHEDULER=basic make check - ./scripts/shippable-post.sh basic - - sudo env CI=true ODP_SHM_DIR=$HOME/odp-shmdir ODP_TEST_OUT_XML=yes ODP_SCHEDULER=sp make check + - ODP_SCHEDULER=sp make check - ./scripts/shippable-post.sh sp - - sudo env CI=true ODP_SHM_DIR=$HOME/odp-shmdir ODP_TEST_OUT_XML=yes ODP_SCHEDULER=iquery make check + - ODP_SCHEDULER=iquery make check - ./scripts/shippable-post.sh iquery - - sudo env CI=true ODP_SHM_DIR=$HOME/odp-shmdir ODP_TEST_OUT_XML=yes ODP_SCHEDULER=scalable make check + - ODP_SCHEDULER=scalable make check - ./scripts/shippable-post.sh scalable - rm -rf $HOME/odp-shmdir -- cgit v1.2.3 From 31d3492b50b057df853ad51c909ac21d6e95a4ca Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Tue, 20 Mar 2018 08:08:03 +0000 Subject: performance: fix sched_latency test with huge cpu count odp_sched_latency has off-by-one error in accessing cpu stats: worker thread id do not start from 0. Instead of fixing just off-by-one, use ODP_THREAD_COUNT_MAX directly to allocate proper amount of cpu stats structures. Signed-off-by: Dmitry Eremin-Solenikov Reviewed-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- test/performance/odp_sched_latency.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test/performance/odp_sched_latency.c b/test/performance/odp_sched_latency.c index d49a212a0..ca7201193 100644 --- a/test/performance/odp_sched_latency.c +++ b/test/performance/odp_sched_latency.c @@ -27,7 +27,6 @@ /* GNU lib C */ #include -#define MAX_WORKERS 64 /**< Maximum number of worker threads */ #define MAX_QUEUES 4096 /**< Maximum number of queues */ #define EVENT_POOL_SIZE (1024 * 1024) /**< Event pool size */ #define TEST_ROUNDS (4 * 1024 * 1024) /**< Test rounds for each thread */ @@ -105,7 +104,8 @@ typedef union ODP_ALIGNED_CACHE { /** Test global variables */ typedef struct { - core_stat_t core_stat[MAX_WORKERS]; /**< Core specific stats */ + /** Core specific stats */ + core_stat_t core_stat[ODP_THREAD_COUNT_MAX]; odp_barrier_t barrier; /**< Barrier for thread synchronization */ odp_pool_t pool; /**< Pool for allocating test events */ test_args_t args; /**< Parsed command line arguments */ @@ -617,8 +617,9 @@ static void parse_args(int argc, char *argv[], test_args_t *args) } /* Make sure arguments are valid */ - if (args->cpu_count > MAX_WORKERS) - args->cpu_count = MAX_WORKERS; + /* -1 for main thread */ + if (args->cpu_count > ODP_THREAD_COUNT_MAX - 1) + args->cpu_count = ODP_THREAD_COUNT_MAX - 1; if (args->prio[LO_PRIO].queues > MAX_QUEUES) args->prio[LO_PRIO].queues = MAX_QUEUES; if (args->prio[HI_PRIO].queues > MAX_QUEUES) -- cgit v1.2.3 From edc9616a1bbc0fbc2d7f84ddafeb119d689f3f70 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Sat, 17 Mar 2018 00:05:24 +0300 Subject: shippable: reenable non-ABI-compat build for GCC Non-ABI-compat build is broken only for Clang, so disable it only for that compiler, rather than disabling it completely. Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: Maxim Uvarov --- .shippable.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.shippable.yml b/.shippable.yml index 86556441b..6ef5cab7c 100644 --- a/.shippable.yml +++ b/.shippable.yml @@ -6,7 +6,7 @@ compiler: env: - CONF="--disable-test-perf --disable-test-perf-proc" - # - CONF="--disable-abi-compat --disable-test-perf --disable-test-perf-proc" + - CONF="--disable-abi-compat --disable-test-perf --disable-test-perf-proc" # - CONF="--enable-schedule-sp" # - CONF="--enable-schedule-iquery" # - CONF="--enable-dpdk-zero-copy" @@ -16,7 +16,7 @@ env: # - CROSS_ARCH="i386" matrix: - allow_failures: + exclude: - compiler: clang env: CONF="--disable-abi-compat --disable-test-perf --disable-test-perf-proc" -- cgit v1.2.3 From d8dd54f2325cf859703721e7a17b6cbe734d857a Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Sat, 10 Feb 2018 03:57:24 +0300 Subject: test: misc: use C++ I/O instead of C To verify that this test is really compiled using C++ compiler use cout instead of printf. Signed-off-by: Dmitry Eremin-Solenikov Reviewed-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- test/miscellaneous/odp_api_from_cpp.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/miscellaneous/odp_api_from_cpp.cpp b/test/miscellaneous/odp_api_from_cpp.cpp index 4578ae4be..7ed72ff17 100644 --- a/test/miscellaneous/odp_api_from_cpp.cpp +++ b/test/miscellaneous/odp_api_from_cpp.cpp @@ -1,11 +1,11 @@ -#include +#include #include #include int main(int argc ODP_UNUSED, const char *argv[] ODP_UNUSED) { - printf("\tODP API version: %s\n", odp_version_api_str()); - printf("\tODP implementation version: %s\n", odp_version_impl_str()); + std::cout << "\tODP API version: " << odp_version_api_str() << std::endl; + std::cout << "\tODP implementation version: " << odp_version_impl_str() << std::endl; return 0; } -- cgit v1.2.3 From 535ad269bfc6581a048d3e844576063be5d19501 Mon Sep 17 00:00:00 2001 From: Petri Savolainen Date: Wed, 7 Mar 2018 15:33:55 +0200 Subject: linux-gen: queue: configurable default size Use configuration file to enable user to change default queue size. Queue size parameter from application is used for larger queues than the default size. Signed-off-by: Petri Savolainen Reviewed-by: Balasubramanian Manoharan Reviewed-by: Dmitry Eremin-Solenikov Signed-off-by: Maxim Uvarov --- config/odp-linux-generic.conf | 5 ++ .../linux-generic/include/odp_queue_internal.h | 4 ++ platform/linux-generic/odp_queue_basic.c | 62 ++++++++++++++++++++-- 3 files changed, 66 insertions(+), 5 deletions(-) diff --git a/config/odp-linux-generic.conf b/config/odp-linux-generic.conf index 15e65d00f..306ee1970 100644 --- a/config/odp-linux-generic.conf +++ b/config/odp-linux-generic.conf @@ -30,3 +30,8 @@ pktio_dpdk: { rx_drop_en = 1 } } + +queue_basic: { + # Default queue size. Value must be a power of two. + default_queue_size = 4096 +} diff --git a/platform/linux-generic/include/odp_queue_internal.h b/platform/linux-generic/include/odp_queue_internal.h index 3aec3fe9d..386d804e0 100644 --- a/platform/linux-generic/include/odp_queue_internal.h +++ b/platform/linux-generic/include/odp_queue_internal.h @@ -75,6 +75,10 @@ typedef struct queue_global_t { uint32_t queue_lf_size; queue_lf_func_t queue_lf_func; + struct { + uint32_t default_queue_size; + } config; + } queue_global_t; extern queue_global_t *queue_glb; diff --git a/platform/linux-generic/odp_queue_basic.c b/platform/linux-generic/odp_queue_basic.c index 1218987b1..bbd12fe81 100644 --- a/platform/linux-generic/odp_queue_basic.c +++ b/platform/linux-generic/odp_queue_basic.c @@ -25,6 +25,7 @@ #include #include #include +#include #define NUM_INTERNAL_QUEUES 64 @@ -36,6 +37,9 @@ #include #include +#define MIN_QUEUE_SIZE 8 +#define MAX_QUEUE_SIZE CONFIG_QUEUE_SIZE + static int queue_init(queue_entry_t *queue, const char *name, const odp_queue_param_t *param); @@ -60,11 +64,11 @@ static int queue_capa(odp_queue_capability_t *capa, int sched) /* Reserve some queues for internal use */ capa->max_queues = ODP_CONFIG_QUEUES - NUM_INTERNAL_QUEUES; capa->plain.max_num = capa->max_queues; - capa->plain.max_size = CONFIG_QUEUE_SIZE; + capa->plain.max_size = MAX_QUEUE_SIZE; capa->plain.lockfree.max_num = queue_glb->queue_lf_num; capa->plain.lockfree.max_size = queue_glb->queue_lf_size; capa->sched.max_num = capa->max_queues; - capa->sched.max_size = CONFIG_QUEUE_SIZE; + capa->sched.max_size = MAX_QUEUE_SIZE; if (sched) { capa->max_ordered_locks = sched_fn->max_ordered_locks(); @@ -75,6 +79,34 @@ static int queue_capa(odp_queue_capability_t *capa, int sched) return 0; } +static int read_config_file(queue_global_t *queue_glb) +{ + const char *str; + uint32_t val_u32; + int val = 0; + + ODP_PRINT("Queue config:\n"); + + str = "queue_basic.default_queue_size"; + if (!_odp_libconfig_lookup_int(str, &val)) { + ODP_ERR("Config option '%s' not found.\n", str); + return -1; + } + + val_u32 = val; + + if (val_u32 > MAX_QUEUE_SIZE || val_u32 < MIN_QUEUE_SIZE || + !CHECK_IS_POWER2(val_u32)) { + ODP_ERR("Bad value %s = %u\n", str, val_u32); + return -1; + } + + queue_glb->config.default_queue_size = val_u32; + ODP_PRINT(" %s: %u\n\n", str, val_u32); + + return 0; +} + static int queue_init_global(void) { uint32_t i; @@ -104,6 +136,11 @@ static int queue_init_global(void) queue->s.handle = queue_from_index(i); } + if (read_config_file(queue_glb)) { + odp_shm_free(shm); + return -1; + } + lf_func = &queue_glb->queue_lf_func; queue_glb->queue_lf_num = queue_lf_init_global(&lf_size, lf_func); queue_glb->queue_lf_size = lf_size; @@ -207,7 +244,7 @@ static odp_queue_t queue_create(const char *name, } if (param->nonblocking == ODP_BLOCKING) { - if (param->size > CONFIG_QUEUE_SIZE) + if (param->size > MAX_QUEUE_SIZE) return ODP_QUEUE_INVALID; } else if (param->nonblocking == ODP_NONBLOCKING_LF) { /* Only plain type lock-free queues supported */ @@ -586,6 +623,8 @@ static odp_event_t queue_deq(odp_queue_t handle) static int queue_init(queue_entry_t *queue, const char *name, const odp_queue_param_t *param) { + uint32_t queue_size; + if (name == NULL) { queue->s.name[0] = 0; } else { @@ -609,9 +648,22 @@ static int queue_init(queue_entry_t *queue, const char *name, queue->s.pktin = PKTIN_INVALID; queue->s.pktout = PKTOUT_INVALID; + /* Use default size for all small queues to quarantee performance + * level. */ + queue_size = queue_glb->config.default_queue_size; + if (param->size > queue_glb->config.default_queue_size) + queue_size = param->size; + + /* Round up if not already a power of two */ + queue_size = ROUNDUP_POWER2_U32(queue_size); + + if (queue_size > MAX_QUEUE_SIZE) { + ODP_ERR("Too large queue size %u\n", queue_size); + return -1; + } + ring_st_init(&queue->s.ring_st, - queue_glb->ring_data[queue->s.index].data, - CONFIG_QUEUE_SIZE); + queue_glb->ring_data[queue->s.index].data, queue_size); return 0; } -- cgit v1.2.3 From c57da8ddc80e0e314957c6d11db288ef11fc1805 Mon Sep 17 00:00:00 2001 From: Petri Savolainen Date: Thu, 8 Mar 2018 14:55:14 +0200 Subject: linux-gen: queue: configurable max size Use configuration file to enable user to change the maximum queue size. Ring memory for all queues is reserved based on the max size. Signed-off-by: Petri Savolainen Reviewed-by: Balasubramanian Manoharan Reviewed-by: Dmitry Eremin-Solenikov Signed-off-by: Maxim Uvarov --- config/odp-linux-generic.conf | 3 + .../linux-generic/include/odp_queue_internal.h | 18 +++--- platform/linux-generic/odp_queue_basic.c | 72 +++++++++++++++++----- 3 files changed, 67 insertions(+), 26 deletions(-) diff --git a/config/odp-linux-generic.conf b/config/odp-linux-generic.conf index 306ee1970..916058866 100644 --- a/config/odp-linux-generic.conf +++ b/config/odp-linux-generic.conf @@ -32,6 +32,9 @@ pktio_dpdk: { } queue_basic: { + # Maximum queue size. Value must be a power of two. + max_queue_size = 8192 + # Default queue size. Value must be a power of two. default_queue_size = 4096 } diff --git a/platform/linux-generic/include/odp_queue_internal.h b/platform/linux-generic/include/odp_queue_internal.h index 386d804e0..98e86fa0e 100644 --- a/platform/linux-generic/include/odp_queue_internal.h +++ b/platform/linux-generic/include/odp_queue_internal.h @@ -63,19 +63,17 @@ union queue_entry_u { uint8_t pad[ROUNDUP_CACHE_LINE(sizeof(struct queue_entry_s))]; }; -typedef struct ODP_ALIGNED_CACHE { - /* Storage space for ring data */ - uint32_t data[CONFIG_QUEUE_SIZE]; -} queue_ring_data_t; - typedef struct queue_global_t { - queue_entry_t queue[ODP_CONFIG_QUEUES]; - queue_ring_data_t ring_data[ODP_CONFIG_QUEUES]; - uint32_t queue_lf_num; - uint32_t queue_lf_size; - queue_lf_func_t queue_lf_func; + queue_entry_t queue[ODP_CONFIG_QUEUES]; + uint32_t *ring_data; + uint32_t queue_lf_num; + uint32_t queue_lf_size; + queue_lf_func_t queue_lf_func; + odp_shm_t queue_gbl_shm; + odp_shm_t queue_ring_shm; struct { + uint32_t max_queue_size; uint32_t default_queue_size; } config; diff --git a/platform/linux-generic/odp_queue_basic.c b/platform/linux-generic/odp_queue_basic.c index bbd12fe81..89a0cd907 100644 --- a/platform/linux-generic/odp_queue_basic.c +++ b/platform/linux-generic/odp_queue_basic.c @@ -38,7 +38,7 @@ #include #define MIN_QUEUE_SIZE 8 -#define MAX_QUEUE_SIZE CONFIG_QUEUE_SIZE +#define MAX_QUEUE_SIZE (1 * 1024 * 1024) static int queue_init(queue_entry_t *queue, const char *name, const odp_queue_param_t *param); @@ -64,11 +64,11 @@ static int queue_capa(odp_queue_capability_t *capa, int sched) /* Reserve some queues for internal use */ capa->max_queues = ODP_CONFIG_QUEUES - NUM_INTERNAL_QUEUES; capa->plain.max_num = capa->max_queues; - capa->plain.max_size = MAX_QUEUE_SIZE; + capa->plain.max_size = queue_glb->config.max_queue_size; capa->plain.lockfree.max_num = queue_glb->queue_lf_num; capa->plain.lockfree.max_size = queue_glb->queue_lf_size; capa->sched.max_num = capa->max_queues; - capa->sched.max_size = MAX_QUEUE_SIZE; + capa->sched.max_size = queue_glb->config.max_queue_size; if (sched) { capa->max_ordered_locks = sched_fn->max_ordered_locks(); @@ -87,7 +87,7 @@ static int read_config_file(queue_global_t *queue_glb) ODP_PRINT("Queue config:\n"); - str = "queue_basic.default_queue_size"; + str = "queue_basic.max_queue_size"; if (!_odp_libconfig_lookup_int(str, &val)) { ODP_ERR("Config option '%s' not found.\n", str); return -1; @@ -101,6 +101,24 @@ static int read_config_file(queue_global_t *queue_glb) return -1; } + queue_glb->config.max_queue_size = val_u32; + ODP_PRINT(" %s: %u\n", str, val_u32); + + str = "queue_basic.default_queue_size"; + if (!_odp_libconfig_lookup_int(str, &val)) { + ODP_ERR("Config option '%s' not found.\n", str); + return -1; + } + + val_u32 = val; + + if (val_u32 > queue_glb->config.max_queue_size || + val_u32 < MIN_QUEUE_SIZE || + !CHECK_IS_POWER2(val_u32)) { + ODP_ERR("Bad value %s = %u\n", str, val_u32); + return -1; + } + queue_glb->config.default_queue_size = val_u32; ODP_PRINT(" %s: %u\n\n", str, val_u32); @@ -114,10 +132,11 @@ static int queue_init_global(void) uint32_t lf_size = 0; queue_lf_func_t *lf_func; odp_queue_capability_t capa; + uint64_t mem_size; ODP_DBG("Starts...\n"); - shm = odp_shm_reserve("odp_queues", + shm = odp_shm_reserve("_odp_queue_gbl", sizeof(queue_global_t), sizeof(queue_entry_t), 0); @@ -141,6 +160,21 @@ static int queue_init_global(void) return -1; } + queue_glb->queue_gbl_shm = shm; + mem_size = sizeof(uint32_t) * ODP_CONFIG_QUEUES * + (uint64_t)queue_glb->config.max_queue_size; + + shm = odp_shm_reserve("_odp_queue_rings", mem_size, + ODP_CACHE_LINE_SIZE, 0); + + if (shm == ODP_SHM_INVALID) { + odp_shm_free(queue_glb->queue_gbl_shm); + return -1; + } + + queue_glb->queue_ring_shm = shm; + queue_glb->ring_data = odp_shm_addr(shm); + lf_func = &queue_glb->queue_lf_func; queue_glb->queue_lf_num = queue_lf_init_global(&lf_size, lf_func); queue_glb->queue_lf_size = lf_size; @@ -170,7 +204,6 @@ static int queue_term_local(void) static int queue_term_global(void) { int ret = 0; - int rc = 0; queue_entry_t *queue; int i; @@ -179,20 +212,24 @@ static int queue_term_global(void) LOCK(queue); if (queue->s.status != QUEUE_STATUS_FREE) { ODP_ERR("Not destroyed queue: %s\n", queue->s.name); - rc = -1; + ret = -1; } UNLOCK(queue); } queue_lf_term_global(); - ret = odp_shm_free(odp_shm_lookup("odp_queues")); - if (ret < 0) { - ODP_ERR("shm free failed for odp_queues"); - rc = -1; + if (odp_shm_free(queue_glb->queue_ring_shm)) { + ODP_ERR("shm free failed"); + ret = -1; + } + + if (odp_shm_free(queue_glb->queue_gbl_shm)) { + ODP_ERR("shm free failed"); + ret = -1; } - return rc; + return ret; } static int queue_capability(odp_queue_capability_t *capa) @@ -244,7 +281,7 @@ static odp_queue_t queue_create(const char *name, } if (param->nonblocking == ODP_BLOCKING) { - if (param->size > MAX_QUEUE_SIZE) + if (param->size > queue_glb->config.max_queue_size) return ODP_QUEUE_INVALID; } else if (param->nonblocking == ODP_NONBLOCKING_LF) { /* Only plain type lock-free queues supported */ @@ -623,6 +660,7 @@ static odp_event_t queue_deq(odp_queue_t handle) static int queue_init(queue_entry_t *queue, const char *name, const odp_queue_param_t *param) { + uint64_t offset; uint32_t queue_size; if (name == NULL) { @@ -657,13 +695,15 @@ static int queue_init(queue_entry_t *queue, const char *name, /* Round up if not already a power of two */ queue_size = ROUNDUP_POWER2_U32(queue_size); - if (queue_size > MAX_QUEUE_SIZE) { + if (queue_size > queue_glb->config.max_queue_size) { ODP_ERR("Too large queue size %u\n", queue_size); return -1; } - ring_st_init(&queue->s.ring_st, - queue_glb->ring_data[queue->s.index].data, queue_size); + offset = queue->s.index * (uint64_t)queue_glb->config.max_queue_size; + + ring_st_init(&queue->s.ring_st, &queue_glb->ring_data[offset], + queue_size); return 0; } -- cgit v1.2.3 From bcd246e3a9948a631eb8eb77886ac1b369b13dba Mon Sep 17 00:00:00 2001 From: Petri Savolainen Date: Fri, 9 Mar 2018 16:49:48 +0200 Subject: linux-gen: sched: configurable priority spread Use configuration file to enable user to change priority queue spreading. Signed-off-by: Petri Savolainen Reviewed-by: Balasubramanian Manoharan Reviewed-by: Dmitry Eremin-Solenikov Signed-off-by: Maxim Uvarov --- config/odp-linux-generic.conf | 9 ++ platform/linux-generic/odp_schedule_basic.c | 167 ++++++++++++++++++---------- 2 files changed, 115 insertions(+), 61 deletions(-) diff --git a/config/odp-linux-generic.conf b/config/odp-linux-generic.conf index 916058866..0034c64ba 100644 --- a/config/odp-linux-generic.conf +++ b/config/odp-linux-generic.conf @@ -38,3 +38,12 @@ queue_basic: { # Default queue size. Value must be a power of two. default_queue_size = 4096 } + +sched_basic: { + # Priority level spread. Each priority level is spread into multiple + # scheduler internal queues. A higher spread value typically improves + # parallelism and thus is better for high thread counts, but causes + # uneven service level for low thread counts. Typically, optimal + # value is the number of threads using the scheduler. + prio_spread = 4 +} diff --git a/platform/linux-generic/odp_schedule_basic.c b/platform/linux-generic/odp_schedule_basic.c index cd20b39dd..e6d28c6df 100644 --- a/platform/linux-generic/odp_schedule_basic.c +++ b/platform/linux-generic/odp_schedule_basic.c @@ -27,6 +27,7 @@ #include #include #include +#include /* Number of priority levels */ #define NUM_PRIO 8 @@ -41,15 +42,18 @@ ODP_STATIC_ASSERT((ODP_SCHED_PRIO_NORMAL > 0) && /* Number of scheduling groups */ #define NUM_SCHED_GRPS 32 -/* Priority queues per priority */ -#define QUEUES_PER_PRIO 4 +/* Maximum priority queue spread */ +#define MAX_SPREAD 4 + +/* Minimum priority queue spread */ +#define MIN_SPREAD 1 /* A thread polls a non preferred sched queue every this many polls * of the prefer queue. */ #define PREFER_RATIO 64 /* Size of poll weight table */ -#define WEIGHT_TBL_SIZE ((QUEUES_PER_PRIO - 1) * PREFER_RATIO) +#define WEIGHT_TBL_SIZE ((MAX_SPREAD - 1) * PREFER_RATIO) /* Maximum number of packet IO interfaces */ #define NUM_PKTIO ODP_CONFIG_PKTIO_ENTRIES @@ -60,14 +64,10 @@ ODP_STATIC_ASSERT((ODP_SCHED_PRIO_NORMAL > 0) && /* Not a valid index */ #define NULL_INDEX ((uint32_t)-1) -/* Priority queue ring size. In worst case, all event queues are scheduled - * queues and have the same priority. The ring size must be larger than or - * equal to ODP_CONFIG_QUEUES / QUEUES_PER_PRIO, so that it can hold all - * queues in the worst case. */ -#define PRIO_QUEUE_RING_SIZE (ODP_CONFIG_QUEUES / QUEUES_PER_PRIO) - -/* Mask for wrapping around priority queue index */ -#define RING_MASK (PRIO_QUEUE_RING_SIZE - 1) +/* Maximum priority queue ring size. A ring must be large enough to store all + * queues in the worst case (all queues are scheduled, have the same priority + * and no spreading). */ +#define MAX_RING_SIZE ODP_CONFIG_QUEUES /* Priority queue empty, not a valid queue index. */ #define PRIO_QUEUE_EMPTY NULL_INDEX @@ -76,14 +76,14 @@ ODP_STATIC_ASSERT((ODP_SCHED_PRIO_NORMAL > 0) && ODP_STATIC_ASSERT(CHECK_IS_POWER2(ODP_CONFIG_QUEUES), "Number_of_queues_is_not_power_of_two"); -/* Ring size must be power of two, so that MAX_QUEUE_IDX_MASK can be used. */ -ODP_STATIC_ASSERT(CHECK_IS_POWER2(PRIO_QUEUE_RING_SIZE), +/* Ring size must be power of two, so that mask can be used. */ +ODP_STATIC_ASSERT(CHECK_IS_POWER2(MAX_RING_SIZE), "Ring_size_is_not_power_of_two"); /* Mask of queues per priority */ typedef uint8_t pri_mask_t; -ODP_STATIC_ASSERT((8 * sizeof(pri_mask_t)) >= QUEUES_PER_PRIO, +ODP_STATIC_ASSERT((8 * sizeof(pri_mask_t)) >= MAX_SPREAD, "pri_mask_t_is_too_small"); /* Start of named groups in group mask arrays */ @@ -147,7 +147,7 @@ typedef struct ODP_ALIGNED_CACHE { ring_t ring; /* Ring data: queue indexes */ - uint32_t queue_index[PRIO_QUEUE_RING_SIZE]; + uint32_t queue_index[MAX_RING_SIZE]; } prio_queue_t; @@ -168,14 +168,20 @@ typedef struct { pri_mask_t pri_mask[NUM_PRIO]; odp_spinlock_t mask_lock; - prio_queue_t prio_q[NUM_SCHED_GRPS][NUM_PRIO][QUEUES_PER_PRIO]; + prio_queue_t prio_q[NUM_SCHED_GRPS][NUM_PRIO][MAX_SPREAD]; odp_shm_t shm; - uint32_t pri_count[NUM_PRIO][QUEUES_PER_PRIO]; + + struct { + uint8_t num_spread; + } config; + + uint32_t pri_count[NUM_PRIO][MAX_SPREAD]; odp_thrmask_t mask_all; odp_spinlock_t grp_lock; odp_atomic_u32_t grp_epoch; + uint32_t ring_mask; struct { char name[ODP_SCHED_GROUP_NAME_LEN]; @@ -186,7 +192,7 @@ typedef struct { struct { uint8_t grp; uint8_t prio; - uint8_t queue_per_prio; + uint8_t spread; uint8_t sync; uint8_t order_lock_count; uint8_t poll_pktin; @@ -206,8 +212,7 @@ typedef struct { /* Check that queue[] variables are large enough */ ODP_STATIC_ASSERT(NUM_SCHED_GRPS <= 256, "Group_does_not_fit_8_bits"); ODP_STATIC_ASSERT(NUM_PRIO <= 256, "Prio_does_not_fit_8_bits"); -ODP_STATIC_ASSERT(QUEUES_PER_PRIO <= 256, - "Queues_per_prio_does_not_fit_8_bits"); +ODP_STATIC_ASSERT(MAX_SPREAD <= 256, "Spread_does_not_fit_8_bits"); ODP_STATIC_ASSERT(CONFIG_QUEUE_MAX_ORD_LOCKS <= 256, "Ordered_lock_count_does_not_fit_8_bits"); ODP_STATIC_ASSERT(NUM_PKTIO <= 256, "Pktio_index_does_not_fit_8_bits"); @@ -221,11 +226,41 @@ static __thread sched_local_t sched_local; /* Function prototypes */ static inline void schedule_release_context(void); +static int read_config_file(sched_global_t *sched) +{ + const char *str; + int val = 0; + + ODP_PRINT("Scheduler config:\n"); + + str = "sched_basic.prio_spread"; + if (!_odp_libconfig_lookup_int(str, &val)) { + ODP_ERR("Config option '%s' not found.\n", str); + return -1; + } + + if (val > MAX_SPREAD || val < MIN_SPREAD) { + ODP_ERR("Bad value %s = %u\n", str, val); + return -1; + } + + sched->config.num_spread = val; + ODP_PRINT(" %s: %i\n\n", str, val); + + return 0; +} + +static inline uint8_t prio_spread_index(uint32_t index) +{ + return index % sched->config.num_spread; +} + static void sched_local_init(void) { int i; - uint8_t id; - uint8_t offset = 0; + uint8_t spread; + uint8_t num_spread = sched->config.num_spread; + uint8_t offset = 1; memset(&sched_local, 0, sizeof(sched_local_t)); @@ -234,17 +269,17 @@ static void sched_local_init(void) sched_local.stash_qi = PRIO_QUEUE_EMPTY; sched_local.ordered.src_queue = NULL_INDEX; - id = sched_local.thr & (QUEUES_PER_PRIO - 1); + spread = prio_spread_index(sched_local.thr); for (i = 0; i < WEIGHT_TBL_SIZE; i++) { - sched_local.weight_tbl[i] = id; + sched_local.weight_tbl[i] = spread; - if (i % PREFER_RATIO == 0) { + if (num_spread > 1 && (i % PREFER_RATIO) == 0) { + sched_local.weight_tbl[i] = prio_spread_index(spread + + offset); offset++; - sched_local.weight_tbl[i] = (id + offset) & - (QUEUES_PER_PRIO - 1); - if (offset == QUEUES_PER_PRIO - 1) - offset = 0; + if (offset == num_spread) + offset = 1; } } } @@ -269,19 +304,24 @@ static int schedule_init_global(void) memset(sched, 0, sizeof(sched_global_t)); + if (read_config_file(sched)) { + odp_shm_free(shm); + return -1; + } + sched->shm = shm; odp_spinlock_init(&sched->mask_lock); for (grp = 0; grp < NUM_SCHED_GRPS; grp++) { for (i = 0; i < NUM_PRIO; i++) { - for (j = 0; j < QUEUES_PER_PRIO; j++) { + for (j = 0; j < MAX_SPREAD; j++) { prio_queue_t *prio_q; int k; prio_q = &sched->prio_q[grp][i][j]; ring_init(&prio_q->ring); - for (k = 0; k < PRIO_QUEUE_RING_SIZE; k++) { + for (k = 0; k < MAX_RING_SIZE; k++) { prio_q->queue_index[k] = PRIO_QUEUE_EMPTY; } @@ -322,14 +362,15 @@ static int schedule_term_global(void) int ret = 0; int rc = 0; int i, j, grp; + uint32_t ring_mask = sched->ring_mask; for (grp = 0; grp < NUM_SCHED_GRPS; grp++) { for (i = 0; i < NUM_PRIO; i++) { - for (j = 0; j < QUEUES_PER_PRIO; j++) { + for (j = 0; j < MAX_SPREAD; j++) { ring_t *ring = &sched->prio_q[grp][i][j].ring; uint32_t qi; - while ((qi = ring_deq(ring, RING_MASK)) != + while ((qi = ring_deq(ring, ring_mask)) != RING_EMPTY) { odp_event_t events[1]; int num; @@ -413,11 +454,6 @@ static uint32_t schedule_max_ordered_locks(void) return CONFIG_QUEUE_MAX_ORD_LOCKS; } -static inline int queue_per_prio(uint32_t queue_index) -{ - return ((QUEUES_PER_PRIO - 1) & queue_index); -} - static void pri_set(int id, int prio) { odp_spinlock_lock(&sched->mask_lock); @@ -441,33 +477,39 @@ static void pri_clr(int id, int prio) static void pri_set_queue(uint32_t queue_index, int prio) { - int id = queue_per_prio(queue_index); + uint8_t id = prio_spread_index(queue_index); return pri_set(id, prio); } static void pri_clr_queue(uint32_t queue_index, int prio) { - int id = queue_per_prio(queue_index); + uint8_t id = prio_spread_index(queue_index); pri_clr(id, prio); } static int schedule_init_queue(uint32_t queue_index, const odp_schedule_param_t *sched_param) { + uint32_t ring_size; int i; int prio = sched_param->prio; pri_set_queue(queue_index, prio); sched->queue[queue_index].grp = sched_param->group; sched->queue[queue_index].prio = prio; - sched->queue[queue_index].queue_per_prio = queue_per_prio(queue_index); + sched->queue[queue_index].spread = prio_spread_index(queue_index); sched->queue[queue_index].sync = sched_param->sync; sched->queue[queue_index].order_lock_count = sched_param->lock_count; sched->queue[queue_index].poll_pktin = 0; sched->queue[queue_index].pktio_index = 0; sched->queue[queue_index].pktin_index = 0; + ring_size = MAX_RING_SIZE / sched->config.num_spread; + ring_size = ROUNDUP_POWER2_U32(ring_size); + ODP_ASSERT(ring_size <= MAX_RING_SIZE); + sched->ring_mask = ring_size - 1; + odp_atomic_init_u64(&sched->order[queue_index].ctx, 0); odp_atomic_init_u64(&sched->order[queue_index].next_ctx, 0); @@ -492,9 +534,9 @@ static void schedule_destroy_queue(uint32_t queue_index) int prio = sched->queue[queue_index].prio; pri_clr_queue(queue_index, prio); - sched->queue[queue_index].grp = 0; - sched->queue[queue_index].prio = 0; - sched->queue[queue_index].queue_per_prio = 0; + sched->queue[queue_index].grp = 0; + sched->queue[queue_index].prio = 0; + sched->queue[queue_index].spread = 0; if (queue_is_ordered(queue_index) && odp_atomic_load_u64(&sched->order[queue_index].ctx) != @@ -504,12 +546,12 @@ static void schedule_destroy_queue(uint32_t queue_index) static int schedule_sched_queue(uint32_t queue_index) { - int grp = sched->queue[queue_index].grp; - int prio = sched->queue[queue_index].prio; - int queue_per_prio = sched->queue[queue_index].queue_per_prio; - ring_t *ring = &sched->prio_q[grp][prio][queue_per_prio].ring; + int grp = sched->queue[queue_index].grp; + int prio = sched->queue[queue_index].prio; + int spread = sched->queue[queue_index].spread; + ring_t *ring = &sched->prio_q[grp][prio][spread].ring; - ring_enq(ring, RING_MASK, queue_index); + ring_enq(ring, sched->ring_mask, queue_index); return 0; } @@ -540,13 +582,14 @@ static void schedule_release_atomic(void) uint32_t qi = sched_local.stash_qi; if (qi != PRIO_QUEUE_EMPTY && sched_local.stash_num == 0) { - int grp = sched->queue[qi].grp; - int prio = sched->queue[qi].prio; - int queue_per_prio = sched->queue[qi].queue_per_prio; - ring_t *ring = &sched->prio_q[grp][prio][queue_per_prio].ring; + int grp = sched->queue[qi].grp; + int prio = sched->queue[qi].prio; + int spread = sched->queue[qi].spread; + ring_t *ring = &sched->prio_q[grp][prio][spread].ring; /* Release current atomic queue */ - ring_enq(ring, RING_MASK, qi); + ring_enq(ring, sched->ring_mask, qi); + sched_local.stash_qi = PRIO_QUEUE_EMPTY; } } @@ -773,8 +816,10 @@ static inline int do_schedule_grp(odp_queue_t *out_queue, odp_event_t out_ev[], int prio, i; int ret; int id; - unsigned int max_deq = MAX_DEQ; uint32_t qi; + unsigned int max_deq = MAX_DEQ; + int num_spread = sched->config.num_spread; + uint32_t ring_mask = sched->ring_mask; /* Schedule events */ for (prio = 0; prio < NUM_PRIO; prio++) { @@ -785,14 +830,14 @@ static inline int do_schedule_grp(odp_queue_t *out_queue, odp_event_t out_ev[], /* Select the first ring based on weights */ id = first; - for (i = 0; i < QUEUES_PER_PRIO;) { + for (i = 0; i < num_spread;) { int num; int ordered; odp_queue_t handle; ring_t *ring; int pktin; - if (id >= QUEUES_PER_PRIO) + if (id >= num_spread) id = 0; /* No queues created for this priority queue */ @@ -805,7 +850,7 @@ static inline int do_schedule_grp(odp_queue_t *out_queue, odp_event_t out_ev[], /* Get queue index from the priority queue */ ring = &sched->prio_q[grp][prio][id].ring; - qi = ring_deq(ring, RING_MASK); + qi = ring_deq(ring, ring_mask); /* Priority queue empty */ if (qi == RING_EMPTY) { @@ -854,7 +899,7 @@ static inline int do_schedule_grp(odp_queue_t *out_queue, odp_event_t out_ev[], continue; if (num_pkt == 0 || !stash) { - ring_enq(ring, RING_MASK, qi); + ring_enq(ring, ring_mask, qi); break; } @@ -880,14 +925,14 @@ static inline int do_schedule_grp(odp_queue_t *out_queue, odp_event_t out_ev[], sched_local.ordered.src_queue = qi; /* Continue scheduling ordered queues */ - ring_enq(ring, RING_MASK, qi); + ring_enq(ring, ring_mask, qi); } else if (queue_is_atomic(qi)) { /* Hold queue during atomic access */ sched_local.stash_qi = qi; } else { /* Continue scheduling the queue */ - ring_enq(ring, RING_MASK, qi); + ring_enq(ring, ring_mask, qi); } handle = queue_from_index(qi); -- cgit v1.2.3 From 12d238d96318cc9d46dd5a2893894824a633805b Mon Sep 17 00:00:00 2001 From: Petri Savolainen Date: Mon, 12 Mar 2018 16:54:41 +0200 Subject: linux-gen: sched: decouple spread and group table sizes Only thing common between these tables were the shared round counter. With separate counters, tables can have different sizes. Especially, spread table size needs to be increased and support also sizes not power of two (configurable spread values). Signed-off-by: Petri Savolainen Reviewed-by: Balasubramanian Manoharan Reviewed-by: Dmitry Eremin-Solenikov Signed-off-by: Maxim Uvarov --- platform/linux-generic/odp_schedule_basic.c | 48 +++++++++++++++++------------ 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/platform/linux-generic/odp_schedule_basic.c b/platform/linux-generic/odp_schedule_basic.c index e6d28c6df..3bcafdb4f 100644 --- a/platform/linux-generic/odp_schedule_basic.c +++ b/platform/linux-generic/odp_schedule_basic.c @@ -42,6 +42,9 @@ ODP_STATIC_ASSERT((ODP_SCHED_PRIO_NORMAL > 0) && /* Number of scheduling groups */ #define NUM_SCHED_GRPS 32 +/* Group weight table size */ +#define GRP_WEIGHT_TBL_SIZE NUM_SCHED_GRPS + /* Maximum priority queue spread */ #define MAX_SPREAD 4 @@ -52,8 +55,8 @@ ODP_STATIC_ASSERT((ODP_SCHED_PRIO_NORMAL > 0) && * of the prefer queue. */ #define PREFER_RATIO 64 -/* Size of poll weight table */ -#define WEIGHT_TBL_SIZE ((MAX_SPREAD - 1) * PREFER_RATIO) +/* Spread weight table */ +#define SPREAD_TBL_SIZE ((MAX_SPREAD - 1) * PREFER_RATIO) /* Maximum number of packet IO interfaces */ #define NUM_PKTIO ODP_CONFIG_PKTIO_ENTRIES @@ -116,17 +119,18 @@ typedef struct { int thr; uint16_t stash_num; uint16_t stash_index; - uint16_t pause; - uint16_t round; + uint16_t grp_round; + uint16_t spread_round; uint32_t stash_qi; odp_queue_t stash_queue; odp_event_t stash_ev[MAX_DEQ]; uint32_t grp_epoch; - int num_grp; + uint16_t num_grp; + uint16_t pause; uint8_t grp[NUM_SCHED_GRPS]; - uint8_t weight_tbl[WEIGHT_TBL_SIZE]; - uint8_t grp_weight[WEIGHT_TBL_SIZE]; + uint8_t spread_tbl[SPREAD_TBL_SIZE]; + uint8_t grp_weight[GRP_WEIGHT_TBL_SIZE]; struct { /* Source queue index */ @@ -182,6 +186,7 @@ typedef struct { odp_spinlock_t grp_lock; odp_atomic_u32_t grp_epoch; uint32_t ring_mask; + uint16_t max_spread; struct { char name[ODP_SCHED_GROUP_NAME_LEN]; @@ -216,6 +221,7 @@ ODP_STATIC_ASSERT(MAX_SPREAD <= 256, "Spread_does_not_fit_8_bits"); ODP_STATIC_ASSERT(CONFIG_QUEUE_MAX_ORD_LOCKS <= 256, "Ordered_lock_count_does_not_fit_8_bits"); ODP_STATIC_ASSERT(NUM_PKTIO <= 256, "Pktio_index_does_not_fit_8_bits"); +ODP_STATIC_ASSERT(CHECK_IS_POWER2(GRP_WEIGHT_TBL_SIZE), "Not_power_of_2"); /* Global scheduler context */ static sched_global_t *sched; @@ -271,11 +277,11 @@ static void sched_local_init(void) spread = prio_spread_index(sched_local.thr); - for (i = 0; i < WEIGHT_TBL_SIZE; i++) { - sched_local.weight_tbl[i] = spread; + for (i = 0; i < SPREAD_TBL_SIZE; i++) { + sched_local.spread_tbl[i] = spread; if (num_spread > 1 && (i % PREFER_RATIO) == 0) { - sched_local.weight_tbl[i] = prio_spread_index(spread + + sched_local.spread_tbl[i] = prio_spread_index(spread + offset); offset++; if (offset == num_spread) @@ -309,6 +315,8 @@ static int schedule_init_global(void) return -1; } + /* When num_spread == 1, only spread_tbl[0] is used. */ + sched->max_spread = (sched->config.num_spread - 1) * PREFER_RATIO; sched->shm = shm; odp_spinlock_init(&sched->mask_lock); @@ -442,7 +450,7 @@ static inline int grp_update_tbl(void) odp_spinlock_unlock(&sched->grp_lock); /* Update group weights. Round robin over all thread's groups. */ - for (i = 0; i < WEIGHT_TBL_SIZE; i++) + for (i = 0; i < GRP_WEIGHT_TBL_SIZE; i++) sched_local.grp_weight[i] = i % num; sched_local.num_grp = num; @@ -961,7 +969,7 @@ static inline int do_schedule(odp_queue_t *out_queue, odp_event_t out_ev[], int i, num_grp; int ret; int first, grp_id; - uint16_t round; + uint16_t spread_round, grp_round; uint32_t epoch; if (sched_local.stash_num) { @@ -978,15 +986,17 @@ static inline int do_schedule(odp_queue_t *out_queue, odp_event_t out_ev[], if (odp_unlikely(sched_local.pause)) return 0; - /* Each thread prefers a priority queue. Poll weight table avoids + /* Each thread prefers a priority queue. Spread weight table avoids * starvation of other priority queues on low thread counts. */ - round = sched_local.round + 1; + spread_round = sched_local.spread_round; + grp_round = (sched_local.grp_round++) & (GRP_WEIGHT_TBL_SIZE - 1); - if (odp_unlikely(round == WEIGHT_TBL_SIZE)) - round = 0; + if (odp_unlikely(spread_round + 1 >= sched->max_spread)) + sched_local.spread_round = 0; + else + sched_local.spread_round = spread_round + 1; - sched_local.round = round; - first = sched_local.weight_tbl[round]; + first = sched_local.spread_tbl[spread_round]; epoch = odp_atomic_load_acq_u32(&sched->grp_epoch); num_grp = sched_local.num_grp; @@ -996,7 +1006,7 @@ static inline int do_schedule(odp_queue_t *out_queue, odp_event_t out_ev[], sched_local.grp_epoch = epoch; } - grp_id = sched_local.grp_weight[round]; + grp_id = sched_local.grp_weight[grp_round]; /* Schedule queues per group and priority */ for (i = 0; i < num_grp; i++) { -- cgit v1.2.3 From 7e0c523ad6f209f238277e4116c21ba696edb7f4 Mon Sep 17 00:00:00 2001 From: Petri Savolainen Date: Mon, 12 Mar 2018 17:05:32 +0200 Subject: linux-gen: sched: increase max spread Allow user to configure larger than the default spread value. Signed-off-by: Petri Savolainen Reviewed-by: Balasubramanian Manoharan Reviewed-by: Dmitry Eremin-Solenikov Signed-off-by: Maxim Uvarov --- platform/linux-generic/odp_schedule_basic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platform/linux-generic/odp_schedule_basic.c b/platform/linux-generic/odp_schedule_basic.c index 3bcafdb4f..b50462a55 100644 --- a/platform/linux-generic/odp_schedule_basic.c +++ b/platform/linux-generic/odp_schedule_basic.c @@ -46,7 +46,7 @@ ODP_STATIC_ASSERT((ODP_SCHED_PRIO_NORMAL > 0) && #define GRP_WEIGHT_TBL_SIZE NUM_SCHED_GRPS /* Maximum priority queue spread */ -#define MAX_SPREAD 4 +#define MAX_SPREAD 8 /* Minimum priority queue spread */ #define MIN_SPREAD 1 -- cgit v1.2.3 From 71ca38c00c11ebb6b4dede6d09ebdd28eb45d890 Mon Sep 17 00:00:00 2001 From: Matias Elo Date: Tue, 13 Mar 2018 08:44:50 +0200 Subject: linux-gen: dpdk: bump supported dpdk version to v17.11 Bump supported DPDK version to LTS version 17.11. Also adds dependecies for optional Mellanox PMD drivers. Signed-off-by: Matias Elo Reviewed-by: Bogdan Pricope Reviewed-by: Dmitry Eremin-Solenikov Signed-off-by: Maxim Uvarov --- .travis.yml | 10 ++++++---- DEPENDENCIES | 2 +- m4/odp_dpdk.m4 | 2 ++ scripts/build-pktio-dpdk | 2 +- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1f5ef42e6..2f47ed79e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -49,7 +49,7 @@ env: # for individual commit validation. But you you want to track tests history # you need generated new one at https://codecov.io specific for your repo. - CODECOV_TOKEN=a733c34c-5f5c-4ff1-af4b-e9f5edb1ab5e - - DPDK_VERS="17.08" + - DPDK_VERS="17.11" matrix: - CONF="" - CONF="--disable-abi-compat" @@ -59,8 +59,6 @@ env: - CONF="--disable-host-optimization" - CONF="--disable-host-optimization --disable-abi-compat" - DPDK_SHARED="y" CONF="--disable-static-applications" - - DPDK_VERS="17.11" CONF="" - - DPDK_VERS="17.11" DPDK_SHARED="y" CONF="--disable-static-applications" compiler: - gcc @@ -196,7 +194,7 @@ install: fi DPDK_TARGET="${DPDK_TARGET}gcc" if [ ! -f "dpdk/${TARGET}/usr/local/lib/libdpdk.$LIBDPDKEXT" ]; then - git -c advice.detachedHead=false clone -q --depth=1 --single-branch --branch=v${DPDK_VERS} http://dpdk.org/git/dpdk dpdk + git -c advice.detachedHead=false clone -q --depth=1 --single-branch --branch=${DPDK_VERS} http://dpdk.org/git/dpdk-stable dpdk pushd dpdk git log --oneline --decorate # AArch64 && ARMv7 fixup @@ -207,6 +205,10 @@ install: make config T=${DPDK_TARGET} O=${TARGET} pushd ${TARGET} sed -ri 's,(CONFIG_RTE_LIBRTE_PMD_PCAP=).*,\1y,' .config + # OCTEON TX driver includes ARM v8.1 instructions + sed -ri 's,(CONFIG_RTE_LIBRTE_OCTEONTX_PMD=).*,\1n,' .config + sed -ri 's,(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_SSOVF=).*,\1n,' .config + sed -ri 's,(CONFIG_RTE_LIBRTE_OCTEONTX_MEMPOOL=).*,\1n,' .config if test -n "${DPDK_MACHINE}" ; then sed -ri 's,(CONFIG_RTE_MACHINE=).*,\1"'${DPDK_MACHINE}'",' .config fi diff --git a/DEPENDENCIES b/DEPENDENCIES index 2cd8ccb4c..f328e787d 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -174,7 +174,7 @@ Prerequisites for building the OpenDataPlane (ODP) API 3.4.1 Building DPDK and ODP with DPDK pktio support - DPDK packet I/O has been tested to work with DPDK v17.08. + DPDK packet I/O has been tested to work with DPDK v17.11. Follow steps in ./scripts/build-pktio-dpdk diff --git a/m4/odp_dpdk.m4 b/m4/odp_dpdk.m4 index b94c9b557..dccc6569c 100644 --- a/m4/odp_dpdk.m4 +++ b/m4/odp_dpdk.m4 @@ -9,6 +9,8 @@ cur_driver=`basename "$filename" .a | sed -e 's/^lib//'` AS_VAR_APPEND([DPDK_PMDS], [-l$cur_driver,]) AS_CASE([$cur_driver], [rte_pmd_nfp], [AS_VAR_APPEND([DPDK_LIBS], [" -lm"])], + [rte_pmd_mlx4], [AS_VAR_APPEND([DPDK_LIBS], [" -lmlx4 -libverbs"])], + [rte_pmd_mlx5], [AS_VAR_APPEND([DPDK_LIBS], [" -lmlx5 -libverbs"])], [rte_pmd_pcap], [AS_VAR_APPEND([DPDK_LIBS], [" -lpcap"])], [rte_pmd_openssl], [AS_VAR_APPEND([DPDK_LIBS], [" -lcrypto"])]) done diff --git a/scripts/build-pktio-dpdk b/scripts/build-pktio-dpdk index 26afd97c9..b0c0a4d0e 100755 --- a/scripts/build-pktio-dpdk +++ b/scripts/build-pktio-dpdk @@ -16,7 +16,7 @@ if [ "$?" != "0" ]; then exit 1 fi -git -c advice.detachedHead=false clone -q --depth=1 --single-branch --branch=v17.08 http://dpdk.org/git/dpdk dpdk +git -c advice.detachedHead=false clone -q --depth=1 --single-branch --branch=17.11 http://dpdk.org/git/dpdk-stable dpdk pushd dpdk git log --oneline --decorate -- cgit v1.2.3 From 4aacfd4d72b80a22d449d865a6c4f063afa823ab Mon Sep 17 00:00:00 2001 From: Petri Savolainen Date: Thu, 15 Mar 2018 16:47:09 +0200 Subject: linux-gen: pktio: add error prints Added error prints for failures due to a bad handle. Signed-off-by: Petri Savolainen Reviewed-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- platform/linux-generic/odp_packet_io.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/platform/linux-generic/odp_packet_io.c b/platform/linux-generic/odp_packet_io.c index 56e9f6860..5b019579a 100644 --- a/platform/linux-generic/odp_packet_io.c +++ b/platform/linux-generic/odp_packet_io.c @@ -376,8 +376,10 @@ int odp_pktio_close(odp_pktio_t hdl) int res; entry = get_pktio_entry(hdl); - if (entry == NULL) + if (entry == NULL) { + ODP_ERR("Bad handle\n"); return -1; + } if (entry->s.state == PKTIO_STATE_STARTED) { ODP_DBG("Missing odp_pktio_stop() before close.\n"); @@ -416,8 +418,10 @@ int odp_pktio_config(odp_pktio_t hdl, const odp_pktio_config_t *config) int res = 0; entry = get_pktio_entry(hdl); - if (!entry) + if (!entry) { + ODP_ERR("Bad handle\n"); return -1; + } if (config == NULL) { odp_pktio_config_init(&default_config); @@ -466,12 +470,15 @@ int odp_pktio_start(odp_pktio_t hdl) int res = 0; entry = get_pktio_entry(hdl); - if (!entry) + if (!entry) { + ODP_ERR("Bad handle\n"); return -1; + } lock_entry(entry); if (entry->s.state == PKTIO_STATE_STARTED) { unlock_entry(entry); + ODP_ERR("Already started\n"); return -1; } if (entry->s.ops->start) @@ -513,8 +520,10 @@ static int _pktio_stop(pktio_entry_t *entry) int res = 0; odp_pktin_mode_t mode = entry->s.param.in_mode; - if (entry->s.state != PKTIO_STATE_STARTED) + if (entry->s.state != PKTIO_STATE_STARTED) { + ODP_ERR("Not started\n"); return -1; + } if (entry->s.ops->stop) res = entry->s.ops->stop(entry); @@ -536,8 +545,10 @@ int odp_pktio_stop(odp_pktio_t hdl) int res; entry = get_pktio_entry(hdl); - if (!entry) + if (!entry) { + ODP_ERR("Bad handle\n"); return -1; + } lock_entry(entry); res = _pktio_stop(entry); -- cgit v1.2.3 From b08aa94548163ae4d4bac2724e68ca9102667f6a Mon Sep 17 00:00:00 2001 From: Petri Savolainen Date: Fri, 16 Mar 2018 11:37:20 +0200 Subject: linux-gen: pktio: add index to pktio print Print also pktio index. It is often more useful information to the user than handle (which may be e.g. a pointer). Signed-off-by: Petri Savolainen Reviewed-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- platform/linux-generic/odp_packet_io.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/platform/linux-generic/odp_packet_io.c b/platform/linux-generic/odp_packet_io.c index 5b019579a..bf7aef148 100644 --- a/platform/linux-generic/odp_packet_io.c +++ b/platform/linux-generic/odp_packet_io.c @@ -1169,13 +1169,15 @@ void odp_pktio_print(odp_pktio_t hdl) len += snprintf(&str[len], n - len, "pktio\n"); - len += snprintf(&str[len], n - len, - " handle %" PRIu64 "\n", - odp_pktio_to_u64(hdl)); len += snprintf(&str[len], n - len, " name %s\n", entry->s.name); len += snprintf(&str[len], n - len, " type %s\n", entry->s.ops->name); + len += snprintf(&str[len], n - len, + " index %i\n", _odp_pktio_index(hdl)); + len += snprintf(&str[len], n - len, + " handle (u64) %" PRIu64 "\n", + odp_pktio_to_u64(hdl)); len += snprintf(&str[len], n - len, " state %s\n", entry->s.state == PKTIO_STATE_STARTED ? "start" : -- cgit v1.2.3 From f80f28c4046b650c180bcb9b41c859c0c8e2564e Mon Sep 17 00:00:00 2001 From: Petri Savolainen Date: Thu, 15 Mar 2018 16:43:55 +0200 Subject: test: sched_pktio: new scheduler performance test with pktio Added new test which can be used to test scheduler performance with packet IO. Future development includes multiple application pipeline stages through queues. Signed-off-by: Petri Savolainen Reviewed-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- test/performance/.gitignore | 1 + test/performance/Makefile.am | 2 + test/performance/odp_sched_pktio.c | 722 +++++++++++++++++++++++++++++++++++++ 3 files changed, 725 insertions(+) create mode 100644 test/performance/odp_sched_pktio.c diff --git a/test/performance/.gitignore b/test/performance/.gitignore index 72035e002..259f690cc 100644 --- a/test/performance/.gitignore +++ b/test/performance/.gitignore @@ -7,4 +7,5 @@ odp_l2fwd odp_pktio_ordered odp_pktio_perf odp_sched_latency +odp_sched_pktio odp_scheduling diff --git a/test/performance/Makefile.am b/test/performance/Makefile.am index e50c840b3..458c8fb0e 100644 --- a/test/performance/Makefile.am +++ b/test/performance/Makefile.am @@ -9,6 +9,7 @@ EXECUTABLES = odp_bench_packet \ COMPILE_ONLY = odp_l2fwd \ odp_pktio_ordered \ odp_sched_latency \ + odp_sched_pktio \ odp_scheduling TESTSCRIPTS = odp_l2fwd_run.sh \ @@ -31,6 +32,7 @@ odp_bench_packet_SOURCES = odp_bench_packet.c odp_crypto_SOURCES = odp_crypto.c odp_pktio_ordered_SOURCES = odp_pktio_ordered.c dummy_crc.h odp_sched_latency_SOURCES = odp_sched_latency.c +odp_sched_pktio_SOURCES = odp_sched_pktio.c odp_scheduling_SOURCES = odp_scheduling.c odp_pktio_perf_SOURCES = odp_pktio_perf.c diff --git a/test/performance/odp_sched_pktio.c b/test/performance/odp_sched_pktio.c new file mode 100644 index 000000000..cf6e17f4c --- /dev/null +++ b/test/performance/odp_sched_pktio.c @@ -0,0 +1,722 @@ +/* Copyright (c) 2018, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include +#include + +#include +#include + +#define MAX_WORKERS 64 +#define MAX_PKTIOS 32 +#define MAX_PKTIO_NAME 31 +#define MAX_PKTIO_QUEUES MAX_WORKERS +#define MAX_PKT_LEN 1514 +#define MAX_PKT_NUM (16 * 1024) +#define MIN_PKT_SEG_LEN 64 +#define BURST_SIZE 32 +#define CHECK_PERIOD 10000 +#define MAX_PKTIO_INDEXES 256 + +typedef struct { + int worker_id; + void *test_global_ptr; +} worker_arg_t; + +typedef struct { + volatile int stop_workers; + odp_barrier_t worker_start; + + struct { + int num_worker; + int num_pktio; + } opt; + + int max_workers; + odp_cpumask_t cpumask; + odp_instance_t instance; + + int worker_cpu[MAX_WORKERS]; + + odp_pool_t pool; + uint32_t pkt_len; + uint32_t pkt_num; + + unsigned int num_input_queues; + unsigned int num_output_queues; + + struct { + char name[MAX_PKTIO_NAME + 1]; + odp_pktio_t pktio; + int pktio_index; + int started; + odph_ethaddr_t my_addr; + odp_queue_t input_queue[MAX_PKTIO_QUEUES]; + odp_pktout_queue_t pktout[MAX_PKTIO_QUEUES]; + + } pktio[MAX_PKTIOS]; + + worker_arg_t worker_arg[MAX_WORKERS]; + + /* Maps pktio input index to pktio[] index for output */ + uint8_t pktio_map[MAX_PKTIO_INDEXES]; + +} test_global_t; + +static test_global_t *test_global; + +static inline void set_dst_eth_addr(odph_ethaddr_t *eth_addr, int index) +{ + eth_addr->addr[0] = 0x02; + eth_addr->addr[1] = 0; + eth_addr->addr[2] = 0; + eth_addr->addr[3] = 0; + eth_addr->addr[4] = 0; + eth_addr->addr[5] = index; +} + +static inline void fill_eth_addr(odp_packet_t pkt[], int num, + test_global_t *test_global, int out) +{ + odph_ethhdr_t *eth; + int i; + + for (i = 0; i < num; ++i) { + eth = odp_packet_data(pkt[i]); + + eth->src = test_global->pktio[out].my_addr; + set_dst_eth_addr(ð->dst, out); + } +} + +static int worker_thread(void *arg) +{ + odp_event_t ev[BURST_SIZE]; + int num, sent, drop, in, out; + odp_pktout_queue_t pktout; + worker_arg_t *worker_arg = arg; + test_global_t *test_global = worker_arg->test_global_ptr; + int worker_id = worker_arg->worker_id; + uint32_t polls = 0; + + printf("Worker %i started\n", worker_id); + + /* Wait for other workers to start */ + odp_barrier_wait(&test_global->worker_start); + + while (1) { + odp_packet_t pkt[BURST_SIZE]; + + num = odp_schedule_multi(NULL, ODP_SCHED_NO_WAIT, + ev, BURST_SIZE); + + polls++; + + if (polls == CHECK_PERIOD) { + polls = 0; + if (test_global->stop_workers) + break; + } + + if (num <= 0) + continue; + + odp_packet_from_event_multi(pkt, ev, num); + + in = odp_packet_input_index(pkt[0]); + out = test_global->pktio_map[in]; + pktout = test_global->pktio[out].pktout[worker_id]; + + fill_eth_addr(pkt, num, test_global, out); + + sent = odp_pktout_send(pktout, pkt, num); + + if (odp_unlikely(sent < 0)) + sent = 0; + + drop = num - sent; + + if (odp_unlikely(drop)) + odp_packet_free_multi(&pkt[sent], drop); + } + + printf("Worker %i stopped\n", worker_id); + + return 0; +} + +static void sig_handler(int signo) +{ + (void)signo; + + if (test_global) { + test_global->stop_workers = 1; + odp_mb_full(); + } +} + +/* Get rid of path in filename - only for unix-type paths using '/' */ +#define NO_PATH(x) (strrchr((x), '/') ? strrchr((x), '/') + 1 : (x)) + +static void print_usage(const char *progname) +{ + printf("\n" + "Scheduler with packet IO test application.\n" + "\n" + "Usage: %s [options]\n" + "\n" + "OPTIONS:\n" + " -i, --interface Packet IO interfaces (comma-separated, no spaces)\n" + " -c, --count Worker thread count. Default: 1\n" + " -h, --help Display help and exit.\n\n", + NO_PATH(progname)); +} + +static int parse_options(int argc, char *argv[], test_global_t *test_global) +{ + int i, opt, long_index; + char *name, *str; + int len, str_len; + const struct option longopts[] = { + {"interface", required_argument, NULL, 'i'}, + {"count", required_argument, NULL, 'c'}, + {"help", no_argument, NULL, 'h'}, + {NULL, 0, NULL, 0} + }; + const char *shortopts = "+i:c:h"; + int ret = 0; + + test_global->opt.num_worker = 1; + + /* let helper collect its own arguments (e.g. --odph_proc) */ + odph_parse_options(argc, argv, shortopts, longopts); + + while (1) { + opt = getopt_long(argc, argv, shortopts, longopts, &long_index); + + if (opt == -1) + break; /* No more options */ + + switch (opt) { + case 'i': + i = 0; + str = optarg; + str_len = strlen(str); + + while (str_len > 0) { + len = strcspn(str, ","); + str_len -= len + 1; + + if (i == MAX_PKTIOS) { + printf("Error: Too many interfaces\n"); + ret = -1; + break; + } + + if (len > MAX_PKTIO_NAME) { + printf("Error: Too long interface name %s\n", + str); + ret = -1; + break; + } + + name = test_global->pktio[i].name; + memcpy(name, str, len); + str += len + 1; + i++; + } + + test_global->opt.num_pktio = i; + + break; + case 'c': + test_global->opt.num_worker = atoi(optarg); + break; + case 'h': + print_usage(argv[0]); + ret = -1; + break; + default: + ret = -1; + break; + } + } + + return ret; +} + +static int config_setup(test_global_t *test_global) +{ + int i, cpu; + odp_pool_capability_t pool_capa; + uint32_t pkt_len, pkt_num; + odp_cpumask_t *cpumask = &test_global->cpumask; + + test_global->max_workers = odp_cpumask_default_worker(cpumask, 0); + + if (test_global->opt.num_worker > test_global->max_workers || + test_global->opt.num_worker > MAX_WORKERS) { + printf("Error: Too many workers %i.\n", + test_global->opt.num_worker); + return -1; + } + + cpu = odp_cpumask_first(cpumask); + for (i = 0; i < test_global->opt.num_worker; ++i) { + test_global->worker_cpu[i] = cpu; + cpu = odp_cpumask_next(cpumask, cpu); + } + + if (test_global->opt.num_pktio == 0) { + printf("Error: At least one pktio interface needed.\n"); + return -1; + } + + if (MAX_PKTIO_INDEXES <= odp_pktio_max_index()) { + printf("Error: Larger pktio_map[] table needed: %u\n", + odp_pktio_max_index()); + return -1; + } + + if (odp_pool_capability(&pool_capa)) { + printf("Error: Pool capability failed.\n"); + return -1; + } + + pkt_len = MAX_PKT_LEN; + pkt_num = MAX_PKT_NUM; + + if (pool_capa.pkt.max_len && pkt_len > pool_capa.pkt.max_len) + pkt_len = pool_capa.pkt.max_len; + + if (pool_capa.pkt.max_num && pkt_num > pool_capa.pkt.max_num) + pkt_num = pool_capa.pkt.max_num; + + test_global->pkt_len = pkt_len; + test_global->pkt_num = pkt_num; + + if (test_global->num_input_queues == 0) + test_global->num_input_queues = test_global->opt.num_worker; + + if (test_global->num_output_queues == 0) + test_global->num_output_queues = test_global->opt.num_worker; + + return 0; +} + +static void print_config(test_global_t *test_global) +{ + char cpumask_str[ODP_CPUMASK_STR_SIZE]; + int i; + + odp_cpumask_to_str(&test_global->cpumask, cpumask_str, + ODP_CPUMASK_STR_SIZE); + + printf("\n" + "Test configuration:\n" + " max workers: %i\n" + " available worker cpus: %s\n" + " num workers: %i\n" + " worker cpus: ", + test_global->max_workers, + cpumask_str, + test_global->opt.num_worker); + + for (i = 0; i < test_global->opt.num_worker; i++) + printf(" %i", test_global->worker_cpu[i]); + + printf("\n" + " num interfaces: %i\n" + " interface names: ", test_global->opt.num_pktio); + + for (i = 0; i < test_global->opt.num_pktio; i++) + printf(" %s", test_global->pktio[i].name); + + printf("\n" + " num input queues: %i\n" + " num output queues: %i\n", + test_global->num_input_queues, test_global->num_output_queues); + + printf("\n"); +} + +static int open_pktios(test_global_t *test_global) +{ + odp_pool_param_t pool_param; + odp_pktio_param_t pktio_param; + odp_pool_t pool; + odp_pktio_t pktio; + odp_pktio_capability_t pktio_capa; + odp_pktio_config_t pktio_config; + odp_pktin_queue_param_t pktin_param; + odp_pktout_queue_param_t pktout_param; + odp_schedule_sync_t sched_sync; + unsigned int num_input, num_output; + char *name; + int i, num_pktio, ret; + + num_pktio = test_global->opt.num_pktio; + num_input = test_global->num_input_queues; + num_output = test_global->num_output_queues; + + odp_pool_param_init(&pool_param); + pool_param.pkt.seg_len = MIN_PKT_SEG_LEN; + pool_param.pkt.len = test_global->pkt_len; + pool_param.pkt.num = test_global->pkt_num; + pool_param.type = ODP_POOL_PACKET; + + pool = odp_pool_create("packet pool", &pool_param); + + test_global->pool = pool; + + if (pool == ODP_POOL_INVALID) { + printf("Error: Pool create.\n"); + return -1; + } + + odp_pktio_param_init(&pktio_param); + pktio_param.in_mode = ODP_PKTIN_MODE_SCHED; + pktio_param.out_mode = ODP_PKTOUT_MODE_DIRECT; + + sched_sync = ODP_SCHED_SYNC_ATOMIC; + + for (i = 0; i < num_pktio; i++) + test_global->pktio[i].pktio = ODP_PKTIO_INVALID; + + /* Open and configure interfaces */ + for (i = 0; i < num_pktio; i++) { + name = test_global->pktio[i].name; + pktio = odp_pktio_open(name, pool, &pktio_param); + + if (pktio == ODP_PKTIO_INVALID) { + printf("Error (%s): Pktio open failed.\n", name); + return -1; + } + + test_global->pktio[i].pktio = pktio; + test_global->pktio[i].pktio_index = odp_pktio_index(pktio); + + ret = odp_pktio_mac_addr(pktio, + test_global->pktio[i].my_addr.addr, + ODPH_ETHADDR_LEN); + if (ret != ODPH_ETHADDR_LEN) { + printf("Error (%s): Bad MAC address len.\n", name); + return -1; + } + + odp_pktio_print(pktio); + + if (odp_pktio_capability(pktio, &pktio_capa)) { + printf("Error (%s): Pktio capa failed.\n", name); + return -1; + } + + if (num_input > pktio_capa.max_input_queues) { + printf("Error (%s): Too many input queues: %u\n", + name, num_input); + return -1; + } + + if (num_output > pktio_capa.max_output_queues) { + printf("Error (%s): Too many output queues: %u\n", + name, num_output); + return -1; + } + + odp_pktio_config_init(&pktio_config); + pktio_config.parser.layer = ODP_PROTO_LAYER_NONE; + + odp_pktio_config(pktio, &pktio_config); + + odp_pktin_queue_param_init(&pktin_param); + + pktin_param.queue_param.sched.prio = ODP_SCHED_PRIO_DEFAULT; + pktin_param.queue_param.sched.sync = sched_sync; + pktin_param.queue_param.sched.group = ODP_SCHED_GROUP_ALL; + + if (num_input > 1) { + pktin_param.hash_enable = 1; + pktin_param.hash_proto.proto.ipv4_udp = 1; + } + + pktin_param.num_queues = num_input; + + if (odp_pktin_queue_config(pktio, &pktin_param)) { + printf("Error (%s): Pktin config failed.\n", name); + return -1; + } + + if (odp_pktin_event_queue(pktio, + test_global->pktio[i].input_queue, + num_input) != (int)num_input) { + printf("Error (%s): Input queue query failed.\n", name); + return -1; + } + + odp_pktout_queue_param_init(&pktout_param); + pktout_param.num_queues = num_output; + pktout_param.op_mode = ODP_PKTIO_OP_MT_UNSAFE; + + if (odp_pktout_queue_config(pktio, &pktout_param)) { + printf("Error (%s): Pktout config failed.\n", name); + return -1; + } + + if (odp_pktout_queue(pktio, + test_global->pktio[i].pktout, + num_output) != (int)num_output) { + printf("Error (%s): Output queue query failed.\n", + name); + return -1; + } + } + + return 0; +} + +static void link_pktios(test_global_t *test_global) +{ + int i, num_pktio, input, output; + + num_pktio = test_global->opt.num_pktio; + + printf("Forwarding table (pktio indexes)\n"); + + /* If single interface loopback, otherwise forward to the next + * interface. */ + for (i = 0; i < num_pktio; i++) { + input = test_global->pktio[i].pktio_index; + output = (i + 1) % num_pktio; + test_global->pktio_map[input] = output; + printf(" input %i, output %i\n", + input, + test_global->pktio[output].pktio_index); + } + + printf("\n"); +} + +static int start_pktios(test_global_t *test_global) +{ + int i; + + for (i = 0; i < test_global->opt.num_pktio; i++) { + if (odp_pktio_start(test_global->pktio[i].pktio)) { + printf("Error (%s): Pktio start failed.\n", + test_global->pktio[i].name); + + return -1; + } + + test_global->pktio[i].started = 1; + } + + return 0; +} + +static int stop_pktios(test_global_t *test_global) +{ + odp_pktio_t pktio; + int i, ret = 0; + + for (i = 0; i < test_global->opt.num_pktio; i++) { + pktio = test_global->pktio[i].pktio; + + if (pktio == ODP_PKTIO_INVALID || + test_global->pktio[i].started == 0) + continue; + + if (odp_pktio_stop(pktio)) { + printf("Error (%s): Pktio stop failed.\n", + test_global->pktio[i].name); + ret = -1; + } + } + + return ret; +} + +static void empty_queues(void) +{ + odp_event_t ev; + uint64_t wait_time = odp_schedule_wait_time(ODP_TIME_SEC_IN_NS / 2); + + /* Drop all events from all queues */ + while (1) { + ev = odp_schedule(NULL, wait_time); + + if (ev == ODP_EVENT_INVALID) + break; + + odp_event_free(ev); + } +} + +static int close_pktios(test_global_t *test_global) +{ + odp_pktio_t pktio; + odp_pool_t pool; + int i, ret = 0; + + for (i = 0; i < test_global->opt.num_pktio; i++) { + pktio = test_global->pktio[i].pktio; + + if (pktio == ODP_PKTIO_INVALID) + continue; + + if (odp_pktio_close(pktio)) { + printf("Error (%s): Pktio close failed.\n", + test_global->pktio[i].name); + ret = -1; + } + } + + pool = test_global->pool; + + if (pool == ODP_POOL_INVALID) + return ret; + + if (odp_pool_destroy(pool)) { + printf("Error: Pool destroy failed.\n"); + ret = -1; + } + + return ret; +} + +static void start_workers(odph_odpthread_t thread[], + test_global_t *test_global) +{ + int i; + odp_cpumask_t cpumask; + odph_odpthread_params_t param; + int num = test_global->opt.num_worker; + + memset(¶m, 0, sizeof(odph_odpthread_params_t)); + param.start = worker_thread; + param.thr_type = ODP_THREAD_WORKER; + param.instance = test_global->instance; + + memset(thread, 0, num * sizeof(odph_odpthread_t)); + + for (i = 0; i < num; i++) { + odp_cpumask_zero(&cpumask); + odp_cpumask_set(&cpumask, test_global->worker_cpu[i]); + test_global->worker_arg[i].worker_id = i; + test_global->worker_arg[i].test_global_ptr = test_global; + param.arg = &test_global->worker_arg[i]; + + odph_odpthreads_create(&thread[i], &cpumask, ¶m); + } +} + +static void wait_workers(odph_odpthread_t thread[], test_global_t *test_global) +{ + int i; + + for (i = 0; i < test_global->opt.num_worker; ++i) + odph_odpthreads_join(&thread[i]); +} + +int main(int argc, char *argv[]) +{ + odp_instance_t instance; + odp_init_t init; + odp_shm_t shm; + odph_odpthread_t thread[MAX_WORKERS]; + + signal(SIGINT, sig_handler); + + /* List features not to be used (may optimize performance) */ + odp_init_param_init(&init); + init.not_used.feat.cls = 1; + init.not_used.feat.crypto = 1; + init.not_used.feat.ipsec = 1; + init.not_used.feat.timer = 1; + init.not_used.feat.tm = 1; + + /* Init ODP before calling anything else */ + if (odp_init_global(&instance, &init, NULL)) { + printf("Error: Global init failed.\n"); + return -1; + } + + /* Init this thread */ + if (odp_init_local(instance, ODP_THREAD_CONTROL)) { + printf("Error: Local init failed.\n"); + return -1; + } + + /* Reserve memory for args from shared mem */ + shm = odp_shm_reserve("test_global", sizeof(test_global_t), + ODP_CACHE_LINE_SIZE, 0); + + if (shm == ODP_SHM_INVALID) { + printf("Error: shm reserve failed.\n"); + return -1; + } + + test_global = odp_shm_addr(shm); + memset(test_global, 0, sizeof(test_global_t)); + + test_global->instance = instance; + test_global->pool = ODP_POOL_INVALID; + + if (parse_options(argc, argv, test_global)) + goto quit; + + odp_sys_info_print(); + + if (config_setup(test_global)) + goto quit; + + print_config(test_global); + + if (open_pktios(test_global)) + goto quit; + + link_pktios(test_global); + + odp_barrier_init(&test_global->worker_start, + test_global->opt.num_worker + 1); + + start_workers(thread, test_global); + + /* Synchronize pktio configuration with workers. Worker are now ready + * to process packets. */ + odp_barrier_wait(&test_global->worker_start); + + if (start_pktios(test_global)) { + test_global->stop_workers = 1; + odp_mb_full(); + } + + wait_workers(thread, test_global); + +quit: + stop_pktios(test_global); + empty_queues(); + close_pktios(test_global); + + if (odp_shm_free(shm)) { + printf("Error: shm free failed.\n"); + return -1; + } + + if (odp_term_local()) { + printf("Error: term local failed.\n"); + return -1; + } + + if (odp_term_global(instance)) { + printf("Error: term global failed.\n"); + return -1; + } + + return 0; +} -- cgit v1.2.3 From 72febbae5ea55a5fd051978fbfa0f669cf0e99fc Mon Sep 17 00:00:00 2001 From: Bogdan Pricope Date: Thu, 22 Mar 2018 16:57:49 +0200 Subject: linux-gen: pktio: dpdk: accept UDPv4 packets with all-zero csum Accept IPv4 UDP packets with all-zero checksum field even if DPDK reported it as bad checksum value. This patch fixes bug: https://bugs.linaro.org/show_bug.cgi?id=3685. Signed-off-by: Bogdan Pricope Reviewed-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- platform/linux-generic/pktio/dpdk.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/platform/linux-generic/pktio/dpdk.c b/platform/linux-generic/pktio/dpdk.c index 7b9fed72d..83dbec4f5 100644 --- a/platform/linux-generic/pktio/dpdk.c +++ b/platform/linux-generic/pktio/dpdk.c @@ -27,6 +27,7 @@ #include #include +#include #include #include @@ -419,6 +420,7 @@ MEMPOOL_REGISTER_OPS(ops_stack); #define IP4_CSUM_RESULT(m) (m->ol_flags & PKT_RX_IP_CKSUM_MASK) #define L4_CSUM_RESULT(m) (m->ol_flags & PKT_RX_L4_CKSUM_MASK) #define HAS_L4_PROTO(m, proto) ((m->packet_type & RTE_PTYPE_L4_MASK) == proto) +#define UDP4_CSUM(_p) (((_odp_udphdr_t *)_odp_packet_l4_ptr(_p, NULL))->chksum) #define PKTIN_CSUM_BITS 0x1C @@ -451,6 +453,12 @@ static inline int pkt_set_ol_rx(odp_pktin_config_opt_t *pktin_cfg, if (packet_csum_result == PKT_RX_L4_CKSUM_GOOD) { pkt_hdr->p.input_flags.l4_chksum_done = 1; } else if (packet_csum_result != PKT_RX_L4_CKSUM_UNKNOWN) { + if (pkt_hdr->p.input_flags.ipv4 && + pkt_hdr->p.input_flags.udp && + !UDP4_CSUM(packet_handle(pkt_hdr))) { + pkt_hdr->p.input_flags.l4_chksum_done = 1; + return 0; + } if (pktin_cfg->bit.drop_udp_err) return -1; -- cgit v1.2.3 From 0225a8a396ddc7168be096f28b0a711184a48ef9 Mon Sep 17 00:00:00 2001 From: Bogdan Pricope Date: Thu, 22 Mar 2018 08:51:27 +0200 Subject: linux-gen: pktio: dpdk: fix IPv4 csum calculation when l4 offset is not set IPv4 header contains length of the header and options in IHL field. It can be used to replace L3 length calculation based on offsets. This patch fixes bug: https://bugs.linaro.org/show_bug.cgi?id=3686. Signed-off-by: Bogdan Pricope Reviewed-and-tested-by: Matias Elo Reviewed-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- platform/linux-generic/pktio/dpdk.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/platform/linux-generic/pktio/dpdk.c b/platform/linux-generic/pktio/dpdk.c index 83dbec4f5..d40ad954f 100644 --- a/platform/linux-generic/pktio/dpdk.c +++ b/platform/linux-generic/pktio/dpdk.c @@ -656,11 +656,7 @@ static inline void pkt_set_ol_tx(odp_pktout_config_opt_t *pktout_cfg, if (!ipv4_chksum_pkt && !udp_chksum_pkt && !tcp_chksum_pkt) return; - if (pkt_p->l4_offset == ODP_PACKET_OFFSET_INVALID) - return; - mbuf->l2_len = pkt_p->l3_offset - pkt_p->l2_offset; - mbuf->l3_len = pkt_p->l4_offset - pkt_p->l3_offset; if (l3_proto_v4) mbuf->ol_flags = PKT_TX_IPV4; @@ -671,8 +667,14 @@ static inline void pkt_set_ol_tx(odp_pktout_config_opt_t *pktout_cfg, mbuf->ol_flags |= PKT_TX_IP_CKSUM; ((struct ipv4_hdr *)l3_hdr)->hdr_checksum = 0; + mbuf->l3_len = _ODP_IPV4HDR_IHL(*(uint8_t *)l3_hdr) * 4; } + if (pkt_p->l4_offset == ODP_PACKET_OFFSET_INVALID) + return; + + mbuf->l3_len = pkt_p->l4_offset - pkt_p->l3_offset; + l4_hdr = (void *)(mbuf_data + pkt_p->l4_offset); if (udp_chksum_pkt) { -- cgit v1.2.3 From 59d636481fd1771107f3eb4632c2a9fc868afc11 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Thu, 15 Mar 2018 15:50:22 +0300 Subject: tests: add IPsec performance test Use this tool to measure IPsec performance with different algorithms For now only outbound direction is supported. Signed-off-by: Dmitry Eremin-Solenikov Reviewed-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- test/performance/Makefile.am | 2 + test/performance/odp_ipsec.c | 1037 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1039 insertions(+) create mode 100644 test/performance/odp_ipsec.c diff --git a/test/performance/Makefile.am b/test/performance/Makefile.am index 458c8fb0e..c8f0bdecf 100644 --- a/test/performance/Makefile.am +++ b/test/performance/Makefile.am @@ -4,6 +4,7 @@ TESTS_ENVIRONMENT += TEST_DIR=${builddir} EXECUTABLES = odp_bench_packet \ odp_crypto \ + odp_ipsec \ odp_pktio_perf COMPILE_ONLY = odp_l2fwd \ @@ -30,6 +31,7 @@ bin_PROGRAMS = $(EXECUTABLES) $(COMPILE_ONLY) odp_bench_packet_SOURCES = odp_bench_packet.c odp_crypto_SOURCES = odp_crypto.c +odp_ipsec_SOURCES = odp_ipsec.c odp_pktio_ordered_SOURCES = odp_pktio_ordered.c dummy_crc.h odp_sched_latency_SOURCES = odp_sched_latency.c odp_sched_pktio_SOURCES = odp_sched_pktio.c diff --git a/test/performance/odp_ipsec.c b/test/performance/odp_ipsec.c new file mode 100644 index 000000000..6552c0e0b --- /dev/null +++ b/test/performance/odp_ipsec.c @@ -0,0 +1,1037 @@ +/* Copyright (c) 2018, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "config.h" + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif /* _GNU_SOURCE */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define app_err(fmt, ...) \ + fprintf(stderr, "%s:%d:%s(): Error: " fmt, __FILE__, \ + __LINE__, __func__, ##__VA_ARGS__) + +/** @def POOL_NUM_PKT + * Number of packets in the pool + */ +#define POOL_NUM_PKT 64 + +static uint8_t test_salt[16] = "0123456789abcdef"; + +static uint8_t test_key16[16] = { 0x01, 0x02, 0x03, 0x04, 0x05, + 0x06, 0x07, 0x08, 0x09, 0x0a, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, +}; + +static uint8_t test_key20[20] = { 0x01, 0x02, 0x03, 0x04, 0x05, + 0x06, 0x07, 0x08, 0x09, 0x0a, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, +}; + +static uint8_t test_key24[24] = { 0x01, 0x02, 0x03, 0x04, 0x05, + 0x06, 0x07, 0x08, 0x09, 0x0a, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, + 0x15, 0x16, 0x17, 0x18 +}; + +/** + * Structure that holds template for sa create call + * for different algorithms supported by test + */ +typedef struct { + const char *name; /**< Algorithm name */ + odp_ipsec_crypto_param_t crypto; /**< Prefilled SA crypto param */ +} ipsec_alg_config_t; + +/** + * Parsed command line crypto arguments. Describes test configuration. + */ +typedef struct { + /** + * If non zero prints content of packets. Enabled by -d or + * --debug option. + */ + int debug_packets; + + /** + * If non zero Try to run crypto operation in place. Note some + * implementation may not support such mode. Enabled by -n or + * --inplace option. + */ + int in_place; + + /** + * Maximum number of outstanding encryption requests. Note code + * poll for results over queue and if nothing is available it can + * submit more encryption requests up to maximum number specified by + * this option. Specified through -f or --flight option. + */ + int in_flight; + + /** + * Number of iteration to repeat crypto operation to get good + * average number. Specified through -i or --terations option. + * Default is 10000. + */ + int iteration_count; + + /** + * Payload size to test. If 0 set of predefined payload sizes + * is tested. Specified through -p or --payload option. + */ + unsigned int payload_length; + + /** + * Pointer to selected algorithm to test. If NULL all available + * alogorthims are tested. Name of algorithm is passed through + * -a or --algorithm option. + */ + ipsec_alg_config_t *alg_config; + + /** + * Use scheduler to get completion events from crypto operation. + * Specified through -s argument. + * */ + int schedule; + + /* + * Poll completion queue for crypto completion events. + * Specified through -p argument. + */ + int poll; + + /* + * Use tunnel instead of transport mode. + * Specified through -t argument. + */ + int tunnel; +} ipsec_args_t; + +/* + * Helper structure that holds averages for test of one algorithm + * for given payload size. + */ +typedef struct { + /** + * Elapsed time for one crypto operation. + */ + double elapsed; + + /** + * CPU time spent pre one crypto operation by whole process + * i.e include current and all other threads in process. + * It is filled with 'getrusage(RUSAGE_SELF, ...)' call. + */ + double rusage_self; + + /** + * CPU time spent per one crypto operation by current thread + * only. It is filled with 'getrusage(RUSAGE_THREAD, ...)' + * call. + */ + double rusage_thread; +} ipsec_run_result_t; + +/** + * Structure holds one snap to misc times of current process. + */ +typedef struct { + struct timeval tv; /**< Elapsed time */ + struct rusage ru_self; /**< Rusage value for whole process */ + struct rusage ru_thread; /**< Rusage value for current thread */ +} time_record_t; + +/** + * Set of predefined payloads. + */ +static unsigned int global_payloads[] = { + 64, + 256, + 1024, + 8192, + 16384 +}; + +/** Number of payloads used in the test */ +static unsigned int global_num_payloads; + +/** + * Set of known algorithms to test + */ +static ipsec_alg_config_t algs_config[] = { + { + .name = "3des-cbc-null", + .crypto = { + .cipher_alg = ODP_CIPHER_ALG_3DES_CBC, + .cipher_key = { + .data = test_key24, + .length = sizeof(test_key24) + }, + .auth_alg = ODP_AUTH_ALG_NULL + }, + }, + { + .name = "3des-cbc-hmac-md5-96", + .crypto = { + .cipher_alg = ODP_CIPHER_ALG_3DES_CBC, + .cipher_key = { + .data = test_key24, + .length = sizeof(test_key24) + }, + .auth_alg = ODP_AUTH_ALG_MD5_HMAC, + .auth_key = { + .data = test_key16, + .length = sizeof(test_key16) + }, + }, + }, + { + .name = "null-hmac-md5-96", + .crypto = { + .cipher_alg = ODP_CIPHER_ALG_NULL, + .auth_alg = ODP_AUTH_ALG_MD5_HMAC, + .auth_key = { + .data = test_key16, + .length = sizeof(test_key16) + }, + }, + }, + { + .name = "aes-cbc-null", + .crypto = { + .cipher_alg = ODP_CIPHER_ALG_AES_CBC, + .cipher_key = { + .data = test_key16, + .length = sizeof(test_key16) + }, + .auth_alg = ODP_AUTH_ALG_NULL + }, + }, + { + .name = "aes-cbc-hmac-sha1-96", + .crypto = { + .cipher_alg = ODP_CIPHER_ALG_AES_CBC, + .cipher_key = { + .data = test_key16, + .length = sizeof(test_key16) + }, + .auth_alg = ODP_AUTH_ALG_SHA1_HMAC, + .auth_key = { + .data = test_key20, + .length = sizeof(test_key20) + }, + }, + }, + { + .name = "null-hmac-sha1-96", + .crypto = { + .cipher_alg = ODP_CIPHER_ALG_NULL, + .auth_alg = ODP_AUTH_ALG_SHA1_HMAC, + .auth_key = { + .data = test_key20, + .length = sizeof(test_key20) + }, + }, + }, + { + .name = "aes-gcm", + .crypto = { + .cipher_alg = ODP_CIPHER_ALG_AES_GCM, + .cipher_key = { + .data = test_key16, + .length = sizeof(test_key16) + }, + .cipher_key_extra = { + .data = test_salt, + .length = 4, + }, + .auth_alg = ODP_AUTH_ALG_AES_GCM, + }, + }, +}; + +/** + * Find corresponding config for given name. Returns NULL + * if config for given name is not found. + */ +static ipsec_alg_config_t * +find_config_by_name(const char *name) +{ + unsigned int i; + ipsec_alg_config_t *ret = NULL; + + for (i = 0; i < (sizeof(algs_config) / sizeof(ipsec_alg_config_t)); + i++) { + if (strcmp(algs_config[i].name, name) == 0) { + ret = algs_config + i; + break; + } + } + return ret; +} + +/** + * Helper function that prints list of algorithms that this + * test understands. + */ +static void +print_config_names(const char *prefix) +{ + unsigned int i; + + for (i = 0; i < (sizeof(algs_config) / sizeof(ipsec_alg_config_t)); + i++) { + printf("%s %s\n", prefix, algs_config[i].name); + } +} + +/** + * Snap current time values and put them into 'rec'. + */ +static void +fill_time_record(time_record_t *rec) +{ + gettimeofday(&rec->tv, NULL); + getrusage(RUSAGE_SELF, &rec->ru_self); + getrusage(RUSAGE_THREAD, &rec->ru_thread); +} + +/** + * Calculated CPU time difference for given two rusage structures. + * Note it adds user space and system time together. + */ +static unsigned long long +get_rusage_diff(struct rusage *start, struct rusage *end) +{ + unsigned long long rusage_diff; + unsigned long long rusage_start; + unsigned long long rusage_end; + + rusage_start = (start->ru_utime.tv_sec * 1000000) + + (start->ru_utime.tv_usec); + rusage_start += (start->ru_stime.tv_sec * 1000000) + + (start->ru_stime.tv_usec); + + rusage_end = (end->ru_utime.tv_sec * 1000000) + + (end->ru_utime.tv_usec); + rusage_end += (end->ru_stime.tv_sec * 1000000) + + (end->ru_stime.tv_usec); + + rusage_diff = rusage_end - rusage_start; + + return rusage_diff; +} + +/** + * Get diff for RUSAGE_SELF (whole process) between two time snap + * records. + */ +static unsigned long long +get_rusage_self_diff(time_record_t *start, time_record_t *end) +{ + return get_rusage_diff(&start->ru_self, &end->ru_self); +} + +/** + * Get diff for RUSAGE_THREAD (current thread only) between two + * time snap records. + */ +static unsigned long long +get_rusage_thread_diff(time_record_t *start, time_record_t *end) +{ + return get_rusage_diff(&start->ru_thread, &end->ru_thread); +} + +/** + * Get diff of elapsed time between two time snap records + */ +static unsigned long long +get_elapsed_usec(time_record_t *start, time_record_t *end) +{ + unsigned long long s; + unsigned long long e; + + s = (start->tv.tv_sec * 1000000) + (start->tv.tv_usec); + e = (end->tv.tv_sec * 1000000) + (end->tv.tv_usec); + + return e - s; +} + +/** + * Print header line for our report. + */ +static void +print_result_header(void) +{ + printf("\n%30.30s %15s %15s %15s %15s %15s %15s\n", + "algorithm", "avg over #", "payload (bytes)", "elapsed (us)", + "rusg self (us)", "rusg thrd (us)", "throughput (Kb)"); +} + +/** + * Print one line of our report. + */ +static void +print_result(ipsec_args_t *cargs, + unsigned int payload_length, + ipsec_alg_config_t *config, + ipsec_run_result_t *result) +{ + unsigned int throughput; + + throughput = (1000000.0 / result->elapsed) * payload_length / 1024; + printf("%30.30s %15d %15d %15.3f %15.3f %15.3f %15d\n", + config->name, cargs->iteration_count, payload_length, + result->elapsed, result->rusage_self, result->rusage_thread, + throughput); +} + +#define IPV4ADDR(a, b, c, d) odp_cpu_to_be_32((a << 24) | \ + (b << 16) | \ + (c << 8) | \ + (d << 0)) + +/** + * Create ODP IPsec SA for given config. + */ +static odp_ipsec_sa_t +create_sa_from_config(ipsec_alg_config_t *config, + ipsec_args_t *cargs) +{ + odp_ipsec_sa_param_t param; + odp_queue_t out_queue; + + odp_ipsec_sa_param_init(¶m); + memcpy(¶m.crypto, &config->crypto, + sizeof(odp_ipsec_crypto_param_t)); + + param.proto = ODP_IPSEC_ESP; + param.dir = ODP_IPSEC_DIR_OUTBOUND; + + if (cargs->tunnel) { + uint32_t src = IPV4ADDR(10, 0, 111, 2); + uint32_t dst = IPV4ADDR(10, 0, 222, 2); + odp_ipsec_tunnel_param_t tunnel; + + memset(&tunnel, 0, sizeof(tunnel)); + tunnel.type = ODP_IPSEC_TUNNEL_IPV4; + tunnel.ipv4.src_addr = &src; + tunnel.ipv4.dst_addr = &dst; + tunnel.ipv4.ttl = 64; + + param.mode = ODP_IPSEC_MODE_TUNNEL; + param.outbound.tunnel = tunnel; + } else { + param.mode = ODP_IPSEC_MODE_TRANSPORT; + } + + if (cargs->schedule || cargs->poll) { + out_queue = odp_queue_lookup("ipsec-out"); + if (out_queue == ODP_QUEUE_INVALID) { + app_err("ipsec-out queue not found\n"); + return ODP_IPSEC_SA_INVALID; + } + param.dest_queue = out_queue; + } else { + param.dest_queue = ODP_QUEUE_INVALID; + } + + return odp_ipsec_sa_create(¶m); +} + +static uint8_t test_data[] = { + /* IP */ + 0x45, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x40, 0x01, 0xac, 0x27, 0xc0, 0xa8, 0x6f, 0x02, + 0xc0, 0xa8, 0xde, 0x02, + + /* ICMP */ + 0x08, 0x00, 0xfb, 0x37, 0x12, 0x34, 0x00, 0x00 +}; + +static odp_packet_t +make_packet(odp_pool_t pkt_pool, unsigned int payload_length) +{ + odp_packet_t pkt; + + if (payload_length < sizeof(test_data)) + return ODP_PACKET_INVALID; + + pkt = odp_packet_alloc(pkt_pool, payload_length); + if (pkt == ODP_PACKET_INVALID) { + app_err("failed to allocate buffer\n"); + return pkt; + } + + odp_packet_copy_from_mem(pkt, 0, sizeof(test_data), test_data); + odp_packet_l3_offset_set(pkt, 0); + + uint8_t *mem = odp_packet_data(pkt); + ((odph_ipv4hdr_t *)mem)->tot_len = odp_cpu_to_be_16(payload_length); + memset(mem + sizeof(test_data), 1, payload_length - sizeof(test_data)); + + return pkt; +} + +/** + * Run measurement iterations for given config and payload size. + * Result of run returned in 'result' out parameter. + */ +static int +run_measure_one(ipsec_args_t *cargs, + odp_ipsec_sa_t sa, + unsigned int payload_length, + time_record_t *start, + time_record_t *end) +{ + odp_ipsec_out_param_t param; + odp_pool_t pkt_pool; + odp_packet_t pkt = ODP_PACKET_INVALID; + int rc = 0; + + pkt_pool = odp_pool_lookup("packet_pool"); + if (pkt_pool == ODP_POOL_INVALID) { + app_err("pkt_pool not found\n"); + return -1; + } + + int packets_sent = 0; + int packets_received = 0; + + /* Initialize parameters block */ + memset(¶m, 0, sizeof(param)); + param.num_sa = 1; + param.num_opt = 0; + param.sa = &sa; + + fill_time_record(start); + + while ((packets_sent < cargs->iteration_count) || + (packets_received < cargs->iteration_count)) { + if ((packets_sent < cargs->iteration_count) && + (packets_sent - packets_received < + cargs->in_flight)) { + odp_packet_t out_pkt; + int num_out = 1; + + pkt = make_packet(pkt_pool, payload_length); + if (ODP_PACKET_INVALID == pkt) + return -1; + + out_pkt = cargs->in_place ? pkt : ODP_PACKET_INVALID; + + if (cargs->debug_packets) + odp_packet_print_data(pkt, 0, + odp_packet_len(pkt)); + + rc = odp_ipsec_out(&pkt, 1, + &out_pkt, &num_out, + ¶m); + if (rc <= 0) { + app_err("failed odp_ipsec_out: rc = %d\n", + rc); + odp_packet_free(pkt); + break; + } + if (odp_packet_has_error(out_pkt)) { + odp_ipsec_packet_result_t result; + + odp_ipsec_result(&result, out_pkt); + app_err("Received error packet: %d\n", + result.status.error.all); + } + packets_sent += rc; + packets_received += num_out; + if (cargs->debug_packets) + odp_packet_print_data(out_pkt, 0, + odp_packet_len(out_pkt)); + odp_packet_free(out_pkt); + } + } + + fill_time_record(end); + + return rc < 0 ? rc : 0; +} + +static int +run_measure_one_async(ipsec_args_t *cargs, + odp_ipsec_sa_t sa, + unsigned int payload_length, + time_record_t *start, + time_record_t *end) +{ + odp_ipsec_out_param_t param; + odp_pool_t pkt_pool; + odp_queue_t out_queue; + odp_packet_t pkt = ODP_PACKET_INVALID; + int rc = 0; + + pkt_pool = odp_pool_lookup("packet_pool"); + if (pkt_pool == ODP_POOL_INVALID) { + app_err("pkt_pool not found\n"); + return -1; + } + + out_queue = odp_queue_lookup("ipsec-out"); + if (out_queue == ODP_QUEUE_INVALID) { + app_err("ipsec-out queue not found\n"); + return -1; + } + + int packets_sent = 0; + int packets_received = 0; + + /* Initialize parameters block */ + memset(¶m, 0, sizeof(param)); + param.num_sa = 1; + param.num_opt = 0; + param.sa = &sa; + + fill_time_record(start); + + while ((packets_sent < cargs->iteration_count) || + (packets_received < cargs->iteration_count)) { + odp_event_t ev; + + if ((packets_sent < cargs->iteration_count) && + (packets_sent - packets_received < + cargs->in_flight)) { + pkt = make_packet(pkt_pool, payload_length); + if (ODP_PACKET_INVALID == pkt) + return -1; + + if (cargs->debug_packets) + odp_packet_print_data(pkt, 0, + odp_packet_len(pkt)); + + rc = odp_ipsec_out_enq(&pkt, 1, + ¶m); + if (rc <= 0) { + app_err("failed odp_crypto_packet_op_enq: rc = %d\n", + rc); + odp_packet_free(pkt); + break; + } + packets_sent += rc; + } + + if (cargs->schedule) + ev = odp_schedule(NULL, + ODP_SCHED_NO_WAIT); + else + ev = odp_queue_deq(out_queue); + + while (ev != ODP_EVENT_INVALID) { + odp_packet_t out_pkt; + odp_ipsec_packet_result_t result; + + out_pkt = odp_ipsec_packet_from_event(ev); + odp_ipsec_result(&result, out_pkt); + + if (cargs->debug_packets) + odp_packet_print_data(out_pkt, 0, + odp_packet_len(out_pkt)); + odp_packet_free(out_pkt); + packets_received++; + if (cargs->schedule) + ev = odp_schedule(NULL, + ODP_SCHED_NO_WAIT); + else + ev = odp_queue_deq(out_queue); + } + } + + fill_time_record(end); + + return rc < 0 ? rc : 0; +} + +/** + * Process one algorithm. Note if paload size is specicified it is + * only one run. Or iterate over set of predefined payloads. + */ +static int +run_measure_one_config(ipsec_args_t *cargs, + ipsec_alg_config_t *config) +{ + odp_ipsec_sa_t sa; + int rc = 0; + unsigned int num_payloads = global_num_payloads; + unsigned int *payloads = global_payloads; + unsigned int i; + + sa = create_sa_from_config(config, cargs); + if (sa == ODP_IPSEC_SA_INVALID) { + app_err("IPsec SA create failed.\n"); + return -1; + } + + print_result_header(); + if (cargs->payload_length) { + num_payloads = 1; + payloads = &cargs->payload_length; + } + + for (i = 0; i < num_payloads; i++) { + double count; + ipsec_run_result_t result; + time_record_t start, end; + + if (cargs->schedule || cargs->poll) + rc = run_measure_one_async(cargs, sa, + payloads[i], + &start, &end); + else + rc = run_measure_one(cargs, sa, + payloads[i], + &start, &end); + if (rc) + break; + + count = get_elapsed_usec(&start, &end); + result.elapsed = count / cargs->iteration_count; + + count = get_rusage_self_diff(&start, &end); + result.rusage_self = count / cargs->iteration_count; + + count = get_rusage_thread_diff(&start, &end); + result.rusage_thread = count / cargs->iteration_count; + + print_result(cargs, payloads[i], + config, &result); + } + + odp_ipsec_sa_disable(sa); + if (cargs->schedule || cargs->poll) { + odp_queue_t out_queue = odp_queue_lookup("ipsec-out"); + odp_ipsec_status_t status; + + while (1) { + odp_event_t event = odp_queue_deq(out_queue); + + if (event != ODP_EVENT_INVALID && + odp_event_type(event) == ODP_EVENT_IPSEC_STATUS && + odp_ipsec_status(&status, event) == ODP_IPSEC_OK && + status.id == ODP_IPSEC_STATUS_SA_DISABLE && + status.sa == sa) + break; + } + } + odp_ipsec_sa_destroy(sa); + + return rc; +} + +typedef struct thr_arg { + ipsec_args_t ipsec_args; + ipsec_alg_config_t *ipsec_alg_config; +} thr_arg_t; + +static int run_thr_func(void *arg) +{ + thr_arg_t *thr_args = (thr_arg_t *)arg; + + run_measure_one_config(&thr_args->ipsec_args, + thr_args->ipsec_alg_config); + return 0; +} + +/** + * Prinf usage information + */ +static void usage(char *progname) +{ + printf("\n" + "Usage: %s OPTIONS\n" + " E.g. %s -i 100000\n" + "\n" + "OpenDataPlane crypto speed measure.\n" + "Optional OPTIONS\n" + " -a, --algorithm Specify algorithm name (default all)\n" + " Supported values are:\n", + progname, progname); + + print_config_names(" "); + printf(" -d, --debug Enable dump of processed packets.\n" + " -f, --flight Max number of packet processed in parallel (default 1)\n" + " -i, --iterations Number of iterations.\n" + " -n, --inplace Encrypt on place.\n" + " -l, --payload Payload length.\n" + " -s, --schedule Use scheduler for completion events.\n" + " -p, --poll Poll completion queue for completion events.\n" + " -t, --tunnel Use tunnel-mode IPsec transformation.\n" + " -h, --help Display help and exit.\n" + "\n"); +} + +static void parse_args(int argc, char *argv[], ipsec_args_t *cargs) +{ + int opt; + int long_index; + static const struct option longopts[] = { + {"algorithm", optional_argument, NULL, 'a'}, + {"debug", no_argument, NULL, 'd'}, + {"flight", optional_argument, NULL, 'f'}, + {"help", no_argument, NULL, 'h'}, + {"iterations", optional_argument, NULL, 'i'}, + {"inplace", no_argument, NULL, 'n'}, + {"payload", optional_argument, NULL, 'l'}, + {"sessions", optional_argument, NULL, 'm'}, + {"poll", no_argument, NULL, 'p'}, + {"schedule", no_argument, NULL, 's'}, + {"tunnel", no_argument, NULL, 't'}, + {NULL, 0, NULL, 0} + }; + + static const char *shortopts = "+a:c:df:hi:m:nl:spt"; + + /* let helper collect its own arguments (e.g. --odph_proc) */ + odph_parse_options(argc, argv, shortopts, longopts); + + cargs->in_place = 0; + cargs->in_flight = 1; + cargs->debug_packets = 0; + cargs->iteration_count = 10000; + cargs->payload_length = 0; + cargs->alg_config = NULL; + cargs->schedule = 0; + + opterr = 0; /* do not issue errors on helper options */ + + while (1) { + opt = getopt_long(argc, argv, shortopts, longopts, &long_index); + + if (opt == -1) + break; /* No more options */ + + switch (opt) { + case 'a': + cargs->alg_config = find_config_by_name(optarg); + if (!cargs->alg_config) { + printf("cannot test crypto '%s' configuration\n", + optarg); + usage(argv[0]); + exit(-1); + } + break; + case 'd': + cargs->debug_packets = 1; + break; + case 'i': + cargs->iteration_count = atoi(optarg); + break; + case 'f': + cargs->in_flight = atoi(optarg); + break; + case 'h': + usage(argv[0]); + exit(EXIT_SUCCESS); + break; + case 'n': + cargs->in_place = 1; + break; + case 'l': + cargs->payload_length = atoi(optarg); + break; + case 's': + cargs->schedule = 1; + break; + case 'p': + cargs->poll = 1; + break; + case 't': + cargs->tunnel = 1; + break; + default: + break; + } + } + + optind = 1; /* reset 'extern optind' from the getopt lib */ + + if (cargs->schedule && cargs->poll) { + printf("-s (schedule) and -p (poll) options are not compatible\n"); + usage(argv[0]); + exit(-1); + } +} + +int main(int argc, char *argv[]) +{ + ipsec_args_t cargs; + odp_pool_t pool; + odp_queue_param_t qparam; + odp_pool_param_t param; + odp_queue_t out_queue = ODP_QUEUE_INVALID; + thr_arg_t thr_arg; + odp_cpumask_t cpumask; + char cpumaskstr[ODP_CPUMASK_STR_SIZE]; + int num_workers = 1; + odph_odpthread_t thr[num_workers]; + odp_instance_t instance; + odp_pool_capability_t capa; + odp_ipsec_config_t config; + uint32_t max_seg_len; + unsigned int i; + + memset(&cargs, 0, sizeof(cargs)); + + /* Parse and store the application arguments */ + parse_args(argc, argv, &cargs); + + /* Init ODP before calling anything else */ + if (odp_init_global(&instance, NULL, NULL)) { + app_err("ODP global init failed.\n"); + exit(EXIT_FAILURE); + } + + /* Init this thread */ + if (odp_init_local(instance, ODP_THREAD_WORKER)) { + app_err("ODP local init failed.\n"); + exit(EXIT_FAILURE); + } + + if (odp_pool_capability(&capa)) { + app_err("Pool capability request failed.\n"); + exit(EXIT_FAILURE); + } + + max_seg_len = capa.pkt.max_seg_len; + + for (i = 0; i < sizeof(global_payloads) / sizeof(unsigned int); i++) { + if (global_payloads[i] > max_seg_len) + break; + } + + global_num_payloads = i; + + /* Create packet pool */ + odp_pool_param_init(¶m); + param.pkt.seg_len = max_seg_len; + param.pkt.len = max_seg_len; + param.pkt.num = POOL_NUM_PKT; + param.type = ODP_POOL_PACKET; + pool = odp_pool_create("packet_pool", ¶m); + + if (pool == ODP_POOL_INVALID) { + app_err("packet pool create failed.\n"); + exit(EXIT_FAILURE); + } + odp_pool_print(pool); + + odp_ipsec_config_init(&config); + config.max_num_sa = 2; + config.inbound.chksums.all_chksum = 0; + config.outbound.all_chksum = 0; + + odp_queue_param_init(&qparam); + if (cargs.schedule) { + qparam.type = ODP_QUEUE_TYPE_SCHED; + qparam.sched.prio = ODP_SCHED_PRIO_DEFAULT; + qparam.sched.sync = ODP_SCHED_SYNC_PARALLEL; + qparam.sched.group = ODP_SCHED_GROUP_ALL; + out_queue = odp_queue_create("ipsec-out", &qparam); + } else if (cargs.poll) { + qparam.type = ODP_QUEUE_TYPE_PLAIN; + out_queue = odp_queue_create("ipsec-out", &qparam); + } + if (cargs.schedule || cargs.poll) { + if (out_queue == ODP_QUEUE_INVALID) { + app_err("ipsec-out queue create failed.\n"); + exit(EXIT_FAILURE); + } + config.inbound_mode = ODP_IPSEC_OP_MODE_ASYNC; + config.outbound_mode = ODP_IPSEC_OP_MODE_ASYNC; + config.inbound.default_queue = out_queue; + } else { + config.inbound_mode = ODP_IPSEC_OP_MODE_SYNC; + config.outbound_mode = ODP_IPSEC_OP_MODE_SYNC; + config.inbound.default_queue = ODP_QUEUE_INVALID; + } + + if (cargs.schedule) { + printf("Run in async scheduled mode\n"); + + thr_arg.ipsec_args = cargs; + thr_arg.ipsec_alg_config = cargs.alg_config; + num_workers = odp_cpumask_default_worker(&cpumask, + num_workers); + (void)odp_cpumask_to_str(&cpumask, cpumaskstr, + sizeof(cpumaskstr)); + printf("num worker threads: %i\n", + num_workers); + printf("first CPU: %i\n", + odp_cpumask_first(&cpumask)); + printf("cpu mask: %s\n", + cpumaskstr); + } else if (cargs.poll) { + printf("Run in async poll mode\n"); + } else { + printf("Run in sync mode\n"); + } + + memset(thr, 0, sizeof(thr)); + + if (cargs.alg_config) { + odph_odpthread_params_t thr_param; + + memset(&thr_param, 0, sizeof(thr_param)); + thr_param.start = run_thr_func; + thr_param.arg = &thr_arg; + thr_param.thr_type = ODP_THREAD_WORKER; + thr_param.instance = instance; + + if (cargs.schedule) { + odph_odpthreads_create(&thr[0], &cpumask, &thr_param); + odph_odpthreads_join(&thr[0]); + } else { + run_measure_one_config(&cargs, cargs.alg_config); + } + } else { + unsigned int i; + + for (i = 0; + i < (sizeof(algs_config) / sizeof(ipsec_alg_config_t)); + i++) { + run_measure_one_config(&cargs, algs_config + i); + } + } + + if (cargs.schedule || cargs.poll) + odp_queue_destroy(out_queue); + if (odp_pool_destroy(pool)) { + app_err("Error: pool destroy\n"); + exit(EXIT_FAILURE); + } + + if (odp_term_local()) { + app_err("Error: term local\n"); + exit(EXIT_FAILURE); + } + + if (odp_term_global(instance)) { + app_err("Error: term global\n"); + exit(EXIT_FAILURE); + } + + return 0; +} + -- cgit v1.2.3 From 68b1a52611e6ac9be306e5a17d009a6fd46c455e Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Fri, 23 Mar 2018 18:19:48 +0300 Subject: performance: ipsec: add AH measurements Signed-off-by: Dmitry Eremin-Solenikov Reviewed-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- test/performance/odp_ipsec.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/test/performance/odp_ipsec.c b/test/performance/odp_ipsec.c index 6552c0e0b..6fdd033cb 100644 --- a/test/performance/odp_ipsec.c +++ b/test/performance/odp_ipsec.c @@ -122,6 +122,12 @@ typedef struct { * Specified through -t argument. */ int tunnel; + + /* + * Use AH transformation. + * Specified through -u argument. + */ + int ah; } ipsec_args_t; /* @@ -778,6 +784,7 @@ static void usage(char *progname) " -s, --schedule Use scheduler for completion events.\n" " -p, --poll Poll completion queue for completion events.\n" " -t, --tunnel Use tunnel-mode IPsec transformation.\n" + " -u, --ah Use AH transformation instead of ESP.\n" " -h, --help Display help and exit.\n" "\n"); } @@ -798,10 +805,11 @@ static void parse_args(int argc, char *argv[], ipsec_args_t *cargs) {"poll", no_argument, NULL, 'p'}, {"schedule", no_argument, NULL, 's'}, {"tunnel", no_argument, NULL, 't'}, + {"ah", no_argument, NULL, 'u'}, {NULL, 0, NULL, 0} }; - static const char *shortopts = "+a:c:df:hi:m:nl:spt"; + static const char *shortopts = "+a:c:df:hi:m:nl:sptu"; /* let helper collect its own arguments (e.g. --odph_proc) */ odph_parse_options(argc, argv, shortopts, longopts); @@ -813,6 +821,7 @@ static void parse_args(int argc, char *argv[], ipsec_args_t *cargs) cargs->payload_length = 0; cargs->alg_config = NULL; cargs->schedule = 0; + cargs->ah = 0; opterr = 0; /* do not issue errors on helper options */ @@ -860,6 +869,9 @@ static void parse_args(int argc, char *argv[], ipsec_args_t *cargs) case 't': cargs->tunnel = 1; break; + case 'u': + cargs->ah = 1; + break; default: break; } @@ -1011,6 +1023,10 @@ int main(int argc, char *argv[]) for (i = 0; i < (sizeof(algs_config) / sizeof(ipsec_alg_config_t)); i++) { + if (cargs.ah && + algs_config[i].crypto.cipher_alg != + ODP_CIPHER_ALG_NULL) + continue; run_measure_one_config(&cargs, algs_config + i); } } -- cgit v1.2.3 From 9f3311f92d28ef9fdf6ef3d8ec8b90efe7a832d6 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Fri, 23 Mar 2018 18:42:42 +0300 Subject: performance: ipsec: add more algorithms Run more performance tests on IPsec transformation. Signed-off-by: Dmitry Eremin-Solenikov Reviewed-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- test/performance/odp_ipsec.c | 117 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/test/performance/odp_ipsec.c b/test/performance/odp_ipsec.c index 6fdd033cb..587293dbe 100644 --- a/test/performance/odp_ipsec.c +++ b/test/performance/odp_ipsec.c @@ -51,6 +51,30 @@ static uint8_t test_key24[24] = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x15, 0x16, 0x17, 0x18 }; +static uint8_t test_key32[32] = { 0x01, 0x02, 0x03, 0x04, 0x05, + 0x06, 0x07, 0x08, 0x09, 0x0a, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, + 0x15, 0x16, 0x17, 0x18, 0x19, + 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, + 0x1f, 0x20, +}; + +static uint8_t test_key64[64] = { 0x01, 0x02, 0x03, 0x04, 0x05, + 0x06, 0x07, 0x08, 0x09, 0x0a, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, + 0x15, 0x16, 0x17, 0x18, 0x19, + 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, + 0x1f, 0x20, 0x21, 0x22, 0x23, + 0x24, 0x25, 0x26, 0x27, 0x28, + 0x29, 0x2a, 0x4b, 0x2c, 0x2d, + 0x2e, 0x2f, 0x30, 0x31, 0x32, + 0x33, 0x34, 0x55, 0x36, 0x37, + 0x38, 0x39, 0x5a, 0x3b, 0x3c, + 0x3d, 0x3e, 0x5f, 0x40, +}; + /** * Structure that holds template for sa create call * for different algorithms supported by test @@ -245,6 +269,32 @@ static ipsec_alg_config_t algs_config[] = { }, }, }, + { + .name = "aes-ctr-null", + .crypto = { + .cipher_alg = ODP_CIPHER_ALG_AES_CTR, + .cipher_key = { + .data = test_key16, + .length = sizeof(test_key16) + }, + .auth_alg = ODP_AUTH_ALG_NULL + }, + }, + { + .name = "aes-ctr-hmac-sha1-96", + .crypto = { + .cipher_alg = ODP_CIPHER_ALG_AES_CTR, + .cipher_key = { + .data = test_key16, + .length = sizeof(test_key16) + }, + .auth_alg = ODP_AUTH_ALG_SHA1_HMAC, + .auth_key = { + .data = test_key20, + .length = sizeof(test_key20) + }, + }, + }, { .name = "null-hmac-sha1-96", .crypto = { @@ -256,6 +306,43 @@ static ipsec_alg_config_t algs_config[] = { }, }, }, + { + .name = "null-hmac-sha256-128", + .crypto = { + .cipher_alg = ODP_CIPHER_ALG_NULL, + .auth_alg = ODP_AUTH_ALG_SHA256_HMAC, + .auth_key = { + .data = test_key32, + .length = sizeof(test_key32) + }, + }, + }, + { + .name = "null-hmac-sha512-256", + .crypto = { + .cipher_alg = ODP_CIPHER_ALG_NULL, + .auth_alg = ODP_AUTH_ALG_SHA512_HMAC, + .auth_key = { + .data = test_key64, + .length = sizeof(test_key64) + }, + }, + }, + { + .name = "null-aes-gmac", + .crypto = { + .cipher_alg = ODP_CIPHER_ALG_NULL, + .auth_alg = ODP_AUTH_ALG_AES_GMAC, + .auth_key = { + .data = test_key16, + .length = sizeof(test_key16) + }, + .cipher_key_extra = { + .data = test_salt, + .length = 4, + }, + }, + }, { .name = "aes-gcm", .crypto = { @@ -271,6 +358,36 @@ static ipsec_alg_config_t algs_config[] = { .auth_alg = ODP_AUTH_ALG_AES_GCM, }, }, + { + .name = "aes-ccm", + .crypto = { + .cipher_alg = ODP_CIPHER_ALG_AES_CCM, + .cipher_key = { + .data = test_key16, + .length = sizeof(test_key16) + }, + .cipher_key_extra = { + .data = test_salt, + .length = 3, + }, + .auth_alg = ODP_AUTH_ALG_AES_CCM, + }, + }, + { + .name = "chacha20-poly1305", + .crypto = { + .cipher_alg = ODP_CIPHER_ALG_CHACHA20_POLY1305, + .cipher_key = { + .data = test_key32, + .length = sizeof(test_key32) + }, + .cipher_key_extra = { + .data = test_salt, + .length = 4, + }, + .auth_alg = ODP_AUTH_ALG_CHACHA20_POLY1305, + }, + }, }; /** -- cgit v1.2.3 From 2eaf47b69ea5f2095e26e0ea597d3e8702b0da06 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Fri, 23 Mar 2018 18:42:42 +0300 Subject: performance: crypto: add more algorithms Run more performance tests on crypto transformation. Signed-off-by: Dmitry Eremin-Solenikov Reviewed-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- test/performance/odp_crypto.c | 131 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) diff --git a/test/performance/odp_crypto.c b/test/performance/odp_crypto.c index e4f49fb76..8841c127e 100644 --- a/test/performance/odp_crypto.c +++ b/test/performance/odp_crypto.c @@ -51,6 +51,30 @@ static uint8_t test_key24[24] = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x15, 0x16, 0x17, 0x18 }; +static uint8_t test_key32[32] = { 0x01, 0x02, 0x03, 0x04, 0x05, + 0x06, 0x07, 0x08, 0x09, 0x0a, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, + 0x15, 0x16, 0x17, 0x18, 0x19, + 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, + 0x1f, 0x20, +}; + +static uint8_t test_key64[64] = { 0x01, 0x02, 0x03, 0x04, 0x05, + 0x06, 0x07, 0x08, 0x09, 0x0a, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, + 0x15, 0x16, 0x17, 0x18, 0x19, + 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, + 0x1f, 0x20, 0x21, 0x22, 0x23, + 0x24, 0x25, 0x26, 0x27, 0x28, + 0x29, 0x2a, 0x4b, 0x2c, 0x2d, + 0x2e, 0x2f, 0x30, 0x31, 0x32, + 0x33, 0x34, 0x55, 0x36, 0x37, + 0x38, 0x39, 0x5a, 0x3b, 0x3c, + 0x3d, 0x3e, 0x5f, 0x40, +}; + /** * Structure that holds template for session create call * for different algorithms supported by test @@ -279,6 +303,81 @@ static crypto_alg_config_t algs_config[] = { .auth_digest_len = 12, }, }, + { + .name = "aes-ctr-null", + .session = { + .cipher_alg = ODP_CIPHER_ALG_AES_CTR, + .cipher_key = { + .data = test_key16, + .length = sizeof(test_key16) + }, + .cipher_iv = { + .data = test_iv, + .length = 16, + }, + .auth_alg = ODP_AUTH_ALG_NULL + }, + }, + { + .name = "aes-ctr-hmac-sha1-96", + .session = { + .cipher_alg = ODP_CIPHER_ALG_AES_CTR, + .cipher_key = { + .data = test_key16, + .length = sizeof(test_key16) + }, + .cipher_iv = { + .data = test_iv, + .length = 16, + }, + .auth_alg = ODP_AUTH_ALG_SHA1_HMAC, + .auth_key = { + .data = test_key20, + .length = sizeof(test_key20) + }, + .auth_digest_len = 12, + }, + }, + { + .name = "null-hmac-sha256-128", + .session = { + .cipher_alg = ODP_CIPHER_ALG_NULL, + .auth_alg = ODP_AUTH_ALG_SHA256_HMAC, + .auth_key = { + .data = test_key32, + .length = sizeof(test_key32) + }, + .auth_digest_len = 16, + }, + }, + { + .name = "null-hmac-sha512-256", + .session = { + .cipher_alg = ODP_CIPHER_ALG_NULL, + .auth_alg = ODP_AUTH_ALG_SHA512_HMAC, + .auth_key = { + .data = test_key64, + .length = sizeof(test_key64) + }, + .auth_digest_len = 32, + }, + }, + { + .name = "null-aes-gmac", + .session = { + .cipher_alg = ODP_CIPHER_ALG_NULL, + .auth_alg = ODP_AUTH_ALG_AES_GMAC, + .auth_key = { + .data = test_key16, + .length = sizeof(test_key16) + }, + .auth_iv = { + .data = test_iv, + .length = 12, + }, + .auth_digest_len = 16, + }, + }, { .name = "aes-gcm", .session = { @@ -295,6 +394,38 @@ static crypto_alg_config_t algs_config[] = { .auth_digest_len = 16, }, }, + { + .name = "aes-ccm", + .session = { + .cipher_alg = ODP_CIPHER_ALG_AES_CCM, + .cipher_key = { + .data = test_key16, + .length = sizeof(test_key16) + }, + .cipher_iv = { + .data = test_iv, + .length = 11, + }, + .auth_alg = ODP_AUTH_ALG_AES_CCM, + .auth_digest_len = 16, + }, + }, + { + .name = "chacha20-poly1305", + .session = { + .cipher_alg = ODP_CIPHER_ALG_CHACHA20_POLY1305, + .cipher_key = { + .data = test_key32, + .length = sizeof(test_key32) + }, + .cipher_iv = { + .data = test_iv, + .length = 12, + }, + .auth_alg = ODP_AUTH_ALG_CHACHA20_POLY1305, + .auth_digest_len = 16, + }, + }, }; /** -- cgit v1.2.3 From fc0a57306aa68a782e8611a9bd08ae10aaa5274a Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Fri, 23 Mar 2018 18:55:54 +0300 Subject: linux-gen: crypto: add IV length checks Check IV length on crypto session creation, fixing possible issues later, during en/decryption. Signed-off-by: Dmitry Eremin-Solenikov Reviewed-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- platform/linux-generic/odp_crypto.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/platform/linux-generic/odp_crypto.c b/platform/linux-generic/odp_crypto.c index a7fddb5b4..21449cfea 100644 --- a/platform/linux-generic/odp_crypto.c +++ b/platform/linux-generic/odp_crypto.c @@ -661,9 +661,8 @@ static int process_cipher_param(odp_crypto_generic_session_t *session, return -1; /* Verify IV len is correct */ - if (!((0 == session->p.cipher_iv.length) || - ((uint32_t)EVP_CIPHER_iv_length(cipher) == - session->p.cipher_iv.length))) + if ((uint32_t)EVP_CIPHER_iv_length(cipher) != + session->p.cipher_iv.length) return -1; session->cipher.evp_cipher = cipher; @@ -791,6 +790,10 @@ static int process_aes_gcm_param(odp_crypto_generic_session_t *session, session->p.cipher_key.length) return -1; + /* Verify IV len is correct */ + if (12 != session->p.cipher_iv.length) + return -1; + memcpy(session->cipher.key_data, session->p.cipher_key.data, session->p.cipher_key.length); @@ -902,6 +905,10 @@ static int process_aes_gmac_param(odp_crypto_generic_session_t *session, session->p.auth_key.length) return -1; + /* Verify IV len is correct */ + if (12 != session->p.auth_iv.length) + return -1; + memcpy(session->auth.key, session->p.auth_key.data, session->p.auth_key.length); @@ -1058,6 +1065,11 @@ static int process_aes_ccm_param(odp_crypto_generic_session_t *session, session->p.cipher_key.length) return -1; + /* Verify IV len is correct */ + if (11 != session->p.cipher_iv.length && + 13 != session->p.cipher_iv.length) + return -1; + memcpy(session->cipher.key_data, session->p.cipher_key.data, session->p.cipher_key.length); @@ -1078,6 +1090,10 @@ static int process_aes_ccm_param(odp_crypto_generic_session_t *session, static int process_auth_hmac_param(odp_crypto_generic_session_t *session, const EVP_MD *evp_md) { + /* Verify IV len is correct */ + if (0 != session->p.auth_iv.length) + return -1; + /* Set function */ if (ODP_CRYPTO_OP_ENCODE == session->p.op) session->auth.func = auth_hmac_gen; @@ -1106,6 +1122,9 @@ static int process_auth_cmac_param(odp_crypto_generic_session_t *session, session->p.auth_key.length) return -1; + if (0 != session->p.auth_iv.length) + return -1; + /* Set function */ if (ODP_CRYPTO_OP_ENCODE == session->p.op) session->auth.func = auth_cmac_gen; -- cgit v1.2.3 From d6de4dfcf1a3a6c861c87d47990f793c71328eda Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Fri, 23 Mar 2018 18:56:59 +0300 Subject: linux-gen: ipsec: add proper support for AES-CCM Make linux-generic's IPsec actually support AES-CCM algorithm. Signed-off-by: Dmitry Eremin-Solenikov Reviewed-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- platform/linux-generic/odp_ipsec_sad.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/platform/linux-generic/odp_ipsec_sad.c b/platform/linux-generic/odp_ipsec_sad.c index 05865eb3a..8dab489cc 100644 --- a/platform/linux-generic/odp_ipsec_sad.c +++ b/platform/linux-generic/odp_ipsec_sad.c @@ -220,6 +220,8 @@ uint32_t _odp_ipsec_cipher_iv_len(odp_cipher_alg_t cipher) #endif case ODP_CIPHER_ALG_AES_GCM: return 12; + case ODP_CIPHER_ALG_AES_CCM: + return 11; case ODP_CIPHER_ALG_CHACHA20_POLY1305: return 12; default: @@ -252,6 +254,8 @@ uint32_t _odp_ipsec_auth_digest_len(odp_auth_alg_t auth) case ODP_AUTH_ALG_AES_GCM: case ODP_AUTH_ALG_AES_GMAC: return 16; + case ODP_AUTH_ALG_AES_CCM: + return 16; case ODP_AUTH_ALG_CHACHA20_POLY1305: return 16; default: @@ -411,6 +415,7 @@ odp_ipsec_sa_t odp_ipsec_sa_create(const odp_ipsec_sa_param_t *param) case ODP_CIPHER_ALG_AES128_GCM: #endif case ODP_CIPHER_ALG_AES_GCM: + case ODP_CIPHER_ALG_AES_CCM: ipsec_sa->use_counter_iv = 1; ipsec_sa->esp_iv_len = 8; ipsec_sa->esp_block_len = 16; -- cgit v1.2.3 From 06321dc028b83ea78b39eb9673859a40f9da37f7 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Sat, 24 Mar 2018 04:54:37 +0300 Subject: build: move odp scheduler setting to common m4 file Signed-off-by: Dmitry Eremin-Solenikov Reviewed-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- m4/odp_scheduler.m4 | 11 +++++++++++ platform/linux-generic/m4/configure.m4 | 2 +- platform/linux-generic/m4/odp_schedule.m4 | 6 ------ 3 files changed, 12 insertions(+), 7 deletions(-) create mode 100644 m4/odp_scheduler.m4 delete mode 100644 platform/linux-generic/m4/odp_schedule.m4 diff --git a/m4/odp_scheduler.m4 b/m4/odp_scheduler.m4 new file mode 100644 index 000000000..23cc7da39 --- /dev/null +++ b/m4/odp_scheduler.m4 @@ -0,0 +1,11 @@ +# ODP_SCHEDULER +# ------------- +# Select default scheduler +AC_DEFUN([ODP_SCHEDULER], [dnl +AC_ARG_ENABLE([scheduler-default], + [AS_HELP_STRING([enable-scheduler-default], + [Choose default scheduler (default is basic)])], + [], [enable_scheduler_default=basic]) +AC_DEFINE_UNQUOTED([ODP_SCHEDULE_DEFAULT], ["$enable_scheduler_default"], + [Define to name default scheduler]) +]) # ODP_SCHEDULER diff --git a/platform/linux-generic/m4/configure.m4 b/platform/linux-generic/m4/configure.m4 index e3f276af4..d4aa9cd03 100644 --- a/platform/linux-generic/m4/configure.m4 +++ b/platform/linux-generic/m4/configure.m4 @@ -10,7 +10,7 @@ ODP_LIBCONFIG m4_include([platform/linux-generic/m4/odp_pcap.m4]) m4_include([platform/linux-generic/m4/odp_netmap.m4]) m4_include([platform/linux-generic/m4/odp_dpdk.m4]) -m4_include([platform/linux-generic/m4/odp_schedule.m4]) +ODP_SCHEDULER m4_include([platform/linux-generic/m4/performance.m4]) diff --git a/platform/linux-generic/m4/odp_schedule.m4 b/platform/linux-generic/m4/odp_schedule.m4 deleted file mode 100644 index 70be5a7d6..000000000 --- a/platform/linux-generic/m4/odp_schedule.m4 +++ /dev/null @@ -1,6 +0,0 @@ -AC_ARG_ENABLE([scheduler-default], - [AS_HELP_STRING([enable-scheduler-default], - [Choose default scheduler (default is basic)])], - [], [enable_scheduler_default=basic]) -AC_DEFINE_UNQUOTED([ODP_SCHEDULE_DEFAULT], ["$enable_scheduler_default"], - [Define to name default scheduler]) -- cgit v1.2.3 From 9a5a18af733c07109224e328ca0ac640ff49f845 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Sat, 24 Mar 2018 05:42:25 +0300 Subject: build: another DPDK-linking fix Try our best to link with DPDK, if we are doing static linking of examples and tests and we detected shared DPDK library. Build the list of static libraries, in hope they are present on the system (like in Debian/Ubuntu DPDK packages). Linking can still fail, as we have warned during configure time. Signed-off-by: Dmitry Eremin-Solenikov Reviewed-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- example/Makefile.inc | 2 ++ m4/odp_dpdk.m4 | 5 ++++- test/Makefile.inc | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/example/Makefile.inc b/example/Makefile.inc index 3c8060da9..083c537f2 100644 --- a/example/Makefile.inc +++ b/example/Makefile.inc @@ -6,6 +6,8 @@ LDADD = $(LIB)/libodp-linux.la $(LIB)/libodphelper.la # Do not link to DPDK twice in case of dynamic linking with ODP if STATIC_APPS +LDADD += $(DPDK_LIBS_LT_STATIC) +else LDADD += $(DPDK_LIBS_LT) endif diff --git a/m4/odp_dpdk.m4 b/m4/odp_dpdk.m4 index dccc6569c..ec248e769 100644 --- a/m4/odp_dpdk.m4 +++ b/m4/odp_dpdk.m4 @@ -21,20 +21,22 @@ AS_VAR_APPEND([DPDK_PMDS], [--no-whole-archive]) # -------------------- # Set DPDK_LIBS/DPDK_LIBS_LT/DPDK_LIBS_LIBODP depending on DPDK setup AC_DEFUN([_ODP_DPDK_SET_LIBS], [dnl +ODP_DPDK_PMDS([$DPDK_PMD_PATH]) AS_IF([test "x$DPDK_SHARED" = "xyes"], [dnl # applications don't need to be linked to anything, just rpath DPDK_LIBS_LT="$DPDK_RPATH_LT" # static linking flags will need -ldpdk + DPDK_LIBS_LT_STATIC="$DPDK_LDFLAGS $DPDK_PMDS $DPDK_LIBS" DPDK_LIBS="-Wl,--no-as-needed,-ldpdk,--as-needed,`echo $DPDK_LIBS | sed -e 's/ /,/g'`" DPDK_LIBS="$DPDK_LDFLAGS $DPDK_RPATH $DPDK_LIBS" # link libodp-linux with -ldpdk DPDK_LIBS_LIBODP="$DPDK_LIBS" ], [dnl - ODP_DPDK_PMDS([$DPDK_PMD_PATH]) # build long list of libraries for applications, which should not be # rearranged by libtool DPDK_LIBS_LT="`echo $DPDK_LIBS | sed -e 's/^/-Wc,/' -e 's/ /,/g'`" DPDK_LIBS_LT="$DPDK_LDFLAGS $DPDK_PMDS $DPDK_LIBS_LT $DPDK_LIBS" + DPDK_LIBS_LT_STATIC="$DPDK_LIBS_LT" # static linking flags follow the suite DPDK_LIBS="$DPDK_LDFLAGS $DPDK_PMDS $DPDK_LIBS" # link libodp-linux with libtool linking flags @@ -43,6 +45,7 @@ AS_IF([test "x$DPDK_SHARED" = "xyes"], [dnl AC_SUBST([DPDK_LIBS]) AC_SUBST([DPDK_LIBS_LIBODP]) AC_SUBST([DPDK_LIBS_LT]) +AC_SUBST([DPDK_LIBS_LT_STATIC]) ]) # _ODP_DPDK_CHECK_LIB(LDFLAGS, [LIBS]) diff --git a/test/Makefile.inc b/test/Makefile.inc index 55a493a85..0706cac8c 100644 --- a/test/Makefile.inc +++ b/test/Makefile.inc @@ -21,6 +21,8 @@ AM_CPPFLAGS = \ # Do not link to DPDK twice in case of dynamic linking with ODP if STATIC_APPS +LDADD += $(DPDK_LIBS_LT_STATIC) +else LDADD += $(DPDK_LIBS_LT) endif -- cgit v1.2.3 From 63123149319eb0e379dc52a3f4691993d026f3bf Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Fri, 23 Mar 2018 04:36:41 +0300 Subject: build: don't use xxd to hexdump config file Use standard od and sed programs to hexdump config file, removing dependency on xxd. Signed-off-by: Dmitry Eremin-Solenikov Reviewed-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- DEPENDENCIES | 2 -- m4/odp_libconfig.m4 | 17 +++++++++-------- platform/linux-generic/odp_libconfig.c | 4 +--- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/DEPENDENCIES b/DEPENDENCIES index f328e787d..48f5a8397 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -21,8 +21,6 @@ Prerequisites for building the OpenDataPlane (ODP) API Libraries currently required to link: openssl, libatomic, libconfig - Required tools: xxd - 3.1 OpenSSL native compile For native compilation, simply load the necessary libraries using the appropriate diff --git a/m4/odp_libconfig.m4 b/m4/odp_libconfig.m4 index 632c271ef..c9d770bb5 100644 --- a/m4/odp_libconfig.m4 +++ b/m4/odp_libconfig.m4 @@ -8,12 +8,11 @@ AC_DEFUN([ODP_LIBCONFIG], PKG_CHECK_MODULES([LIBCONFIG], [libconfig]) ########################################################################## -# Check for xxd availability +# Check for od availability ########################################################################## -AC_CHECK_PROGS([XXD], [xxd]) -if test -z "$XXD"; - then AC_MSG_ERROR([Could not find 'xxd']) -fi +AC_CHECK_PROGS([OD], [od]) +AC_PROG_SED +AS_IF([test -z "$OD"], [AC_MSG_ERROR([Could not find 'od'])]) ########################################################################## # Create a header file odp_libconfig_config.h which containins null @@ -21,8 +20,10 @@ fi ########################################################################## AC_CONFIG_COMMANDS([platform/${with_platform}/include/odp_libconfig_config.h], [mkdir -p platform/${with_platform}/include - (cd ${srcdir}/config ; xxd -i odp-${with_platform}.conf) | \ - sed 's/\([[0-9a-f]]\)$/\0, 0x00/' > \ + (echo "static const char config_builtin[[]] = {"; \ + $OD -An -v -tx1 < ${srcdir}/config/odp-${with_platform}.conf | \ + $SED -e 's/[[0-9a-f]]\+/0x\0,/g' ; \ + echo "0x00 };") > \ platform/${with_platform}/include/odp_libconfig_config.h], - [with_platform=$with_platform]) + [with_platform=$with_platform OD=$OD SED=$SED]) ]) # ODP_LIBCONFIG diff --git a/platform/linux-generic/odp_libconfig.c b/platform/linux-generic/odp_libconfig.c index 3b3b31703..85acc5728 100644 --- a/platform/linux-generic/odp_libconfig.c +++ b/platform/linux-generic/odp_libconfig.c @@ -16,8 +16,6 @@ #include #include -#define CONF_STR_NAME ((const char *)odp_linux_generic_conf) - extern struct odp_global_data_s odp_global_data; int _odp_libconfig_init_global(void) @@ -33,7 +31,7 @@ int _odp_libconfig_init_global(void) config_init(config); config_init(config_rt); - if (!config_read_string(config, CONF_STR_NAME)) { + if (!config_read_string(config, config_builtin)) { ODP_ERR("Failed to read default config: %s(%d): %s\n", config_error_file(config), config_error_line(config), config_error_text(config)); -- cgit v1.2.3 From a9584da4284c04e45a9584a58c6dbeb404871119 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Wed, 28 Mar 2018 17:14:28 +0300 Subject: build: fix autoconf error caused by double-registering config item Autoconf uses provided tag as is, when registering config command. Require platform code to pass platform argument, so that different config commands will be registered. Signed-off-by: Dmitry Eremin-Solenikov Reviewed-and-tested-by: Matias Elo Signed-off-by: Maxim Uvarov --- m4/odp_libconfig.m4 | 12 ++++++------ platform/linux-generic/m4/configure.m4 | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/m4/odp_libconfig.m4 b/m4/odp_libconfig.m4 index c9d770bb5..302dc5066 100644 --- a/m4/odp_libconfig.m4 +++ b/m4/odp_libconfig.m4 @@ -1,5 +1,5 @@ -# ODP_LIBCONFIG -# ------------- +# ODP_LIBCONFIG(PLATFORM) +# ----------------------- AC_DEFUN([ODP_LIBCONFIG], [dnl ########################################################################## @@ -18,12 +18,12 @@ AS_IF([test -z "$OD"], [AC_MSG_ERROR([Could not find 'od'])]) # Create a header file odp_libconfig_config.h which containins null # terminated hex dump of odp-linux.conf ########################################################################## -AC_CONFIG_COMMANDS([platform/${with_platform}/include/odp_libconfig_config.h], -[mkdir -p platform/${with_platform}/include +AC_CONFIG_COMMANDS([platform/$1/include/odp_libconfig_config.h], +[mkdir -p platform/$1/include (echo "static const char config_builtin[[]] = {"; \ - $OD -An -v -tx1 < ${srcdir}/config/odp-${with_platform}.conf | \ + $OD -An -v -tx1 < ${srcdir}/config/odp-$1.conf | \ $SED -e 's/[[0-9a-f]]\+/0x\0,/g' ; \ echo "0x00 };") > \ - platform/${with_platform}/include/odp_libconfig_config.h], + platform/$1/include/odp_libconfig_config.h], [with_platform=$with_platform OD=$OD SED=$SED]) ]) # ODP_LIBCONFIG diff --git a/platform/linux-generic/m4/configure.m4 b/platform/linux-generic/m4/configure.m4 index d4aa9cd03..7cf0a9b67 100644 --- a/platform/linux-generic/m4/configure.m4 +++ b/platform/linux-generic/m4/configure.m4 @@ -6,7 +6,7 @@ ODP_ATOMIC ODP_PTHREAD ODP_TIMER ODP_OPENSSL -ODP_LIBCONFIG +ODP_LIBCONFIG([linux-generic]) m4_include([platform/linux-generic/m4/odp_pcap.m4]) m4_include([platform/linux-generic/m4/odp_netmap.m4]) m4_include([platform/linux-generic/m4/odp_dpdk.m4]) -- cgit v1.2.3 From 674ea087fba016e55f03e3966fe64fc908141f8f Mon Sep 17 00:00:00 2001 From: Maxim Uvarov Date: Fri, 30 Mar 2018 19:09:07 +0300 Subject: add odp_ipsec to .gitignore Signed-off-by: Maxim Uvarov --- test/performance/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/test/performance/.gitignore b/test/performance/.gitignore index 259f690cc..4c4ab6ed5 100644 --- a/test/performance/.gitignore +++ b/test/performance/.gitignore @@ -3,6 +3,7 @@ odp_atomic odp_bench_packet odp_crypto +odp_ipsec odp_l2fwd odp_pktio_ordered odp_pktio_perf -- cgit v1.2.3 From 57df98eca15fe64a716f9f555714ef5fb9cd25e9 Mon Sep 17 00:00:00 2001 From: Bogdan Pricope Date: Thu, 29 Mar 2018 11:47:02 +0300 Subject: linux-gen: dpdk: fix runtime/default config read order Signed-off-by: Bogdan Pricope Reviewed-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- .../linux-generic/include/odp_libconfig_internal.h | 5 +++ platform/linux-generic/odp_libconfig.c | 38 ++++++++++++++++++++++ platform/linux-generic/pktio/dpdk.c | 18 ++++------ 3 files changed, 49 insertions(+), 12 deletions(-) diff --git a/platform/linux-generic/include/odp_libconfig_internal.h b/platform/linux-generic/include/odp_libconfig_internal.h index 042917755..727f68863 100644 --- a/platform/linux-generic/include/odp_libconfig_internal.h +++ b/platform/linux-generic/include/odp_libconfig_internal.h @@ -22,6 +22,11 @@ int _odp_libconfig_term_global(void); int _odp_libconfig_lookup_int(const char *path, int *value); +int _odp_libconfig_lookup_ext_int(const char *base_path, + const char *local_path, + const char *name, + int *value); + #ifdef __cplusplus } #endif diff --git a/platform/linux-generic/odp_libconfig.c b/platform/linux-generic/odp_libconfig.c index 85acc5728..316ed067a 100644 --- a/platform/linux-generic/odp_libconfig.c +++ b/platform/linux-generic/odp_libconfig.c @@ -95,3 +95,41 @@ int _odp_libconfig_lookup_int(const char *path, int *value) return (ret_def == CONFIG_TRUE || ret_rt == CONFIG_TRUE) ? 1 : 0; } + +static int lookup_int(config_t *cfg, + const char *base_path, + const char *local_path, + const char *name, + int *value) +{ + char path[256]; + + if (local_path) { + snprintf(path, sizeof(path), "%s.%s.%s", base_path, + local_path, name); + if (config_lookup_int(cfg, path, value) == CONFIG_TRUE) + return 1; + } + + snprintf(path, sizeof(path), "%s.%s", base_path, name); + if (config_lookup_int(cfg, path, value) == CONFIG_TRUE) + return 1; + + return 0; +} + +int _odp_libconfig_lookup_ext_int(const char *base_path, + const char *local_path, + const char *name, + int *value) +{ + if (lookup_int(&odp_global_data.libconfig_runtime, + base_path, local_path, name, value)) + return 1; + + if (lookup_int(&odp_global_data.libconfig_default, + base_path, local_path, name, value)) + return 1; + + return 0; +} diff --git a/platform/linux-generic/pktio/dpdk.c b/platform/linux-generic/pktio/dpdk.c index d40ad954f..7bccab873 100644 --- a/platform/linux-generic/pktio/dpdk.c +++ b/platform/linux-generic/pktio/dpdk.c @@ -95,22 +95,16 @@ void refer_constructors(void) } #endif -static int lookup_opt(const char *path, const char *drv_name, int *val) +static int lookup_opt(const char *opt_name, const char *drv_name, int *val) { const char *base = "pktio_dpdk"; - char opt_path[256]; - int ret = 0; - - /* Default option */ - snprintf(opt_path, sizeof(opt_path), "%s.%s", base, path); - ret += _odp_libconfig_lookup_int(opt_path, val); - - /* Driver specific option overrides default option */ - snprintf(opt_path, sizeof(opt_path), "%s.%s.%s", base, drv_name, path); - ret += _odp_libconfig_lookup_int(opt_path, val); + int ret; + ret = _odp_libconfig_lookup_ext_int(base, drv_name, opt_name, val); if (ret == 0) - ODP_ERR("Unable to find DPDK configuration option: %s\n", path); + ODP_ERR("Unable to find DPDK configuration option: %s\n", + opt_name); + return ret; } -- cgit v1.2.3 From fbe07de54f3768f32bd39a0ea9aa2a79188b1186 Mon Sep 17 00:00:00 2001 From: Josep Puigdemont Date: Tue, 3 Apr 2018 09:44:51 +0200 Subject: fdserver: handle interruption by signal in accept This patch fixes: https://bugs.linaro.org/show_bug.cgi?id=3690 Suggested-by: Janne Peltonen Signed-off-by: Josep Puigdemont Reviewed-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- platform/linux-generic/_fdserver.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/platform/linux-generic/_fdserver.c b/platform/linux-generic/_fdserver.c index 065736f01..af9ca4a08 100644 --- a/platform/linux-generic/_fdserver.c +++ b/platform/linux-generic/_fdserver.c @@ -559,8 +559,11 @@ static void wait_requests(int sock) addr_sz = sizeof(remote); c_socket = accept(sock, (struct sockaddr *)&remote, &addr_sz); if (c_socket == -1) { - ODP_ERR("wait_requests: %s\n", strerror(errno)); - return; + if (errno == EINTR) + continue; + + ODP_ERR("wait_requests: %s\n", strerror(errno)); + return; } if (handle_request(c_socket)) -- cgit v1.2.3 From a430caf661d4a0ad8d69c67dc7a20911e3169a25 Mon Sep 17 00:00:00 2001 From: Maxim Uvarov Date: Thu, 5 Apr 2018 18:31:08 +0300 Subject: linux-gen: clean up ishm file naming rename ivshmem related files with _ prefix to odp_ for better fix project code style. No code change, just file renames. Signed-off-by: Maxim Uvarov Reviewed-by: Bill Fischofer --- platform/linux-generic/Makefile.am | 64 +- platform/linux-generic/_fdserver.c | 699 -------- platform/linux-generic/_ishm.c | 1897 -------------------- platform/linux-generic/_ishmphy.c | 209 --- platform/linux-generic/_ishmpool.c | 807 --------- .../linux-generic/include/_fdserver_internal.h | 39 - platform/linux-generic/include/_ishm_internal.h | 54 - platform/linux-generic/include/_ishmphy_internal.h | 25 - .../linux-generic/include/_ishmpool_internal.h | 56 - .../linux-generic/include/odp_fdserver_internal.h | 39 + platform/linux-generic/include/odp_ishm_internal.h | 54 + .../linux-generic/include/odp_ishmphy_internal.h | 25 + .../linux-generic/include/odp_ishmpool_internal.h | 56 + .../include/odp_schedule_scalable_ordered.h | 2 +- platform/linux-generic/odp_fdserver.c | 699 ++++++++ platform/linux-generic/odp_ishm.c | 1897 ++++++++++++++++++++ platform/linux-generic/odp_ishmphy.c | 209 +++ platform/linux-generic/odp_ishmpool.c | 807 +++++++++ platform/linux-generic/odp_queue_scalable.c | 4 +- platform/linux-generic/odp_schedule_scalable.c | 4 +- platform/linux-generic/odp_shared_memory.c | 2 +- platform/linux-generic/pktio/ipc.c | 2 +- 22 files changed, 3824 insertions(+), 3826 deletions(-) delete mode 100644 platform/linux-generic/_fdserver.c delete mode 100644 platform/linux-generic/_ishm.c delete mode 100644 platform/linux-generic/_ishmphy.c delete mode 100644 platform/linux-generic/_ishmpool.c delete mode 100644 platform/linux-generic/include/_fdserver_internal.h delete mode 100644 platform/linux-generic/include/_ishm_internal.h delete mode 100644 platform/linux-generic/include/_ishmphy_internal.h delete mode 100644 platform/linux-generic/include/_ishmpool_internal.h create mode 100644 platform/linux-generic/include/odp_fdserver_internal.h create mode 100644 platform/linux-generic/include/odp_ishm_internal.h create mode 100644 platform/linux-generic/include/odp_ishmphy_internal.h create mode 100644 platform/linux-generic/include/odp_ishmpool_internal.h create mode 100644 platform/linux-generic/odp_fdserver.c create mode 100644 platform/linux-generic/odp_ishm.c create mode 100644 platform/linux-generic/odp_ishmphy.c create mode 100644 platform/linux-generic/odp_ishmpool.c diff --git a/platform/linux-generic/Makefile.am b/platform/linux-generic/Makefile.am index 7e57994d1..4f03eab15 100644 --- a/platform/linux-generic/Makefile.am +++ b/platform/linux-generic/Makefile.am @@ -82,15 +82,11 @@ endif noinst_HEADERS = \ arch/odp_arch_time_internal.h \ - include/_fdserver_internal.h \ - include/_ishm_internal.h \ - include/_ishmphy_internal.h \ - include/_ishmpool_internal.h \ include/odp_align_internal.h \ include/odp_atomic_internal.h \ - include/odp_buffer_inlines.h \ include/odp_bitmap_internal.h \ include/odp_bitset.h \ + include/odp_buffer_inlines.h \ include/odp_buffer_internal.h \ include/odp_classification_datamodel.h \ include/odp_classification_inlines.h \ @@ -98,37 +94,41 @@ noinst_HEADERS = \ include/odp_config_internal.h \ include/odp_debug_internal.h \ include/odp_errno_define.h \ + include/odp_fdserver_internal.h \ include/odp_forward_typedefs_internal.h \ include/odp_internal.h \ include/odp_ipsec_internal.h \ + include/odp_ishm_internal.h \ + include/odp_ishmphy_internal.h \ + include/odp_ishmpool_internal.h \ include/odp_libconfig_internal.h \ include/odp_llqueue.h \ include/odp_macros_internal.h \ include/odp_name_table_internal.h \ + include/odp_packet_dpdk.h \ include/odp_packet_internal.h \ include/odp_packet_io_internal.h \ include/odp_packet_io_ipc_internal.h \ include/odp_packet_io_ring_internal.h \ include/odp_packet_netmap.h \ - include/odp_packet_dpdk.h \ + include/odp_packet_null.h \ include/odp_packet_socket.h \ include/odp_packet_tap.h \ - include/odp_packet_null.h \ include/odp_pkt_queue_internal.h \ include/odp_pool_internal.h \ include/odp_posix_extensions.h \ - include/odp_queue_internal.h \ - include/odp_queue_scalable_internal.h \ include/odp_queue_if.h \ + include/odp_queue_internal.h \ include/odp_queue_lf.h \ + include/odp_queue_scalable_internal.h \ include/odp_ring_internal.h \ include/odp_ring_st_internal.h \ include/odp_schedule_if.h \ - include/odp_schedule_scalable.h \ include/odp_schedule_scalable_config.h \ + include/odp_schedule_scalable.h \ include/odp_schedule_scalable_ordered.h \ - include/odp_sorted_list_internal.h \ include/odp_shm_internal.h \ + include/odp_sorted_list_internal.h \ include/odp_timer_internal.h \ include/odp_timer_wheel_internal.h \ include/odp_traffic_mngr_internal.h \ @@ -138,15 +138,10 @@ noinst_HEADERS = \ include/protocols/tcp.h \ include/protocols/thash.h \ include/protocols/udp.h - nodist_noinst_HEADERS = \ include/odp_libconfig_config.h __LIB__libodp_linux_la_SOURCES = \ - _fdserver.c \ - _ishm.c \ - _ishmphy.c \ - _ishmpool.c \ odp_atomic.c \ odp_barrier.c \ odp_bitmap.c \ @@ -159,30 +154,21 @@ __LIB__libodp_linux_la_SOURCES = \ odp_crypto.c \ odp_errno.c \ odp_event.c \ + odp_fdserver.c \ odp_hash.c \ - odp_init.c \ odp_impl.c \ + odp_init.c \ odp_ipsec.c \ odp_ipsec_events.c \ odp_ipsec_sad.c \ + odp_ishm.c \ + odp_ishmphy.c \ + odp_ishmpool.c \ odp_libconfig.c \ odp_name_table.c \ odp_packet.c \ odp_packet_flags.c \ odp_packet_io.c \ - pktio/ethtool.c \ - pktio/io_ops.c \ - pktio/ipc.c \ - pktio/pktio_common.c \ - pktio/loop.c \ - pktio/netmap.c \ - pktio/null.c \ - pktio/dpdk.c \ - pktio/socket.c \ - pktio/socket_mmap.c \ - pktio/sysfs.c \ - pktio/tap.c \ - pktio/ring.c \ odp_pkt_queue.c \ odp_pool.c \ odp_queue_basic.c \ @@ -193,10 +179,10 @@ __LIB__libodp_linux_la_SOURCES = \ odp_rwlock_recursive.c \ odp_schedule_basic.c \ odp_schedule_if.c \ - odp_schedule_sp.c \ odp_schedule_iquery.c \ odp_schedule_scalable.c \ odp_schedule_scalable_ordered.c \ + odp_schedule_sp.c \ odp_shared_memory.c \ odp_sorted_list.c \ odp_spinlock.c \ @@ -209,8 +195,20 @@ __LIB__libodp_linux_la_SOURCES = \ odp_timer_wheel.c \ odp_traffic_mngr.c \ odp_version.c \ - odp_weak.c - + odp_weak.c \ + pktio/dpdk.c \ + pktio/ethtool.c \ + pktio/io_ops.c \ + pktio/ipc.c \ + pktio/loop.c \ + pktio/netmap.c \ + pktio/null.c \ + pktio/pktio_common.c \ + pktio/ring.c \ + pktio/socket.c \ + pktio/socket_mmap.c \ + pktio/sysfs.c \ + pktio/tap.c if ODP_ABI_COMPAT __LIB__libodp_linux_la_SOURCES += \ odp_atomic_api.c \ diff --git a/platform/linux-generic/_fdserver.c b/platform/linux-generic/_fdserver.c deleted file mode 100644 index af9ca4a08..000000000 --- a/platform/linux-generic/_fdserver.c +++ /dev/null @@ -1,699 +0,0 @@ -/* Copyright (c) 2016-2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ - -#include "config.h" - -/* - * This file implements a file descriptor sharing server enabling - * sharing of file descriptors between processes, regardless of fork time. - * - * File descriptors are process scoped, but they can be "sent and converted - * on the fly" between processes using special unix domain socket ancillary - * data. - * The receiving process gets a file descriptor "pointing" to the same thing - * as the one sent (but the value of the file descriptor itself may be different - * from the one sent). - * Because ODP applications are responsible for creating ODP threads (i.e. - * pthreads or linux processes), ODP has no control on the order things happen: - * Nothing prevent a thread A to fork B and C, and then C creating a pktio - * which will be used by A and B to send/receive packets. - * Assuming this pktio uses a file descriptor, the latter will need to be - * shared between the processes, despite the "non convenient" fork time. - * The shared memory allocator is likely to use this as well to be able to - * share memory regardless of fork() time. - * This server handles a table of {(context,key)<-> fd} pair, and is - * interfaced by the following functions: - * - * _odp_fdserver_register_fd(context, key, fd_to_send); - * _odp_fdserver_deregister_fd(context, key); - * _odp_fdserver_lookup_fd(context, key); - * - * which are used to register/deregister or querry for file descriptor based - * on a context and key value couple, which has to be unique. - * - * Note again that the file descriptors stored here are local to this server - * process and get converted both when registered or looked up. - */ - -#include -#include -#include -#include -#include <_fdserver_internal.h> -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define FDSERVER_SOCKPATH_MAXLEN 255 -#define FDSERVER_SOCK_FORMAT "%s/%s/odp-%d-fdserver" -#define FDSERVER_SOCKDIR_FORMAT "%s/%s" -#define FDSERVER_DEFAULT_DIR "/dev/shm" -#define FDSERVER_BACKLOG 5 - -#ifndef MAP_ANONYMOUS -#define MAP_ANONYMOUS MAP_ANON -#endif - -/* when accessing the client functions, clients should be mutexed: */ -static odp_spinlock_t *client_lock; - -/* define the tables of file descriptors handled by this server: */ -#define FDSERVER_MAX_ENTRIES 256 -typedef struct fdentry_s { - fd_server_context_e context; - uint64_t key; - int fd; -} fdentry_t; -static fdentry_t *fd_table; -static int fd_table_nb_entries; - -/* - * define the message struct used for communication between client and server - * (this single message is used in both direction) - * The file descriptors are sent out of band as ancillary data for conversion. - */ -typedef struct fd_server_msg { - int command; - fd_server_context_e context; - uint64_t key; -} fdserver_msg_t; -/* possible commands are: */ -#define FD_REGISTER_REQ 1 /* client -> server */ -#define FD_REGISTER_ACK 2 /* server -> client */ -#define FD_REGISTER_NACK 3 /* server -> client */ -#define FD_LOOKUP_REQ 4 /* client -> server */ -#define FD_LOOKUP_ACK 5 /* server -> client */ -#define FD_LOOKUP_NACK 6 /* server -> client */ -#define FD_DEREGISTER_REQ 7 /* client -> server */ -#define FD_DEREGISTER_ACK 8 /* server -> client */ -#define FD_DEREGISTER_NACK 9 /* server -> client */ -#define FD_SERVERSTOP_REQ 10 /* client -> server (stops) */ - -/* - * Client and server function: - * Send a fdserver_msg, possibly including a file descriptor, on the socket - * This function is used both by: - * -the client (sending a FD_REGISTER_REQ with a file descriptor to be shared, - * or FD_LOOKUP_REQ/FD_DEREGISTER_REQ without a file descriptor) - * -the server (sending FD_REGISTER_ACK/NACK, FD_LOOKUP_NACK, - * FD_DEREGISTER_ACK/NACK... without a fd or a - * FD_LOOKUP_ACK with a fd) - * This function make use of the ancillary data (control data) to pass and - * convert file descriptors over UNIX sockets - * Return -1 on error, 0 on success. - */ -static int send_fdserver_msg(int sock, int command, - fd_server_context_e context, uint64_t key, - int fd_to_send) -{ - struct msghdr socket_message; - struct iovec io_vector[1]; /* one msg frgmt only */ - struct cmsghdr *control_message = NULL; - int *fd_location; - fdserver_msg_t msg; - int res; - - char ancillary_data[CMSG_SPACE(sizeof(int))]; - - /* prepare the register request body (single framgent): */ - msg.command = command; - msg.context = context; - msg.key = key; - io_vector[0].iov_base = &msg; - io_vector[0].iov_len = sizeof(fdserver_msg_t); - - /* initialize socket message */ - memset(&socket_message, 0, sizeof(struct msghdr)); - socket_message.msg_iov = io_vector; - socket_message.msg_iovlen = 1; - - if (fd_to_send >= 0) { - /* provide space for the ancillary data */ - memset(ancillary_data, 0, CMSG_SPACE(sizeof(int))); - socket_message.msg_control = ancillary_data; - socket_message.msg_controllen = CMSG_SPACE(sizeof(int)); - - /* initialize a single ancillary data element for fd passing */ - control_message = CMSG_FIRSTHDR(&socket_message); - control_message->cmsg_level = SOL_SOCKET; - control_message->cmsg_type = SCM_RIGHTS; - control_message->cmsg_len = CMSG_LEN(sizeof(int)); - fd_location = (int *)(void *)CMSG_DATA(control_message); - *fd_location = fd_to_send; - } - res = sendmsg(sock, &socket_message, 0); - if (res < 0) { - ODP_ERR("send_fdserver_msg: %s\n", strerror(errno)); - return -1; - } - - return 0; -} - -/* - * Client and server function - * Receive a fdserver_msg, possibly including a file descriptor, on the - * given socket. - * This function is used both by: - * -the server (receiving a FD_REGISTER_REQ with a file descriptor to be shared, - * or FD_LOOKUP_REQ, FD_DEREGISTER_REQ without a file descriptor) - * -the client (receiving FD_REGISTER_ACK...without a fd or a FD_LOOKUP_ACK with - * a fd) - * This function make use of the ancillary data (control data) to pass and - * convert file descriptors over UNIX sockets. - * Return -1 on error, 0 on success. - */ -static int recv_fdserver_msg(int sock, int *command, - fd_server_context_e *context, uint64_t *key, - int *recvd_fd) -{ - struct msghdr socket_message; - struct iovec io_vector[1]; /* one msg frgmt only */ - struct cmsghdr *control_message = NULL; - int *fd_location; - fdserver_msg_t msg; - char ancillary_data[CMSG_SPACE(sizeof(int))]; - - memset(&socket_message, 0, sizeof(struct msghdr)); - memset(ancillary_data, 0, CMSG_SPACE(sizeof(int))); - - /* setup a place to fill in message contents */ - io_vector[0].iov_base = &msg; - io_vector[0].iov_len = sizeof(fdserver_msg_t); - socket_message.msg_iov = io_vector; - socket_message.msg_iovlen = 1; - - /* provide space for the ancillary data */ - socket_message.msg_control = ancillary_data; - socket_message.msg_controllen = CMSG_SPACE(sizeof(int)); - - /* receive the message */ - if (recvmsg(sock, &socket_message, MSG_CMSG_CLOEXEC) < 0) { - ODP_ERR("recv_fdserver_msg: %s\n", strerror(errno)); - return -1; - } - - *command = msg.command; - *context = msg.context; - *key = msg.key; - - /* grab the converted file descriptor (if any) */ - *recvd_fd = -1; - - if ((socket_message.msg_flags & MSG_CTRUNC) == MSG_CTRUNC) - return 0; - - /* iterate ancillary elements to find the file descriptor: */ - for (control_message = CMSG_FIRSTHDR(&socket_message); - control_message != NULL; - control_message = CMSG_NXTHDR(&socket_message, control_message)) { - if ((control_message->cmsg_level == SOL_SOCKET) && - (control_message->cmsg_type == SCM_RIGHTS)) { - fd_location = (int *)(void *)CMSG_DATA(control_message); - *recvd_fd = *fd_location; - break; - } - } - - return 0; -} - -/* opens and returns a connected socket to the server */ -static int get_socket(void) -{ - char sockpath[FDSERVER_SOCKPATH_MAXLEN]; - int s_sock; /* server socket */ - struct sockaddr_un remote; - int len; - - /* construct the named socket path: */ - snprintf(sockpath, FDSERVER_SOCKPATH_MAXLEN, FDSERVER_SOCK_FORMAT, - odp_global_data.shm_dir, - odp_global_data.uid, - odp_global_data.main_pid); - - s_sock = socket(AF_UNIX, SOCK_STREAM, 0); - if (s_sock == -1) { - ODP_ERR("cannot connect to server: %s\n", strerror(errno)); - return -1; - } - - remote.sun_family = AF_UNIX; - strcpy(remote.sun_path, sockpath); - len = strlen(remote.sun_path) + sizeof(remote.sun_family); - if (connect(s_sock, (struct sockaddr *)&remote, len) == -1) { - ODP_ERR("cannot connect to server: %s\n", strerror(errno)); - close(s_sock); - return -1; - } - - return s_sock; -} - -/* - * Client function: - * Register a file descriptor to the server. Return -1 on error. - */ -int _odp_fdserver_register_fd(fd_server_context_e context, uint64_t key, - int fd_to_send) -{ - int s_sock; /* server socket */ - int res; - int command; - int fd; - - odp_spinlock_lock(client_lock); - - ODP_DBG("FD client register: pid=%d key=%" PRIu64 ", fd=%d\n", - getpid(), key, fd_to_send); - - s_sock = get_socket(); - if (s_sock < 0) { - odp_spinlock_unlock(client_lock); - return -1; - } - - res = send_fdserver_msg(s_sock, FD_REGISTER_REQ, context, key, - fd_to_send); - if (res < 0) { - ODP_ERR("fd registration failure\n"); - close(s_sock); - odp_spinlock_unlock(client_lock); - return -1; - } - - res = recv_fdserver_msg(s_sock, &command, &context, &key, &fd); - - if ((res < 0) || (command != FD_REGISTER_ACK)) { - ODP_ERR("fd registration failure\n"); - close(s_sock); - odp_spinlock_unlock(client_lock); - return -1; - } - - close(s_sock); - - odp_spinlock_unlock(client_lock); - return 0; -} - -/* - * Client function: - * Deregister a file descriptor from the server. Return -1 on error. - */ -int _odp_fdserver_deregister_fd(fd_server_context_e context, uint64_t key) -{ - int s_sock; /* server socket */ - int res; - int command; - int fd; - - odp_spinlock_lock(client_lock); - - ODP_DBG("FD client deregister: pid=%d key=%" PRIu64 "\n", - getpid(), key); - - s_sock = get_socket(); - if (s_sock < 0) { - odp_spinlock_unlock(client_lock); - return -1; - } - - res = send_fdserver_msg(s_sock, FD_DEREGISTER_REQ, context, key, -1); - if (res < 0) { - ODP_ERR("fd de-registration failure\n"); - close(s_sock); - odp_spinlock_unlock(client_lock); - return -1; - } - - res = recv_fdserver_msg(s_sock, &command, &context, &key, &fd); - - if ((res < 0) || (command != FD_DEREGISTER_ACK)) { - ODP_ERR("fd de-registration failure\n"); - close(s_sock); - odp_spinlock_unlock(client_lock); - return -1; - } - - close(s_sock); - - odp_spinlock_unlock(client_lock); - return 0; -} - -/* - * client function: - * lookup a file descriptor from the server. return -1 on error, - * or the file descriptor on success (>=0). - */ -int _odp_fdserver_lookup_fd(fd_server_context_e context, uint64_t key) -{ - int s_sock; /* server socket */ - int res; - int command; - int fd; - - odp_spinlock_lock(client_lock); - - s_sock = get_socket(); - if (s_sock < 0) { - odp_spinlock_unlock(client_lock); - return -1; - } - - res = send_fdserver_msg(s_sock, FD_LOOKUP_REQ, context, key, -1); - if (res < 0) { - ODP_ERR("fd lookup failure\n"); - close(s_sock); - odp_spinlock_unlock(client_lock); - return -1; - } - - res = recv_fdserver_msg(s_sock, &command, &context, &key, &fd); - - if ((res < 0) || (command != FD_LOOKUP_ACK)) { - ODP_ERR("fd lookup failure\n"); - close(s_sock); - odp_spinlock_unlock(client_lock); - return -1; - } - - close(s_sock); - ODP_DBG("FD client lookup: pid=%d, key=%" PRIu64 ", fd=%d\n", - getpid(), key, fd); - - odp_spinlock_unlock(client_lock); - return fd; -} - -/* - * request server terminaison: - */ -static int stop_server(void) -{ - int s_sock; /* server socket */ - int res; - - odp_spinlock_lock(client_lock); - - ODP_DBG("FD sending server stop request\n"); - - s_sock = get_socket(); - if (s_sock < 0) { - odp_spinlock_unlock(client_lock); - return -1; - } - - res = send_fdserver_msg(s_sock, FD_SERVERSTOP_REQ, 0, 0, -1); - if (res < 0) { - ODP_ERR("fd stop request failure\n"); - close(s_sock); - odp_spinlock_unlock(client_lock); - return -1; - } - - close(s_sock); - - odp_spinlock_unlock(client_lock); - return 0; -} - -/* - * server function - * receive a client request and handle it. - * Always returns 0 unless a stop request is received. - */ -static int handle_request(int client_sock) -{ - int command; - fd_server_context_e context; - uint64_t key; - int fd; - int i; - - /* get a client request: */ - recv_fdserver_msg(client_sock, &command, &context, &key, &fd); - switch (command) { - case FD_REGISTER_REQ: - if ((fd < 0) || (context >= FD_SRV_CTX_END)) { - ODP_ERR("Invalid register fd or context\n"); - send_fdserver_msg(client_sock, FD_REGISTER_NACK, - FD_SRV_CTX_NA, 0, -1); - return 0; - } - - /* store the file descriptor in table: */ - if (fd_table_nb_entries < FDSERVER_MAX_ENTRIES) { - fd_table[fd_table_nb_entries].context = context; - fd_table[fd_table_nb_entries].key = key; - fd_table[fd_table_nb_entries++].fd = fd; - ODP_DBG("storing {ctx=%d, key=%" PRIu64 "}->fd=%d\n", - context, key, fd); - } else { - ODP_ERR("FD table full\n"); - send_fdserver_msg(client_sock, FD_REGISTER_NACK, - FD_SRV_CTX_NA, 0, -1); - return 0; - } - - send_fdserver_msg(client_sock, FD_REGISTER_ACK, - FD_SRV_CTX_NA, 0, -1); - break; - - case FD_LOOKUP_REQ: - if (context >= FD_SRV_CTX_END) { - ODP_ERR("invalid lookup context\n"); - send_fdserver_msg(client_sock, FD_LOOKUP_NACK, - FD_SRV_CTX_NA, 0, -1); - return 0; - } - - /* search key in table and sent reply: */ - for (i = 0; i < fd_table_nb_entries; i++) { - if ((fd_table[i].context == context) && - (fd_table[i].key == key)) { - fd = fd_table[i].fd; - ODP_DBG("lookup {ctx=%d," - " key=%" PRIu64 "}->fd=%d\n", - context, key, fd); - send_fdserver_msg(client_sock, - FD_LOOKUP_ACK, context, key, - fd); - return 0; - } - } - - /* context+key not found... send nack */ - send_fdserver_msg(client_sock, FD_LOOKUP_NACK, context, key, - -1); - break; - - case FD_DEREGISTER_REQ: - if (context >= FD_SRV_CTX_END) { - ODP_ERR("invalid deregister context\n"); - send_fdserver_msg(client_sock, FD_DEREGISTER_NACK, - FD_SRV_CTX_NA, 0, -1); - return 0; - } - - /* search key in table and remove it if found, and reply: */ - for (i = 0; i < fd_table_nb_entries; i++) { - if ((fd_table[i].context == context) && - (fd_table[i].key == key)) { - ODP_DBG("drop {ctx=%d," - " key=%" PRIu64 "}->fd=%d\n", - context, key, fd_table[i].fd); - close(fd_table[i].fd); - fd_table[i] = fd_table[--fd_table_nb_entries]; - send_fdserver_msg(client_sock, - FD_DEREGISTER_ACK, - context, key, -1); - return 0; - } - } - - /* key not found... send nack */ - send_fdserver_msg(client_sock, FD_DEREGISTER_NACK, - context, key, -1); - break; - - case FD_SERVERSTOP_REQ: - ODP_DBG("Stoping FD server\n"); - return 1; - - default: - ODP_ERR("Unexpected request\n"); - break; - } - return 0; -} - -/* - * server function - * loop forever, handling client requests one by one - */ -static void wait_requests(int sock) -{ - int c_socket; /* client connection */ - unsigned int addr_sz; - struct sockaddr_un remote; - - for (;;) { - addr_sz = sizeof(remote); - c_socket = accept(sock, (struct sockaddr *)&remote, &addr_sz); - if (c_socket == -1) { - if (errno == EINTR) - continue; - - ODP_ERR("wait_requests: %s\n", strerror(errno)); - return; - } - - if (handle_request(c_socket)) - break; - close(c_socket); - } - close(c_socket); -} - -/* - * Create a unix domain socket and fork a process to listen to incoming - * requests. - */ -int _odp_fdserver_init_global(void) -{ - char sockpath[FDSERVER_SOCKPATH_MAXLEN]; - int sock; - struct sockaddr_un local; - pid_t server_pid; - int res; - - /* create the client spinlock that any client can see: */ - client_lock = mmap(NULL, sizeof(odp_spinlock_t), PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS, -1, 0); - - odp_spinlock_init(client_lock); - - snprintf(sockpath, FDSERVER_SOCKPATH_MAXLEN, FDSERVER_SOCKDIR_FORMAT, - odp_global_data.shm_dir, - odp_global_data.uid); - - mkdir(sockpath, 0744); - - /* construct the server named socket path: */ - snprintf(sockpath, FDSERVER_SOCKPATH_MAXLEN, FDSERVER_SOCK_FORMAT, - odp_global_data.shm_dir, - odp_global_data.uid, - odp_global_data.main_pid); - - /* create UNIX domain socket: */ - sock = socket(AF_UNIX, SOCK_STREAM, 0); - if (sock == -1) { - ODP_ERR("_odp_fdserver_init_global: %s\n", strerror(errno)); - return -1; - } - - /* remove previous named socket if it already exists: */ - unlink(sockpath); - - /* bind to new named socket: */ - local.sun_family = AF_UNIX; - strncpy(local.sun_path, sockpath, sizeof(local.sun_path)); - res = bind(sock, (struct sockaddr *)&local, sizeof(struct sockaddr_un)); - if (res == -1) { - ODP_ERR("_odp_fdserver_init_global: %s\n", strerror(errno)); - close(sock); - return -1; - } - - /* listen for incoming conections: */ - if (listen(sock, FDSERVER_BACKLOG) == -1) { - ODP_ERR("_odp_fdserver_init_global: %s\n", strerror(errno)); - close(sock); - return -1; - } - - /* fork a server process: */ - server_pid = fork(); - if (server_pid == -1) { - ODP_ERR("Could not fork!\n"); - close(sock); - return -1; - } - - if (server_pid == 0) { /*child */ - /* TODO: pin the server on appropriate service cpu mask */ - /* when (if) we can agree on the usage of service mask */ - - /* request to be killed if parent dies, hence avoiding */ - /* orphans being "adopted" by the init process... */ - prctl(PR_SET_PDEATHSIG, SIGTERM); - - /* allocate the space for the file descriptor<->key table: */ - fd_table = malloc(FDSERVER_MAX_ENTRIES * sizeof(fdentry_t)); - if (!fd_table) { - ODP_ERR("maloc failed!\n"); - exit(1); - } - - /* wait for clients requests */ - wait_requests(sock); /* Returns when server is stopped */ - close(sock); - - /* release the file descriptor table: */ - free(fd_table); - - exit(0); - } - - /* parent */ - close(sock); - return 0; -} - -/* - * Terminate the server - */ -int _odp_fdserver_term_global(void) -{ - int status; - char sockpath[FDSERVER_SOCKPATH_MAXLEN]; - - /* close the server and wait for child terminaison*/ - stop_server(); - wait(&status); - - /* construct the server named socket path: */ - snprintf(sockpath, FDSERVER_SOCKPATH_MAXLEN, FDSERVER_SOCK_FORMAT, - odp_global_data.shm_dir, - odp_global_data.uid, - odp_global_data.main_pid); - - /* delete the UNIX domain socket: */ - unlink(sockpath); - - /* delete shm files directory */ - snprintf(sockpath, FDSERVER_SOCKPATH_MAXLEN, FDSERVER_SOCKDIR_FORMAT, - odp_global_data.shm_dir, - odp_global_data.uid); - rmdir(sockpath); - - return 0; -} diff --git a/platform/linux-generic/_ishm.c b/platform/linux-generic/_ishm.c deleted file mode 100644 index ab112acea..000000000 --- a/platform/linux-generic/_ishm.c +++ /dev/null @@ -1,1897 +0,0 @@ -/* Copyright (c) 2016-2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ - -#include "config.h" - -/* This file handles the internal shared memory: internal shared memory - * is memory which is sharable by all ODP threads regardless of how the - * ODP thread is implemented (pthread or process) and regardless of fork() - * time. - * Moreover, when reserved with the _ODP_ISHM_SINGLE_VA flag, - * internal shared memory is guaranteed to always be located at the same virtual - * address, i.e. pointers to internal shared memory are fully shareable - * between odp threads (regardless of thread type or fork time) in that case. - * Internal shared memory is mainly meant to be used internaly within ODP - * (hence its name), but may also be allocated by odp applications and drivers, - * in the future (through these interfaces). - * To guarrentee this full pointer shareability (when reserved with the - * _ODP_ISHM_SINGLE_VA flag) internal shared memory is handled as follows: - * At global_init time, a huge virtual address space reservation is performed. - * Note that this is just reserving virtual space, not physical memory. - * Because all ODP threads (pthreads or processes) are descendants of the ODP - * instantiation process, this VA space is inherited by all ODP threads. - * When internal shmem reservation actually occurs, and - * when reserved with the _ODP_ISHM_SINGLE_VA flag, physical memory is - * allocated, and mapped (MAP_FIXED) to some part in the huge preallocated - * address space area: - * because this virtual address space is common to all ODP threads, we - * know this mapping will succeed, and not clash with anything else. - * Hence, an ODP threads which perform a lookup for the same ishm block - * can map it at the same VA address. - * When internal shared memory is released, the physical memory is released - * and the corresponding virtual space returned to its "pool" of preallocated - * virtual space (assuming it was allocated from there). - * Note, though, that, if 2 linux processes share the same ishm block, - * the virtual space is marked as released as soon as one of the processes - * releases the ishm block, but the physical memory space is actually released - * by the kernel once all processes have done a ishm operation (i,e. a sync). - * This is due to the fact that linux does not contain any syscall to unmap - * memory from a different process. - * - * This file contains functions to handle the VA area (handling fragmentation - * and defragmentation resulting from different allocs/release) and also - * define the functions to allocate, release and lookup internal shared - * memory: - * _odp_ishm_reserve(), _odp_ishm_free*() and _odp_ishm_lookup*()... - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include <_fdserver_internal.h> -#include <_ishm_internal.h> -#include <_ishmphy_internal.h> -#include <_ishmpool_internal.h> -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Maximum number of internal shared memory blocks. - * - * This is the number of separate ISHM areas that can be reserved concurrently - * (Note that freeing such blocks may take time, or possibly never happen - * if some of the block ownwers never procsync() after free). This number - * should take that into account) - */ -#define ISHM_MAX_NB_BLOCKS 128 - -/* - * Maximum internal shared memory block name length in chars - * probably taking the same number as SHM name size make sense at this stage - */ -#define ISHM_NAME_MAXLEN 128 - -/* - * Linux underlying file name: /odp--ishm- - * The part may be replaced by a sequence number if no specific - * name is given at reserve time - * is either /dev/shm or the hugepagefs mount point for default - * size. - * (searched at init time) - */ -#define ISHM_FILENAME_MAXLEN (ISHM_NAME_MAXLEN + 64) -#define ISHM_FILENAME_FORMAT "%s/odp-%d-ishm-%s" -#define ISHM_FILENAME_NORMAL_PAGE_DIR "/dev/shm" -#define _ODP_FILES_FMT "odp-%d-" - -/* - * when the memory is to be shared with an external entity (such as another - * ODP instance or an OS process not part of this ODP instance) then a - * export file is created describing the exported memory: this defines the - * location and the filename format of this description file - */ -#define ISHM_EXPTNAME_FORMAT "%s/%s/odp-%d-shm-%s" - -/* - * At worse case the virtual space gets so fragmented that there is - * a unallocated fragment between each allocated fragment: - * In that case, the number of fragments to take care of is twice the - * number of ISHM blocks + 1. - */ -#define ISHM_NB_FRAGMNTS (ISHM_MAX_NB_BLOCKS * 2 + 1) - -/* - * when a memory block is to be exported outside its ODP instance, - * an block 'attribute file' is created in /dev/shm/odp--shm-. - * The information given in this file is according to the following: - */ -#define EXPORT_FILE_LINE1_FMT "ODP exported shm block info:" -#define EXPORT_FILE_LINE2_FMT "ishm_blockname: %s" -#define EXPORT_FILE_LINE3_FMT "file: %s" -#define EXPORT_FILE_LINE4_FMT "length: %" PRIu64 -#define EXPORT_FILE_LINE5_FMT "flags: %" PRIu32 -#define EXPORT_FILE_LINE6_FMT "user_length: %" PRIu64 -#define EXPORT_FILE_LINE7_FMT "user_flags: %" PRIu32 -#define EXPORT_FILE_LINE8_FMT "align: %" PRIu32 -/* - * A fragment describes a piece of the shared virtual address space, - * and is allocated only when allocation is done with the _ODP_ISHM_SINGLE_VA - * flag: - * A fragment is said to be used when it actually does represent some - * portion of the virtual address space, and is said to be unused when - * it does not (so at start, one single fragment is used -describing the - * whole address space as unallocated-, and all others are unused). - * Fragments get used as address space fragmentation increases. - * A fragment is allocated if the piece of address space it - * describes is actually used by a shared memory block. - * Allocated fragments get their block_index set >=0. - */ -typedef struct ishm_fragment { - struct ishm_fragment *prev; /* not used when the fragment is unused */ - struct ishm_fragment *next; - void *start; /* start of segment (VA) */ - uintptr_t len; /* length of segment. multiple of page size */ - int block_index; /* -1 for unallocated fragments */ -} ishm_fragment_t; - -/* - * A block describes a piece of reserved memory: Any successful ishm_reserve() - * will allocate a block. A ishm_reserve() with the _ODP_ISHM_SINGLE_VA flag set - * will allocate both a block and a fragment. - * Blocks contain only global data common to all processes. - */ -typedef enum {UNKNOWN, HUGE, NORMAL, EXTERNAL} huge_flag_t; -typedef struct ishm_block { - char name[ISHM_NAME_MAXLEN]; /* name for the ishm block (if any) */ - char filename[ISHM_FILENAME_MAXLEN]; /* name of the .../odp-* file */ - char exptname[ISHM_FILENAME_MAXLEN]; /* name of the export file */ - uint32_t user_flags; /* any flags the user want to remember. */ - uint32_t flags; /* block creation flags. */ - uint32_t external_fd:1; /* block FD was externally provided */ - uint64_t user_len; /* length, as requested at reserve time. */ - void *start; /* only valid if _ODP_ISHM_SINGLE_VA is set*/ - uint64_t len; /* length. multiple of page size. 0 if free*/ - ishm_fragment_t *fragment; /* used when _ODP_ISHM_SINGLE_VA is used */ - huge_flag_t huge; /* page type: external means unknown here. */ - uint64_t seq; /* sequence number, incremented on alloc and free */ - uint64_t refcnt;/* number of linux processes mapping this block */ -} ishm_block_t; - -/* - * Table of blocks describing allocated internal shared memory - * This table is visible to every ODP thread (linux process or pthreads). - * (it is allocated shared at odp init time and is therefore inherited by all) - * Table index is used as handle, so it cannot move!. Entry is regarded as - * free when len==0 - */ -typedef struct { - odp_spinlock_t lock; - uint64_t dev_seq; /* used when creating device names */ - uint32_t odpthread_cnt; /* number of running ODP threads */ - ishm_block_t block[ISHM_MAX_NB_BLOCKS]; -} ishm_table_t; -static ishm_table_t *ishm_tbl; - -/* - * Process local table containing the list of (believed) allocated blocks seen - * from the current process. There is one such table per linux process. linux - * threads within a process shares this table. - * The contents within this table may become obsolete when other processes - * reserve/free ishm blocks. This is what the procsync() function - * catches by comparing the block sequence number with the one in this table. - * This table is filled at ishm_reserve and ishm_lookup time. - * Entries are removed at ishm_free or procsync time. - * Note that flags and len are present in this table and seems to be redundant - * with those present in the ishm block table: but this is not fully true: - * When ishm_sync() detects obsolete mappings and tries to remove them, - * the entry in the ishm block table is then obsolete, and the values which are - * found in this table must be used to perform the ummap. - * (and the values in the block tables are needed at lookup time...) - */ -typedef struct { - int thrd_refcnt; /* number of pthreads in this process, really */ - struct { - int block_index; /* entry in the ishm_tbl */ - uint32_t flags; /* flags used at creation time */ - uint64_t seq; - void *start; /* start of block (VA) */ - uint64_t len; /* length of block. multiple of page size */ - int fd; /* file descriptor used for this block */ - } entry[ISHM_MAX_NB_BLOCKS]; - int nb_entries; -} ishm_proctable_t; -static ishm_proctable_t *ishm_proctable; - -/* - * Table of fragments describing the common virtual address space: - * This table is visible to every ODP thread (linux process or pthreads). - * (it is allocated at odp init time and is therefore inherited by all) - */ -typedef struct { - ishm_fragment_t fragment[ISHM_NB_FRAGMNTS]; - ishm_fragment_t *used_fragmnts; /* ordered by increasing start addr */ - ishm_fragment_t *unused_fragmnts; -} ishm_ftable_t; -static ishm_ftable_t *ishm_ftbl; - -#ifndef MAP_ANONYMOUS -#define MAP_ANONYMOUS MAP_ANON -#endif - -/* prototypes: */ -static void procsync(void); - -/* - * Take a piece of the preallocated virtual space to fit "size" bytes. - * (best fit). Size must be rounded up to an integer number of pages size. - * Possibly split the fragment to keep track of remaining space. - * Returns the allocated fragment (best_fragment) and the corresponding address. - * External caller must ensure mutex before the call! - */ -static void *alloc_fragment(uintptr_t size, int block_index, intptr_t align, - ishm_fragment_t **best_fragmnt) -{ - ishm_fragment_t *fragmnt; - *best_fragmnt = NULL; - ishm_fragment_t *rem_fragmnt; - uintptr_t border;/* possible start of new fragment (next alignement) */ - intptr_t left; /* room remaining after, if the segment is allocated */ - uintptr_t remainder = odp_global_data.shm_max_memory; - - /* - * search for the best bit, i.e. search for the unallocated fragment - * would give less remainder if the new fragment was allocated within - * it: - */ - for (fragmnt = ishm_ftbl->used_fragmnts; - fragmnt; fragmnt = fragmnt->next) { - /* skip allocated segment: */ - if (fragmnt->block_index >= 0) - continue; - /* skip too short segment: */ - border = ((uintptr_t)fragmnt->start + align - 1) & (-align); - left = - ((uintptr_t)fragmnt->start + fragmnt->len) - (border + size); - if (left < 0) - continue; - /* remember best fit: */ - if ((uintptr_t)left < remainder) { - remainder = left; /* best, so far */ - *best_fragmnt = fragmnt; - } - } - - if (!(*best_fragmnt)) { - ODP_ERR("unable to get virtual address for shmem block!\n."); - return NULL; - } - - (*best_fragmnt)->block_index = block_index; - border = ((uintptr_t)(*best_fragmnt)->start + align - 1) & (-align); - - /* - * if there is room between previous fragment and new one, (due to - * alignment requirement) then fragment (split) the space between - * the end of the previous fragment and the beginning of the new one: - */ - if (border - (uintptr_t)(*best_fragmnt)->start > 0) { - /* fragment space, i.e. take a new fragment descriptor... */ - rem_fragmnt = ishm_ftbl->unused_fragmnts; - if (!rem_fragmnt) { - ODP_ERR("unable to get shmem fragment descriptor!\n."); - return NULL; - } - ishm_ftbl->unused_fragmnts = rem_fragmnt->next; - - /* and link it between best_fragmnt->prev and best_fragmnt */ - if ((*best_fragmnt)->prev) - (*best_fragmnt)->prev->next = rem_fragmnt; - else - ishm_ftbl->used_fragmnts = rem_fragmnt; - rem_fragmnt->prev = (*best_fragmnt)->prev; - (*best_fragmnt)->prev = rem_fragmnt; - rem_fragmnt->next = (*best_fragmnt); - - /* update length: rem_fragmnt getting space before border */ - rem_fragmnt->block_index = -1; - rem_fragmnt->start = (*best_fragmnt)->start; - rem_fragmnt->len = border - (uintptr_t)(*best_fragmnt)->start; - (*best_fragmnt)->start = - (void *)((uintptr_t)rem_fragmnt->start + rem_fragmnt->len); - (*best_fragmnt)->len -= rem_fragmnt->len; - } - - /* if this was a perfect fit, i.e. no free space follows, we are done */ - if (remainder == 0) - return (*best_fragmnt)->start; - - /* otherwise, fragment space, i.e. take a new fragment descriptor... */ - rem_fragmnt = ishm_ftbl->unused_fragmnts; - if (!rem_fragmnt) { - ODP_ERR("unable to get shmem fragment descriptor!\n."); - return (*best_fragmnt)->start; - } - ishm_ftbl->unused_fragmnts = rem_fragmnt->next; - - /* ... double link it... */ - rem_fragmnt->next = (*best_fragmnt)->next; - rem_fragmnt->prev = (*best_fragmnt); - if ((*best_fragmnt)->next) - (*best_fragmnt)->next->prev = rem_fragmnt; - (*best_fragmnt)->next = rem_fragmnt; - - /* ... and keep track of the remainder */ - (*best_fragmnt)->len = size; - rem_fragmnt->len = remainder; - rem_fragmnt->start = (void *)((char *)(*best_fragmnt)->start + size); - rem_fragmnt->block_index = -1; - - return (*best_fragmnt)->start; -} - -/* - * Free a portion of virtual space. - * Possibly defragment, if the freed fragment is adjacent to another - * free virtual fragment. - * External caller must ensure mutex before the call! - */ -static void free_fragment(ishm_fragment_t *fragmnt) -{ - ishm_fragment_t *prev_f; - ishm_fragment_t *next_f; - - /* sanity check */ - if (!fragmnt) - return; - - prev_f = fragmnt->prev; - next_f = fragmnt->next; - - /* free the fragment */ - fragmnt->block_index = -1; - - /* check if the previous fragment is also free: if so, defragment */ - if (prev_f && (prev_f->block_index < 0)) { - fragmnt->start = prev_f->start; - fragmnt->len += prev_f->len; - if (prev_f->prev) { - prev_f->prev->next = fragmnt; - } else { - if (ishm_ftbl->used_fragmnts == prev_f) - ishm_ftbl->used_fragmnts = fragmnt; - else - ODP_ERR("corrupted fragment list!.\n"); - } - fragmnt->prev = prev_f->prev; - - /* put removed fragment in free list */ - prev_f->prev = NULL; - prev_f->next = ishm_ftbl->unused_fragmnts; - ishm_ftbl->unused_fragmnts = prev_f; - } - - /* check if the next fragment is also free: if so, defragment */ - if (next_f && (next_f->block_index < 0)) { - fragmnt->len += next_f->len; - if (next_f->next) - next_f->next->prev = fragmnt; - fragmnt->next = next_f->next; - - /* put removed fragment in free list */ - next_f->prev = NULL; - next_f->next = ishm_ftbl->unused_fragmnts; - ishm_ftbl->unused_fragmnts = next_f; - } -} - -/* - * Create file with size len. returns -1 on error - * Creates a file to /dev/shm/odp-- (for normal pages) - * or /mnt/huge/odp-- (for huge pages) - * Return the new file descriptor, or -1 on error. - */ -static int create_file(int block_index, huge_flag_t huge, uint64_t len, - uint32_t flags, uint32_t align) -{ - char *name; - int fd; - ishm_block_t *new_block; /* entry in the main block table */ - char seq_string[ISHM_FILENAME_MAXLEN]; /* used to construct filename*/ - char filename[ISHM_FILENAME_MAXLEN]; /* filename in /dev/shm or - * /mnt/huge */ - int oflag = O_RDWR | O_CREAT | O_TRUNC; /* flags for open */ - FILE *export_file; - char dir[ISHM_FILENAME_MAXLEN]; - - new_block = &ishm_tbl->block[block_index]; - name = new_block->name; - - /* create the filename: */ - snprintf(seq_string, ISHM_FILENAME_MAXLEN, "%08" PRIu64, - ishm_tbl->dev_seq++); - - /* huge dir must be known to create files there!: */ - if ((huge == HUGE) && - (!odp_global_data.hugepage_info.default_huge_page_dir)) - return -1; - - if (huge == HUGE) - snprintf(dir, ISHM_FILENAME_MAXLEN, "%s/%s", - odp_global_data.hugepage_info.default_huge_page_dir, - odp_global_data.uid); - else - snprintf(dir, ISHM_FILENAME_MAXLEN, "%s/%s", - odp_global_data.shm_dir, - odp_global_data.uid); - - snprintf(filename, ISHM_FILENAME_MAXLEN, - ISHM_FILENAME_FORMAT, - dir, - odp_global_data.main_pid, - (name && name[0]) ? name : seq_string); - - mkdir(dir, 0744); - - fd = open(filename, oflag, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); - if (fd < 0) { - if (huge == HUGE) - ODP_DBG("open failed for %s: %s.\n", - filename, strerror(errno)); - else - ODP_ERR("open failed for %s: %s.\n", - filename, strerror(errno)); - return -1; - } - - if (ftruncate(fd, len) == -1) { - ODP_ERR("ftruncate failed: fd=%d, err=%s.\n", - fd, strerror(errno)); - close(fd); - unlink(filename); - return -1; - } - - - /* if _ODP_ISHM_EXPORT is set, create a description file for - * external ref: - */ - if (flags & _ODP_ISHM_EXPORT) { - strncpy(new_block->filename, filename, - ISHM_FILENAME_MAXLEN - 1); - snprintf(new_block->exptname, ISHM_FILENAME_MAXLEN, - ISHM_EXPTNAME_FORMAT, - odp_global_data.shm_dir, - odp_global_data.uid, - odp_global_data.main_pid, - (name && name[0]) ? name : seq_string); - export_file = fopen(new_block->exptname, "w"); - if (export_file == NULL) { - ODP_ERR("open failed: err=%s.\n", - strerror(errno)); - new_block->exptname[0] = 0; - } else { - fprintf(export_file, EXPORT_FILE_LINE1_FMT "\n"); - fprintf(export_file, EXPORT_FILE_LINE2_FMT "\n", name); - fprintf(export_file, EXPORT_FILE_LINE3_FMT "\n", - new_block->filename); - fprintf(export_file, EXPORT_FILE_LINE4_FMT "\n", len); - fprintf(export_file, EXPORT_FILE_LINE5_FMT "\n", flags); - fprintf(export_file, EXPORT_FILE_LINE6_FMT "\n", - new_block->user_len); - fprintf(export_file, EXPORT_FILE_LINE7_FMT "\n", - new_block->user_flags); - fprintf(export_file, EXPORT_FILE_LINE8_FMT "\n", align); - - fclose(export_file); - } - } else { - new_block->exptname[0] = 0; - /* remove the file from the filesystem, keeping its fd open */ - unlink(filename); - } - - return fd; -} - -/* delete the files related to a given ishm block: */ -static void delete_file(ishm_block_t *block) -{ - /* remove the .../odp-* file, unless fd was external: */ - if (block->filename[0] != 0) - unlink(block->filename); - /* also remove possible description file (if block was exported): */ - if (block->exptname[0] != 0) - unlink(block->exptname); -} - -/* - * performs the mapping, possibly allocating a fragment of the pre-reserved - * VA space if the _ODP_ISHM_SINGLE_VA flag was given. - * Sets fd, and returns the mapping address. - * This function will also set the _ODP_ISHM_SINGLE_VA flag if the alignment - * requires it - * Mutex must be assured by the caller. - */ -static void *do_map(int block_index, uint64_t len, uint32_t align, - uint32_t flags, huge_flag_t huge, int *fd) -{ - ishm_block_t *new_block; /* entry in the main block table */ - void *addr = NULL; - void *mapped_addr; - ishm_fragment_t *fragment = NULL; - - new_block = &ishm_tbl->block[block_index]; - - /* - * Creates a file to /dev/shm/odp-- (for normal pages) - * or /mnt/huge/odp-- (for huge pages) - * unless a fd was already given - */ - if (*fd < 0) { - *fd = create_file(block_index, huge, len, flags, align); - if (*fd < 0) - return NULL; - } else { - new_block->filename[0] = 0; - } - - /* allocate an address range in the prebooked VA area if needed */ - if (flags & _ODP_ISHM_SINGLE_VA) { - addr = alloc_fragment(len, block_index, align, &fragment); - if (!addr) { - ODP_ERR("alloc_fragment failed.\n"); - if (!new_block->external_fd) { - close(*fd); - *fd = -1; - delete_file(new_block); - } - return NULL; - } - ishm_tbl->block[block_index].fragment = fragment; - } - - /* try to mmap: */ - mapped_addr = _odp_ishmphy_map(*fd, addr, len, flags); - if (mapped_addr == NULL) { - if (flags & _ODP_ISHM_SINGLE_VA) - free_fragment(fragment); - if (!new_block->external_fd) { - close(*fd); - *fd = -1; - delete_file(new_block); - } - return NULL; - } - - return mapped_addr; -} - -/* - * Performs an extra mapping (for a process trying to see an existing block - * i.e. performing a lookup). - * Mutex must be assured by the caller. - */ -static void *do_remap(int block_index, int fd) -{ - void *mapped_addr; - ishm_fragment_t *fragment; - uint64_t len; - uint32_t flags; - - len = ishm_tbl->block[block_index].len; - flags = ishm_tbl->block[block_index].flags; - - if (flags & _ODP_ISHM_SINGLE_VA) { - fragment = ishm_tbl->block[block_index].fragment; - if (!fragment) { - ODP_ERR("invalid fragment failure.\n"); - return NULL; - } - - /* try to mmap: */ - mapped_addr = _odp_ishmphy_map(fd, fragment->start, len, flags); - if (mapped_addr == NULL) - return NULL; - return mapped_addr; - } - - /* try to mmap: */ - mapped_addr = _odp_ishmphy_map(fd, NULL, len, flags); - if (mapped_addr == NULL) - return NULL; - - return mapped_addr; -} - -/* - * Performs unmapping, possibly freeing a prereserved VA space fragment, - * if the _ODP_ISHM_SINGLE_VA flag was set at alloc time - * Mutex must be assured by the caller. - */ -static int do_unmap(void *start, uint64_t size, uint32_t flags, - int block_index) -{ - int ret; - - if (start) - ret = _odp_ishmphy_unmap(start, size, flags); - else - ret = 0; - - if ((block_index >= 0) && (flags & _ODP_ISHM_SINGLE_VA)) { - /* mark reserved address space as free */ - free_fragment(ishm_tbl->block[block_index].fragment); - } - - return ret; -} - -/* - * Search for a given used and allocated block name. - * (search is performed in the global ishm table) - * Returns the index of the found block (if any) or -1 if none. - * Mutex must be assured by the caller. - */ -static int find_block_by_name(const char *name) -{ - int i; - - if (name == NULL || name[0] == 0) - return -1; - - for (i = 0; i < ISHM_MAX_NB_BLOCKS; i++) { - if ((ishm_tbl->block[i].len) && - (strcmp(name, ishm_tbl->block[i].name) == 0)) - return i; - } - - return -1; -} - -/* - * Search for a block by address (only works when flag _ODP_ISHM_SINGLE_VA - * was set at reserve() time, or if the block is already known by this - * process). - * Search is performed in the process table and in the global ishm table. - * The provided address does not have to be at start: any address - * within the fragment is OK. - * Returns the index to the found block (if any) or -1 if none. - * Mutex must be assured by the caller. - */ -static int find_block_by_address(void *addr) -{ - int block_index; - int i; - ishm_fragment_t *fragmnt; - - /* - * first check if there is already a process known block for this - * address - */ - for (i = 0; i < ishm_proctable->nb_entries; i++) { - block_index = ishm_proctable->entry[i].block_index; - if ((addr > ishm_proctable->entry[i].start) && - ((char *)addr < ((char *)ishm_proctable->entry[i].start + - ishm_tbl->block[block_index].len))) - return block_index; - } - - /* - * then check if there is a existing single VA block known by some other - * process and containing the given address - */ - for (i = 0; i < ISHM_MAX_NB_BLOCKS; i++) { - if ((!ishm_tbl->block[i].len) || - (!(ishm_tbl->block[i].flags & _ODP_ISHM_SINGLE_VA))) - continue; - fragmnt = ishm_tbl->block[i].fragment; - if (!fragmnt) { - ODP_ERR("find_fragment: invalid NULL fragment\n"); - return -1; - } - if ((addr >= fragmnt->start) && - ((char *)addr < ((char *)fragmnt->start + fragmnt->len))) - return i; - } - - /* address does not belong to any accessible block: */ - return -1; -} - -/* - * Search a given ishm block in the process local table. Return its index - * in the process table or -1 if not found (meaning that the ishm table - * block index was not referenced in the process local table, i.e. the - * block is known by some other process, but not by the current process). - * Caller must assure mutex. - */ -static int procfind_block(int block_index) -{ - int i; - - for (i = 0; i < ishm_proctable->nb_entries; i++) { - if (ishm_proctable->entry[i].block_index == block_index) - return i; - } - return -1; -} - -/* - * Release the physical memory mapping for blocks which have been freed - * by other processes. Caller must ensure mutex. - * Mutex must be assured by the caller. - */ -static void procsync(void) -{ - int i = 0; - int last; - ishm_block_t *block; - - last = ishm_proctable->nb_entries; - while (i < last) { - /* if the process sequence number doesn't match the main - * table seq number, this entry is obsolete - */ - block = &ishm_tbl->block[ishm_proctable->entry[i].block_index]; - if (ishm_proctable->entry[i].seq != block->seq) { - /* obsolete entry: free memory and remove proc entry */ - close(ishm_proctable->entry[i].fd); - _odp_ishmphy_unmap(ishm_proctable->entry[i].start, - ishm_proctable->entry[i].len, - ishm_proctable->entry[i].flags); - ishm_proctable->entry[i] = - ishm_proctable->entry[--last]; - } else { - i++; - } - } - ishm_proctable->nb_entries = last; -} - -/* - * Allocate and map internal shared memory, or other objects: - * If a name is given, check that this name is not already in use. - * If ok, allocate a new shared memory block and map the - * provided fd in it (if fd >=0 was given). - * If no fd is provided, a shared memory file desc named - * /dev/shm/odp--ishm- is created and mapped. - * (the name is different for huge page file as they must be on hugepagefs) - * The function returns the index of the newly created block in the - * main block table (>=0) or -1 on error. - */ -int _odp_ishm_reserve(const char *name, uint64_t size, int fd, - uint32_t align, uint32_t flags, uint32_t user_flags) -{ - int new_index; /* index in the main block table*/ - ishm_block_t *new_block; /* entry in the main block table*/ - uint64_t page_sz; /* normal page size. usually 4K*/ - uint64_t page_hp_size; /* huge page size */ - uint32_t hp_align; - uint64_t len; /* mapped length */ - void *addr = NULL; /* mapping address */ - int new_proc_entry; - struct stat statbuf; - static int huge_error_printed; /* to avoid millions of error...*/ - - odp_spinlock_lock(&ishm_tbl->lock); - - /* update this process view... */ - procsync(); - - /* Get system page sizes: page_hp_size is 0 if no huge page available*/ - page_sz = odp_sys_page_size(); - page_hp_size = odp_sys_huge_page_size(); - - /* grab a new entry: */ - for (new_index = 0; new_index < ISHM_MAX_NB_BLOCKS; new_index++) { - if (ishm_tbl->block[new_index].len == 0) { - /* Found free block */ - break; - } - } - - /* check if we have reached the maximum number of allocation: */ - if (new_index >= ISHM_MAX_NB_BLOCKS) { - odp_spinlock_unlock(&ishm_tbl->lock); - ODP_ERR("ISHM_MAX_NB_BLOCKS limit reached!\n"); - return -1; - } - - new_block = &ishm_tbl->block[new_index]; - - /* save block name (if any given): */ - if (name) - strncpy(new_block->name, name, ISHM_NAME_MAXLEN - 1); - else - new_block->name[0] = 0; - - /* save user data: */ - new_block->user_flags = user_flags; - new_block->user_len = size; - - /* If a file descriptor is provided, get the real size and map: */ - if (fd >= 0) { - if (fstat(fd, &statbuf) < 0) { - close(fd); - odp_spinlock_unlock(&ishm_tbl->lock); - ODP_ERR("_ishm_reserve failed (fstat failed: %s).\n", - strerror(errno)); - __odp_errno = errno; - return -1; - } - len = statbuf.st_size; - /* note that the huge page flag is meningless here as huge - * page is determined by the provided file descriptor: */ - addr = do_map(new_index, len, align, flags, EXTERNAL, &fd); - if (addr == NULL) { - close(fd); - odp_spinlock_unlock(&ishm_tbl->lock); - ODP_ERR("_ishm_reserve failed.\n"); - return -1; - } - new_block->huge = EXTERNAL; - new_block->external_fd = 1; - } else { - new_block->external_fd = 0; - } - - /* Otherwise, Try first huge pages when possible and needed: */ - if ((fd < 0) && page_hp_size && (size > page_sz)) { - /* at least, alignment in VA should match page size, but user - * can request more: If the user requirement exceeds the page - * size then we have to make sure the block will be mapped at - * the same address every where, otherwise alignment may be - * be wrong for some process */ - hp_align = align; - if (hp_align <= page_hp_size) - hp_align = page_hp_size; - else - flags |= _ODP_ISHM_SINGLE_VA; - - /* roundup to page size */ - len = (size + (page_hp_size - 1)) & (-page_hp_size); - addr = do_map(new_index, len, hp_align, flags, HUGE, &fd); - - if (addr == NULL) { - if (!huge_error_printed) { - ODP_ERR("No huge pages, fall back to normal " - "pages. " - "check: /proc/sys/vm/nr_hugepages.\n"); - huge_error_printed = 1; - } - } else { - new_block->huge = HUGE; - } - } - - /* Try normal pages if huge pages failed */ - if (fd < 0) { - /* at least, alignment in VA should match page size, but user - * can request more: If the user requirement exceeds the page - * size then we have to make sure the block will be mapped at - * the same address every where, otherwise alignment may be - * be wrong for some process */ - if (align <= odp_sys_page_size()) - align = odp_sys_page_size(); - else - flags |= _ODP_ISHM_SINGLE_VA; - - /* roundup to page size */ - len = (size + (page_sz - 1)) & (-page_sz); - addr = do_map(new_index, len, align, flags, NORMAL, &fd); - new_block->huge = NORMAL; - } - - /* if neither huge pages or normal pages works, we cannot proceed: */ - if ((fd < 0) || (addr == NULL) || (len == 0)) { - if ((!new_block->external_fd) && (fd >= 0)) - close(fd); - delete_file(new_block); - odp_spinlock_unlock(&ishm_tbl->lock); - ODP_ERR("_ishm_reserve failed.\n"); - return -1; - } - - /* remember block data and increment block seq number to mark change */ - new_block->len = len; - new_block->user_len = size; - new_block->flags = flags; - new_block->user_flags = user_flags; - new_block->seq++; - new_block->refcnt = 1; - new_block->start = addr; /* only for SINGLE_VA*/ - - /* the allocation succeeded: update the process local view */ - new_proc_entry = ishm_proctable->nb_entries++; - ishm_proctable->entry[new_proc_entry].block_index = new_index; - ishm_proctable->entry[new_proc_entry].flags = flags; - ishm_proctable->entry[new_proc_entry].seq = new_block->seq; - ishm_proctable->entry[new_proc_entry].start = addr; - ishm_proctable->entry[new_proc_entry].len = len; - ishm_proctable->entry[new_proc_entry].fd = fd; - - /* register the file descriptor to the file descriptor server. */ - _odp_fdserver_register_fd(FD_SRV_CTX_ISHM, new_index, fd); - - odp_spinlock_unlock(&ishm_tbl->lock); - return new_index; -} - -/* - * Try to map an memory block mapped by another ODP instance into the - * current ODP instance. - * returns 0 on success. - */ -int _odp_ishm_find_exported(const char *remote_name, pid_t external_odp_pid, - const char *local_name) -{ - char export_filename[ISHM_FILENAME_MAXLEN]; - char blockname[ISHM_FILENAME_MAXLEN]; - char filename[ISHM_FILENAME_MAXLEN]; - FILE *export_file; - uint64_t len; - uint32_t flags; - uint64_t user_len; - uint32_t user_flags; - uint32_t align; - int fd; - int block_index; - - /* try to read the block description file: */ - snprintf(export_filename, ISHM_FILENAME_MAXLEN, - ISHM_EXPTNAME_FORMAT, - odp_global_data.shm_dir, - odp_global_data.uid, - external_odp_pid, - remote_name); - - export_file = fopen(export_filename, "r"); - - if (export_file == NULL) { - ODP_ERR("Error opening %s.\n", export_filename); - return -1; - } - - if (fscanf(export_file, EXPORT_FILE_LINE1_FMT " ") != 0) - goto error_exp_file; - - if (fscanf(export_file, EXPORT_FILE_LINE2_FMT " ", blockname) != 1) - goto error_exp_file; - - if (fscanf(export_file, EXPORT_FILE_LINE3_FMT " ", filename) != 1) - goto error_exp_file; - - if (fscanf(export_file, EXPORT_FILE_LINE4_FMT " ", &len) != 1) - goto error_exp_file; - - if (fscanf(export_file, EXPORT_FILE_LINE5_FMT " ", &flags) != 1) - goto error_exp_file; - - if (fscanf(export_file, EXPORT_FILE_LINE6_FMT " ", &user_len) != 1) - goto error_exp_file; - - if (fscanf(export_file, EXPORT_FILE_LINE7_FMT " ", &user_flags) != 1) - goto error_exp_file; - - if (fscanf(export_file, EXPORT_FILE_LINE8_FMT " ", &align) != 1) - goto error_exp_file; - - fclose(export_file); - - /* now open the filename given in the description file: */ - fd = open(filename, O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); - if (fd == -1) { - ODP_ERR("open failed for %s: %s.\n", - filename, strerror(errno)); - return -1; - } - - /* clear the _ODP_ISHM_EXPORT flag so we don't export that again*/ - flags &= ~(uint32_t)_ODP_ISHM_EXPORT; - - /* reserve the memory, providing the opened file descriptor: */ - block_index = _odp_ishm_reserve(local_name, 0, fd, align, flags, 0); - if (block_index < 0) { - close(fd); - return block_index; - } - - /* set inherited info: */ - ishm_tbl->block[block_index].user_flags = user_flags; - ishm_tbl->block[block_index].user_len = user_len; - - return block_index; - -error_exp_file: - fclose(export_file); - ODP_ERR("Error reading %s.\n", export_filename); - return -1; -} - -/* - * Free and unmap internal shared memory: - * The file descriptor is closed and the .../odp-* file deleted, - * unless fd was externally provided at reserve() time. - * return 0 if OK, and -1 on error. - * Mutex must be assured by the caller. - */ -static int block_free(int block_index) -{ - int proc_index; - ishm_block_t *block; /* entry in the main block table*/ - int last; - - if ((block_index < 0) || - (block_index >= ISHM_MAX_NB_BLOCKS) || - (ishm_tbl->block[block_index].len == 0)) { - ODP_ERR("Request to free an invalid block\n"); - return -1; - } - - block = &ishm_tbl->block[block_index]; - - proc_index = procfind_block(block_index); - if (proc_index >= 0) { - /* close the related fd */ - close(ishm_proctable->entry[proc_index].fd); - - /* remove the mapping and possible fragment */ - do_unmap(ishm_proctable->entry[proc_index].start, - block->len, - ishm_proctable->entry[proc_index].flags, - block_index); - - /* remove entry from process local table: */ - last = ishm_proctable->nb_entries - 1; - ishm_proctable->entry[proc_index] = - ishm_proctable->entry[last]; - ishm_proctable->nb_entries = last; - } else { - /* just possibly free the fragment as no mapping exist here: */ - do_unmap(NULL, 0, block->flags, block_index); - } - - /* remove all files related to this block: */ - delete_file(block); - - /* deregister the file descriptor from the file descriptor server. */ - _odp_fdserver_deregister_fd(FD_SRV_CTX_ISHM, block_index); - - /* mark the block as free in the main block table: */ - block->len = 0; - - /* mark the change so other processes see this entry as obsolete: */ - block->seq++; - - return 0; -} - -/* - * Free and unmap internal shared memory, identified by its block number: - * return -1 on error. 0 if OK. - */ -int _odp_ishm_free_by_index(int block_index) -{ - int ret; - - odp_spinlock_lock(&ishm_tbl->lock); - procsync(); - - ret = block_free(block_index); - odp_spinlock_unlock(&ishm_tbl->lock); - return ret; -} - -/* - * free and unmap internal shared memory, identified by its block name: - * return -1 on error. 0 if OK. - */ -int _odp_ishm_free_by_name(const char *name) -{ - int block_index; - int ret; - - odp_spinlock_lock(&ishm_tbl->lock); - procsync(); - - /* search the block in main ishm table */ - block_index = find_block_by_name(name); - if (block_index < 0) { - ODP_ERR("Request to free an non existing block..." - " (double free?)\n"); - odp_spinlock_unlock(&ishm_tbl->lock); - return -1; - } - - ret = block_free(block_index); - odp_spinlock_unlock(&ishm_tbl->lock); - return ret; -} - -/* - * Free and unmap internal shared memory identified by address: - * return -1 on error. 0 if OK. - */ -int _odp_ishm_free_by_address(void *addr) -{ - int block_index; - int ret; - - odp_spinlock_lock(&ishm_tbl->lock); - procsync(); - - /* search the block in main ishm table */ - block_index = find_block_by_address(addr); - if (block_index < 0) { - ODP_ERR("Request to free an non existing block..." - " (double free?)\n"); - odp_spinlock_unlock(&ishm_tbl->lock); - return -1; - } - - ret = block_free(block_index); - - odp_spinlock_unlock(&ishm_tbl->lock); - return ret; -} - -/* - * Lookup for an ishm shared memory, identified by its block index - * in the main ishm block table. - * Map this ishm area in the process VA (if not already present). - * Returns the block user address or NULL on error. - * Mutex must be assured by the caller. - */ -static void *block_lookup(int block_index) -{ - int proc_index; - int fd = -1; - ishm_block_t *block; - void *mapped_addr; - int new_entry; - - if ((block_index < 0) || - (block_index >= ISHM_MAX_NB_BLOCKS) || - (ishm_tbl->block[block_index].len == 0)) { - ODP_ERR("Request to lookup an invalid block\n"); - return NULL; - } - - /* search it in process table: if there, this process knows it already*/ - proc_index = procfind_block(block_index); - if (proc_index >= 0) - return ishm_proctable->entry[proc_index].start; - - /* this ishm is not known by this process, yet: we create the mapping.*/ - fd = _odp_fdserver_lookup_fd(FD_SRV_CTX_ISHM, block_index); - if (fd < 0) { - ODP_ERR("Could not find ishm file descriptor (BUG!)\n"); - return NULL; - } - - /* perform the mapping */ - block = &ishm_tbl->block[block_index]; - - mapped_addr = do_remap(block_index, fd); - if (mapped_addr == NULL) { - ODP_ERR(" lookup: Could not map existing shared memory!\n"); - return NULL; - } - - /* the mapping succeeded: update the process local view */ - new_entry = ishm_proctable->nb_entries++; - ishm_proctable->entry[new_entry].block_index = block_index; - ishm_proctable->entry[new_entry].flags = block->flags; - ishm_proctable->entry[new_entry].seq = block->seq; - ishm_proctable->entry[new_entry].start = mapped_addr; - ishm_proctable->entry[new_entry].len = block->len; - ishm_proctable->entry[new_entry].fd = fd; - block->refcnt++; - - return mapped_addr; -} - -/* - * Lookup for an ishm shared memory, identified by its block_index. - * Maps this ishmem area in the process VA (if not already present). - * Returns the block user address, or NULL if the index - * does not match any known ishm blocks. - */ -void *_odp_ishm_lookup_by_index(int block_index) -{ - void *ret; - - odp_spinlock_lock(&ishm_tbl->lock); - procsync(); - - ret = block_lookup(block_index); - odp_spinlock_unlock(&ishm_tbl->lock); - return ret; -} - -/* - * Lookup for an ishm shared memory, identified by its block name. - * Map this ishm area in the process VA (if not already present). - * Return the block index, or -1 if the index - * does not match any known ishm blocks. - */ -int _odp_ishm_lookup_by_name(const char *name) -{ - int block_index; - - odp_spinlock_lock(&ishm_tbl->lock); - procsync(); - - /* search the block in main ishm table: return -1 if not found: */ - block_index = find_block_by_name(name); - if ((block_index < 0) || (!block_lookup(block_index))) { - odp_spinlock_unlock(&ishm_tbl->lock); - return -1; - } - - odp_spinlock_unlock(&ishm_tbl->lock); - return block_index; -} - -/* - * Lookup for an ishm shared memory block, identified by its VA address. - * This works only if the block has already been looked-up (mapped) by the - * current process or it it was created with the _ODP_ISHM_SINGLE_VA flag. - * Map this ishm area in the process VA (if not already present). - * Return the block index, or -1 if the address - * does not match any known ishm blocks. - */ -int _odp_ishm_lookup_by_address(void *addr) -{ - int block_index; - - odp_spinlock_lock(&ishm_tbl->lock); - procsync(); - - /* search the block in main ishm table: return -1 if not found: */ - block_index = find_block_by_address(addr); - if ((block_index < 0) || (!block_lookup(block_index))) { - odp_spinlock_unlock(&ishm_tbl->lock); - return -1; - } - - odp_spinlock_unlock(&ishm_tbl->lock); - return block_index; -} - -/* - * Returns the VA address of a given block (which has to be known in the current - * process). Returns NULL if the block is unknown. - */ -void *_odp_ishm_address(int block_index) -{ - int proc_index; - void *addr; - - odp_spinlock_lock(&ishm_tbl->lock); - procsync(); - - if ((block_index < 0) || - (block_index >= ISHM_MAX_NB_BLOCKS) || - (ishm_tbl->block[block_index].len == 0)) { - ODP_ERR("Request for address on an invalid block\n"); - odp_spinlock_unlock(&ishm_tbl->lock); - return NULL; - } - - proc_index = procfind_block(block_index); - if (proc_index < 0) { - odp_spinlock_unlock(&ishm_tbl->lock); - return NULL; - } - - addr = ishm_proctable->entry[proc_index].start; - odp_spinlock_unlock(&ishm_tbl->lock); - return addr; -} - -int _odp_ishm_info(int block_index, _odp_ishm_info_t *info) -{ - int proc_index; - - odp_spinlock_lock(&ishm_tbl->lock); - procsync(); - - if ((block_index < 0) || - (block_index >= ISHM_MAX_NB_BLOCKS) || - (ishm_tbl->block[block_index].len == 0)) { - odp_spinlock_unlock(&ishm_tbl->lock); - ODP_ERR("Request for info on an invalid block\n"); - return -1; - } - - /* search it in process table: if not there, need to map*/ - proc_index = procfind_block(block_index); - if (proc_index < 0) { - odp_spinlock_unlock(&ishm_tbl->lock); - return -1; - } - - info->name = ishm_tbl->block[block_index].name; - info->addr = ishm_proctable->entry[proc_index].start; - info->size = ishm_tbl->block[block_index].user_len; - info->page_size = (ishm_tbl->block[block_index].huge == HUGE) ? - odp_sys_huge_page_size() : odp_sys_page_size(); - info->flags = ishm_tbl->block[block_index].flags; - info->user_flags = ishm_tbl->block[block_index].user_flags; - - odp_spinlock_unlock(&ishm_tbl->lock); - return 0; -} - -static int do_odp_ishm_init_local(void) -{ - int i; - int block_index; - - /* - * the ishm_process table is local to each linux process - * Check that no other linux threads (of same or ancestor processes) - * have already created the table, and create it if needed. - * We protect this with the general ishm lock to avoid - * init race condition of different running threads. - */ - odp_spinlock_lock(&ishm_tbl->lock); - ishm_tbl->odpthread_cnt++; /* count ODPthread (pthread or process) */ - if (!ishm_proctable) { - ishm_proctable = malloc(sizeof(ishm_proctable_t)); - if (!ishm_proctable) { - odp_spinlock_unlock(&ishm_tbl->lock); - return -1; - } - memset(ishm_proctable, 0, sizeof(ishm_proctable_t)); - } - if (syscall(SYS_gettid) != getpid()) - ishm_proctable->thrd_refcnt++; /* new linux thread */ - else - ishm_proctable->thrd_refcnt = 1;/* new linux process */ - - /* - * if this ODP thread is actually a new linux process, (as opposed - * to a pthread), i.e, we just forked, then all shmem blocks - * of the parent process are mapped into this child by inheritance. - * (The process local table is inherited as well). We hence have to - * increase the process refcount for each of the inherited mappings: - */ - if (syscall(SYS_gettid) == getpid()) { - for (i = 0; i < ishm_proctable->nb_entries; i++) { - block_index = ishm_proctable->entry[i].block_index; - ishm_tbl->block[block_index].refcnt++; - } - } - - odp_spinlock_unlock(&ishm_tbl->lock); - return 0; -} - -/* remove all files staring with "odp-" from a directory "dir" */ -int _odp_ishm_cleanup_files(const char *dirpath) -{ - struct dirent *e; - DIR *dir; - char userdir[PATH_MAX]; - char prefix[PATH_MAX]; - char *fullpath; - int d_len = strlen(dirpath); - int p_len; - int f_len; - - snprintf(userdir, PATH_MAX, "%s/%s", dirpath, odp_global_data.uid); - - dir = opendir(userdir); - if (!dir) { - /* ok if the dir does not exist. no much to delete then! */ - ODP_DBG("opendir failed for %s: %s\n", - dirpath, strerror(errno)); - return 0; - } - snprintf(prefix, PATH_MAX, _ODP_FILES_FMT, odp_global_data.main_pid); - p_len = strlen(prefix); - while ((e = readdir(dir)) != NULL) { - if (strncmp(e->d_name, prefix, p_len) == 0) { - f_len = strlen(e->d_name); - fullpath = malloc(d_len + f_len + 2); - if (fullpath == NULL) { - closedir(dir); - return -1; - } - snprintf(fullpath, PATH_MAX, "%s/%s", - dirpath, e->d_name); - ODP_DBG("deleting obsolete file: %s\n", fullpath); - if (unlink(fullpath)) - ODP_ERR("unlink failed for %s: %s\n", - fullpath, strerror(errno)); - free(fullpath); - } - } - closedir(dir); - - return 0; -} - -int _odp_ishm_init_global(const odp_init_t *init) -{ - void *addr; - void *spce_addr; - int i; - uid_t uid; - char *hp_dir = odp_global_data.hugepage_info.default_huge_page_dir; - uint64_t align; - uint64_t max_memory = ODP_CONFIG_ISHM_VA_PREALLOC_SZ; - uint64_t internal = ODP_CONFIG_ISHM_VA_PREALLOC_SZ / 8; - - /* user requested memory size + some extra for internal use */ - if (init && init->shm.max_memory) - max_memory = init->shm.max_memory + internal; - - odp_global_data.shm_max_memory = max_memory; - odp_global_data.shm_max_size = max_memory - internal; - odp_global_data.main_pid = getpid(); - odp_global_data.shm_dir = getenv("ODP_SHM_DIR"); - if (odp_global_data.shm_dir) { - odp_global_data.shm_dir_from_env = 1; - } else { - odp_global_data.shm_dir = - calloc(1, sizeof(ISHM_FILENAME_NORMAL_PAGE_DIR)); - sprintf(odp_global_data.shm_dir, "%s", - ISHM_FILENAME_NORMAL_PAGE_DIR); - odp_global_data.shm_dir_from_env = 0; - } - - ODP_DBG("ishm: using dir %s\n", odp_global_data.shm_dir); - - uid = getuid(); - snprintf(odp_global_data.uid, UID_MAXLEN, "%d", - uid); - - if ((syscall(SYS_gettid)) != odp_global_data.main_pid) { - ODP_ERR("ishm init must be performed by the main " - "ODP process!\n."); - return -1; - } - - if (!hp_dir) { - ODP_DBG("NOTE: No support for huge pages\n"); - align = odp_sys_page_size(); - } else { - ODP_DBG("Huge pages mount point is: %s\n", hp_dir); - _odp_ishm_cleanup_files(hp_dir); - align = odp_sys_huge_page_size(); - } - - _odp_ishm_cleanup_files(odp_global_data.shm_dir); - - /* allocate space for the internal shared mem block table: */ - addr = mmap(NULL, sizeof(ishm_table_t), - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); - if (addr == MAP_FAILED) { - ODP_ERR("unable to mmap the main block table\n."); - goto init_glob_err1; - } - ishm_tbl = addr; - memset(ishm_tbl, 0, sizeof(ishm_table_t)); - ishm_tbl->dev_seq = 0; - ishm_tbl->odpthread_cnt = 0; - odp_spinlock_init(&ishm_tbl->lock); - - /* allocate space for the internal shared mem fragment table: */ - addr = mmap(NULL, sizeof(ishm_ftable_t), - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); - if (addr == MAP_FAILED) { - ODP_ERR("unable to mmap the main fragment table\n."); - goto init_glob_err2; - } - ishm_ftbl = addr; - memset(ishm_ftbl, 0, sizeof(ishm_ftable_t)); - - /* - *reserve the address space for _ODP_ISHM_SINGLE_VA reserved blocks, - * only address space! - */ - spce_addr = _odp_ishmphy_book_va(max_memory, align); - if (!spce_addr) { - ODP_ERR("unable to reserve virtual space\n."); - goto init_glob_err3; - } - - /* use the first fragment descriptor to describe to whole VA space: */ - ishm_ftbl->fragment[0].block_index = -1; - ishm_ftbl->fragment[0].start = spce_addr; - ishm_ftbl->fragment[0].len = max_memory; - ishm_ftbl->fragment[0].prev = NULL; - ishm_ftbl->fragment[0].next = NULL; - ishm_ftbl->used_fragmnts = &ishm_ftbl->fragment[0]; - - /* and put all other fragment descriptors in the unused list: */ - for (i = 1; i < ISHM_NB_FRAGMNTS - 1; i++) { - ishm_ftbl->fragment[i].prev = NULL; - ishm_ftbl->fragment[i].next = &ishm_ftbl->fragment[i + 1]; - } - ishm_ftbl->fragment[ISHM_NB_FRAGMNTS - 1].prev = NULL; - ishm_ftbl->fragment[ISHM_NB_FRAGMNTS - 1].next = NULL; - ishm_ftbl->unused_fragmnts = &ishm_ftbl->fragment[1]; - - /* - * We run _odp_ishm_init_local() directely here to give the - * possibility to run shm_reserve() before the odp_init_local() - * is performed for the main thread... Many init_global() functions - * indeed assume the availability of odp_shm_reserve()...: - */ - if (do_odp_ishm_init_local()) { - ODP_ERR("unable to init the main thread\n."); - goto init_glob_err4; - } - - /* get ready to create pools: */ - _odp_ishm_pool_init(); - - return 0; - -init_glob_err4: - if (_odp_ishmphy_unbook_va()) - ODP_ERR("unable to unbook virtual space\n."); -init_glob_err3: - if (munmap(ishm_ftbl, sizeof(ishm_ftable_t)) < 0) - ODP_ERR("unable to munmap main fragment table\n."); -init_glob_err2: - if (munmap(ishm_tbl, sizeof(ishm_table_t)) < 0) - ODP_ERR("unable to munmap main block table\n."); -init_glob_err1: - return -1; -} - -int _odp_ishm_init_local(void) -{ - /* - * Do not re-run this for the main ODP process, as it has already - * been done in advance at _odp_ishm_init_global() time: - */ - if ((getpid() == odp_global_data.main_pid) && - (syscall(SYS_gettid) == getpid())) - return 0; - - return do_odp_ishm_init_local(); -} - -static int do_odp_ishm_term_local(void) -{ - int i; - int proc_table_refcnt = 0; - int block_index; - ishm_block_t *block; - - procsync(); - - ishm_tbl->odpthread_cnt--; /* decount ODPthread (pthread or process) */ - - /* - * The ishm_process table is local to each linux process - * Check that no other linux threads (of this linux process) - * still needs the table, and free it if so. - * We protect this with the general ishm lock to avoid - * term race condition of different running threads. - */ - proc_table_refcnt = --ishm_proctable->thrd_refcnt; - if (!proc_table_refcnt) { - /* - * this is the last thread of this process... - * All mappings for this process are about to be lost... - * Go through the table of visible blocks for this process, - * decreasing the refcnt of each visible blocks, and issuing - * warning for those no longer referenced by any process. - * Note that non-referenced blocks are not freed: this is - * deliberate as this would imply that the semantic of the - * freeing function would differ depending on whether we run - * with odp_thread as processes or pthreads. With this approach, - * the user should always free the blocks manually, which is - * more consistent - */ - for (i = 0; i < ishm_proctable->nb_entries; i++) { - block_index = ishm_proctable->entry[i].block_index; - block = &ishm_tbl->block[block_index]; - if ((--block->refcnt) <= 0) { - block->refcnt = 0; - ODP_DBG("Warning: block %d: name:%s " - "no longer referenced\n", - i, - ishm_tbl->block[i].name[0] ? - ishm_tbl->block[i].name : ""); - } - } - - free(ishm_proctable); - ishm_proctable = NULL; - } - - return 0; -} - -int _odp_ishm_term_local(void) -{ - int ret; - - odp_spinlock_lock(&ishm_tbl->lock); - - /* postpone last thread term to allow free() by global term functions:*/ - if (ishm_tbl->odpthread_cnt == 1) { - odp_spinlock_unlock(&ishm_tbl->lock); - return 0; - } - - ret = do_odp_ishm_term_local(); - odp_spinlock_unlock(&ishm_tbl->lock); - return ret; -} - -int _odp_ishm_term_global(void) -{ - int ret = 0; - int index; - ishm_block_t *block; - - if ((getpid() != odp_global_data.main_pid) || - (syscall(SYS_gettid) != getpid())) - ODP_ERR("odp_term_global() must be performed by the main " - "ODP process!\n."); - - /* cleanup possibly non freed memory (and complain a bit): */ - for (index = 0; index < ISHM_MAX_NB_BLOCKS; index++) { - block = &ishm_tbl->block[index]; - if (block->len != 0) { - ODP_ERR("block '%s' (file %s) was never freed " - "(cleaning up...).\n", - block->name, block->filename); - delete_file(block); - } - } - - /* perform the last thread terminate which was postponed: */ - ret = do_odp_ishm_term_local(); - - /* free the fragment table */ - if (munmap(ishm_ftbl, sizeof(ishm_ftable_t)) < 0) { - ret |= -1; - ODP_ERR("unable to munmap fragment table\n."); - } - /* free the block table */ - if (munmap(ishm_tbl, sizeof(ishm_table_t)) < 0) { - ret |= -1; - ODP_ERR("unable to munmap main table\n."); - } - - /* free the reserved VA space */ - if (_odp_ishmphy_unbook_va()) - ret |= -1; - - if (!odp_global_data.shm_dir_from_env) - free(odp_global_data.shm_dir); - - return ret; -} - -/* - * Print the current ishm status (allocated blocks and VA space map) - * Return the number of allocated blocks (including those not mapped - * by the current odp thread). Also perform a number of sanity check. - * For debug. - */ -int _odp_ishm_status(const char *title) -{ - int i; - char flags[3]; - char huge; - int proc_index; - ishm_fragment_t *fragmnt; - int consecutive_unallocated = 0; /* should never exceed 1 */ - uintptr_t last_address = 0; - ishm_fragment_t *previous = NULL; - int nb_used_frgments = 0; - int nb_unused_frgments = 0; /* nb frag describing a VA area */ - int nb_allocated_frgments = 0; /* nb frag describing an allocated VA */ - int nb_blocks = 0; - int single_va_blocks = 0; - int max_name_len = 0; - - odp_spinlock_lock(&ishm_tbl->lock); - procsync(); - - /* find longest block name */ - for (i = 0; i < ISHM_MAX_NB_BLOCKS; i++) { - int str_len; - - if (ishm_tbl->block[i].len <= 0) - continue; - - str_len = strlen(ishm_tbl->block[i].name); - - if (max_name_len < str_len) - max_name_len = str_len; - } - - ODP_PRINT("ishm blocks allocated at: %s\n", title); - - ODP_PRINT(" %-*s flag len user_len seq ref start fd" - " file\n", max_name_len, "name"); - - /* display block table: 1 line per entry +1 extra line if mapped here */ - for (i = 0; i < ISHM_MAX_NB_BLOCKS; i++) { - if (ishm_tbl->block[i].len <= 0) - continue; /* unused block */ - - nb_blocks++; - if (ishm_tbl->block[i].flags & _ODP_ISHM_SINGLE_VA) - single_va_blocks++; - - flags[0] = (ishm_tbl->block[i].flags & _ODP_ISHM_SINGLE_VA) ? - 'S' : '.'; - flags[1] = (ishm_tbl->block[i].flags & _ODP_ISHM_LOCK) ? - 'L' : '.'; - flags[2] = 0; - switch (ishm_tbl->block[i].huge) { - case HUGE: - huge = 'H'; - break; - case NORMAL: - huge = 'N'; - break; - case EXTERNAL: - huge = 'E'; - break; - default: - huge = '?'; - } - proc_index = procfind_block(i); - ODP_PRINT("%2i %-*s %s%c 0x%-08lx %-8lu %-3lu %-3lu", - i, max_name_len, ishm_tbl->block[i].name, - flags, huge, - ishm_tbl->block[i].len, - ishm_tbl->block[i].user_len, - ishm_tbl->block[i].seq, - ishm_tbl->block[i].refcnt); - - if (proc_index < 0) - continue; - - ODP_PRINT("%-08lx %-3d", - ishm_proctable->entry[proc_index].start, - ishm_proctable->entry[proc_index].fd); - - ODP_PRINT("%s\n", ishm_tbl->block[i].filename); - } - - /* display the virtual space allocations... : */ - ODP_PRINT("\nishm virtual space:\n"); - for (fragmnt = ishm_ftbl->used_fragmnts; - fragmnt; fragmnt = fragmnt->next) { - if (fragmnt->block_index >= 0) { - nb_allocated_frgments++; - ODP_PRINT(" %08p - %08p: ALLOCATED by block:%d\n", - (uintptr_t)fragmnt->start, - (uintptr_t)fragmnt->start + fragmnt->len - 1, - fragmnt->block_index); - consecutive_unallocated = 0; - } else { - ODP_PRINT(" %08p - %08p: NOT ALLOCATED\n", - (uintptr_t)fragmnt->start, - (uintptr_t)fragmnt->start + fragmnt->len - 1); - if (consecutive_unallocated++) - ODP_ERR("defragmentation error\n"); - } - - /* some other sanity checks: */ - if (fragmnt->prev != previous) - ODP_ERR("chaining error\n"); - - if (fragmnt != ishm_ftbl->used_fragmnts) { - if ((uintptr_t)fragmnt->start != last_address + 1) - ODP_ERR("lost space error\n"); - } - - last_address = (uintptr_t)fragmnt->start + fragmnt->len - 1; - previous = fragmnt; - nb_used_frgments++; - } - - /* - * the number of blocks with the single_VA flag set should match - * the number of used fragments: - */ - if (single_va_blocks != nb_allocated_frgments) - ODP_ERR("single_va_blocks != nb_allocated_fragments!\n"); - - /* compute the number of unused fragments*/ - for (fragmnt = ishm_ftbl->unused_fragmnts; - fragmnt; fragmnt = fragmnt->next) - nb_unused_frgments++; - - ODP_PRINT("ishm: %d fragment used. %d fragments unused. (total=%d)\n", - nb_used_frgments, nb_unused_frgments, - nb_used_frgments + nb_unused_frgments); - - if ((nb_used_frgments + nb_unused_frgments) != ISHM_NB_FRAGMNTS) - ODP_ERR("lost fragments!\n"); - - if (nb_blocks < ishm_proctable->nb_entries) - ODP_ERR("process known block cannot exceed main total sum!\n"); - - ODP_PRINT("\n"); - - odp_spinlock_unlock(&ishm_tbl->lock); - return nb_blocks; -} - -void _odp_ishm_print(int block_index) -{ - ishm_block_t *block; - const char *str; - - odp_spinlock_lock(&ishm_tbl->lock); - - if ((block_index < 0) || - (block_index >= ISHM_MAX_NB_BLOCKS) || - (ishm_tbl->block[block_index].len == 0)) { - odp_spinlock_unlock(&ishm_tbl->lock); - ODP_ERR("Request for info on an invalid block\n"); - return; - } - - block = &ishm_tbl->block[block_index]; - - ODP_PRINT("\nSHM block info\n--------------\n"); - ODP_PRINT(" name: %s\n", block->name); - ODP_PRINT(" file: %s\n", block->filename); - ODP_PRINT(" expt: %s\n", block->exptname); - ODP_PRINT(" user_flags: 0x%x\n", block->user_flags); - ODP_PRINT(" flags: 0x%x\n", block->flags); - ODP_PRINT(" user_len: %lu\n", block->user_len); - ODP_PRINT(" start: %p\n", block->start); - ODP_PRINT(" len: %lu\n", block->len); - - switch (block->huge) { - case HUGE: - str = "huge"; - break; - case NORMAL: - str = "normal"; - break; - case EXTERNAL: - str = "external"; - break; - default: - str = "??"; - } - - ODP_PRINT(" page type: %s\n", str); - ODP_PRINT(" seq: %lu\n", block->seq); - ODP_PRINT(" refcnt: %lu\n", block->refcnt); - ODP_PRINT("\n"); - - odp_spinlock_unlock(&ishm_tbl->lock); -} diff --git a/platform/linux-generic/_ishmphy.c b/platform/linux-generic/_ishmphy.c deleted file mode 100644 index 8dde2831e..000000000 --- a/platform/linux-generic/_ishmphy.c +++ /dev/null @@ -1,209 +0,0 @@ -/* Copyright (c) 2016-2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ - -#include "config.h" - -/* - * This file handles the lower end of the ishm memory allocator: - * It performs the physical mappings. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include <_ishm_internal.h> -#include <_ishmphy_internal.h> - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include <_ishmphy_internal.h> - -static void *common_va_address; -static uint64_t common_va_len; - -#ifndef MAP_ANONYMOUS -#define MAP_ANONYMOUS MAP_ANON -#endif - -/* Book some virtual address space - * This function is called at odp_init_global() time to pre-book some - * virtual address space inherited by all odpthreads (i.e. descendant - * processes and threads) and later used to guarantee the unicity the - * the mapping VA address when memory is reserver with the _ODP_ISHM_SINGLE_VA - * flag. - * returns the address of the mapping or NULL on error. - */ -void *_odp_ishmphy_book_va(uintptr_t len, intptr_t align) -{ - void *addr; - - addr = mmap(NULL, len + align, PROT_NONE, - MAP_SHARED | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); - if (addr == MAP_FAILED) { - ODP_ERR("_ishmphy_book_va failure\n"); - return NULL; - } - - if (mprotect(addr, len, PROT_NONE)) - ODP_ERR("failure for protect\n"); - - ODP_DBG("VA Reserved: %p, len=%p\n", addr, len + align); - - common_va_address = addr; - common_va_len = len; - - /* return the nearest aligned address: */ - return (void *)(((uintptr_t)addr + align - 1) & (-align)); -} - -/* Un-book some virtual address space - * This function is called at odp_term_global() time to unbook - * the virtual address space booked by _ishmphy_book_va() - */ -int _odp_ishmphy_unbook_va(void) -{ - int ret; - - ret = munmap(common_va_address, common_va_len); - if (ret) - ODP_ERR("_unishmphy_book_va failure\n"); - return ret; -} - -/* - * do a mapping: - * Performs a mapping of the provided file descriptor to the process VA - * space. If the _ODP_ISHM_SINGLE_VA flag is set, 'start' is assumed to be - * the VA address where the mapping is to be done. - * If the flag is not set, a new VA address is taken. - * returns the address of the mapping or NULL on error. - */ -void *_odp_ishmphy_map(int fd, void *start, uint64_t size, - int flags) -{ - void *mapped_addr_tmp, *mapped_addr; - int mmap_flags = 0; - - if (flags & _ODP_ISHM_SINGLE_VA) { - if (!start) { - ODP_ERR("failure: missing address\n"); - return NULL; - } - /* maps over fragment of reserved VA: */ - /* first, try a normal map. If that works, remap it where it - * should (on the prereverved space), and remove the initial - * normal mapping: - * This is because it turned out that if a mapping fails - * on a the prereserved virtual address space, then - * the prereserved address space which was tried to be mapped - * on becomes available to the kernel again! This was not - * according to expectations: the assumption was that if a - * mapping fails, the system should remain unchanged, but this - * is obvioulsy not true (at least for huge pages when - * exhausted). - * So the strategy is to first map at a non reserved place - * (which can then be freed and returned to the kernel on - * failure) and peform a new map to the prereserved space on - * success (which is then guaranteed to work). - * The initial free maping can then be removed. - */ - mapped_addr = MAP_FAILED; - mapped_addr_tmp = mmap(NULL, size, PROT_READ | PROT_WRITE, - MAP_SHARED | mmap_flags, fd, 0); - if (mapped_addr_tmp != MAP_FAILED) { - /* If OK, do new map at right fixed location... */ - mapped_addr = mmap(start, - size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_FIXED | mmap_flags, - fd, 0); - if (mapped_addr != start) - ODP_ERR("new map failed:%s\n", strerror(errno)); - /* ... and remove initial mapping: */ - if (munmap(mapped_addr_tmp, size)) - ODP_ERR("munmap failed:%s\n", strerror(errno)); - } - } else { - /* just do a new mapping in the VA space: */ - mapped_addr = mmap(NULL, size, PROT_READ | PROT_WRITE, - MAP_SHARED | mmap_flags, fd, 0); - if ((mapped_addr >= common_va_address) && - ((char *)mapped_addr < - (char *)common_va_address + common_va_len)) { - ODP_ERR("VA SPACE OVERLAP!\n"); - } - } - - if (mapped_addr == MAP_FAILED) { - ODP_ERR("mmap failed:%s\n", strerror(errno)); - return NULL; - } - - /* if locking is requested, lock it...*/ - if (flags & _ODP_ISHM_LOCK) { - if (mlock(mapped_addr, size)) { - if (munmap(mapped_addr, size)) - ODP_ERR("munmap failed:%s\n", strerror(errno)); - ODP_ERR("mlock failed:%s\n", strerror(errno)); - return NULL; - } - } - return mapped_addr; -} - -/* free a mapping: - * If the _ODP_ISHM_SINGLE_VA flag was given at creation time the virtual - * address range must be returned to the preoallocated "pool". this is - * done by mapping non accessibly memory there (hence blocking the VA but - * releasing the physical memory). - * If the _ODP_ISHM_SINGLE_VA flag was not given, both physical memory and - * virtual address space are realeased by calling the normal munmap. - * return 0 on success or -1 on error. - */ -int _odp_ishmphy_unmap(void *start, uint64_t len, int flags) -{ - void *addr; - int ret; - int mmap_flgs; - - mmap_flgs = MAP_SHARED | MAP_FIXED | MAP_ANONYMOUS | MAP_NORESERVE; - - /* if locking was requested, unlock...*/ - if (flags & _ODP_ISHM_LOCK) - munlock(start, len); - - if (flags & _ODP_ISHM_SINGLE_VA) { - /* map unnaccessible memory overwrites previous mapping - * and free the physical memory, but guarantees to block - * the VA range from other mappings - */ - addr = mmap(start, len, PROT_NONE, mmap_flgs, -1, 0); - if (addr == MAP_FAILED) { - ODP_ERR("_ishmphy_free failure for ISHM_SINGLE_VA\n"); - return -1; - } - if (mprotect(start, len, PROT_NONE)) - ODP_ERR("_ishmphy_free failure for protect\n"); - return 0; - } - - /* just release the mapping */ - ret = munmap(start, len); - if (ret) - ODP_ERR("_ishmphy_free failure: %s\n", strerror(errno)); - return ret; -} diff --git a/platform/linux-generic/_ishmpool.c b/platform/linux-generic/_ishmpool.c deleted file mode 100644 index 4ff24c026..000000000 --- a/platform/linux-generic/_ishmpool.c +++ /dev/null @@ -1,807 +0,0 @@ -/* Copyright (c) 2017-2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ - -/* This file gathers the buddy and slab allocation functionality provided - * by _ishm. - * _odp_ishmpool_create() can be used to create a pool for buddy/slab - * allocation. _odp_ishmpool_create() will allocate a memory area using - * ishm_reserve() for both the control part (needed for tracking - * allocation/free...) and the user memory itself (part of which will be given - * at each ishmpool_alloc()). - * The element size provided at pool creation time determines whether - * to pool will of type buddy or slab. - * For buddy, all allocations are rounded to the nearest power of 2. - * - * The implementation of the buddy allocator is very traditional: it - * maintains N lists of free buffers. - * The control part actually contains these N queue heads, (N-M are actually - * used), the free buffers themselves being used for chaining (the chaining info - * is in the buffers: as they are "free" they should not be touched by the - * user). The control part also contains a array of bytes for remembering - * the size (actually the order) of the allocated buffers: - * There are 2^(N-M) such bytes, this number being the maximum number of - * allocated buffers (when all allocation are <= 2^M bytes) - * Buddy allocators handle fragmentation by splitting or merging blocks by 2. - * They guarantee a minimum efficiency of 50%, at worse case fragmentation. - * - * Slab implementation is even simpler, all free elements being queued in - * one single queue at init, taken from this queue when allocated and - * returned to this same queue when freed. - * - * The reason for not using malloc() is that malloc does not guarantee - * memory sharability between ODP threads (regardless of their implememtation) - * which ishm_reserve() can do. see the comments around - * _odp_ishmbud_pool_create() and ishm_reserve() for more details. - * - * This file is divided in 3 sections: the first one regroups functions - * needed by the buddy allocation. - * The second one regroups the functions needed by the slab allocator. - * The third section regroups the common functions exported externally. - */ -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include <_ishm_internal.h> -#include <_ishmpool_internal.h> -#include -#include -#include -#include -#include - -#define BUDDY_MIN_SIZE 32 /* minimal buddy allocation size */ - -typedef _odp_ishm_pool_t pool_t; /* for shorter writing */ - -/* array of ishm block index used for pools. only used for pool - * lookup by name */ -#define MAX_NB_POOL 100 -static int pool_blk_idx[MAX_NB_POOL]; - -/* section 1: functions for buddy allocation: */ - -/* free buddy blocks contains the following structure, used to link the - * free blocks together. - */ -typedef struct bblock_t { - struct bblock_t *next; - uint32_t order; -} bblock_t; - -/* value set in the 'order' table when the block is not allocated: */ -#define BBLOCK_FREE 0 - -/* compute ceil(log2(size)) */ -static uint8_t clog2(uint64_t size) -{ - uint64_t sz; - uint32_t bit; - uint8_t res; - - sz = size; /* we start by computing res = log2(sz)... */ - res = 0; - for (bit = 32; bit ; bit >>= 1) { - if (sz >= ((uint64_t)1 << bit)) { - sz >>= bit; - res += bit; - } - } - if (((uint64_t)1 << res) < size) /* ...and then ceil(x) */ - res++; - - return res; -} - -/* - * given a bblock address, and an order value, returns the address - * of the buddy bblock (the other "half") - */ -static inline bblock_t *get_bblock_buddy(pool_t *bpool, bblock_t *addr, - uint8_t order) -{ - uintptr_t b; - - b = ((uintptr_t)addr - (uintptr_t)bpool->ctrl.user_addr); - b ^= 1 << order; - return (void *)(b + (uintptr_t)bpool->ctrl.user_addr); -} - -/* - * given a buddy block address, return its number (used for busy flags): - */ -static inline uintptr_t get_bblock_nr(pool_t *bpool, void *addr) -{ - uintptr_t b; - uint8_t min_order; - - min_order = bpool->ctrl.min_order; - b = ((uintptr_t)addr - (uintptr_t)bpool->ctrl.user_addr) >> min_order; - return b; -} - -/* remove bblock from the list for bblocks of rank order. The bblock to be - * removed is really expected to be on the list: not finding it is an error */ -static inline void remove_from_list(pool_t *bpool, uint8_t order, - bblock_t *bblock) -{ - bblock_t *curr; /* current bblock (when parsing list) */ - bblock_t *prev; /* previous bblock (when parsing list) */ - - curr = bpool->ctrl.free_heads[order]; - if (!curr) - goto remove_from_list_error; - - if (curr == bblock) { - bpool->ctrl.free_heads[order] = curr->next; - return; - } - - while (curr) { - if (curr == bblock) { - prev->next = curr->next; - return; - } - prev = curr; - curr = curr->next; - } - -remove_from_list_error: - ODP_ERR("List corrupted\n"); -} - -/* - * create a buddy memory pool of given size (actually nearest power of 2), - * where allocation will never be smaller than min_alloc. - * returns a pointer to the created buddy_pool - * The allocated area contains: - * - The _odp_ishm_pool_ctrl_t structure - * - The array of ((order - min_order) of free list heads - * - The array of 'order' values, remembering sizes of allocated bblocks - * - alignment to cache line - * - The user memory - */ -static pool_t *_odp_ishmbud_pool_create(const char *pool_name, int store_idx, - uint64_t size, - uint64_t min_alloc, int flags) -{ - uint8_t order; /* pool order = ceil(log2(size)) */ - uint8_t min_order; /* pool min_order = ceil(log2(min_alloc))*/ - uint32_t max_nb_bblock; /* max number of bblock, when smallest */ - uint32_t control_sz; /* size of control area */ - uint32_t free_head_sz; /* mem area needed for list heads */ - uint32_t saved_order_sz; /* mem area to remember given sizes */ - uint64_t user_sz; /* 2^order bytes */ - uint64_t total_sz; /* total size to request */ - int blk_idx; /* as returned by _ishm_resrve() */ - pool_t *bpool; - int i; - bblock_t *first_block; - - /* a bblock_t must fit in the buffers for linked chain! */ - if (min_alloc < sizeof(bblock_t)) - min_alloc = sizeof(bblock_t); - - /* pool order is such that 2^order = size. same for min_order */ - order = clog2(size); - min_order = clog2(min_alloc); - - /* check parameters obvious wishes: */ - if (order >= 64) - return NULL; - if (order < min_order) - return NULL; - - /* at worst case, all bblocks have smallest (2^min_order) size */ - max_nb_bblock = (1 << (order - min_order)); - - /* space needed for the control area (padded to cache line size)*/ - control_sz = ROUNDUP_CACHE_LINE(sizeof(_odp_ishm_pool_ctrl_t)); - - /* space needed for 'order' free bblock list heads: */ - /* Note that only lists from min_order to order are really used.*/ - free_head_sz = ROUNDUP_CACHE_LINE(sizeof(void *) * (order + 1)); - - /* space needed for order -i.e. size- storage of alloc'd bblock:*/ - saved_order_sz = ROUNDUP_CACHE_LINE(max_nb_bblock * sizeof(uint8_t)); - - /* space needed for user area is 2^order bytes: */ - user_sz = 1 << order; - - total_sz = control_sz + - free_head_sz + - saved_order_sz + - user_sz; - - /* allocate required memory: */ - blk_idx = _odp_ishm_reserve(pool_name, total_sz, -1, - ODP_CACHE_LINE_SIZE, flags, 0); - if (blk_idx < 0) { - ODP_ERR("_odp_ishm_reserve failed."); - return NULL; - } - - bpool = _odp_ishm_address(blk_idx); - if (bpool == NULL) { - ODP_ERR("_odp_ishm_address failed."); - return NULL; - } - - /* store in pool array (needed for look up): */ - pool_blk_idx[store_idx] = blk_idx; - - /* remember block index, needed when pool is destroyed */ - bpool->ctrl.ishm_blk_idx = blk_idx; - - /* remember element size: 0 means unknown size, i.e. buddy alloation*/ - bpool->ctrl.element_sz = 0; - - /* prepare mutex: */ - odp_spinlock_init(&bpool->ctrl.lock); - - /* initialise pointers and things... */ - bpool->ctrl.order = order; - bpool->ctrl.min_order = min_order; - bpool->ctrl.free_heads = - (void *)((uintptr_t)bpool + control_sz); - bpool->ctrl.alloced_order = - (uint8_t *)((uintptr_t)bpool->ctrl.free_heads + free_head_sz); - bpool->ctrl.user_addr = - (void *)((uintptr_t)bpool->ctrl.alloced_order + saved_order_sz); - - /* initialize all free list to NULL, except the top biggest element:*/ - for (i = 0; i < (order - min_order); i++) - bpool->ctrl.free_heads[i] = NULL; - bpool->ctrl.free_heads[order] = bpool->ctrl.user_addr; - first_block = (bblock_t *)bpool->ctrl.user_addr; - first_block->next = NULL; - first_block->order = order; - - /* set all 'order' of allocated bblocks to free: */ - memset(bpool->ctrl.alloced_order, BBLOCK_FREE, saved_order_sz); - - return bpool; -} - -/* allocated memory from the given buddy pool */ -static void *_odp_ishmbud_alloc(pool_t *bpool, uint64_t size) -{ - uint32_t rq_order; /* requested order */ - uint32_t try_order; - bblock_t *bblock; - bblock_t *buddy; - uintptr_t nr; - - /* if size is zero or too big reject: */ - if ((!size) && (size > (1U << bpool->ctrl.order))) { - ODP_ERR("Invalid alloc size (0 or larger than whole pool)\n"); - return NULL; - } - - /* compute ceil(log2(size)), to get the requested block order: */ - rq_order = clog2(size); - - /* make sure the requested order is bigger (or same) as minimum! */ - if (rq_order < bpool->ctrl.min_order) - rq_order = bpool->ctrl.min_order; - - /* mutex from here: */ - odp_spinlock_lock(&bpool->ctrl.lock); - - /* now, start trying to allocate a bblock of rq_order. If that - * fails keep trying larger orders until pool order is reached */ - bblock = NULL; - for (try_order = rq_order; try_order <= bpool->ctrl.order; - try_order++) { - if (bpool->ctrl.free_heads[try_order]) { - /* remove from list: */ - bblock = - (bblock_t *)(bpool->ctrl.free_heads[try_order]); - bpool->ctrl.free_heads[try_order] = bblock->next; - break; - } - } - - if (!bblock) { - odp_spinlock_unlock(&bpool->ctrl.lock); - ODP_ERR("Out of memory. (Buddy pool full)\n"); - return NULL; - } - - /* OK: we got a block, but possibbly too large (if try_order>rq_order) - * return the extra halves to the pool hence splitting the bblock at - * each 'extra' order: */ - while (try_order-- > rq_order) { - /* split: */ - buddy = (bblock_t *)((uintptr_t)bblock + (1 << try_order)); - buddy->order = try_order; - /* add to list: */ - buddy->next = bpool->ctrl.free_heads[try_order]; - bpool->ctrl.free_heads[try_order] = buddy; - /* mark as free (non allocated block get size 0): */ - nr = get_bblock_nr(bpool, buddy); - bpool->ctrl.alloced_order[nr] = BBLOCK_FREE; - } - - /* remember the size if the allocated block: */ - nr = get_bblock_nr(bpool, bblock); - bpool->ctrl.alloced_order[nr] = rq_order; - - /* and return the allocated block! */ - odp_spinlock_unlock(&bpool->ctrl.lock); - return (void *)bblock; -} - -/* free a previously allocated buffer from a given buddy pool */ -static int _odp_ishmbud_free(pool_t *bpool, void *addr) -{ - uintptr_t user_start; /* start of user area */ - uintptr_t user_stop; /* stop of user area */ - uintptr_t mask; /* 2^min_order - 1 */ - bblock_t *bblock; /* bblock being freed */ - bblock_t *buddy; /* buddy bblock of bblock being freed */ - uint8_t order; /* order of block being freed */ - uintptr_t nr; /* block number */ - - /* freeing NULL is regarded as OK, though without any effect: */ - if (!addr) - return 0; - - user_start = (uintptr_t)bpool->ctrl.user_addr; - user_stop = user_start + ((uintptr_t)1 << bpool->ctrl.order); - mask = ((uintptr_t)1 << bpool->ctrl.min_order) - 1; - - /* some sanity checks: check that given address is within pool and - * that relative address has 2^min_order granularity: */ - if (((uintptr_t)addr < user_start) || - ((uintptr_t)addr > user_stop) || - (((uintptr_t)addr - user_start) & mask)) { - ODP_ERR("Invalid address to be freed\n"); - return -1; - } - - /* mutex from here: */ - odp_spinlock_lock(&bpool->ctrl.lock); - - /* collect saved block order and make sure bblock was allocated */ - bblock = (bblock_t *)addr; - nr = get_bblock_nr(bpool, bblock); - order = bpool->ctrl.alloced_order[nr]; - if (order == BBLOCK_FREE) { - ODP_ERR("Double free error\n"); - odp_spinlock_unlock(&bpool->ctrl.lock); - return -1; - } - - /* this looks like a valid free, mark at least this as free: */ - bpool->ctrl.alloced_order[nr] = BBLOCK_FREE; - - /* go up in orders, trying to merge buddies... */ - while (order < bpool->ctrl.order) { - buddy = get_bblock_buddy(bpool, bblock, order); - /*if buddy is not free: no further merge possible */ - nr = get_bblock_nr(bpool, buddy); - if (bpool->ctrl.alloced_order[nr] != BBLOCK_FREE) - break; - /*merge only bblock of same order:*/ - if (buddy->order != order) - break; - /*merge: remove buddy from free list: */ - remove_from_list(bpool, order, buddy); - /*merge: make sure we point at start of block: */ - if (bblock > buddy) - bblock = buddy; - /*merge: size of bloack has dubbled: increse order: */ - order++; - } - - /* insert the bblock into its correct free block list: */ - bblock->next = bpool->ctrl.free_heads[order]; - bpool->ctrl.free_heads[order] = bblock; - - /* remember the (possibly now merged) block order: */ - bblock->order = order; - - odp_spinlock_unlock(&bpool->ctrl.lock); - return 0; -} - -/* print buddy pool status and performs sanity checks */ -static int _odp_ishmbud_pool_status(const char *title, pool_t *bpool) -{ - uint8_t order, pool_order, pool_min_order; - uint64_t free_q_nb_bblocks[64]; - uint64_t allocated_nb_bblocks[64]; - uint64_t free_q_nb_bblocks_bytes[64]; - uint64_t allocated_nb_bblocks_bytes[64]; - uint64_t total_bytes_free; - uint64_t total_bytes_allocated; - uint64_t nr; - bblock_t *bblock; - int res = 0; - - odp_spinlock_lock(&bpool->ctrl.lock); - - pool_order = bpool->ctrl.order; - pool_min_order = bpool->ctrl.min_order; - - ODP_DBG("\n%s\n", title); - ODP_DBG("Pool Type: BUDDY\n"); - ODP_DBG("pool size: %" PRIu64 " (bytes)\n", (1UL << pool_order)); - ODP_DBG("pool order: %d\n", (int)pool_order); - ODP_DBG("pool min_order: %d\n", (int)pool_min_order); - - /* a pool wholse order is more than 64 cannot even be reached on 64 - * bit machines! */ - if (pool_order > 64) { - odp_spinlock_unlock(&bpool->ctrl.lock); - return -1; - } - - total_bytes_free = 0; - total_bytes_allocated = 0; - - /* for each queue */ - for (order = pool_min_order; order <= pool_order; order++) { - free_q_nb_bblocks[order] = 0; - free_q_nb_bblocks_bytes[order] = 0; - allocated_nb_bblocks[order] = 0; - allocated_nb_bblocks_bytes[order] = 0; - - /* get the number of buffs in the free queue for this order: */ - bblock = bpool->ctrl.free_heads[order]; - while (bblock) { - free_q_nb_bblocks[order]++; - free_q_nb_bblocks_bytes[order] += (1 << order); - bblock = bblock->next; - } - - total_bytes_free += free_q_nb_bblocks_bytes[order]; - - /* get the number of allocated buffers of this order */ - for (nr = 0; - nr < (1U << (pool_order - pool_min_order)); nr++) { - if (bpool->ctrl.alloced_order[nr] == order) - allocated_nb_bblocks[order]++; - } - - allocated_nb_bblocks_bytes[order] = - allocated_nb_bblocks[order] * (1 << order); - - total_bytes_allocated += allocated_nb_bblocks_bytes[order]; - - ODP_DBG("Order %d => Free: %" PRIu64 " buffers " - "(%" PRIu64" bytes) " - "Allocated %" PRIu64 " buffers (%" PRIu64 " bytes) " - "Total: %" PRIu64 " bytes\n", - (int)order, free_q_nb_bblocks[order], - free_q_nb_bblocks_bytes[order], - allocated_nb_bblocks[order], - allocated_nb_bblocks_bytes[order], - free_q_nb_bblocks_bytes[order] + - allocated_nb_bblocks_bytes[order]); - } - - ODP_DBG("Allocated space: %" PRIu64 " (bytes)\n", - total_bytes_allocated); - ODP_DBG("Free space: %" PRIu64 " (bytes)\n", total_bytes_free); - - if (total_bytes_free + total_bytes_allocated != (1U << pool_order)) { - ODP_DBG("Lost bytes on this pool!\n"); - res = -1; - } - - if (res) - ODP_DBG("Pool inconsistent!\n"); - - odp_spinlock_unlock(&bpool->ctrl.lock); - return res; -} - -/* section 2: functions for slab allocation: */ - -/* free slab blocks contains the following structure, used to link the - * free blocks together. - */ -typedef struct sblock_t { - struct sblock_t *next; -} sblock_t; - -/* - * create a slab memory pool of given size (rounded up to the nearest integer - * number of element, where each element has size 'elt_size'). - * returns a pointer to the created slab pool. - * The allocated area contains: - * - The _odp_ishm_pool_ctrl_t structure - * - alignment to cache line - * - The user memory - */ -static pool_t *_odp_ishmslab_pool_create(const char *pool_name, int store_idx, - uint64_t size, - uint64_t elt_size, int flags) -{ - uint32_t nb_sblock; /* number of elements in the pool */ - uint32_t control_sz; /* size of control area */ - uint64_t total_sz; /* total size to request */ - uint64_t user_sz; /* 2^order bytes */ - int blk_idx; /* as returned by _ishm_reserve() */ - pool_t *spool; - unsigned int i; - sblock_t *block; - - /* a sblock_t must fit in the buffers for linked chain! */ - if (elt_size < sizeof(bblock_t)) { - elt_size = sizeof(bblock_t); - size = size * (sizeof(bblock_t) / elt_size + - ((sizeof(bblock_t) % elt_size) ? 1 : 0)); - } - - /* nb of element fitting in the pool is just ceil(size/elt_size)*/ - nb_sblock = (size / elt_size) + ((size % elt_size) ? 1 : 0); - - /* space needed for the control area (padded to cache line size)*/ - control_sz = ROUNDUP_CACHE_LINE(sizeof(_odp_ishm_pool_ctrl_t)); - - /* space needed for user area is : */ - user_sz = nb_sblock * elt_size; - - total_sz = control_sz + - user_sz; - - /* allocate required memory: */ - blk_idx = _odp_ishm_reserve(pool_name, total_sz, -1, - ODP_CACHE_LINE_SIZE, flags, 0); - if (blk_idx < 0) { - ODP_ERR("_odp_ishm_reserve failed."); - return NULL; - } - - spool = _odp_ishm_address(blk_idx); - if (spool == NULL) { - ODP_ERR("_odp_ishm_address failed."); - return NULL; - } - - /* store in pool array (needed for look up): */ - pool_blk_idx[store_idx] = blk_idx; - - /* remember block index, needed when pool is destroyed */ - spool->ctrl.ishm_blk_idx = blk_idx; - - /* remember element (sblock) size and their number: */ - spool->ctrl.element_sz = elt_size; - spool->ctrl.nb_elem = nb_sblock; - - /* prepare mutex: */ - odp_spinlock_init(&spool->ctrl.lock); - - /* initialise pointers and things... */ - spool->ctrl.user_addr = - (void *)((uintptr_t)spool + control_sz); - - /* initialise the free list with the list of all elements:*/ - spool->ctrl.free_head = spool->ctrl.user_addr; - for (i = 0; i < nb_sblock - 1; i++) { - block = (sblock_t *)((uintptr_t)spool->ctrl.user_addr + - i * (uintptr_t)elt_size); - block->next = (sblock_t *)((uintptr_t)block + - (uintptr_t)elt_size); - } - block = (sblock_t *)((uintptr_t)spool->ctrl.user_addr + - (nb_sblock - 1) * (uintptr_t)elt_size); - block->next = NULL; - - return spool; -} - -/* allocated memory from the given slab pool */ -static void *_odp_ishmslab_alloc(pool_t *spool, uint64_t size) -{ - void *ret; - sblock_t *block; - - if (size > spool->ctrl.element_sz) - return NULL; - - odp_spinlock_lock(&spool->ctrl.lock); - ret = spool->ctrl.free_head; - if (!ret) { - odp_spinlock_unlock(&spool->ctrl.lock); - ODP_ERR("Out of memory. (Slab pool full)\n"); - return NULL; - } - - block = (sblock_t *)ret; - spool->ctrl.free_head = block->next; - - odp_spinlock_unlock(&spool->ctrl.lock); - return ret; -} - -/* free a previously allocated buffer from a given slab pool */ -static int _odp_ishmslab_free(pool_t *spool, void *addr) -{ - uintptr_t user_start; /* start of user area */ - uintptr_t user_stop; /* stop of user area */ - sblock_t *block; - - /* freeing NULL is regarded as OK, though without any effect: */ - if (!addr) - return 0; - - user_start = (uintptr_t)spool->ctrl.user_addr; - user_stop = user_start + spool->ctrl.element_sz * spool->ctrl.nb_elem; - - /* some sanity checks: check that given address is within pool and - * that relative address has element_sz granularity: */ - if (((uintptr_t)addr < user_start) || - ((uintptr_t)addr > user_stop) || - (((uintptr_t)addr - user_start) % spool->ctrl.element_sz)) { - ODP_ERR("Invalid address to be freed\n"); - return -1; - } - - odp_spinlock_lock(&spool->ctrl.lock); - block = (sblock_t *)addr; - block->next = (sblock_t *)spool->ctrl.free_head; - spool->ctrl.free_head = addr; - odp_spinlock_unlock(&spool->ctrl.lock); - - return 0; -} - -/* print slab pool status and performs sanity checks */ -static int _odp_ishmslab_pool_status(const char *title, pool_t *spool) -{ - sblock_t *sblock; - uint64_t nb_free_elts; /* number of free elements */ - - odp_spinlock_lock(&spool->ctrl.lock); - - ODP_DBG("\n%s\n", title); - ODP_DBG("Pool Type: FIXED SIZE\n"); - ODP_DBG("pool size: %" PRIu64 " (bytes)\n", - spool->ctrl.nb_elem * spool->ctrl.element_sz); - - /* count the number of free elements in the free list: */ - nb_free_elts = 0; - sblock = (sblock_t *)spool->ctrl.free_head; - while (sblock) { - nb_free_elts++; - sblock = sblock->next; - } - - ODP_DBG("%" PRIu64 "/%" PRIu64 " available elements.\n", - nb_free_elts, spool->ctrl.nb_elem); - - odp_spinlock_unlock(&spool->ctrl.lock); - return 0; -} - -/* section 3: common, external functions: */ - -/* create a pool: either with fixed alloc size (if max_alloc/min_alloc<2) or - * of variable block size (if max_alloc == 0) */ -pool_t *_odp_ishm_pool_create(const char *pool_name, uint64_t size, - uint64_t min_alloc, uint64_t max_alloc, int flags) -{ - int store_idx; - uint64_t real_pool_sz; - - if (min_alloc > max_alloc) { - ODP_ERR("invalid parameter: min_alloc > max_alloc"); - return NULL; - } - - /* search for a free index in pool_blk_idx for the pool */ - for (store_idx = 0; store_idx < MAX_NB_POOL; store_idx++) { - if (pool_blk_idx[store_idx] < 0) - break; - } - if (store_idx == MAX_NB_POOL) { - ODP_ERR("Max number of pool reached (MAX_NB_POOL)"); - return NULL; - } - - if ((min_alloc == 0) || ((max_alloc / min_alloc) > 2)) { - /* alloc variation is not constant enough: we go for a buddy - * allocator. The pool efficiency may go as low as 50% - * so we double the required size to make sure we can satisfy - * the user request */ - real_pool_sz = 2 * size; - return _odp_ishmbud_pool_create(pool_name, store_idx, - real_pool_sz, - BUDDY_MIN_SIZE, flags); - } else { - /* min and max are close enough so we go for constant size - * allocator: - * make sure the pool can fit the required size, even when - * only min_alloc allocation are performed: */ - real_pool_sz = ((size / min_alloc) + - ((size % min_alloc) ? 1 : 0)) - * max_alloc; - return _odp_ishmslab_pool_create(pool_name, store_idx, - real_pool_sz, - max_alloc, flags); - } -} - -/* destroy a pool. everything goes away. no operation on the pool should - * follow. */ -int _odp_ishm_pool_destroy(pool_t *pool) -{ - int store_idx; - - for (store_idx = 0; store_idx < MAX_NB_POOL; store_idx++) { - if (pool_blk_idx[store_idx] == pool->ctrl.ishm_blk_idx) { - pool_blk_idx[store_idx] = -1; - break; - } - } - - return _odp_ishm_free_by_index(pool->ctrl.ishm_blk_idx); -} - -/* allocated a buffer from a pool */ -void *_odp_ishm_pool_alloc(_odp_ishm_pool_t *pool, uint64_t size) -{ - if (!pool->ctrl.element_sz) - return _odp_ishmbud_alloc(pool, size); - else - return _odp_ishmslab_alloc(pool, size); -} - -/* free a previously allocated buffer from a pool */ -int _odp_ishm_pool_free(_odp_ishm_pool_t *pool, void *addr) -{ - if (!pool->ctrl.element_sz) - return _odp_ishmbud_free(pool, addr); - else - return _odp_ishmslab_free(pool, addr); -} - -/* Print a pool status */ -int _odp_ishm_pool_status(const char *title, _odp_ishm_pool_t *pool) -{ - if (!pool->ctrl.element_sz) - return _odp_ishmbud_pool_status(title, pool); - else - return _odp_ishmslab_pool_status(title, pool); -} - -void _odp_ishm_pool_init(void) -{ - int i; - - for (i = 0; i < MAX_NB_POOL; i++) - pool_blk_idx[i] = -1; -} - -_odp_ishm_pool_t *_odp_ishm_pool_lookup(const char *pool_name) -{ - int block_idx; - int store_idx; - - /* search for a _ishm block with the given name */ - block_idx = _odp_ishm_lookup_by_name(pool_name); - if (block_idx < 0) - return NULL; - - /* a block with that name exists: make sure it is within - * the registered pools */ - for (store_idx = 0; store_idx < MAX_NB_POOL; store_idx++) { - if (pool_blk_idx[store_idx] == block_idx) - return _odp_ishm_address(block_idx); - } - - return NULL; -} diff --git a/platform/linux-generic/include/_fdserver_internal.h b/platform/linux-generic/include/_fdserver_internal.h deleted file mode 100644 index 8518a5b7e..000000000 --- a/platform/linux-generic/include/_fdserver_internal.h +++ /dev/null @@ -1,39 +0,0 @@ -/* Copyright (c) 2016-2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ - -#ifndef _FD_SERVER_INTERNAL_H -#define _FD_SERVER_INTERNAL_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * the following enum defines the different contextes by which the - * FD server may be used: In the FD server, the keys used to store/retrieve - * a file descriptor are actually context based: - * Both the context and the key are stored at fd registration time, - * and both the context and the key are used to retrieve a fd. - * In other words a context identifies a FD server usage, so that different - * unrelated fd server users do not have to guarantee key unicity between - * them. - */ -typedef enum fd_server_context { - FD_SRV_CTX_NA, /* Not Applicable */ - FD_SRV_CTX_ISHM, - FD_SRV_CTX_END, /* upper enum limit */ -} fd_server_context_e; - -int _odp_fdserver_register_fd(fd_server_context_e context, uint64_t key, - int fd); -int _odp_fdserver_deregister_fd(fd_server_context_e context, uint64_t key); -int _odp_fdserver_lookup_fd(fd_server_context_e context, uint64_t key); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/platform/linux-generic/include/_ishm_internal.h b/platform/linux-generic/include/_ishm_internal.h deleted file mode 100644 index 56c7f5a93..000000000 --- a/platform/linux-generic/include/_ishm_internal.h +++ /dev/null @@ -1,54 +0,0 @@ -/* Copyright (c) 2016-2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ - -#ifndef ODP_ISHM_INTERNAL_H_ -#define ODP_ISHM_INTERNAL_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include - -/* flags available at ishm_reserve: */ -#define _ODP_ISHM_SINGLE_VA 1 -#define _ODP_ISHM_LOCK 2 -#define _ODP_ISHM_EXPORT 4 /*create export descr file in /tmp */ - -/** - * Shared memory block info - */ -typedef struct _odp_ishm_info_t { - const char *name; /**< Block name */ - void *addr; /**< Block address */ - uint64_t size; /**< Block size in bytes */ - uint64_t page_size; /**< Memory page size */ - uint32_t flags; /**< _ODP_ISHM_* flags */ - uint32_t user_flags;/**< user specific flags */ -} _odp_ishm_info_t; - -int _odp_ishm_reserve(const char *name, uint64_t size, int fd, uint32_t align, - uint32_t flags, uint32_t user_flags); -int _odp_ishm_free_by_index(int block_index); -int _odp_ishm_free_by_name(const char *name); -int _odp_ishm_free_by_address(void *addr); -void *_odp_ishm_lookup_by_index(int block_index); -int _odp_ishm_lookup_by_name(const char *name); -int _odp_ishm_lookup_by_address(void *addr); -int _odp_ishm_find_exported(const char *remote_name, - pid_t external_odp_pid, - const char *local_name); -void *_odp_ishm_address(int block_index); -int _odp_ishm_info(int block_index, _odp_ishm_info_t *info); -int _odp_ishm_status(const char *title); -int _odp_ishm_cleanup_files(const char *dirpath); -void _odp_ishm_print(int block_index); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/platform/linux-generic/include/_ishmphy_internal.h b/platform/linux-generic/include/_ishmphy_internal.h deleted file mode 100644 index 05e3fcec7..000000000 --- a/platform/linux-generic/include/_ishmphy_internal.h +++ /dev/null @@ -1,25 +0,0 @@ -/* Copyright (c) 2016-2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ - -#ifndef _ISHMPHY_INTERNAL_H -#define _ISHMPHY_INTERNAL_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include - -void *_odp_ishmphy_book_va(uintptr_t len, intptr_t align); -int _odp_ishmphy_unbook_va(void); -void *_odp_ishmphy_map(int fd, void *start, uint64_t size, int flags); -int _odp_ishmphy_unmap(void *start, uint64_t len, int flags); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/platform/linux-generic/include/_ishmpool_internal.h b/platform/linux-generic/include/_ishmpool_internal.h deleted file mode 100644 index 94bcddaeb..000000000 --- a/platform/linux-generic/include/_ishmpool_internal.h +++ /dev/null @@ -1,56 +0,0 @@ -/* Copyright (c) 2017-2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ - -#ifndef ODP_ISHMBUDDY_INTERNAL_H_ -#define ODP_ISHMBUDDY_INTERNAL_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include - -typedef struct _odp_ishm_pool_ctrl_t { - uint32_t element_sz; /* 0 for buddy pools, >0 for slab. */ - int ishm_blk_idx; /* the block index returned by _ishm_resrve()*/ - odp_spinlock_t lock; /* for pool access mutex */ - void *user_addr; /* user pool area ('real user pool') */ - union { - struct { /* things needed for buddy pools: */ - uint8_t order; /* pool is 2^order bytes long */ - uint8_t min_order; /*alloc won't go below 2^min_order*/ - void **free_heads; /* 'order' free list heads. */ - uint8_t *alloced_order; /* size of blocks, 0=free */ - }; - struct { /* things needed for slab pools: */ - void *free_head; /* free element list head */ - uint64_t nb_elem;/* total number of elements in pool */ - }; - }; -} _odp_ishm_pool_ctrl_t; - -typedef struct _odp_ishm_pool_t { - _odp_ishm_pool_ctrl_t ctrl; /* control part */ - uint8_t mem[1]; /* area for heads, saved alloc'd orders, data*/ -} _odp_ishm_pool_t; - -_odp_ishm_pool_t *_odp_ishm_pool_create(const char *pool_name, - uint64_t size, - uint64_t min_alloc, - uint64_t max_alloc, int flags); -int _odp_ishm_pool_destroy(_odp_ishm_pool_t *pool); -void *_odp_ishm_pool_alloc(_odp_ishm_pool_t *pool, uint64_t size); -int _odp_ishm_pool_free(_odp_ishm_pool_t *pool, void *addr); -int _odp_ishm_pool_status(const char *title, _odp_ishm_pool_t *pool); -_odp_ishm_pool_t *_odp_ishm_pool_lookup(const char *pool_name); -void _odp_ishm_pool_init(void); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/platform/linux-generic/include/odp_fdserver_internal.h b/platform/linux-generic/include/odp_fdserver_internal.h new file mode 100644 index 000000000..8518a5b7e --- /dev/null +++ b/platform/linux-generic/include/odp_fdserver_internal.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2016-2018, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _FD_SERVER_INTERNAL_H +#define _FD_SERVER_INTERNAL_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * the following enum defines the different contextes by which the + * FD server may be used: In the FD server, the keys used to store/retrieve + * a file descriptor are actually context based: + * Both the context and the key are stored at fd registration time, + * and both the context and the key are used to retrieve a fd. + * In other words a context identifies a FD server usage, so that different + * unrelated fd server users do not have to guarantee key unicity between + * them. + */ +typedef enum fd_server_context { + FD_SRV_CTX_NA, /* Not Applicable */ + FD_SRV_CTX_ISHM, + FD_SRV_CTX_END, /* upper enum limit */ +} fd_server_context_e; + +int _odp_fdserver_register_fd(fd_server_context_e context, uint64_t key, + int fd); +int _odp_fdserver_deregister_fd(fd_server_context_e context, uint64_t key); +int _odp_fdserver_lookup_fd(fd_server_context_e context, uint64_t key); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/platform/linux-generic/include/odp_ishm_internal.h b/platform/linux-generic/include/odp_ishm_internal.h new file mode 100644 index 000000000..56c7f5a93 --- /dev/null +++ b/platform/linux-generic/include/odp_ishm_internal.h @@ -0,0 +1,54 @@ +/* Copyright (c) 2016-2018, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef ODP_ISHM_INTERNAL_H_ +#define ODP_ISHM_INTERNAL_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* flags available at ishm_reserve: */ +#define _ODP_ISHM_SINGLE_VA 1 +#define _ODP_ISHM_LOCK 2 +#define _ODP_ISHM_EXPORT 4 /*create export descr file in /tmp */ + +/** + * Shared memory block info + */ +typedef struct _odp_ishm_info_t { + const char *name; /**< Block name */ + void *addr; /**< Block address */ + uint64_t size; /**< Block size in bytes */ + uint64_t page_size; /**< Memory page size */ + uint32_t flags; /**< _ODP_ISHM_* flags */ + uint32_t user_flags;/**< user specific flags */ +} _odp_ishm_info_t; + +int _odp_ishm_reserve(const char *name, uint64_t size, int fd, uint32_t align, + uint32_t flags, uint32_t user_flags); +int _odp_ishm_free_by_index(int block_index); +int _odp_ishm_free_by_name(const char *name); +int _odp_ishm_free_by_address(void *addr); +void *_odp_ishm_lookup_by_index(int block_index); +int _odp_ishm_lookup_by_name(const char *name); +int _odp_ishm_lookup_by_address(void *addr); +int _odp_ishm_find_exported(const char *remote_name, + pid_t external_odp_pid, + const char *local_name); +void *_odp_ishm_address(int block_index); +int _odp_ishm_info(int block_index, _odp_ishm_info_t *info); +int _odp_ishm_status(const char *title); +int _odp_ishm_cleanup_files(const char *dirpath); +void _odp_ishm_print(int block_index); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/platform/linux-generic/include/odp_ishmphy_internal.h b/platform/linux-generic/include/odp_ishmphy_internal.h new file mode 100644 index 000000000..05e3fcec7 --- /dev/null +++ b/platform/linux-generic/include/odp_ishmphy_internal.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2016-2018, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _ISHMPHY_INTERNAL_H +#define _ISHMPHY_INTERNAL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +void *_odp_ishmphy_book_va(uintptr_t len, intptr_t align); +int _odp_ishmphy_unbook_va(void); +void *_odp_ishmphy_map(int fd, void *start, uint64_t size, int flags); +int _odp_ishmphy_unmap(void *start, uint64_t len, int flags); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/platform/linux-generic/include/odp_ishmpool_internal.h b/platform/linux-generic/include/odp_ishmpool_internal.h new file mode 100644 index 000000000..94bcddaeb --- /dev/null +++ b/platform/linux-generic/include/odp_ishmpool_internal.h @@ -0,0 +1,56 @@ +/* Copyright (c) 2017-2018, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef ODP_ISHMBUDDY_INTERNAL_H_ +#define ODP_ISHMBUDDY_INTERNAL_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +typedef struct _odp_ishm_pool_ctrl_t { + uint32_t element_sz; /* 0 for buddy pools, >0 for slab. */ + int ishm_blk_idx; /* the block index returned by _ishm_resrve()*/ + odp_spinlock_t lock; /* for pool access mutex */ + void *user_addr; /* user pool area ('real user pool') */ + union { + struct { /* things needed for buddy pools: */ + uint8_t order; /* pool is 2^order bytes long */ + uint8_t min_order; /*alloc won't go below 2^min_order*/ + void **free_heads; /* 'order' free list heads. */ + uint8_t *alloced_order; /* size of blocks, 0=free */ + }; + struct { /* things needed for slab pools: */ + void *free_head; /* free element list head */ + uint64_t nb_elem;/* total number of elements in pool */ + }; + }; +} _odp_ishm_pool_ctrl_t; + +typedef struct _odp_ishm_pool_t { + _odp_ishm_pool_ctrl_t ctrl; /* control part */ + uint8_t mem[1]; /* area for heads, saved alloc'd orders, data*/ +} _odp_ishm_pool_t; + +_odp_ishm_pool_t *_odp_ishm_pool_create(const char *pool_name, + uint64_t size, + uint64_t min_alloc, + uint64_t max_alloc, int flags); +int _odp_ishm_pool_destroy(_odp_ishm_pool_t *pool); +void *_odp_ishm_pool_alloc(_odp_ishm_pool_t *pool, uint64_t size); +int _odp_ishm_pool_free(_odp_ishm_pool_t *pool, void *addr); +int _odp_ishm_pool_status(const char *title, _odp_ishm_pool_t *pool); +_odp_ishm_pool_t *_odp_ishm_pool_lookup(const char *pool_name); +void _odp_ishm_pool_init(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/platform/linux-generic/include/odp_schedule_scalable_ordered.h b/platform/linux-generic/include/odp_schedule_scalable_ordered.h index fb4720a51..17d4f7eab 100644 --- a/platform/linux-generic/include/odp_schedule_scalable_ordered.h +++ b/platform/linux-generic/include/odp_schedule_scalable_ordered.h @@ -14,7 +14,7 @@ #include #include #include -#include <_ishmpool_internal.h> +#include /* High level functioning of reordering * Datastructures - diff --git a/platform/linux-generic/odp_fdserver.c b/platform/linux-generic/odp_fdserver.c new file mode 100644 index 000000000..f8d40ff9a --- /dev/null +++ b/platform/linux-generic/odp_fdserver.c @@ -0,0 +1,699 @@ +/* Copyright (c) 2016-2018, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "config.h" + +/* + * This file implements a file descriptor sharing server enabling + * sharing of file descriptors between processes, regardless of fork time. + * + * File descriptors are process scoped, but they can be "sent and converted + * on the fly" between processes using special unix domain socket ancillary + * data. + * The receiving process gets a file descriptor "pointing" to the same thing + * as the one sent (but the value of the file descriptor itself may be different + * from the one sent). + * Because ODP applications are responsible for creating ODP threads (i.e. + * pthreads or linux processes), ODP has no control on the order things happen: + * Nothing prevent a thread A to fork B and C, and then C creating a pktio + * which will be used by A and B to send/receive packets. + * Assuming this pktio uses a file descriptor, the latter will need to be + * shared between the processes, despite the "non convenient" fork time. + * The shared memory allocator is likely to use this as well to be able to + * share memory regardless of fork() time. + * This server handles a table of {(context,key)<-> fd} pair, and is + * interfaced by the following functions: + * + * _odp_fdserver_register_fd(context, key, fd_to_send); + * _odp_fdserver_deregister_fd(context, key); + * _odp_fdserver_lookup_fd(context, key); + * + * which are used to register/deregister or querry for file descriptor based + * on a context and key value couple, which has to be unique. + * + * Note again that the file descriptors stored here are local to this server + * process and get converted both when registered or looked up. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define FDSERVER_SOCKPATH_MAXLEN 255 +#define FDSERVER_SOCK_FORMAT "%s/%s/odp-%d-fdserver" +#define FDSERVER_SOCKDIR_FORMAT "%s/%s" +#define FDSERVER_DEFAULT_DIR "/dev/shm" +#define FDSERVER_BACKLOG 5 + +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +/* when accessing the client functions, clients should be mutexed: */ +static odp_spinlock_t *client_lock; + +/* define the tables of file descriptors handled by this server: */ +#define FDSERVER_MAX_ENTRIES 256 +typedef struct fdentry_s { + fd_server_context_e context; + uint64_t key; + int fd; +} fdentry_t; +static fdentry_t *fd_table; +static int fd_table_nb_entries; + +/* + * define the message struct used for communication between client and server + * (this single message is used in both direction) + * The file descriptors are sent out of band as ancillary data for conversion. + */ +typedef struct fd_server_msg { + int command; + fd_server_context_e context; + uint64_t key; +} fdserver_msg_t; +/* possible commands are: */ +#define FD_REGISTER_REQ 1 /* client -> server */ +#define FD_REGISTER_ACK 2 /* server -> client */ +#define FD_REGISTER_NACK 3 /* server -> client */ +#define FD_LOOKUP_REQ 4 /* client -> server */ +#define FD_LOOKUP_ACK 5 /* server -> client */ +#define FD_LOOKUP_NACK 6 /* server -> client */ +#define FD_DEREGISTER_REQ 7 /* client -> server */ +#define FD_DEREGISTER_ACK 8 /* server -> client */ +#define FD_DEREGISTER_NACK 9 /* server -> client */ +#define FD_SERVERSTOP_REQ 10 /* client -> server (stops) */ + +/* + * Client and server function: + * Send a fdserver_msg, possibly including a file descriptor, on the socket + * This function is used both by: + * -the client (sending a FD_REGISTER_REQ with a file descriptor to be shared, + * or FD_LOOKUP_REQ/FD_DEREGISTER_REQ without a file descriptor) + * -the server (sending FD_REGISTER_ACK/NACK, FD_LOOKUP_NACK, + * FD_DEREGISTER_ACK/NACK... without a fd or a + * FD_LOOKUP_ACK with a fd) + * This function make use of the ancillary data (control data) to pass and + * convert file descriptors over UNIX sockets + * Return -1 on error, 0 on success. + */ +static int send_fdserver_msg(int sock, int command, + fd_server_context_e context, uint64_t key, + int fd_to_send) +{ + struct msghdr socket_message; + struct iovec io_vector[1]; /* one msg frgmt only */ + struct cmsghdr *control_message = NULL; + int *fd_location; + fdserver_msg_t msg; + int res; + + char ancillary_data[CMSG_SPACE(sizeof(int))]; + + /* prepare the register request body (single framgent): */ + msg.command = command; + msg.context = context; + msg.key = key; + io_vector[0].iov_base = &msg; + io_vector[0].iov_len = sizeof(fdserver_msg_t); + + /* initialize socket message */ + memset(&socket_message, 0, sizeof(struct msghdr)); + socket_message.msg_iov = io_vector; + socket_message.msg_iovlen = 1; + + if (fd_to_send >= 0) { + /* provide space for the ancillary data */ + memset(ancillary_data, 0, CMSG_SPACE(sizeof(int))); + socket_message.msg_control = ancillary_data; + socket_message.msg_controllen = CMSG_SPACE(sizeof(int)); + + /* initialize a single ancillary data element for fd passing */ + control_message = CMSG_FIRSTHDR(&socket_message); + control_message->cmsg_level = SOL_SOCKET; + control_message->cmsg_type = SCM_RIGHTS; + control_message->cmsg_len = CMSG_LEN(sizeof(int)); + fd_location = (int *)(void *)CMSG_DATA(control_message); + *fd_location = fd_to_send; + } + res = sendmsg(sock, &socket_message, 0); + if (res < 0) { + ODP_ERR("send_fdserver_msg: %s\n", strerror(errno)); + return -1; + } + + return 0; +} + +/* + * Client and server function + * Receive a fdserver_msg, possibly including a file descriptor, on the + * given socket. + * This function is used both by: + * -the server (receiving a FD_REGISTER_REQ with a file descriptor to be shared, + * or FD_LOOKUP_REQ, FD_DEREGISTER_REQ without a file descriptor) + * -the client (receiving FD_REGISTER_ACK...without a fd or a FD_LOOKUP_ACK with + * a fd) + * This function make use of the ancillary data (control data) to pass and + * convert file descriptors over UNIX sockets. + * Return -1 on error, 0 on success. + */ +static int recv_fdserver_msg(int sock, int *command, + fd_server_context_e *context, uint64_t *key, + int *recvd_fd) +{ + struct msghdr socket_message; + struct iovec io_vector[1]; /* one msg frgmt only */ + struct cmsghdr *control_message = NULL; + int *fd_location; + fdserver_msg_t msg; + char ancillary_data[CMSG_SPACE(sizeof(int))]; + + memset(&socket_message, 0, sizeof(struct msghdr)); + memset(ancillary_data, 0, CMSG_SPACE(sizeof(int))); + + /* setup a place to fill in message contents */ + io_vector[0].iov_base = &msg; + io_vector[0].iov_len = sizeof(fdserver_msg_t); + socket_message.msg_iov = io_vector; + socket_message.msg_iovlen = 1; + + /* provide space for the ancillary data */ + socket_message.msg_control = ancillary_data; + socket_message.msg_controllen = CMSG_SPACE(sizeof(int)); + + /* receive the message */ + if (recvmsg(sock, &socket_message, MSG_CMSG_CLOEXEC) < 0) { + ODP_ERR("recv_fdserver_msg: %s\n", strerror(errno)); + return -1; + } + + *command = msg.command; + *context = msg.context; + *key = msg.key; + + /* grab the converted file descriptor (if any) */ + *recvd_fd = -1; + + if ((socket_message.msg_flags & MSG_CTRUNC) == MSG_CTRUNC) + return 0; + + /* iterate ancillary elements to find the file descriptor: */ + for (control_message = CMSG_FIRSTHDR(&socket_message); + control_message != NULL; + control_message = CMSG_NXTHDR(&socket_message, control_message)) { + if ((control_message->cmsg_level == SOL_SOCKET) && + (control_message->cmsg_type == SCM_RIGHTS)) { + fd_location = (int *)(void *)CMSG_DATA(control_message); + *recvd_fd = *fd_location; + break; + } + } + + return 0; +} + +/* opens and returns a connected socket to the server */ +static int get_socket(void) +{ + char sockpath[FDSERVER_SOCKPATH_MAXLEN]; + int s_sock; /* server socket */ + struct sockaddr_un remote; + int len; + + /* construct the named socket path: */ + snprintf(sockpath, FDSERVER_SOCKPATH_MAXLEN, FDSERVER_SOCK_FORMAT, + odp_global_data.shm_dir, + odp_global_data.uid, + odp_global_data.main_pid); + + s_sock = socket(AF_UNIX, SOCK_STREAM, 0); + if (s_sock == -1) { + ODP_ERR("cannot connect to server: %s\n", strerror(errno)); + return -1; + } + + remote.sun_family = AF_UNIX; + strcpy(remote.sun_path, sockpath); + len = strlen(remote.sun_path) + sizeof(remote.sun_family); + if (connect(s_sock, (struct sockaddr *)&remote, len) == -1) { + ODP_ERR("cannot connect to server: %s\n", strerror(errno)); + close(s_sock); + return -1; + } + + return s_sock; +} + +/* + * Client function: + * Register a file descriptor to the server. Return -1 on error. + */ +int _odp_fdserver_register_fd(fd_server_context_e context, uint64_t key, + int fd_to_send) +{ + int s_sock; /* server socket */ + int res; + int command; + int fd; + + odp_spinlock_lock(client_lock); + + ODP_DBG("FD client register: pid=%d key=%" PRIu64 ", fd=%d\n", + getpid(), key, fd_to_send); + + s_sock = get_socket(); + if (s_sock < 0) { + odp_spinlock_unlock(client_lock); + return -1; + } + + res = send_fdserver_msg(s_sock, FD_REGISTER_REQ, context, key, + fd_to_send); + if (res < 0) { + ODP_ERR("fd registration failure\n"); + close(s_sock); + odp_spinlock_unlock(client_lock); + return -1; + } + + res = recv_fdserver_msg(s_sock, &command, &context, &key, &fd); + + if ((res < 0) || (command != FD_REGISTER_ACK)) { + ODP_ERR("fd registration failure\n"); + close(s_sock); + odp_spinlock_unlock(client_lock); + return -1; + } + + close(s_sock); + + odp_spinlock_unlock(client_lock); + return 0; +} + +/* + * Client function: + * Deregister a file descriptor from the server. Return -1 on error. + */ +int _odp_fdserver_deregister_fd(fd_server_context_e context, uint64_t key) +{ + int s_sock; /* server socket */ + int res; + int command; + int fd; + + odp_spinlock_lock(client_lock); + + ODP_DBG("FD client deregister: pid=%d key=%" PRIu64 "\n", + getpid(), key); + + s_sock = get_socket(); + if (s_sock < 0) { + odp_spinlock_unlock(client_lock); + return -1; + } + + res = send_fdserver_msg(s_sock, FD_DEREGISTER_REQ, context, key, -1); + if (res < 0) { + ODP_ERR("fd de-registration failure\n"); + close(s_sock); + odp_spinlock_unlock(client_lock); + return -1; + } + + res = recv_fdserver_msg(s_sock, &command, &context, &key, &fd); + + if ((res < 0) || (command != FD_DEREGISTER_ACK)) { + ODP_ERR("fd de-registration failure\n"); + close(s_sock); + odp_spinlock_unlock(client_lock); + return -1; + } + + close(s_sock); + + odp_spinlock_unlock(client_lock); + return 0; +} + +/* + * client function: + * lookup a file descriptor from the server. return -1 on error, + * or the file descriptor on success (>=0). + */ +int _odp_fdserver_lookup_fd(fd_server_context_e context, uint64_t key) +{ + int s_sock; /* server socket */ + int res; + int command; + int fd; + + odp_spinlock_lock(client_lock); + + s_sock = get_socket(); + if (s_sock < 0) { + odp_spinlock_unlock(client_lock); + return -1; + } + + res = send_fdserver_msg(s_sock, FD_LOOKUP_REQ, context, key, -1); + if (res < 0) { + ODP_ERR("fd lookup failure\n"); + close(s_sock); + odp_spinlock_unlock(client_lock); + return -1; + } + + res = recv_fdserver_msg(s_sock, &command, &context, &key, &fd); + + if ((res < 0) || (command != FD_LOOKUP_ACK)) { + ODP_ERR("fd lookup failure\n"); + close(s_sock); + odp_spinlock_unlock(client_lock); + return -1; + } + + close(s_sock); + ODP_DBG("FD client lookup: pid=%d, key=%" PRIu64 ", fd=%d\n", + getpid(), key, fd); + + odp_spinlock_unlock(client_lock); + return fd; +} + +/* + * request server terminaison: + */ +static int stop_server(void) +{ + int s_sock; /* server socket */ + int res; + + odp_spinlock_lock(client_lock); + + ODP_DBG("FD sending server stop request\n"); + + s_sock = get_socket(); + if (s_sock < 0) { + odp_spinlock_unlock(client_lock); + return -1; + } + + res = send_fdserver_msg(s_sock, FD_SERVERSTOP_REQ, 0, 0, -1); + if (res < 0) { + ODP_ERR("fd stop request failure\n"); + close(s_sock); + odp_spinlock_unlock(client_lock); + return -1; + } + + close(s_sock); + + odp_spinlock_unlock(client_lock); + return 0; +} + +/* + * server function + * receive a client request and handle it. + * Always returns 0 unless a stop request is received. + */ +static int handle_request(int client_sock) +{ + int command; + fd_server_context_e context; + uint64_t key; + int fd; + int i; + + /* get a client request: */ + recv_fdserver_msg(client_sock, &command, &context, &key, &fd); + switch (command) { + case FD_REGISTER_REQ: + if ((fd < 0) || (context >= FD_SRV_CTX_END)) { + ODP_ERR("Invalid register fd or context\n"); + send_fdserver_msg(client_sock, FD_REGISTER_NACK, + FD_SRV_CTX_NA, 0, -1); + return 0; + } + + /* store the file descriptor in table: */ + if (fd_table_nb_entries < FDSERVER_MAX_ENTRIES) { + fd_table[fd_table_nb_entries].context = context; + fd_table[fd_table_nb_entries].key = key; + fd_table[fd_table_nb_entries++].fd = fd; + ODP_DBG("storing {ctx=%d, key=%" PRIu64 "}->fd=%d\n", + context, key, fd); + } else { + ODP_ERR("FD table full\n"); + send_fdserver_msg(client_sock, FD_REGISTER_NACK, + FD_SRV_CTX_NA, 0, -1); + return 0; + } + + send_fdserver_msg(client_sock, FD_REGISTER_ACK, + FD_SRV_CTX_NA, 0, -1); + break; + + case FD_LOOKUP_REQ: + if (context >= FD_SRV_CTX_END) { + ODP_ERR("invalid lookup context\n"); + send_fdserver_msg(client_sock, FD_LOOKUP_NACK, + FD_SRV_CTX_NA, 0, -1); + return 0; + } + + /* search key in table and sent reply: */ + for (i = 0; i < fd_table_nb_entries; i++) { + if ((fd_table[i].context == context) && + (fd_table[i].key == key)) { + fd = fd_table[i].fd; + ODP_DBG("lookup {ctx=%d," + " key=%" PRIu64 "}->fd=%d\n", + context, key, fd); + send_fdserver_msg(client_sock, + FD_LOOKUP_ACK, context, key, + fd); + return 0; + } + } + + /* context+key not found... send nack */ + send_fdserver_msg(client_sock, FD_LOOKUP_NACK, context, key, + -1); + break; + + case FD_DEREGISTER_REQ: + if (context >= FD_SRV_CTX_END) { + ODP_ERR("invalid deregister context\n"); + send_fdserver_msg(client_sock, FD_DEREGISTER_NACK, + FD_SRV_CTX_NA, 0, -1); + return 0; + } + + /* search key in table and remove it if found, and reply: */ + for (i = 0; i < fd_table_nb_entries; i++) { + if ((fd_table[i].context == context) && + (fd_table[i].key == key)) { + ODP_DBG("drop {ctx=%d," + " key=%" PRIu64 "}->fd=%d\n", + context, key, fd_table[i].fd); + close(fd_table[i].fd); + fd_table[i] = fd_table[--fd_table_nb_entries]; + send_fdserver_msg(client_sock, + FD_DEREGISTER_ACK, + context, key, -1); + return 0; + } + } + + /* key not found... send nack */ + send_fdserver_msg(client_sock, FD_DEREGISTER_NACK, + context, key, -1); + break; + + case FD_SERVERSTOP_REQ: + ODP_DBG("Stoping FD server\n"); + return 1; + + default: + ODP_ERR("Unexpected request\n"); + break; + } + return 0; +} + +/* + * server function + * loop forever, handling client requests one by one + */ +static void wait_requests(int sock) +{ + int c_socket; /* client connection */ + unsigned int addr_sz; + struct sockaddr_un remote; + + for (;;) { + addr_sz = sizeof(remote); + c_socket = accept(sock, (struct sockaddr *)&remote, &addr_sz); + if (c_socket == -1) { + if (errno == EINTR) + continue; + + ODP_ERR("wait_requests: %s\n", strerror(errno)); + return; + } + + if (handle_request(c_socket)) + break; + close(c_socket); + } + close(c_socket); +} + +/* + * Create a unix domain socket and fork a process to listen to incoming + * requests. + */ +int _odp_fdserver_init_global(void) +{ + char sockpath[FDSERVER_SOCKPATH_MAXLEN]; + int sock; + struct sockaddr_un local; + pid_t server_pid; + int res; + + /* create the client spinlock that any client can see: */ + client_lock = mmap(NULL, sizeof(odp_spinlock_t), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + + odp_spinlock_init(client_lock); + + snprintf(sockpath, FDSERVER_SOCKPATH_MAXLEN, FDSERVER_SOCKDIR_FORMAT, + odp_global_data.shm_dir, + odp_global_data.uid); + + mkdir(sockpath, 0744); + + /* construct the server named socket path: */ + snprintf(sockpath, FDSERVER_SOCKPATH_MAXLEN, FDSERVER_SOCK_FORMAT, + odp_global_data.shm_dir, + odp_global_data.uid, + odp_global_data.main_pid); + + /* create UNIX domain socket: */ + sock = socket(AF_UNIX, SOCK_STREAM, 0); + if (sock == -1) { + ODP_ERR("_odp_fdserver_init_global: %s\n", strerror(errno)); + return -1; + } + + /* remove previous named socket if it already exists: */ + unlink(sockpath); + + /* bind to new named socket: */ + local.sun_family = AF_UNIX; + strncpy(local.sun_path, sockpath, sizeof(local.sun_path)); + res = bind(sock, (struct sockaddr *)&local, sizeof(struct sockaddr_un)); + if (res == -1) { + ODP_ERR("_odp_fdserver_init_global: %s\n", strerror(errno)); + close(sock); + return -1; + } + + /* listen for incoming conections: */ + if (listen(sock, FDSERVER_BACKLOG) == -1) { + ODP_ERR("_odp_fdserver_init_global: %s\n", strerror(errno)); + close(sock); + return -1; + } + + /* fork a server process: */ + server_pid = fork(); + if (server_pid == -1) { + ODP_ERR("Could not fork!\n"); + close(sock); + return -1; + } + + if (server_pid == 0) { /*child */ + /* TODO: pin the server on appropriate service cpu mask */ + /* when (if) we can agree on the usage of service mask */ + + /* request to be killed if parent dies, hence avoiding */ + /* orphans being "adopted" by the init process... */ + prctl(PR_SET_PDEATHSIG, SIGTERM); + + /* allocate the space for the file descriptor<->key table: */ + fd_table = malloc(FDSERVER_MAX_ENTRIES * sizeof(fdentry_t)); + if (!fd_table) { + ODP_ERR("maloc failed!\n"); + exit(1); + } + + /* wait for clients requests */ + wait_requests(sock); /* Returns when server is stopped */ + close(sock); + + /* release the file descriptor table: */ + free(fd_table); + + exit(0); + } + + /* parent */ + close(sock); + return 0; +} + +/* + * Terminate the server + */ +int _odp_fdserver_term_global(void) +{ + int status; + char sockpath[FDSERVER_SOCKPATH_MAXLEN]; + + /* close the server and wait for child terminaison*/ + stop_server(); + wait(&status); + + /* construct the server named socket path: */ + snprintf(sockpath, FDSERVER_SOCKPATH_MAXLEN, FDSERVER_SOCK_FORMAT, + odp_global_data.shm_dir, + odp_global_data.uid, + odp_global_data.main_pid); + + /* delete the UNIX domain socket: */ + unlink(sockpath); + + /* delete shm files directory */ + snprintf(sockpath, FDSERVER_SOCKPATH_MAXLEN, FDSERVER_SOCKDIR_FORMAT, + odp_global_data.shm_dir, + odp_global_data.uid); + rmdir(sockpath); + + return 0; +} diff --git a/platform/linux-generic/odp_ishm.c b/platform/linux-generic/odp_ishm.c new file mode 100644 index 000000000..3f123c901 --- /dev/null +++ b/platform/linux-generic/odp_ishm.c @@ -0,0 +1,1897 @@ +/* Copyright (c) 2016-2018, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "config.h" + +/* This file handles the internal shared memory: internal shared memory + * is memory which is sharable by all ODP threads regardless of how the + * ODP thread is implemented (pthread or process) and regardless of fork() + * time. + * Moreover, when reserved with the _ODP_ISHM_SINGLE_VA flag, + * internal shared memory is guaranteed to always be located at the same virtual + * address, i.e. pointers to internal shared memory are fully shareable + * between odp threads (regardless of thread type or fork time) in that case. + * Internal shared memory is mainly meant to be used internaly within ODP + * (hence its name), but may also be allocated by odp applications and drivers, + * in the future (through these interfaces). + * To guarrentee this full pointer shareability (when reserved with the + * _ODP_ISHM_SINGLE_VA flag) internal shared memory is handled as follows: + * At global_init time, a huge virtual address space reservation is performed. + * Note that this is just reserving virtual space, not physical memory. + * Because all ODP threads (pthreads or processes) are descendants of the ODP + * instantiation process, this VA space is inherited by all ODP threads. + * When internal shmem reservation actually occurs, and + * when reserved with the _ODP_ISHM_SINGLE_VA flag, physical memory is + * allocated, and mapped (MAP_FIXED) to some part in the huge preallocated + * address space area: + * because this virtual address space is common to all ODP threads, we + * know this mapping will succeed, and not clash with anything else. + * Hence, an ODP threads which perform a lookup for the same ishm block + * can map it at the same VA address. + * When internal shared memory is released, the physical memory is released + * and the corresponding virtual space returned to its "pool" of preallocated + * virtual space (assuming it was allocated from there). + * Note, though, that, if 2 linux processes share the same ishm block, + * the virtual space is marked as released as soon as one of the processes + * releases the ishm block, but the physical memory space is actually released + * by the kernel once all processes have done a ishm operation (i,e. a sync). + * This is due to the fact that linux does not contain any syscall to unmap + * memory from a different process. + * + * This file contains functions to handle the VA area (handling fragmentation + * and defragmentation resulting from different allocs/release) and also + * define the functions to allocate, release and lookup internal shared + * memory: + * _odp_ishm_reserve(), _odp_ishm_free*() and _odp_ishm_lookup*()... + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Maximum number of internal shared memory blocks. + * + * This is the number of separate ISHM areas that can be reserved concurrently + * (Note that freeing such blocks may take time, or possibly never happen + * if some of the block ownwers never procsync() after free). This number + * should take that into account) + */ +#define ISHM_MAX_NB_BLOCKS 128 + +/* + * Maximum internal shared memory block name length in chars + * probably taking the same number as SHM name size make sense at this stage + */ +#define ISHM_NAME_MAXLEN 128 + +/* + * Linux underlying file name: /odp--ishm- + * The part may be replaced by a sequence number if no specific + * name is given at reserve time + * is either /dev/shm or the hugepagefs mount point for default + * size. + * (searched at init time) + */ +#define ISHM_FILENAME_MAXLEN (ISHM_NAME_MAXLEN + 64) +#define ISHM_FILENAME_FORMAT "%s/odp-%d-ishm-%s" +#define ISHM_FILENAME_NORMAL_PAGE_DIR "/dev/shm" +#define _ODP_FILES_FMT "odp-%d-" + +/* + * when the memory is to be shared with an external entity (such as another + * ODP instance or an OS process not part of this ODP instance) then a + * export file is created describing the exported memory: this defines the + * location and the filename format of this description file + */ +#define ISHM_EXPTNAME_FORMAT "%s/%s/odp-%d-shm-%s" + +/* + * At worse case the virtual space gets so fragmented that there is + * a unallocated fragment between each allocated fragment: + * In that case, the number of fragments to take care of is twice the + * number of ISHM blocks + 1. + */ +#define ISHM_NB_FRAGMNTS (ISHM_MAX_NB_BLOCKS * 2 + 1) + +/* + * when a memory block is to be exported outside its ODP instance, + * an block 'attribute file' is created in /dev/shm/odp--shm-. + * The information given in this file is according to the following: + */ +#define EXPORT_FILE_LINE1_FMT "ODP exported shm block info:" +#define EXPORT_FILE_LINE2_FMT "ishm_blockname: %s" +#define EXPORT_FILE_LINE3_FMT "file: %s" +#define EXPORT_FILE_LINE4_FMT "length: %" PRIu64 +#define EXPORT_FILE_LINE5_FMT "flags: %" PRIu32 +#define EXPORT_FILE_LINE6_FMT "user_length: %" PRIu64 +#define EXPORT_FILE_LINE7_FMT "user_flags: %" PRIu32 +#define EXPORT_FILE_LINE8_FMT "align: %" PRIu32 +/* + * A fragment describes a piece of the shared virtual address space, + * and is allocated only when allocation is done with the _ODP_ISHM_SINGLE_VA + * flag: + * A fragment is said to be used when it actually does represent some + * portion of the virtual address space, and is said to be unused when + * it does not (so at start, one single fragment is used -describing the + * whole address space as unallocated-, and all others are unused). + * Fragments get used as address space fragmentation increases. + * A fragment is allocated if the piece of address space it + * describes is actually used by a shared memory block. + * Allocated fragments get their block_index set >=0. + */ +typedef struct ishm_fragment { + struct ishm_fragment *prev; /* not used when the fragment is unused */ + struct ishm_fragment *next; + void *start; /* start of segment (VA) */ + uintptr_t len; /* length of segment. multiple of page size */ + int block_index; /* -1 for unallocated fragments */ +} ishm_fragment_t; + +/* + * A block describes a piece of reserved memory: Any successful ishm_reserve() + * will allocate a block. A ishm_reserve() with the _ODP_ISHM_SINGLE_VA flag set + * will allocate both a block and a fragment. + * Blocks contain only global data common to all processes. + */ +typedef enum {UNKNOWN, HUGE, NORMAL, EXTERNAL} huge_flag_t; +typedef struct ishm_block { + char name[ISHM_NAME_MAXLEN]; /* name for the ishm block (if any) */ + char filename[ISHM_FILENAME_MAXLEN]; /* name of the .../odp-* file */ + char exptname[ISHM_FILENAME_MAXLEN]; /* name of the export file */ + uint32_t user_flags; /* any flags the user want to remember. */ + uint32_t flags; /* block creation flags. */ + uint32_t external_fd:1; /* block FD was externally provided */ + uint64_t user_len; /* length, as requested at reserve time. */ + void *start; /* only valid if _ODP_ISHM_SINGLE_VA is set*/ + uint64_t len; /* length. multiple of page size. 0 if free*/ + ishm_fragment_t *fragment; /* used when _ODP_ISHM_SINGLE_VA is used */ + huge_flag_t huge; /* page type: external means unknown here. */ + uint64_t seq; /* sequence number, incremented on alloc and free */ + uint64_t refcnt;/* number of linux processes mapping this block */ +} ishm_block_t; + +/* + * Table of blocks describing allocated internal shared memory + * This table is visible to every ODP thread (linux process or pthreads). + * (it is allocated shared at odp init time and is therefore inherited by all) + * Table index is used as handle, so it cannot move!. Entry is regarded as + * free when len==0 + */ +typedef struct { + odp_spinlock_t lock; + uint64_t dev_seq; /* used when creating device names */ + uint32_t odpthread_cnt; /* number of running ODP threads */ + ishm_block_t block[ISHM_MAX_NB_BLOCKS]; +} ishm_table_t; +static ishm_table_t *ishm_tbl; + +/* + * Process local table containing the list of (believed) allocated blocks seen + * from the current process. There is one such table per linux process. linux + * threads within a process shares this table. + * The contents within this table may become obsolete when other processes + * reserve/free ishm blocks. This is what the procsync() function + * catches by comparing the block sequence number with the one in this table. + * This table is filled at ishm_reserve and ishm_lookup time. + * Entries are removed at ishm_free or procsync time. + * Note that flags and len are present in this table and seems to be redundant + * with those present in the ishm block table: but this is not fully true: + * When ishm_sync() detects obsolete mappings and tries to remove them, + * the entry in the ishm block table is then obsolete, and the values which are + * found in this table must be used to perform the ummap. + * (and the values in the block tables are needed at lookup time...) + */ +typedef struct { + int thrd_refcnt; /* number of pthreads in this process, really */ + struct { + int block_index; /* entry in the ishm_tbl */ + uint32_t flags; /* flags used at creation time */ + uint64_t seq; + void *start; /* start of block (VA) */ + uint64_t len; /* length of block. multiple of page size */ + int fd; /* file descriptor used for this block */ + } entry[ISHM_MAX_NB_BLOCKS]; + int nb_entries; +} ishm_proctable_t; +static ishm_proctable_t *ishm_proctable; + +/* + * Table of fragments describing the common virtual address space: + * This table is visible to every ODP thread (linux process or pthreads). + * (it is allocated at odp init time and is therefore inherited by all) + */ +typedef struct { + ishm_fragment_t fragment[ISHM_NB_FRAGMNTS]; + ishm_fragment_t *used_fragmnts; /* ordered by increasing start addr */ + ishm_fragment_t *unused_fragmnts; +} ishm_ftable_t; +static ishm_ftable_t *ishm_ftbl; + +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +/* prototypes: */ +static void procsync(void); + +/* + * Take a piece of the preallocated virtual space to fit "size" bytes. + * (best fit). Size must be rounded up to an integer number of pages size. + * Possibly split the fragment to keep track of remaining space. + * Returns the allocated fragment (best_fragment) and the corresponding address. + * External caller must ensure mutex before the call! + */ +static void *alloc_fragment(uintptr_t size, int block_index, intptr_t align, + ishm_fragment_t **best_fragmnt) +{ + ishm_fragment_t *fragmnt; + *best_fragmnt = NULL; + ishm_fragment_t *rem_fragmnt; + uintptr_t border;/* possible start of new fragment (next alignement) */ + intptr_t left; /* room remaining after, if the segment is allocated */ + uintptr_t remainder = odp_global_data.shm_max_memory; + + /* + * search for the best bit, i.e. search for the unallocated fragment + * would give less remainder if the new fragment was allocated within + * it: + */ + for (fragmnt = ishm_ftbl->used_fragmnts; + fragmnt; fragmnt = fragmnt->next) { + /* skip allocated segment: */ + if (fragmnt->block_index >= 0) + continue; + /* skip too short segment: */ + border = ((uintptr_t)fragmnt->start + align - 1) & (-align); + left = + ((uintptr_t)fragmnt->start + fragmnt->len) - (border + size); + if (left < 0) + continue; + /* remember best fit: */ + if ((uintptr_t)left < remainder) { + remainder = left; /* best, so far */ + *best_fragmnt = fragmnt; + } + } + + if (!(*best_fragmnt)) { + ODP_ERR("unable to get virtual address for shmem block!\n."); + return NULL; + } + + (*best_fragmnt)->block_index = block_index; + border = ((uintptr_t)(*best_fragmnt)->start + align - 1) & (-align); + + /* + * if there is room between previous fragment and new one, (due to + * alignment requirement) then fragment (split) the space between + * the end of the previous fragment and the beginning of the new one: + */ + if (border - (uintptr_t)(*best_fragmnt)->start > 0) { + /* fragment space, i.e. take a new fragment descriptor... */ + rem_fragmnt = ishm_ftbl->unused_fragmnts; + if (!rem_fragmnt) { + ODP_ERR("unable to get shmem fragment descriptor!\n."); + return NULL; + } + ishm_ftbl->unused_fragmnts = rem_fragmnt->next; + + /* and link it between best_fragmnt->prev and best_fragmnt */ + if ((*best_fragmnt)->prev) + (*best_fragmnt)->prev->next = rem_fragmnt; + else + ishm_ftbl->used_fragmnts = rem_fragmnt; + rem_fragmnt->prev = (*best_fragmnt)->prev; + (*best_fragmnt)->prev = rem_fragmnt; + rem_fragmnt->next = (*best_fragmnt); + + /* update length: rem_fragmnt getting space before border */ + rem_fragmnt->block_index = -1; + rem_fragmnt->start = (*best_fragmnt)->start; + rem_fragmnt->len = border - (uintptr_t)(*best_fragmnt)->start; + (*best_fragmnt)->start = + (void *)((uintptr_t)rem_fragmnt->start + rem_fragmnt->len); + (*best_fragmnt)->len -= rem_fragmnt->len; + } + + /* if this was a perfect fit, i.e. no free space follows, we are done */ + if (remainder == 0) + return (*best_fragmnt)->start; + + /* otherwise, fragment space, i.e. take a new fragment descriptor... */ + rem_fragmnt = ishm_ftbl->unused_fragmnts; + if (!rem_fragmnt) { + ODP_ERR("unable to get shmem fragment descriptor!\n."); + return (*best_fragmnt)->start; + } + ishm_ftbl->unused_fragmnts = rem_fragmnt->next; + + /* ... double link it... */ + rem_fragmnt->next = (*best_fragmnt)->next; + rem_fragmnt->prev = (*best_fragmnt); + if ((*best_fragmnt)->next) + (*best_fragmnt)->next->prev = rem_fragmnt; + (*best_fragmnt)->next = rem_fragmnt; + + /* ... and keep track of the remainder */ + (*best_fragmnt)->len = size; + rem_fragmnt->len = remainder; + rem_fragmnt->start = (void *)((char *)(*best_fragmnt)->start + size); + rem_fragmnt->block_index = -1; + + return (*best_fragmnt)->start; +} + +/* + * Free a portion of virtual space. + * Possibly defragment, if the freed fragment is adjacent to another + * free virtual fragment. + * External caller must ensure mutex before the call! + */ +static void free_fragment(ishm_fragment_t *fragmnt) +{ + ishm_fragment_t *prev_f; + ishm_fragment_t *next_f; + + /* sanity check */ + if (!fragmnt) + return; + + prev_f = fragmnt->prev; + next_f = fragmnt->next; + + /* free the fragment */ + fragmnt->block_index = -1; + + /* check if the previous fragment is also free: if so, defragment */ + if (prev_f && (prev_f->block_index < 0)) { + fragmnt->start = prev_f->start; + fragmnt->len += prev_f->len; + if (prev_f->prev) { + prev_f->prev->next = fragmnt; + } else { + if (ishm_ftbl->used_fragmnts == prev_f) + ishm_ftbl->used_fragmnts = fragmnt; + else + ODP_ERR("corrupted fragment list!.\n"); + } + fragmnt->prev = prev_f->prev; + + /* put removed fragment in free list */ + prev_f->prev = NULL; + prev_f->next = ishm_ftbl->unused_fragmnts; + ishm_ftbl->unused_fragmnts = prev_f; + } + + /* check if the next fragment is also free: if so, defragment */ + if (next_f && (next_f->block_index < 0)) { + fragmnt->len += next_f->len; + if (next_f->next) + next_f->next->prev = fragmnt; + fragmnt->next = next_f->next; + + /* put removed fragment in free list */ + next_f->prev = NULL; + next_f->next = ishm_ftbl->unused_fragmnts; + ishm_ftbl->unused_fragmnts = next_f; + } +} + +/* + * Create file with size len. returns -1 on error + * Creates a file to /dev/shm/odp-- (for normal pages) + * or /mnt/huge/odp-- (for huge pages) + * Return the new file descriptor, or -1 on error. + */ +static int create_file(int block_index, huge_flag_t huge, uint64_t len, + uint32_t flags, uint32_t align) +{ + char *name; + int fd; + ishm_block_t *new_block; /* entry in the main block table */ + char seq_string[ISHM_FILENAME_MAXLEN]; /* used to construct filename*/ + char filename[ISHM_FILENAME_MAXLEN]; /* filename in /dev/shm or + * /mnt/huge */ + int oflag = O_RDWR | O_CREAT | O_TRUNC; /* flags for open */ + FILE *export_file; + char dir[ISHM_FILENAME_MAXLEN]; + + new_block = &ishm_tbl->block[block_index]; + name = new_block->name; + + /* create the filename: */ + snprintf(seq_string, ISHM_FILENAME_MAXLEN, "%08" PRIu64, + ishm_tbl->dev_seq++); + + /* huge dir must be known to create files there!: */ + if ((huge == HUGE) && + (!odp_global_data.hugepage_info.default_huge_page_dir)) + return -1; + + if (huge == HUGE) + snprintf(dir, ISHM_FILENAME_MAXLEN, "%s/%s", + odp_global_data.hugepage_info.default_huge_page_dir, + odp_global_data.uid); + else + snprintf(dir, ISHM_FILENAME_MAXLEN, "%s/%s", + odp_global_data.shm_dir, + odp_global_data.uid); + + snprintf(filename, ISHM_FILENAME_MAXLEN, + ISHM_FILENAME_FORMAT, + dir, + odp_global_data.main_pid, + (name && name[0]) ? name : seq_string); + + mkdir(dir, 0744); + + fd = open(filename, oflag, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (fd < 0) { + if (huge == HUGE) + ODP_DBG("open failed for %s: %s.\n", + filename, strerror(errno)); + else + ODP_ERR("open failed for %s: %s.\n", + filename, strerror(errno)); + return -1; + } + + if (ftruncate(fd, len) == -1) { + ODP_ERR("ftruncate failed: fd=%d, err=%s.\n", + fd, strerror(errno)); + close(fd); + unlink(filename); + return -1; + } + + + /* if _ODP_ISHM_EXPORT is set, create a description file for + * external ref: + */ + if (flags & _ODP_ISHM_EXPORT) { + strncpy(new_block->filename, filename, + ISHM_FILENAME_MAXLEN - 1); + snprintf(new_block->exptname, ISHM_FILENAME_MAXLEN, + ISHM_EXPTNAME_FORMAT, + odp_global_data.shm_dir, + odp_global_data.uid, + odp_global_data.main_pid, + (name && name[0]) ? name : seq_string); + export_file = fopen(new_block->exptname, "w"); + if (export_file == NULL) { + ODP_ERR("open failed: err=%s.\n", + strerror(errno)); + new_block->exptname[0] = 0; + } else { + fprintf(export_file, EXPORT_FILE_LINE1_FMT "\n"); + fprintf(export_file, EXPORT_FILE_LINE2_FMT "\n", name); + fprintf(export_file, EXPORT_FILE_LINE3_FMT "\n", + new_block->filename); + fprintf(export_file, EXPORT_FILE_LINE4_FMT "\n", len); + fprintf(export_file, EXPORT_FILE_LINE5_FMT "\n", flags); + fprintf(export_file, EXPORT_FILE_LINE6_FMT "\n", + new_block->user_len); + fprintf(export_file, EXPORT_FILE_LINE7_FMT "\n", + new_block->user_flags); + fprintf(export_file, EXPORT_FILE_LINE8_FMT "\n", align); + + fclose(export_file); + } + } else { + new_block->exptname[0] = 0; + /* remove the file from the filesystem, keeping its fd open */ + unlink(filename); + } + + return fd; +} + +/* delete the files related to a given ishm block: */ +static void delete_file(ishm_block_t *block) +{ + /* remove the .../odp-* file, unless fd was external: */ + if (block->filename[0] != 0) + unlink(block->filename); + /* also remove possible description file (if block was exported): */ + if (block->exptname[0] != 0) + unlink(block->exptname); +} + +/* + * performs the mapping, possibly allocating a fragment of the pre-reserved + * VA space if the _ODP_ISHM_SINGLE_VA flag was given. + * Sets fd, and returns the mapping address. + * This function will also set the _ODP_ISHM_SINGLE_VA flag if the alignment + * requires it + * Mutex must be assured by the caller. + */ +static void *do_map(int block_index, uint64_t len, uint32_t align, + uint32_t flags, huge_flag_t huge, int *fd) +{ + ishm_block_t *new_block; /* entry in the main block table */ + void *addr = NULL; + void *mapped_addr; + ishm_fragment_t *fragment = NULL; + + new_block = &ishm_tbl->block[block_index]; + + /* + * Creates a file to /dev/shm/odp-- (for normal pages) + * or /mnt/huge/odp-- (for huge pages) + * unless a fd was already given + */ + if (*fd < 0) { + *fd = create_file(block_index, huge, len, flags, align); + if (*fd < 0) + return NULL; + } else { + new_block->filename[0] = 0; + } + + /* allocate an address range in the prebooked VA area if needed */ + if (flags & _ODP_ISHM_SINGLE_VA) { + addr = alloc_fragment(len, block_index, align, &fragment); + if (!addr) { + ODP_ERR("alloc_fragment failed.\n"); + if (!new_block->external_fd) { + close(*fd); + *fd = -1; + delete_file(new_block); + } + return NULL; + } + ishm_tbl->block[block_index].fragment = fragment; + } + + /* try to mmap: */ + mapped_addr = _odp_ishmphy_map(*fd, addr, len, flags); + if (mapped_addr == NULL) { + if (flags & _ODP_ISHM_SINGLE_VA) + free_fragment(fragment); + if (!new_block->external_fd) { + close(*fd); + *fd = -1; + delete_file(new_block); + } + return NULL; + } + + return mapped_addr; +} + +/* + * Performs an extra mapping (for a process trying to see an existing block + * i.e. performing a lookup). + * Mutex must be assured by the caller. + */ +static void *do_remap(int block_index, int fd) +{ + void *mapped_addr; + ishm_fragment_t *fragment; + uint64_t len; + uint32_t flags; + + len = ishm_tbl->block[block_index].len; + flags = ishm_tbl->block[block_index].flags; + + if (flags & _ODP_ISHM_SINGLE_VA) { + fragment = ishm_tbl->block[block_index].fragment; + if (!fragment) { + ODP_ERR("invalid fragment failure.\n"); + return NULL; + } + + /* try to mmap: */ + mapped_addr = _odp_ishmphy_map(fd, fragment->start, len, flags); + if (mapped_addr == NULL) + return NULL; + return mapped_addr; + } + + /* try to mmap: */ + mapped_addr = _odp_ishmphy_map(fd, NULL, len, flags); + if (mapped_addr == NULL) + return NULL; + + return mapped_addr; +} + +/* + * Performs unmapping, possibly freeing a prereserved VA space fragment, + * if the _ODP_ISHM_SINGLE_VA flag was set at alloc time + * Mutex must be assured by the caller. + */ +static int do_unmap(void *start, uint64_t size, uint32_t flags, + int block_index) +{ + int ret; + + if (start) + ret = _odp_ishmphy_unmap(start, size, flags); + else + ret = 0; + + if ((block_index >= 0) && (flags & _ODP_ISHM_SINGLE_VA)) { + /* mark reserved address space as free */ + free_fragment(ishm_tbl->block[block_index].fragment); + } + + return ret; +} + +/* + * Search for a given used and allocated block name. + * (search is performed in the global ishm table) + * Returns the index of the found block (if any) or -1 if none. + * Mutex must be assured by the caller. + */ +static int find_block_by_name(const char *name) +{ + int i; + + if (name == NULL || name[0] == 0) + return -1; + + for (i = 0; i < ISHM_MAX_NB_BLOCKS; i++) { + if ((ishm_tbl->block[i].len) && + (strcmp(name, ishm_tbl->block[i].name) == 0)) + return i; + } + + return -1; +} + +/* + * Search for a block by address (only works when flag _ODP_ISHM_SINGLE_VA + * was set at reserve() time, or if the block is already known by this + * process). + * Search is performed in the process table and in the global ishm table. + * The provided address does not have to be at start: any address + * within the fragment is OK. + * Returns the index to the found block (if any) or -1 if none. + * Mutex must be assured by the caller. + */ +static int find_block_by_address(void *addr) +{ + int block_index; + int i; + ishm_fragment_t *fragmnt; + + /* + * first check if there is already a process known block for this + * address + */ + for (i = 0; i < ishm_proctable->nb_entries; i++) { + block_index = ishm_proctable->entry[i].block_index; + if ((addr > ishm_proctable->entry[i].start) && + ((char *)addr < ((char *)ishm_proctable->entry[i].start + + ishm_tbl->block[block_index].len))) + return block_index; + } + + /* + * then check if there is a existing single VA block known by some other + * process and containing the given address + */ + for (i = 0; i < ISHM_MAX_NB_BLOCKS; i++) { + if ((!ishm_tbl->block[i].len) || + (!(ishm_tbl->block[i].flags & _ODP_ISHM_SINGLE_VA))) + continue; + fragmnt = ishm_tbl->block[i].fragment; + if (!fragmnt) { + ODP_ERR("find_fragment: invalid NULL fragment\n"); + return -1; + } + if ((addr >= fragmnt->start) && + ((char *)addr < ((char *)fragmnt->start + fragmnt->len))) + return i; + } + + /* address does not belong to any accessible block: */ + return -1; +} + +/* + * Search a given ishm block in the process local table. Return its index + * in the process table or -1 if not found (meaning that the ishm table + * block index was not referenced in the process local table, i.e. the + * block is known by some other process, but not by the current process). + * Caller must assure mutex. + */ +static int procfind_block(int block_index) +{ + int i; + + for (i = 0; i < ishm_proctable->nb_entries; i++) { + if (ishm_proctable->entry[i].block_index == block_index) + return i; + } + return -1; +} + +/* + * Release the physical memory mapping for blocks which have been freed + * by other processes. Caller must ensure mutex. + * Mutex must be assured by the caller. + */ +static void procsync(void) +{ + int i = 0; + int last; + ishm_block_t *block; + + last = ishm_proctable->nb_entries; + while (i < last) { + /* if the process sequence number doesn't match the main + * table seq number, this entry is obsolete + */ + block = &ishm_tbl->block[ishm_proctable->entry[i].block_index]; + if (ishm_proctable->entry[i].seq != block->seq) { + /* obsolete entry: free memory and remove proc entry */ + close(ishm_proctable->entry[i].fd); + _odp_ishmphy_unmap(ishm_proctable->entry[i].start, + ishm_proctable->entry[i].len, + ishm_proctable->entry[i].flags); + ishm_proctable->entry[i] = + ishm_proctable->entry[--last]; + } else { + i++; + } + } + ishm_proctable->nb_entries = last; +} + +/* + * Allocate and map internal shared memory, or other objects: + * If a name is given, check that this name is not already in use. + * If ok, allocate a new shared memory block and map the + * provided fd in it (if fd >=0 was given). + * If no fd is provided, a shared memory file desc named + * /dev/shm/odp--ishm- is created and mapped. + * (the name is different for huge page file as they must be on hugepagefs) + * The function returns the index of the newly created block in the + * main block table (>=0) or -1 on error. + */ +int _odp_ishm_reserve(const char *name, uint64_t size, int fd, + uint32_t align, uint32_t flags, uint32_t user_flags) +{ + int new_index; /* index in the main block table*/ + ishm_block_t *new_block; /* entry in the main block table*/ + uint64_t page_sz; /* normal page size. usually 4K*/ + uint64_t page_hp_size; /* huge page size */ + uint32_t hp_align; + uint64_t len; /* mapped length */ + void *addr = NULL; /* mapping address */ + int new_proc_entry; + struct stat statbuf; + static int huge_error_printed; /* to avoid millions of error...*/ + + odp_spinlock_lock(&ishm_tbl->lock); + + /* update this process view... */ + procsync(); + + /* Get system page sizes: page_hp_size is 0 if no huge page available*/ + page_sz = odp_sys_page_size(); + page_hp_size = odp_sys_huge_page_size(); + + /* grab a new entry: */ + for (new_index = 0; new_index < ISHM_MAX_NB_BLOCKS; new_index++) { + if (ishm_tbl->block[new_index].len == 0) { + /* Found free block */ + break; + } + } + + /* check if we have reached the maximum number of allocation: */ + if (new_index >= ISHM_MAX_NB_BLOCKS) { + odp_spinlock_unlock(&ishm_tbl->lock); + ODP_ERR("ISHM_MAX_NB_BLOCKS limit reached!\n"); + return -1; + } + + new_block = &ishm_tbl->block[new_index]; + + /* save block name (if any given): */ + if (name) + strncpy(new_block->name, name, ISHM_NAME_MAXLEN - 1); + else + new_block->name[0] = 0; + + /* save user data: */ + new_block->user_flags = user_flags; + new_block->user_len = size; + + /* If a file descriptor is provided, get the real size and map: */ + if (fd >= 0) { + if (fstat(fd, &statbuf) < 0) { + close(fd); + odp_spinlock_unlock(&ishm_tbl->lock); + ODP_ERR("_ishm_reserve failed (fstat failed: %s).\n", + strerror(errno)); + __odp_errno = errno; + return -1; + } + len = statbuf.st_size; + /* note that the huge page flag is meningless here as huge + * page is determined by the provided file descriptor: */ + addr = do_map(new_index, len, align, flags, EXTERNAL, &fd); + if (addr == NULL) { + close(fd); + odp_spinlock_unlock(&ishm_tbl->lock); + ODP_ERR("_ishm_reserve failed.\n"); + return -1; + } + new_block->huge = EXTERNAL; + new_block->external_fd = 1; + } else { + new_block->external_fd = 0; + } + + /* Otherwise, Try first huge pages when possible and needed: */ + if ((fd < 0) && page_hp_size && (size > page_sz)) { + /* at least, alignment in VA should match page size, but user + * can request more: If the user requirement exceeds the page + * size then we have to make sure the block will be mapped at + * the same address every where, otherwise alignment may be + * be wrong for some process */ + hp_align = align; + if (hp_align <= page_hp_size) + hp_align = page_hp_size; + else + flags |= _ODP_ISHM_SINGLE_VA; + + /* roundup to page size */ + len = (size + (page_hp_size - 1)) & (-page_hp_size); + addr = do_map(new_index, len, hp_align, flags, HUGE, &fd); + + if (addr == NULL) { + if (!huge_error_printed) { + ODP_ERR("No huge pages, fall back to normal " + "pages. " + "check: /proc/sys/vm/nr_hugepages.\n"); + huge_error_printed = 1; + } + } else { + new_block->huge = HUGE; + } + } + + /* Try normal pages if huge pages failed */ + if (fd < 0) { + /* at least, alignment in VA should match page size, but user + * can request more: If the user requirement exceeds the page + * size then we have to make sure the block will be mapped at + * the same address every where, otherwise alignment may be + * be wrong for some process */ + if (align <= odp_sys_page_size()) + align = odp_sys_page_size(); + else + flags |= _ODP_ISHM_SINGLE_VA; + + /* roundup to page size */ + len = (size + (page_sz - 1)) & (-page_sz); + addr = do_map(new_index, len, align, flags, NORMAL, &fd); + new_block->huge = NORMAL; + } + + /* if neither huge pages or normal pages works, we cannot proceed: */ + if ((fd < 0) || (addr == NULL) || (len == 0)) { + if ((!new_block->external_fd) && (fd >= 0)) + close(fd); + delete_file(new_block); + odp_spinlock_unlock(&ishm_tbl->lock); + ODP_ERR("_ishm_reserve failed.\n"); + return -1; + } + + /* remember block data and increment block seq number to mark change */ + new_block->len = len; + new_block->user_len = size; + new_block->flags = flags; + new_block->user_flags = user_flags; + new_block->seq++; + new_block->refcnt = 1; + new_block->start = addr; /* only for SINGLE_VA*/ + + /* the allocation succeeded: update the process local view */ + new_proc_entry = ishm_proctable->nb_entries++; + ishm_proctable->entry[new_proc_entry].block_index = new_index; + ishm_proctable->entry[new_proc_entry].flags = flags; + ishm_proctable->entry[new_proc_entry].seq = new_block->seq; + ishm_proctable->entry[new_proc_entry].start = addr; + ishm_proctable->entry[new_proc_entry].len = len; + ishm_proctable->entry[new_proc_entry].fd = fd; + + /* register the file descriptor to the file descriptor server. */ + _odp_fdserver_register_fd(FD_SRV_CTX_ISHM, new_index, fd); + + odp_spinlock_unlock(&ishm_tbl->lock); + return new_index; +} + +/* + * Try to map an memory block mapped by another ODP instance into the + * current ODP instance. + * returns 0 on success. + */ +int _odp_ishm_find_exported(const char *remote_name, pid_t external_odp_pid, + const char *local_name) +{ + char export_filename[ISHM_FILENAME_MAXLEN]; + char blockname[ISHM_FILENAME_MAXLEN]; + char filename[ISHM_FILENAME_MAXLEN]; + FILE *export_file; + uint64_t len; + uint32_t flags; + uint64_t user_len; + uint32_t user_flags; + uint32_t align; + int fd; + int block_index; + + /* try to read the block description file: */ + snprintf(export_filename, ISHM_FILENAME_MAXLEN, + ISHM_EXPTNAME_FORMAT, + odp_global_data.shm_dir, + odp_global_data.uid, + external_odp_pid, + remote_name); + + export_file = fopen(export_filename, "r"); + + if (export_file == NULL) { + ODP_ERR("Error opening %s.\n", export_filename); + return -1; + } + + if (fscanf(export_file, EXPORT_FILE_LINE1_FMT " ") != 0) + goto error_exp_file; + + if (fscanf(export_file, EXPORT_FILE_LINE2_FMT " ", blockname) != 1) + goto error_exp_file; + + if (fscanf(export_file, EXPORT_FILE_LINE3_FMT " ", filename) != 1) + goto error_exp_file; + + if (fscanf(export_file, EXPORT_FILE_LINE4_FMT " ", &len) != 1) + goto error_exp_file; + + if (fscanf(export_file, EXPORT_FILE_LINE5_FMT " ", &flags) != 1) + goto error_exp_file; + + if (fscanf(export_file, EXPORT_FILE_LINE6_FMT " ", &user_len) != 1) + goto error_exp_file; + + if (fscanf(export_file, EXPORT_FILE_LINE7_FMT " ", &user_flags) != 1) + goto error_exp_file; + + if (fscanf(export_file, EXPORT_FILE_LINE8_FMT " ", &align) != 1) + goto error_exp_file; + + fclose(export_file); + + /* now open the filename given in the description file: */ + fd = open(filename, O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (fd == -1) { + ODP_ERR("open failed for %s: %s.\n", + filename, strerror(errno)); + return -1; + } + + /* clear the _ODP_ISHM_EXPORT flag so we don't export that again*/ + flags &= ~(uint32_t)_ODP_ISHM_EXPORT; + + /* reserve the memory, providing the opened file descriptor: */ + block_index = _odp_ishm_reserve(local_name, 0, fd, align, flags, 0); + if (block_index < 0) { + close(fd); + return block_index; + } + + /* set inherited info: */ + ishm_tbl->block[block_index].user_flags = user_flags; + ishm_tbl->block[block_index].user_len = user_len; + + return block_index; + +error_exp_file: + fclose(export_file); + ODP_ERR("Error reading %s.\n", export_filename); + return -1; +} + +/* + * Free and unmap internal shared memory: + * The file descriptor is closed and the .../odp-* file deleted, + * unless fd was externally provided at reserve() time. + * return 0 if OK, and -1 on error. + * Mutex must be assured by the caller. + */ +static int block_free(int block_index) +{ + int proc_index; + ishm_block_t *block; /* entry in the main block table*/ + int last; + + if ((block_index < 0) || + (block_index >= ISHM_MAX_NB_BLOCKS) || + (ishm_tbl->block[block_index].len == 0)) { + ODP_ERR("Request to free an invalid block\n"); + return -1; + } + + block = &ishm_tbl->block[block_index]; + + proc_index = procfind_block(block_index); + if (proc_index >= 0) { + /* close the related fd */ + close(ishm_proctable->entry[proc_index].fd); + + /* remove the mapping and possible fragment */ + do_unmap(ishm_proctable->entry[proc_index].start, + block->len, + ishm_proctable->entry[proc_index].flags, + block_index); + + /* remove entry from process local table: */ + last = ishm_proctable->nb_entries - 1; + ishm_proctable->entry[proc_index] = + ishm_proctable->entry[last]; + ishm_proctable->nb_entries = last; + } else { + /* just possibly free the fragment as no mapping exist here: */ + do_unmap(NULL, 0, block->flags, block_index); + } + + /* remove all files related to this block: */ + delete_file(block); + + /* deregister the file descriptor from the file descriptor server. */ + _odp_fdserver_deregister_fd(FD_SRV_CTX_ISHM, block_index); + + /* mark the block as free in the main block table: */ + block->len = 0; + + /* mark the change so other processes see this entry as obsolete: */ + block->seq++; + + return 0; +} + +/* + * Free and unmap internal shared memory, identified by its block number: + * return -1 on error. 0 if OK. + */ +int _odp_ishm_free_by_index(int block_index) +{ + int ret; + + odp_spinlock_lock(&ishm_tbl->lock); + procsync(); + + ret = block_free(block_index); + odp_spinlock_unlock(&ishm_tbl->lock); + return ret; +} + +/* + * free and unmap internal shared memory, identified by its block name: + * return -1 on error. 0 if OK. + */ +int _odp_ishm_free_by_name(const char *name) +{ + int block_index; + int ret; + + odp_spinlock_lock(&ishm_tbl->lock); + procsync(); + + /* search the block in main ishm table */ + block_index = find_block_by_name(name); + if (block_index < 0) { + ODP_ERR("Request to free an non existing block..." + " (double free?)\n"); + odp_spinlock_unlock(&ishm_tbl->lock); + return -1; + } + + ret = block_free(block_index); + odp_spinlock_unlock(&ishm_tbl->lock); + return ret; +} + +/* + * Free and unmap internal shared memory identified by address: + * return -1 on error. 0 if OK. + */ +int _odp_ishm_free_by_address(void *addr) +{ + int block_index; + int ret; + + odp_spinlock_lock(&ishm_tbl->lock); + procsync(); + + /* search the block in main ishm table */ + block_index = find_block_by_address(addr); + if (block_index < 0) { + ODP_ERR("Request to free an non existing block..." + " (double free?)\n"); + odp_spinlock_unlock(&ishm_tbl->lock); + return -1; + } + + ret = block_free(block_index); + + odp_spinlock_unlock(&ishm_tbl->lock); + return ret; +} + +/* + * Lookup for an ishm shared memory, identified by its block index + * in the main ishm block table. + * Map this ishm area in the process VA (if not already present). + * Returns the block user address or NULL on error. + * Mutex must be assured by the caller. + */ +static void *block_lookup(int block_index) +{ + int proc_index; + int fd = -1; + ishm_block_t *block; + void *mapped_addr; + int new_entry; + + if ((block_index < 0) || + (block_index >= ISHM_MAX_NB_BLOCKS) || + (ishm_tbl->block[block_index].len == 0)) { + ODP_ERR("Request to lookup an invalid block\n"); + return NULL; + } + + /* search it in process table: if there, this process knows it already*/ + proc_index = procfind_block(block_index); + if (proc_index >= 0) + return ishm_proctable->entry[proc_index].start; + + /* this ishm is not known by this process, yet: we create the mapping.*/ + fd = _odp_fdserver_lookup_fd(FD_SRV_CTX_ISHM, block_index); + if (fd < 0) { + ODP_ERR("Could not find ishm file descriptor (BUG!)\n"); + return NULL; + } + + /* perform the mapping */ + block = &ishm_tbl->block[block_index]; + + mapped_addr = do_remap(block_index, fd); + if (mapped_addr == NULL) { + ODP_ERR(" lookup: Could not map existing shared memory!\n"); + return NULL; + } + + /* the mapping succeeded: update the process local view */ + new_entry = ishm_proctable->nb_entries++; + ishm_proctable->entry[new_entry].block_index = block_index; + ishm_proctable->entry[new_entry].flags = block->flags; + ishm_proctable->entry[new_entry].seq = block->seq; + ishm_proctable->entry[new_entry].start = mapped_addr; + ishm_proctable->entry[new_entry].len = block->len; + ishm_proctable->entry[new_entry].fd = fd; + block->refcnt++; + + return mapped_addr; +} + +/* + * Lookup for an ishm shared memory, identified by its block_index. + * Maps this ishmem area in the process VA (if not already present). + * Returns the block user address, or NULL if the index + * does not match any known ishm blocks. + */ +void *_odp_ishm_lookup_by_index(int block_index) +{ + void *ret; + + odp_spinlock_lock(&ishm_tbl->lock); + procsync(); + + ret = block_lookup(block_index); + odp_spinlock_unlock(&ishm_tbl->lock); + return ret; +} + +/* + * Lookup for an ishm shared memory, identified by its block name. + * Map this ishm area in the process VA (if not already present). + * Return the block index, or -1 if the index + * does not match any known ishm blocks. + */ +int _odp_ishm_lookup_by_name(const char *name) +{ + int block_index; + + odp_spinlock_lock(&ishm_tbl->lock); + procsync(); + + /* search the block in main ishm table: return -1 if not found: */ + block_index = find_block_by_name(name); + if ((block_index < 0) || (!block_lookup(block_index))) { + odp_spinlock_unlock(&ishm_tbl->lock); + return -1; + } + + odp_spinlock_unlock(&ishm_tbl->lock); + return block_index; +} + +/* + * Lookup for an ishm shared memory block, identified by its VA address. + * This works only if the block has already been looked-up (mapped) by the + * current process or it it was created with the _ODP_ISHM_SINGLE_VA flag. + * Map this ishm area in the process VA (if not already present). + * Return the block index, or -1 if the address + * does not match any known ishm blocks. + */ +int _odp_ishm_lookup_by_address(void *addr) +{ + int block_index; + + odp_spinlock_lock(&ishm_tbl->lock); + procsync(); + + /* search the block in main ishm table: return -1 if not found: */ + block_index = find_block_by_address(addr); + if ((block_index < 0) || (!block_lookup(block_index))) { + odp_spinlock_unlock(&ishm_tbl->lock); + return -1; + } + + odp_spinlock_unlock(&ishm_tbl->lock); + return block_index; +} + +/* + * Returns the VA address of a given block (which has to be known in the current + * process). Returns NULL if the block is unknown. + */ +void *_odp_ishm_address(int block_index) +{ + int proc_index; + void *addr; + + odp_spinlock_lock(&ishm_tbl->lock); + procsync(); + + if ((block_index < 0) || + (block_index >= ISHM_MAX_NB_BLOCKS) || + (ishm_tbl->block[block_index].len == 0)) { + ODP_ERR("Request for address on an invalid block\n"); + odp_spinlock_unlock(&ishm_tbl->lock); + return NULL; + } + + proc_index = procfind_block(block_index); + if (proc_index < 0) { + odp_spinlock_unlock(&ishm_tbl->lock); + return NULL; + } + + addr = ishm_proctable->entry[proc_index].start; + odp_spinlock_unlock(&ishm_tbl->lock); + return addr; +} + +int _odp_ishm_info(int block_index, _odp_ishm_info_t *info) +{ + int proc_index; + + odp_spinlock_lock(&ishm_tbl->lock); + procsync(); + + if ((block_index < 0) || + (block_index >= ISHM_MAX_NB_BLOCKS) || + (ishm_tbl->block[block_index].len == 0)) { + odp_spinlock_unlock(&ishm_tbl->lock); + ODP_ERR("Request for info on an invalid block\n"); + return -1; + } + + /* search it in process table: if not there, need to map*/ + proc_index = procfind_block(block_index); + if (proc_index < 0) { + odp_spinlock_unlock(&ishm_tbl->lock); + return -1; + } + + info->name = ishm_tbl->block[block_index].name; + info->addr = ishm_proctable->entry[proc_index].start; + info->size = ishm_tbl->block[block_index].user_len; + info->page_size = (ishm_tbl->block[block_index].huge == HUGE) ? + odp_sys_huge_page_size() : odp_sys_page_size(); + info->flags = ishm_tbl->block[block_index].flags; + info->user_flags = ishm_tbl->block[block_index].user_flags; + + odp_spinlock_unlock(&ishm_tbl->lock); + return 0; +} + +static int do_odp_ishm_init_local(void) +{ + int i; + int block_index; + + /* + * the ishm_process table is local to each linux process + * Check that no other linux threads (of same or ancestor processes) + * have already created the table, and create it if needed. + * We protect this with the general ishm lock to avoid + * init race condition of different running threads. + */ + odp_spinlock_lock(&ishm_tbl->lock); + ishm_tbl->odpthread_cnt++; /* count ODPthread (pthread or process) */ + if (!ishm_proctable) { + ishm_proctable = malloc(sizeof(ishm_proctable_t)); + if (!ishm_proctable) { + odp_spinlock_unlock(&ishm_tbl->lock); + return -1; + } + memset(ishm_proctable, 0, sizeof(ishm_proctable_t)); + } + if (syscall(SYS_gettid) != getpid()) + ishm_proctable->thrd_refcnt++; /* new linux thread */ + else + ishm_proctable->thrd_refcnt = 1;/* new linux process */ + + /* + * if this ODP thread is actually a new linux process, (as opposed + * to a pthread), i.e, we just forked, then all shmem blocks + * of the parent process are mapped into this child by inheritance. + * (The process local table is inherited as well). We hence have to + * increase the process refcount for each of the inherited mappings: + */ + if (syscall(SYS_gettid) == getpid()) { + for (i = 0; i < ishm_proctable->nb_entries; i++) { + block_index = ishm_proctable->entry[i].block_index; + ishm_tbl->block[block_index].refcnt++; + } + } + + odp_spinlock_unlock(&ishm_tbl->lock); + return 0; +} + +/* remove all files staring with "odp-" from a directory "dir" */ +int _odp_ishm_cleanup_files(const char *dirpath) +{ + struct dirent *e; + DIR *dir; + char userdir[PATH_MAX]; + char prefix[PATH_MAX]; + char *fullpath; + int d_len = strlen(dirpath); + int p_len; + int f_len; + + snprintf(userdir, PATH_MAX, "%s/%s", dirpath, odp_global_data.uid); + + dir = opendir(userdir); + if (!dir) { + /* ok if the dir does not exist. no much to delete then! */ + ODP_DBG("opendir failed for %s: %s\n", + dirpath, strerror(errno)); + return 0; + } + snprintf(prefix, PATH_MAX, _ODP_FILES_FMT, odp_global_data.main_pid); + p_len = strlen(prefix); + while ((e = readdir(dir)) != NULL) { + if (strncmp(e->d_name, prefix, p_len) == 0) { + f_len = strlen(e->d_name); + fullpath = malloc(d_len + f_len + 2); + if (fullpath == NULL) { + closedir(dir); + return -1; + } + snprintf(fullpath, PATH_MAX, "%s/%s", + dirpath, e->d_name); + ODP_DBG("deleting obsolete file: %s\n", fullpath); + if (unlink(fullpath)) + ODP_ERR("unlink failed for %s: %s\n", + fullpath, strerror(errno)); + free(fullpath); + } + } + closedir(dir); + + return 0; +} + +int _odp_ishm_init_global(const odp_init_t *init) +{ + void *addr; + void *spce_addr; + int i; + uid_t uid; + char *hp_dir = odp_global_data.hugepage_info.default_huge_page_dir; + uint64_t align; + uint64_t max_memory = ODP_CONFIG_ISHM_VA_PREALLOC_SZ; + uint64_t internal = ODP_CONFIG_ISHM_VA_PREALLOC_SZ / 8; + + /* user requested memory size + some extra for internal use */ + if (init && init->shm.max_memory) + max_memory = init->shm.max_memory + internal; + + odp_global_data.shm_max_memory = max_memory; + odp_global_data.shm_max_size = max_memory - internal; + odp_global_data.main_pid = getpid(); + odp_global_data.shm_dir = getenv("ODP_SHM_DIR"); + if (odp_global_data.shm_dir) { + odp_global_data.shm_dir_from_env = 1; + } else { + odp_global_data.shm_dir = + calloc(1, sizeof(ISHM_FILENAME_NORMAL_PAGE_DIR)); + sprintf(odp_global_data.shm_dir, "%s", + ISHM_FILENAME_NORMAL_PAGE_DIR); + odp_global_data.shm_dir_from_env = 0; + } + + ODP_DBG("ishm: using dir %s\n", odp_global_data.shm_dir); + + uid = getuid(); + snprintf(odp_global_data.uid, UID_MAXLEN, "%d", + uid); + + if ((syscall(SYS_gettid)) != odp_global_data.main_pid) { + ODP_ERR("ishm init must be performed by the main " + "ODP process!\n."); + return -1; + } + + if (!hp_dir) { + ODP_DBG("NOTE: No support for huge pages\n"); + align = odp_sys_page_size(); + } else { + ODP_DBG("Huge pages mount point is: %s\n", hp_dir); + _odp_ishm_cleanup_files(hp_dir); + align = odp_sys_huge_page_size(); + } + + _odp_ishm_cleanup_files(odp_global_data.shm_dir); + + /* allocate space for the internal shared mem block table: */ + addr = mmap(NULL, sizeof(ishm_table_t), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) { + ODP_ERR("unable to mmap the main block table\n."); + goto init_glob_err1; + } + ishm_tbl = addr; + memset(ishm_tbl, 0, sizeof(ishm_table_t)); + ishm_tbl->dev_seq = 0; + ishm_tbl->odpthread_cnt = 0; + odp_spinlock_init(&ishm_tbl->lock); + + /* allocate space for the internal shared mem fragment table: */ + addr = mmap(NULL, sizeof(ishm_ftable_t), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) { + ODP_ERR("unable to mmap the main fragment table\n."); + goto init_glob_err2; + } + ishm_ftbl = addr; + memset(ishm_ftbl, 0, sizeof(ishm_ftable_t)); + + /* + *reserve the address space for _ODP_ISHM_SINGLE_VA reserved blocks, + * only address space! + */ + spce_addr = _odp_ishmphy_book_va(max_memory, align); + if (!spce_addr) { + ODP_ERR("unable to reserve virtual space\n."); + goto init_glob_err3; + } + + /* use the first fragment descriptor to describe to whole VA space: */ + ishm_ftbl->fragment[0].block_index = -1; + ishm_ftbl->fragment[0].start = spce_addr; + ishm_ftbl->fragment[0].len = max_memory; + ishm_ftbl->fragment[0].prev = NULL; + ishm_ftbl->fragment[0].next = NULL; + ishm_ftbl->used_fragmnts = &ishm_ftbl->fragment[0]; + + /* and put all other fragment descriptors in the unused list: */ + for (i = 1; i < ISHM_NB_FRAGMNTS - 1; i++) { + ishm_ftbl->fragment[i].prev = NULL; + ishm_ftbl->fragment[i].next = &ishm_ftbl->fragment[i + 1]; + } + ishm_ftbl->fragment[ISHM_NB_FRAGMNTS - 1].prev = NULL; + ishm_ftbl->fragment[ISHM_NB_FRAGMNTS - 1].next = NULL; + ishm_ftbl->unused_fragmnts = &ishm_ftbl->fragment[1]; + + /* + * We run _odp_ishm_init_local() directely here to give the + * possibility to run shm_reserve() before the odp_init_local() + * is performed for the main thread... Many init_global() functions + * indeed assume the availability of odp_shm_reserve()...: + */ + if (do_odp_ishm_init_local()) { + ODP_ERR("unable to init the main thread\n."); + goto init_glob_err4; + } + + /* get ready to create pools: */ + _odp_ishm_pool_init(); + + return 0; + +init_glob_err4: + if (_odp_ishmphy_unbook_va()) + ODP_ERR("unable to unbook virtual space\n."); +init_glob_err3: + if (munmap(ishm_ftbl, sizeof(ishm_ftable_t)) < 0) + ODP_ERR("unable to munmap main fragment table\n."); +init_glob_err2: + if (munmap(ishm_tbl, sizeof(ishm_table_t)) < 0) + ODP_ERR("unable to munmap main block table\n."); +init_glob_err1: + return -1; +} + +int _odp_ishm_init_local(void) +{ + /* + * Do not re-run this for the main ODP process, as it has already + * been done in advance at _odp_ishm_init_global() time: + */ + if ((getpid() == odp_global_data.main_pid) && + (syscall(SYS_gettid) == getpid())) + return 0; + + return do_odp_ishm_init_local(); +} + +static int do_odp_ishm_term_local(void) +{ + int i; + int proc_table_refcnt = 0; + int block_index; + ishm_block_t *block; + + procsync(); + + ishm_tbl->odpthread_cnt--; /* decount ODPthread (pthread or process) */ + + /* + * The ishm_process table is local to each linux process + * Check that no other linux threads (of this linux process) + * still needs the table, and free it if so. + * We protect this with the general ishm lock to avoid + * term race condition of different running threads. + */ + proc_table_refcnt = --ishm_proctable->thrd_refcnt; + if (!proc_table_refcnt) { + /* + * this is the last thread of this process... + * All mappings for this process are about to be lost... + * Go through the table of visible blocks for this process, + * decreasing the refcnt of each visible blocks, and issuing + * warning for those no longer referenced by any process. + * Note that non-referenced blocks are not freed: this is + * deliberate as this would imply that the semantic of the + * freeing function would differ depending on whether we run + * with odp_thread as processes or pthreads. With this approach, + * the user should always free the blocks manually, which is + * more consistent + */ + for (i = 0; i < ishm_proctable->nb_entries; i++) { + block_index = ishm_proctable->entry[i].block_index; + block = &ishm_tbl->block[block_index]; + if ((--block->refcnt) <= 0) { + block->refcnt = 0; + ODP_DBG("Warning: block %d: name:%s " + "no longer referenced\n", + i, + ishm_tbl->block[i].name[0] ? + ishm_tbl->block[i].name : ""); + } + } + + free(ishm_proctable); + ishm_proctable = NULL; + } + + return 0; +} + +int _odp_ishm_term_local(void) +{ + int ret; + + odp_spinlock_lock(&ishm_tbl->lock); + + /* postpone last thread term to allow free() by global term functions:*/ + if (ishm_tbl->odpthread_cnt == 1) { + odp_spinlock_unlock(&ishm_tbl->lock); + return 0; + } + + ret = do_odp_ishm_term_local(); + odp_spinlock_unlock(&ishm_tbl->lock); + return ret; +} + +int _odp_ishm_term_global(void) +{ + int ret = 0; + int index; + ishm_block_t *block; + + if ((getpid() != odp_global_data.main_pid) || + (syscall(SYS_gettid) != getpid())) + ODP_ERR("odp_term_global() must be performed by the main " + "ODP process!\n."); + + /* cleanup possibly non freed memory (and complain a bit): */ + for (index = 0; index < ISHM_MAX_NB_BLOCKS; index++) { + block = &ishm_tbl->block[index]; + if (block->len != 0) { + ODP_ERR("block '%s' (file %s) was never freed " + "(cleaning up...).\n", + block->name, block->filename); + delete_file(block); + } + } + + /* perform the last thread terminate which was postponed: */ + ret = do_odp_ishm_term_local(); + + /* free the fragment table */ + if (munmap(ishm_ftbl, sizeof(ishm_ftable_t)) < 0) { + ret |= -1; + ODP_ERR("unable to munmap fragment table\n."); + } + /* free the block table */ + if (munmap(ishm_tbl, sizeof(ishm_table_t)) < 0) { + ret |= -1; + ODP_ERR("unable to munmap main table\n."); + } + + /* free the reserved VA space */ + if (_odp_ishmphy_unbook_va()) + ret |= -1; + + if (!odp_global_data.shm_dir_from_env) + free(odp_global_data.shm_dir); + + return ret; +} + +/* + * Print the current ishm status (allocated blocks and VA space map) + * Return the number of allocated blocks (including those not mapped + * by the current odp thread). Also perform a number of sanity check. + * For debug. + */ +int _odp_ishm_status(const char *title) +{ + int i; + char flags[3]; + char huge; + int proc_index; + ishm_fragment_t *fragmnt; + int consecutive_unallocated = 0; /* should never exceed 1 */ + uintptr_t last_address = 0; + ishm_fragment_t *previous = NULL; + int nb_used_frgments = 0; + int nb_unused_frgments = 0; /* nb frag describing a VA area */ + int nb_allocated_frgments = 0; /* nb frag describing an allocated VA */ + int nb_blocks = 0; + int single_va_blocks = 0; + int max_name_len = 0; + + odp_spinlock_lock(&ishm_tbl->lock); + procsync(); + + /* find longest block name */ + for (i = 0; i < ISHM_MAX_NB_BLOCKS; i++) { + int str_len; + + if (ishm_tbl->block[i].len <= 0) + continue; + + str_len = strlen(ishm_tbl->block[i].name); + + if (max_name_len < str_len) + max_name_len = str_len; + } + + ODP_PRINT("ishm blocks allocated at: %s\n", title); + + ODP_PRINT(" %-*s flag len user_len seq ref start fd" + " file\n", max_name_len, "name"); + + /* display block table: 1 line per entry +1 extra line if mapped here */ + for (i = 0; i < ISHM_MAX_NB_BLOCKS; i++) { + if (ishm_tbl->block[i].len <= 0) + continue; /* unused block */ + + nb_blocks++; + if (ishm_tbl->block[i].flags & _ODP_ISHM_SINGLE_VA) + single_va_blocks++; + + flags[0] = (ishm_tbl->block[i].flags & _ODP_ISHM_SINGLE_VA) ? + 'S' : '.'; + flags[1] = (ishm_tbl->block[i].flags & _ODP_ISHM_LOCK) ? + 'L' : '.'; + flags[2] = 0; + switch (ishm_tbl->block[i].huge) { + case HUGE: + huge = 'H'; + break; + case NORMAL: + huge = 'N'; + break; + case EXTERNAL: + huge = 'E'; + break; + default: + huge = '?'; + } + proc_index = procfind_block(i); + ODP_PRINT("%2i %-*s %s%c 0x%-08lx %-8lu %-3lu %-3lu", + i, max_name_len, ishm_tbl->block[i].name, + flags, huge, + ishm_tbl->block[i].len, + ishm_tbl->block[i].user_len, + ishm_tbl->block[i].seq, + ishm_tbl->block[i].refcnt); + + if (proc_index < 0) + continue; + + ODP_PRINT("%-08lx %-3d", + ishm_proctable->entry[proc_index].start, + ishm_proctable->entry[proc_index].fd); + + ODP_PRINT("%s\n", ishm_tbl->block[i].filename); + } + + /* display the virtual space allocations... : */ + ODP_PRINT("\nishm virtual space:\n"); + for (fragmnt = ishm_ftbl->used_fragmnts; + fragmnt; fragmnt = fragmnt->next) { + if (fragmnt->block_index >= 0) { + nb_allocated_frgments++; + ODP_PRINT(" %08p - %08p: ALLOCATED by block:%d\n", + (uintptr_t)fragmnt->start, + (uintptr_t)fragmnt->start + fragmnt->len - 1, + fragmnt->block_index); + consecutive_unallocated = 0; + } else { + ODP_PRINT(" %08p - %08p: NOT ALLOCATED\n", + (uintptr_t)fragmnt->start, + (uintptr_t)fragmnt->start + fragmnt->len - 1); + if (consecutive_unallocated++) + ODP_ERR("defragmentation error\n"); + } + + /* some other sanity checks: */ + if (fragmnt->prev != previous) + ODP_ERR("chaining error\n"); + + if (fragmnt != ishm_ftbl->used_fragmnts) { + if ((uintptr_t)fragmnt->start != last_address + 1) + ODP_ERR("lost space error\n"); + } + + last_address = (uintptr_t)fragmnt->start + fragmnt->len - 1; + previous = fragmnt; + nb_used_frgments++; + } + + /* + * the number of blocks with the single_VA flag set should match + * the number of used fragments: + */ + if (single_va_blocks != nb_allocated_frgments) + ODP_ERR("single_va_blocks != nb_allocated_fragments!\n"); + + /* compute the number of unused fragments*/ + for (fragmnt = ishm_ftbl->unused_fragmnts; + fragmnt; fragmnt = fragmnt->next) + nb_unused_frgments++; + + ODP_PRINT("ishm: %d fragment used. %d fragments unused. (total=%d)\n", + nb_used_frgments, nb_unused_frgments, + nb_used_frgments + nb_unused_frgments); + + if ((nb_used_frgments + nb_unused_frgments) != ISHM_NB_FRAGMNTS) + ODP_ERR("lost fragments!\n"); + + if (nb_blocks < ishm_proctable->nb_entries) + ODP_ERR("process known block cannot exceed main total sum!\n"); + + ODP_PRINT("\n"); + + odp_spinlock_unlock(&ishm_tbl->lock); + return nb_blocks; +} + +void _odp_ishm_print(int block_index) +{ + ishm_block_t *block; + const char *str; + + odp_spinlock_lock(&ishm_tbl->lock); + + if ((block_index < 0) || + (block_index >= ISHM_MAX_NB_BLOCKS) || + (ishm_tbl->block[block_index].len == 0)) { + odp_spinlock_unlock(&ishm_tbl->lock); + ODP_ERR("Request for info on an invalid block\n"); + return; + } + + block = &ishm_tbl->block[block_index]; + + ODP_PRINT("\nSHM block info\n--------------\n"); + ODP_PRINT(" name: %s\n", block->name); + ODP_PRINT(" file: %s\n", block->filename); + ODP_PRINT(" expt: %s\n", block->exptname); + ODP_PRINT(" user_flags: 0x%x\n", block->user_flags); + ODP_PRINT(" flags: 0x%x\n", block->flags); + ODP_PRINT(" user_len: %lu\n", block->user_len); + ODP_PRINT(" start: %p\n", block->start); + ODP_PRINT(" len: %lu\n", block->len); + + switch (block->huge) { + case HUGE: + str = "huge"; + break; + case NORMAL: + str = "normal"; + break; + case EXTERNAL: + str = "external"; + break; + default: + str = "??"; + } + + ODP_PRINT(" page type: %s\n", str); + ODP_PRINT(" seq: %lu\n", block->seq); + ODP_PRINT(" refcnt: %lu\n", block->refcnt); + ODP_PRINT("\n"); + + odp_spinlock_unlock(&ishm_tbl->lock); +} diff --git a/platform/linux-generic/odp_ishmphy.c b/platform/linux-generic/odp_ishmphy.c new file mode 100644 index 000000000..6207ce757 --- /dev/null +++ b/platform/linux-generic/odp_ishmphy.c @@ -0,0 +1,209 @@ +/* Copyright (c) 2016-2018, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "config.h" + +/* + * This file handles the lower end of the ishm memory allocator: + * It performs the physical mappings. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void *common_va_address; +static uint64_t common_va_len; + +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +/* Book some virtual address space + * This function is called at odp_init_global() time to pre-book some + * virtual address space inherited by all odpthreads (i.e. descendant + * processes and threads) and later used to guarantee the unicity the + * the mapping VA address when memory is reserver with the _ODP_ISHM_SINGLE_VA + * flag. + * returns the address of the mapping or NULL on error. + */ +void *_odp_ishmphy_book_va(uintptr_t len, intptr_t align) +{ + void *addr; + + addr = mmap(NULL, len + align, PROT_NONE, + MAP_SHARED | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); + if (addr == MAP_FAILED) { + ODP_ERR("_ishmphy_book_va failure\n"); + return NULL; + } + + if (mprotect(addr, len, PROT_NONE)) + ODP_ERR("failure for protect\n"); + + ODP_DBG("VA Reserved: %p, len=%p\n", addr, len + align); + + common_va_address = addr; + common_va_len = len; + + /* return the nearest aligned address: */ + return (void *)(((uintptr_t)addr + align - 1) & (-align)); +} + +/* Un-book some virtual address space + * This function is called at odp_term_global() time to unbook + * the virtual address space booked by _ishmphy_book_va() + */ +int _odp_ishmphy_unbook_va(void) +{ + int ret; + + ret = munmap(common_va_address, common_va_len); + if (ret) + ODP_ERR("_unishmphy_book_va failure\n"); + return ret; +} + +/* + * do a mapping: + * Performs a mapping of the provided file descriptor to the process VA + * space. If the _ODP_ISHM_SINGLE_VA flag is set, 'start' is assumed to be + * the VA address where the mapping is to be done. + * If the flag is not set, a new VA address is taken. + * returns the address of the mapping or NULL on error. + */ +void *_odp_ishmphy_map(int fd, void *start, uint64_t size, + int flags) +{ + void *mapped_addr_tmp, *mapped_addr; + int mmap_flags = 0; + + if (flags & _ODP_ISHM_SINGLE_VA) { + if (!start) { + ODP_ERR("failure: missing address\n"); + return NULL; + } + /* maps over fragment of reserved VA: */ + /* first, try a normal map. If that works, remap it where it + * should (on the prereverved space), and remove the initial + * normal mapping: + * This is because it turned out that if a mapping fails + * on a the prereserved virtual address space, then + * the prereserved address space which was tried to be mapped + * on becomes available to the kernel again! This was not + * according to expectations: the assumption was that if a + * mapping fails, the system should remain unchanged, but this + * is obvioulsy not true (at least for huge pages when + * exhausted). + * So the strategy is to first map at a non reserved place + * (which can then be freed and returned to the kernel on + * failure) and peform a new map to the prereserved space on + * success (which is then guaranteed to work). + * The initial free maping can then be removed. + */ + mapped_addr = MAP_FAILED; + mapped_addr_tmp = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED | mmap_flags, fd, 0); + if (mapped_addr_tmp != MAP_FAILED) { + /* If OK, do new map at right fixed location... */ + mapped_addr = mmap(start, + size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED | mmap_flags, + fd, 0); + if (mapped_addr != start) + ODP_ERR("new map failed:%s\n", strerror(errno)); + /* ... and remove initial mapping: */ + if (munmap(mapped_addr_tmp, size)) + ODP_ERR("munmap failed:%s\n", strerror(errno)); + } + } else { + /* just do a new mapping in the VA space: */ + mapped_addr = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED | mmap_flags, fd, 0); + if ((mapped_addr >= common_va_address) && + ((char *)mapped_addr < + (char *)common_va_address + common_va_len)) { + ODP_ERR("VA SPACE OVERLAP!\n"); + } + } + + if (mapped_addr == MAP_FAILED) { + ODP_ERR("mmap failed:%s\n", strerror(errno)); + return NULL; + } + + /* if locking is requested, lock it...*/ + if (flags & _ODP_ISHM_LOCK) { + if (mlock(mapped_addr, size)) { + if (munmap(mapped_addr, size)) + ODP_ERR("munmap failed:%s\n", strerror(errno)); + ODP_ERR("mlock failed:%s\n", strerror(errno)); + return NULL; + } + } + return mapped_addr; +} + +/* free a mapping: + * If the _ODP_ISHM_SINGLE_VA flag was given at creation time the virtual + * address range must be returned to the preoallocated "pool". this is + * done by mapping non accessibly memory there (hence blocking the VA but + * releasing the physical memory). + * If the _ODP_ISHM_SINGLE_VA flag was not given, both physical memory and + * virtual address space are realeased by calling the normal munmap. + * return 0 on success or -1 on error. + */ +int _odp_ishmphy_unmap(void *start, uint64_t len, int flags) +{ + void *addr; + int ret; + int mmap_flgs; + + mmap_flgs = MAP_SHARED | MAP_FIXED | MAP_ANONYMOUS | MAP_NORESERVE; + + /* if locking was requested, unlock...*/ + if (flags & _ODP_ISHM_LOCK) + munlock(start, len); + + if (flags & _ODP_ISHM_SINGLE_VA) { + /* map unnaccessible memory overwrites previous mapping + * and free the physical memory, but guarantees to block + * the VA range from other mappings + */ + addr = mmap(start, len, PROT_NONE, mmap_flgs, -1, 0); + if (addr == MAP_FAILED) { + ODP_ERR("_ishmphy_free failure for ISHM_SINGLE_VA\n"); + return -1; + } + if (mprotect(start, len, PROT_NONE)) + ODP_ERR("_ishmphy_free failure for protect\n"); + return 0; + } + + /* just release the mapping */ + ret = munmap(start, len); + if (ret) + ODP_ERR("_ishmphy_free failure: %s\n", strerror(errno)); + return ret; +} diff --git a/platform/linux-generic/odp_ishmpool.c b/platform/linux-generic/odp_ishmpool.c new file mode 100644 index 000000000..04a0e535a --- /dev/null +++ b/platform/linux-generic/odp_ishmpool.c @@ -0,0 +1,807 @@ +/* Copyright (c) 2017-2018, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/* This file gathers the buddy and slab allocation functionality provided + * by _ishm. + * _odp_ishmpool_create() can be used to create a pool for buddy/slab + * allocation. _odp_ishmpool_create() will allocate a memory area using + * ishm_reserve() for both the control part (needed for tracking + * allocation/free...) and the user memory itself (part of which will be given + * at each ishmpool_alloc()). + * The element size provided at pool creation time determines whether + * to pool will of type buddy or slab. + * For buddy, all allocations are rounded to the nearest power of 2. + * + * The implementation of the buddy allocator is very traditional: it + * maintains N lists of free buffers. + * The control part actually contains these N queue heads, (N-M are actually + * used), the free buffers themselves being used for chaining (the chaining info + * is in the buffers: as they are "free" they should not be touched by the + * user). The control part also contains a array of bytes for remembering + * the size (actually the order) of the allocated buffers: + * There are 2^(N-M) such bytes, this number being the maximum number of + * allocated buffers (when all allocation are <= 2^M bytes) + * Buddy allocators handle fragmentation by splitting or merging blocks by 2. + * They guarantee a minimum efficiency of 50%, at worse case fragmentation. + * + * Slab implementation is even simpler, all free elements being queued in + * one single queue at init, taken from this queue when allocated and + * returned to this same queue when freed. + * + * The reason for not using malloc() is that malloc does not guarantee + * memory sharability between ODP threads (regardless of their implememtation) + * which ishm_reserve() can do. see the comments around + * _odp_ishmbud_pool_create() and ishm_reserve() for more details. + * + * This file is divided in 3 sections: the first one regroups functions + * needed by the buddy allocation. + * The second one regroups the functions needed by the slab allocator. + * The third section regroups the common functions exported externally. + */ +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define BUDDY_MIN_SIZE 32 /* minimal buddy allocation size */ + +typedef _odp_ishm_pool_t pool_t; /* for shorter writing */ + +/* array of ishm block index used for pools. only used for pool + * lookup by name */ +#define MAX_NB_POOL 100 +static int pool_blk_idx[MAX_NB_POOL]; + +/* section 1: functions for buddy allocation: */ + +/* free buddy blocks contains the following structure, used to link the + * free blocks together. + */ +typedef struct bblock_t { + struct bblock_t *next; + uint32_t order; +} bblock_t; + +/* value set in the 'order' table when the block is not allocated: */ +#define BBLOCK_FREE 0 + +/* compute ceil(log2(size)) */ +static uint8_t clog2(uint64_t size) +{ + uint64_t sz; + uint32_t bit; + uint8_t res; + + sz = size; /* we start by computing res = log2(sz)... */ + res = 0; + for (bit = 32; bit ; bit >>= 1) { + if (sz >= ((uint64_t)1 << bit)) { + sz >>= bit; + res += bit; + } + } + if (((uint64_t)1 << res) < size) /* ...and then ceil(x) */ + res++; + + return res; +} + +/* + * given a bblock address, and an order value, returns the address + * of the buddy bblock (the other "half") + */ +static inline bblock_t *get_bblock_buddy(pool_t *bpool, bblock_t *addr, + uint8_t order) +{ + uintptr_t b; + + b = ((uintptr_t)addr - (uintptr_t)bpool->ctrl.user_addr); + b ^= 1 << order; + return (void *)(b + (uintptr_t)bpool->ctrl.user_addr); +} + +/* + * given a buddy block address, return its number (used for busy flags): + */ +static inline uintptr_t get_bblock_nr(pool_t *bpool, void *addr) +{ + uintptr_t b; + uint8_t min_order; + + min_order = bpool->ctrl.min_order; + b = ((uintptr_t)addr - (uintptr_t)bpool->ctrl.user_addr) >> min_order; + return b; +} + +/* remove bblock from the list for bblocks of rank order. The bblock to be + * removed is really expected to be on the list: not finding it is an error */ +static inline void remove_from_list(pool_t *bpool, uint8_t order, + bblock_t *bblock) +{ + bblock_t *curr; /* current bblock (when parsing list) */ + bblock_t *prev; /* previous bblock (when parsing list) */ + + curr = bpool->ctrl.free_heads[order]; + if (!curr) + goto remove_from_list_error; + + if (curr == bblock) { + bpool->ctrl.free_heads[order] = curr->next; + return; + } + + while (curr) { + if (curr == bblock) { + prev->next = curr->next; + return; + } + prev = curr; + curr = curr->next; + } + +remove_from_list_error: + ODP_ERR("List corrupted\n"); +} + +/* + * create a buddy memory pool of given size (actually nearest power of 2), + * where allocation will never be smaller than min_alloc. + * returns a pointer to the created buddy_pool + * The allocated area contains: + * - The _odp_ishm_pool_ctrl_t structure + * - The array of ((order - min_order) of free list heads + * - The array of 'order' values, remembering sizes of allocated bblocks + * - alignment to cache line + * - The user memory + */ +static pool_t *_odp_ishmbud_pool_create(const char *pool_name, int store_idx, + uint64_t size, + uint64_t min_alloc, int flags) +{ + uint8_t order; /* pool order = ceil(log2(size)) */ + uint8_t min_order; /* pool min_order = ceil(log2(min_alloc))*/ + uint32_t max_nb_bblock; /* max number of bblock, when smallest */ + uint32_t control_sz; /* size of control area */ + uint32_t free_head_sz; /* mem area needed for list heads */ + uint32_t saved_order_sz; /* mem area to remember given sizes */ + uint64_t user_sz; /* 2^order bytes */ + uint64_t total_sz; /* total size to request */ + int blk_idx; /* as returned by _ishm_resrve() */ + pool_t *bpool; + int i; + bblock_t *first_block; + + /* a bblock_t must fit in the buffers for linked chain! */ + if (min_alloc < sizeof(bblock_t)) + min_alloc = sizeof(bblock_t); + + /* pool order is such that 2^order = size. same for min_order */ + order = clog2(size); + min_order = clog2(min_alloc); + + /* check parameters obvious wishes: */ + if (order >= 64) + return NULL; + if (order < min_order) + return NULL; + + /* at worst case, all bblocks have smallest (2^min_order) size */ + max_nb_bblock = (1 << (order - min_order)); + + /* space needed for the control area (padded to cache line size)*/ + control_sz = ROUNDUP_CACHE_LINE(sizeof(_odp_ishm_pool_ctrl_t)); + + /* space needed for 'order' free bblock list heads: */ + /* Note that only lists from min_order to order are really used.*/ + free_head_sz = ROUNDUP_CACHE_LINE(sizeof(void *) * (order + 1)); + + /* space needed for order -i.e. size- storage of alloc'd bblock:*/ + saved_order_sz = ROUNDUP_CACHE_LINE(max_nb_bblock * sizeof(uint8_t)); + + /* space needed for user area is 2^order bytes: */ + user_sz = 1 << order; + + total_sz = control_sz + + free_head_sz + + saved_order_sz + + user_sz; + + /* allocate required memory: */ + blk_idx = _odp_ishm_reserve(pool_name, total_sz, -1, + ODP_CACHE_LINE_SIZE, flags, 0); + if (blk_idx < 0) { + ODP_ERR("_odp_ishm_reserve failed."); + return NULL; + } + + bpool = _odp_ishm_address(blk_idx); + if (bpool == NULL) { + ODP_ERR("_odp_ishm_address failed."); + return NULL; + } + + /* store in pool array (needed for look up): */ + pool_blk_idx[store_idx] = blk_idx; + + /* remember block index, needed when pool is destroyed */ + bpool->ctrl.ishm_blk_idx = blk_idx; + + /* remember element size: 0 means unknown size, i.e. buddy alloation*/ + bpool->ctrl.element_sz = 0; + + /* prepare mutex: */ + odp_spinlock_init(&bpool->ctrl.lock); + + /* initialise pointers and things... */ + bpool->ctrl.order = order; + bpool->ctrl.min_order = min_order; + bpool->ctrl.free_heads = + (void *)((uintptr_t)bpool + control_sz); + bpool->ctrl.alloced_order = + (uint8_t *)((uintptr_t)bpool->ctrl.free_heads + free_head_sz); + bpool->ctrl.user_addr = + (void *)((uintptr_t)bpool->ctrl.alloced_order + saved_order_sz); + + /* initialize all free list to NULL, except the top biggest element:*/ + for (i = 0; i < (order - min_order); i++) + bpool->ctrl.free_heads[i] = NULL; + bpool->ctrl.free_heads[order] = bpool->ctrl.user_addr; + first_block = (bblock_t *)bpool->ctrl.user_addr; + first_block->next = NULL; + first_block->order = order; + + /* set all 'order' of allocated bblocks to free: */ + memset(bpool->ctrl.alloced_order, BBLOCK_FREE, saved_order_sz); + + return bpool; +} + +/* allocated memory from the given buddy pool */ +static void *_odp_ishmbud_alloc(pool_t *bpool, uint64_t size) +{ + uint32_t rq_order; /* requested order */ + uint32_t try_order; + bblock_t *bblock; + bblock_t *buddy; + uintptr_t nr; + + /* if size is zero or too big reject: */ + if ((!size) && (size > (1U << bpool->ctrl.order))) { + ODP_ERR("Invalid alloc size (0 or larger than whole pool)\n"); + return NULL; + } + + /* compute ceil(log2(size)), to get the requested block order: */ + rq_order = clog2(size); + + /* make sure the requested order is bigger (or same) as minimum! */ + if (rq_order < bpool->ctrl.min_order) + rq_order = bpool->ctrl.min_order; + + /* mutex from here: */ + odp_spinlock_lock(&bpool->ctrl.lock); + + /* now, start trying to allocate a bblock of rq_order. If that + * fails keep trying larger orders until pool order is reached */ + bblock = NULL; + for (try_order = rq_order; try_order <= bpool->ctrl.order; + try_order++) { + if (bpool->ctrl.free_heads[try_order]) { + /* remove from list: */ + bblock = + (bblock_t *)(bpool->ctrl.free_heads[try_order]); + bpool->ctrl.free_heads[try_order] = bblock->next; + break; + } + } + + if (!bblock) { + odp_spinlock_unlock(&bpool->ctrl.lock); + ODP_ERR("Out of memory. (Buddy pool full)\n"); + return NULL; + } + + /* OK: we got a block, but possibbly too large (if try_order>rq_order) + * return the extra halves to the pool hence splitting the bblock at + * each 'extra' order: */ + while (try_order-- > rq_order) { + /* split: */ + buddy = (bblock_t *)((uintptr_t)bblock + (1 << try_order)); + buddy->order = try_order; + /* add to list: */ + buddy->next = bpool->ctrl.free_heads[try_order]; + bpool->ctrl.free_heads[try_order] = buddy; + /* mark as free (non allocated block get size 0): */ + nr = get_bblock_nr(bpool, buddy); + bpool->ctrl.alloced_order[nr] = BBLOCK_FREE; + } + + /* remember the size if the allocated block: */ + nr = get_bblock_nr(bpool, bblock); + bpool->ctrl.alloced_order[nr] = rq_order; + + /* and return the allocated block! */ + odp_spinlock_unlock(&bpool->ctrl.lock); + return (void *)bblock; +} + +/* free a previously allocated buffer from a given buddy pool */ +static int _odp_ishmbud_free(pool_t *bpool, void *addr) +{ + uintptr_t user_start; /* start of user area */ + uintptr_t user_stop; /* stop of user area */ + uintptr_t mask; /* 2^min_order - 1 */ + bblock_t *bblock; /* bblock being freed */ + bblock_t *buddy; /* buddy bblock of bblock being freed */ + uint8_t order; /* order of block being freed */ + uintptr_t nr; /* block number */ + + /* freeing NULL is regarded as OK, though without any effect: */ + if (!addr) + return 0; + + user_start = (uintptr_t)bpool->ctrl.user_addr; + user_stop = user_start + ((uintptr_t)1 << bpool->ctrl.order); + mask = ((uintptr_t)1 << bpool->ctrl.min_order) - 1; + + /* some sanity checks: check that given address is within pool and + * that relative address has 2^min_order granularity: */ + if (((uintptr_t)addr < user_start) || + ((uintptr_t)addr > user_stop) || + (((uintptr_t)addr - user_start) & mask)) { + ODP_ERR("Invalid address to be freed\n"); + return -1; + } + + /* mutex from here: */ + odp_spinlock_lock(&bpool->ctrl.lock); + + /* collect saved block order and make sure bblock was allocated */ + bblock = (bblock_t *)addr; + nr = get_bblock_nr(bpool, bblock); + order = bpool->ctrl.alloced_order[nr]; + if (order == BBLOCK_FREE) { + ODP_ERR("Double free error\n"); + odp_spinlock_unlock(&bpool->ctrl.lock); + return -1; + } + + /* this looks like a valid free, mark at least this as free: */ + bpool->ctrl.alloced_order[nr] = BBLOCK_FREE; + + /* go up in orders, trying to merge buddies... */ + while (order < bpool->ctrl.order) { + buddy = get_bblock_buddy(bpool, bblock, order); + /*if buddy is not free: no further merge possible */ + nr = get_bblock_nr(bpool, buddy); + if (bpool->ctrl.alloced_order[nr] != BBLOCK_FREE) + break; + /*merge only bblock of same order:*/ + if (buddy->order != order) + break; + /*merge: remove buddy from free list: */ + remove_from_list(bpool, order, buddy); + /*merge: make sure we point at start of block: */ + if (bblock > buddy) + bblock = buddy; + /*merge: size of bloack has dubbled: increse order: */ + order++; + } + + /* insert the bblock into its correct free block list: */ + bblock->next = bpool->ctrl.free_heads[order]; + bpool->ctrl.free_heads[order] = bblock; + + /* remember the (possibly now merged) block order: */ + bblock->order = order; + + odp_spinlock_unlock(&bpool->ctrl.lock); + return 0; +} + +/* print buddy pool status and performs sanity checks */ +static int _odp_ishmbud_pool_status(const char *title, pool_t *bpool) +{ + uint8_t order, pool_order, pool_min_order; + uint64_t free_q_nb_bblocks[64]; + uint64_t allocated_nb_bblocks[64]; + uint64_t free_q_nb_bblocks_bytes[64]; + uint64_t allocated_nb_bblocks_bytes[64]; + uint64_t total_bytes_free; + uint64_t total_bytes_allocated; + uint64_t nr; + bblock_t *bblock; + int res = 0; + + odp_spinlock_lock(&bpool->ctrl.lock); + + pool_order = bpool->ctrl.order; + pool_min_order = bpool->ctrl.min_order; + + ODP_DBG("\n%s\n", title); + ODP_DBG("Pool Type: BUDDY\n"); + ODP_DBG("pool size: %" PRIu64 " (bytes)\n", (1UL << pool_order)); + ODP_DBG("pool order: %d\n", (int)pool_order); + ODP_DBG("pool min_order: %d\n", (int)pool_min_order); + + /* a pool wholse order is more than 64 cannot even be reached on 64 + * bit machines! */ + if (pool_order > 64) { + odp_spinlock_unlock(&bpool->ctrl.lock); + return -1; + } + + total_bytes_free = 0; + total_bytes_allocated = 0; + + /* for each queue */ + for (order = pool_min_order; order <= pool_order; order++) { + free_q_nb_bblocks[order] = 0; + free_q_nb_bblocks_bytes[order] = 0; + allocated_nb_bblocks[order] = 0; + allocated_nb_bblocks_bytes[order] = 0; + + /* get the number of buffs in the free queue for this order: */ + bblock = bpool->ctrl.free_heads[order]; + while (bblock) { + free_q_nb_bblocks[order]++; + free_q_nb_bblocks_bytes[order] += (1 << order); + bblock = bblock->next; + } + + total_bytes_free += free_q_nb_bblocks_bytes[order]; + + /* get the number of allocated buffers of this order */ + for (nr = 0; + nr < (1U << (pool_order - pool_min_order)); nr++) { + if (bpool->ctrl.alloced_order[nr] == order) + allocated_nb_bblocks[order]++; + } + + allocated_nb_bblocks_bytes[order] = + allocated_nb_bblocks[order] * (1 << order); + + total_bytes_allocated += allocated_nb_bblocks_bytes[order]; + + ODP_DBG("Order %d => Free: %" PRIu64 " buffers " + "(%" PRIu64" bytes) " + "Allocated %" PRIu64 " buffers (%" PRIu64 " bytes) " + "Total: %" PRIu64 " bytes\n", + (int)order, free_q_nb_bblocks[order], + free_q_nb_bblocks_bytes[order], + allocated_nb_bblocks[order], + allocated_nb_bblocks_bytes[order], + free_q_nb_bblocks_bytes[order] + + allocated_nb_bblocks_bytes[order]); + } + + ODP_DBG("Allocated space: %" PRIu64 " (bytes)\n", + total_bytes_allocated); + ODP_DBG("Free space: %" PRIu64 " (bytes)\n", total_bytes_free); + + if (total_bytes_free + total_bytes_allocated != (1U << pool_order)) { + ODP_DBG("Lost bytes on this pool!\n"); + res = -1; + } + + if (res) + ODP_DBG("Pool inconsistent!\n"); + + odp_spinlock_unlock(&bpool->ctrl.lock); + return res; +} + +/* section 2: functions for slab allocation: */ + +/* free slab blocks contains the following structure, used to link the + * free blocks together. + */ +typedef struct sblock_t { + struct sblock_t *next; +} sblock_t; + +/* + * create a slab memory pool of given size (rounded up to the nearest integer + * number of element, where each element has size 'elt_size'). + * returns a pointer to the created slab pool. + * The allocated area contains: + * - The _odp_ishm_pool_ctrl_t structure + * - alignment to cache line + * - The user memory + */ +static pool_t *_odp_ishmslab_pool_create(const char *pool_name, int store_idx, + uint64_t size, + uint64_t elt_size, int flags) +{ + uint32_t nb_sblock; /* number of elements in the pool */ + uint32_t control_sz; /* size of control area */ + uint64_t total_sz; /* total size to request */ + uint64_t user_sz; /* 2^order bytes */ + int blk_idx; /* as returned by _ishm_reserve() */ + pool_t *spool; + unsigned int i; + sblock_t *block; + + /* a sblock_t must fit in the buffers for linked chain! */ + if (elt_size < sizeof(bblock_t)) { + elt_size = sizeof(bblock_t); + size = size * (sizeof(bblock_t) / elt_size + + ((sizeof(bblock_t) % elt_size) ? 1 : 0)); + } + + /* nb of element fitting in the pool is just ceil(size/elt_size)*/ + nb_sblock = (size / elt_size) + ((size % elt_size) ? 1 : 0); + + /* space needed for the control area (padded to cache line size)*/ + control_sz = ROUNDUP_CACHE_LINE(sizeof(_odp_ishm_pool_ctrl_t)); + + /* space needed for user area is : */ + user_sz = nb_sblock * elt_size; + + total_sz = control_sz + + user_sz; + + /* allocate required memory: */ + blk_idx = _odp_ishm_reserve(pool_name, total_sz, -1, + ODP_CACHE_LINE_SIZE, flags, 0); + if (blk_idx < 0) { + ODP_ERR("_odp_ishm_reserve failed."); + return NULL; + } + + spool = _odp_ishm_address(blk_idx); + if (spool == NULL) { + ODP_ERR("_odp_ishm_address failed."); + return NULL; + } + + /* store in pool array (needed for look up): */ + pool_blk_idx[store_idx] = blk_idx; + + /* remember block index, needed when pool is destroyed */ + spool->ctrl.ishm_blk_idx = blk_idx; + + /* remember element (sblock) size and their number: */ + spool->ctrl.element_sz = elt_size; + spool->ctrl.nb_elem = nb_sblock; + + /* prepare mutex: */ + odp_spinlock_init(&spool->ctrl.lock); + + /* initialise pointers and things... */ + spool->ctrl.user_addr = + (void *)((uintptr_t)spool + control_sz); + + /* initialise the free list with the list of all elements:*/ + spool->ctrl.free_head = spool->ctrl.user_addr; + for (i = 0; i < nb_sblock - 1; i++) { + block = (sblock_t *)((uintptr_t)spool->ctrl.user_addr + + i * (uintptr_t)elt_size); + block->next = (sblock_t *)((uintptr_t)block + + (uintptr_t)elt_size); + } + block = (sblock_t *)((uintptr_t)spool->ctrl.user_addr + + (nb_sblock - 1) * (uintptr_t)elt_size); + block->next = NULL; + + return spool; +} + +/* allocated memory from the given slab pool */ +static void *_odp_ishmslab_alloc(pool_t *spool, uint64_t size) +{ + void *ret; + sblock_t *block; + + if (size > spool->ctrl.element_sz) + return NULL; + + odp_spinlock_lock(&spool->ctrl.lock); + ret = spool->ctrl.free_head; + if (!ret) { + odp_spinlock_unlock(&spool->ctrl.lock); + ODP_ERR("Out of memory. (Slab pool full)\n"); + return NULL; + } + + block = (sblock_t *)ret; + spool->ctrl.free_head = block->next; + + odp_spinlock_unlock(&spool->ctrl.lock); + return ret; +} + +/* free a previously allocated buffer from a given slab pool */ +static int _odp_ishmslab_free(pool_t *spool, void *addr) +{ + uintptr_t user_start; /* start of user area */ + uintptr_t user_stop; /* stop of user area */ + sblock_t *block; + + /* freeing NULL is regarded as OK, though without any effect: */ + if (!addr) + return 0; + + user_start = (uintptr_t)spool->ctrl.user_addr; + user_stop = user_start + spool->ctrl.element_sz * spool->ctrl.nb_elem; + + /* some sanity checks: check that given address is within pool and + * that relative address has element_sz granularity: */ + if (((uintptr_t)addr < user_start) || + ((uintptr_t)addr > user_stop) || + (((uintptr_t)addr - user_start) % spool->ctrl.element_sz)) { + ODP_ERR("Invalid address to be freed\n"); + return -1; + } + + odp_spinlock_lock(&spool->ctrl.lock); + block = (sblock_t *)addr; + block->next = (sblock_t *)spool->ctrl.free_head; + spool->ctrl.free_head = addr; + odp_spinlock_unlock(&spool->ctrl.lock); + + return 0; +} + +/* print slab pool status and performs sanity checks */ +static int _odp_ishmslab_pool_status(const char *title, pool_t *spool) +{ + sblock_t *sblock; + uint64_t nb_free_elts; /* number of free elements */ + + odp_spinlock_lock(&spool->ctrl.lock); + + ODP_DBG("\n%s\n", title); + ODP_DBG("Pool Type: FIXED SIZE\n"); + ODP_DBG("pool size: %" PRIu64 " (bytes)\n", + spool->ctrl.nb_elem * spool->ctrl.element_sz); + + /* count the number of free elements in the free list: */ + nb_free_elts = 0; + sblock = (sblock_t *)spool->ctrl.free_head; + while (sblock) { + nb_free_elts++; + sblock = sblock->next; + } + + ODP_DBG("%" PRIu64 "/%" PRIu64 " available elements.\n", + nb_free_elts, spool->ctrl.nb_elem); + + odp_spinlock_unlock(&spool->ctrl.lock); + return 0; +} + +/* section 3: common, external functions: */ + +/* create a pool: either with fixed alloc size (if max_alloc/min_alloc<2) or + * of variable block size (if max_alloc == 0) */ +pool_t *_odp_ishm_pool_create(const char *pool_name, uint64_t size, + uint64_t min_alloc, uint64_t max_alloc, int flags) +{ + int store_idx; + uint64_t real_pool_sz; + + if (min_alloc > max_alloc) { + ODP_ERR("invalid parameter: min_alloc > max_alloc"); + return NULL; + } + + /* search for a free index in pool_blk_idx for the pool */ + for (store_idx = 0; store_idx < MAX_NB_POOL; store_idx++) { + if (pool_blk_idx[store_idx] < 0) + break; + } + if (store_idx == MAX_NB_POOL) { + ODP_ERR("Max number of pool reached (MAX_NB_POOL)"); + return NULL; + } + + if ((min_alloc == 0) || ((max_alloc / min_alloc) > 2)) { + /* alloc variation is not constant enough: we go for a buddy + * allocator. The pool efficiency may go as low as 50% + * so we double the required size to make sure we can satisfy + * the user request */ + real_pool_sz = 2 * size; + return _odp_ishmbud_pool_create(pool_name, store_idx, + real_pool_sz, + BUDDY_MIN_SIZE, flags); + } else { + /* min and max are close enough so we go for constant size + * allocator: + * make sure the pool can fit the required size, even when + * only min_alloc allocation are performed: */ + real_pool_sz = ((size / min_alloc) + + ((size % min_alloc) ? 1 : 0)) + * max_alloc; + return _odp_ishmslab_pool_create(pool_name, store_idx, + real_pool_sz, + max_alloc, flags); + } +} + +/* destroy a pool. everything goes away. no operation on the pool should + * follow. */ +int _odp_ishm_pool_destroy(pool_t *pool) +{ + int store_idx; + + for (store_idx = 0; store_idx < MAX_NB_POOL; store_idx++) { + if (pool_blk_idx[store_idx] == pool->ctrl.ishm_blk_idx) { + pool_blk_idx[store_idx] = -1; + break; + } + } + + return _odp_ishm_free_by_index(pool->ctrl.ishm_blk_idx); +} + +/* allocated a buffer from a pool */ +void *_odp_ishm_pool_alloc(_odp_ishm_pool_t *pool, uint64_t size) +{ + if (!pool->ctrl.element_sz) + return _odp_ishmbud_alloc(pool, size); + else + return _odp_ishmslab_alloc(pool, size); +} + +/* free a previously allocated buffer from a pool */ +int _odp_ishm_pool_free(_odp_ishm_pool_t *pool, void *addr) +{ + if (!pool->ctrl.element_sz) + return _odp_ishmbud_free(pool, addr); + else + return _odp_ishmslab_free(pool, addr); +} + +/* Print a pool status */ +int _odp_ishm_pool_status(const char *title, _odp_ishm_pool_t *pool) +{ + if (!pool->ctrl.element_sz) + return _odp_ishmbud_pool_status(title, pool); + else + return _odp_ishmslab_pool_status(title, pool); +} + +void _odp_ishm_pool_init(void) +{ + int i; + + for (i = 0; i < MAX_NB_POOL; i++) + pool_blk_idx[i] = -1; +} + +_odp_ishm_pool_t *_odp_ishm_pool_lookup(const char *pool_name) +{ + int block_idx; + int store_idx; + + /* search for a _ishm block with the given name */ + block_idx = _odp_ishm_lookup_by_name(pool_name); + if (block_idx < 0) + return NULL; + + /* a block with that name exists: make sure it is within + * the registered pools */ + for (store_idx = 0; store_idx < MAX_NB_POOL; store_idx++) { + if (pool_blk_idx[store_idx] == block_idx) + return _odp_ishm_address(block_idx); + } + + return NULL; +} diff --git a/platform/linux-generic/odp_queue_scalable.c b/platform/linux-generic/odp_queue_scalable.c index 895133cd5..2a5e01d47 100644 --- a/platform/linux-generic/odp_queue_scalable.c +++ b/platform/linux-generic/odp_queue_scalable.c @@ -25,8 +25,8 @@ #include #include #include -#include <_ishm_internal.h> -#include <_ishmpool_internal.h> +#include +#include #include #include diff --git a/platform/linux-generic/odp_schedule_scalable.c b/platform/linux-generic/odp_schedule_scalable.c index f5974442d..023ab1edd 100644 --- a/platform/linux-generic/odp_schedule_scalable.c +++ b/platform/linux-generic/odp_schedule_scalable.c @@ -21,8 +21,8 @@ #include #include #include -#include <_ishm_internal.h> -#include <_ishmpool_internal.h> +#include +#include #include #include diff --git a/platform/linux-generic/odp_shared_memory.c b/platform/linux-generic/odp_shared_memory.c index c9b04dfdb..edf261af7 100644 --- a/platform/linux-generic/odp_shared_memory.c +++ b/platform/linux-generic/odp_shared_memory.c @@ -11,7 +11,7 @@ #include #include #include -#include <_ishm_internal.h> +#include #include #include diff --git a/platform/linux-generic/pktio/ipc.c b/platform/linux-generic/pktio/ipc.c index 6dcc7a593..08e3e4bc9 100644 --- a/platform/linux-generic/pktio/ipc.c +++ b/platform/linux-generic/pktio/ipc.c @@ -11,7 +11,7 @@ #include #include #include -#include <_ishm_internal.h> +#include #include #include -- cgit v1.2.3 From ef5998dfd0ba1405fef6dd1a7f0e9adf686fca89 Mon Sep 17 00:00:00 2001 From: Maxim Uvarov Date: Fri, 6 Apr 2018 16:17:01 +0300 Subject: linux-gen: hide debug prints from fd server Debug prints are very specific to fdserver internals and useless until you debug fdserver code itself. Add option to uncomment this prints if needed. Signed-off-by: Maxim Uvarov Reviewed-by: Bill Fischofer --- platform/linux-generic/odp_fdserver.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/platform/linux-generic/odp_fdserver.c b/platform/linux-generic/odp_fdserver.c index f8d40ff9a..a06fb5081 100644 --- a/platform/linux-generic/odp_fdserver.c +++ b/platform/linux-generic/odp_fdserver.c @@ -70,6 +70,14 @@ #define MAP_ANONYMOUS MAP_ANON #endif +#define FD_ODP_DEBUG_PRINT 0 + +#define FD_ODP_DBG(fmt, ...) \ + do { \ + if (FD_ODP_DEBUG_PRINT == 1) \ + ODP_DBG(fmt, ##__VA_ARGS__);\ + } while (0) + /* when accessing the client functions, clients should be mutexed: */ static odp_spinlock_t *client_lock; @@ -280,8 +288,8 @@ int _odp_fdserver_register_fd(fd_server_context_e context, uint64_t key, odp_spinlock_lock(client_lock); - ODP_DBG("FD client register: pid=%d key=%" PRIu64 ", fd=%d\n", - getpid(), key, fd_to_send); + FD_ODP_DBG("FD client register: pid=%d key=%" PRIu64 ", fd=%d\n", + getpid(), key, fd_to_send); s_sock = get_socket(); if (s_sock < 0) { @@ -326,8 +334,8 @@ int _odp_fdserver_deregister_fd(fd_server_context_e context, uint64_t key) odp_spinlock_lock(client_lock); - ODP_DBG("FD client deregister: pid=%d key=%" PRIu64 "\n", - getpid(), key); + FD_ODP_DBG("FD client deregister: pid=%d key=%" PRIu64 "\n", + getpid(), key); s_sock = get_socket(); if (s_sock < 0) { @@ -413,7 +421,7 @@ static int stop_server(void) odp_spinlock_lock(client_lock); - ODP_DBG("FD sending server stop request\n"); + FD_ODP_DBG("FD sending server stop request\n"); s_sock = get_socket(); if (s_sock < 0) { @@ -464,8 +472,8 @@ static int handle_request(int client_sock) fd_table[fd_table_nb_entries].context = context; fd_table[fd_table_nb_entries].key = key; fd_table[fd_table_nb_entries++].fd = fd; - ODP_DBG("storing {ctx=%d, key=%" PRIu64 "}->fd=%d\n", - context, key, fd); + FD_ODP_DBG("storing {ctx=%d, key=%" PRIu64 "}->fd=%d\n", + context, key, fd); } else { ODP_ERR("FD table full\n"); send_fdserver_msg(client_sock, FD_REGISTER_NACK, @@ -517,9 +525,9 @@ static int handle_request(int client_sock) for (i = 0; i < fd_table_nb_entries; i++) { if ((fd_table[i].context == context) && (fd_table[i].key == key)) { - ODP_DBG("drop {ctx=%d," - " key=%" PRIu64 "}->fd=%d\n", - context, key, fd_table[i].fd); + FD_ODP_DBG("drop {ctx=%d," + " key=%" PRIu64 "}->fd=%d\n", + context, key, fd_table[i].fd); close(fd_table[i].fd); fd_table[i] = fd_table[--fd_table_nb_entries]; send_fdserver_msg(client_sock, @@ -535,7 +543,7 @@ static int handle_request(int client_sock) break; case FD_SERVERSTOP_REQ: - ODP_DBG("Stoping FD server\n"); + FD_ODP_DBG("Stoping FD server\n"); return 1; default: -- cgit v1.2.3 From ed4e1bfea15d8502e50b6c210f38dd3f13683d2f Mon Sep 17 00:00:00 2001 From: Maxim Uvarov Date: Fri, 6 Apr 2018 16:21:34 +0300 Subject: linux-gen: ishm: remove useless debug print in case of huge page it is ok that file was not created, no need to generate confusing debug message here. Signed-off-by: Maxim Uvarov Reviewed-by: Bill Fischofer --- platform/linux-generic/odp_ishm.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/platform/linux-generic/odp_ishm.c b/platform/linux-generic/odp_ishm.c index 3f123c901..5f5e5eefb 100644 --- a/platform/linux-generic/odp_ishm.c +++ b/platform/linux-generic/odp_ishm.c @@ -456,10 +456,7 @@ static int create_file(int block_index, huge_flag_t huge, uint64_t len, fd = open(filename, oflag, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); if (fd < 0) { - if (huge == HUGE) - ODP_DBG("open failed for %s: %s.\n", - filename, strerror(errno)); - else + if (huge != HUGE) ODP_ERR("open failed for %s: %s.\n", filename, strerror(errno)); return -1; -- cgit v1.2.3 From 92ebb26bae583ff864bc4bef6a2776329527a069 Mon Sep 17 00:00:00 2001 From: Maxim Uvarov Date: Fri, 6 Apr 2018 16:40:17 +0300 Subject: travis: use -M option for generated patches generate patches with -M (move) option to not validate moved files from stratch. Signed-off-by: Maxim Uvarov Reviewed-by: Bill Fischofer --- scripts/ci-checkpatches.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/ci-checkpatches.sh b/scripts/ci-checkpatches.sh index 798efe052..383045cd7 100755 --- a/scripts/ci-checkpatches.sh +++ b/scripts/ci-checkpatches.sh @@ -7,14 +7,14 @@ echo "Run checkpatch for ${PATCHES}" # validate only the latest commit if it's not merge commit. if [ "$PATCHES" = "" ]; then - git format-patch -1 HEAD; + git format-patch -1 -M HEAD; perl ./scripts/checkpatch.pl *.patch; exit $? fi git show --summary HEAD| grep -q '^Merge:'; if [ $? -ne 0 ]; then - git format-patch -1 HEAD; + git format-patch -1 -M HEAD; perl ./scripts/checkpatch.pl *.patch; exit $? fi -- cgit v1.2.3 From 84f8013995d4b96ac9471cc92285c76b50464415 Mon Sep 17 00:00:00 2001 From: Petri Savolainen Date: Tue, 27 Mar 2018 12:55:57 +0300 Subject: test: sched_pktio: add option to collect statistics Options -s enables statistics collection and changes return value to kilo-packets. Return value can be used in scripts to validate correct test execution. Signed-off-by: Petri Savolainen Reviewed-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- test/performance/odp_sched_pktio.c | 84 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 81 insertions(+), 3 deletions(-) diff --git a/test/performance/odp_sched_pktio.c b/test/performance/odp_sched_pktio.c index cf6e17f4c..16d14b195 100644 --- a/test/performance/odp_sched_pktio.c +++ b/test/performance/odp_sched_pktio.c @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include @@ -22,12 +24,18 @@ #define BURST_SIZE 32 #define CHECK_PERIOD 10000 #define MAX_PKTIO_INDEXES 256 +#define TEST_PASSED_LIMIT 5000 typedef struct { int worker_id; void *test_global_ptr; } worker_arg_t; +typedef struct ODP_ALIGNED_CACHE { + uint64_t rx_pkt; + uint64_t tx_pkt; +} worker_stat_t; + typedef struct { volatile int stop_workers; odp_barrier_t worker_start; @@ -35,6 +43,7 @@ typedef struct { struct { int num_worker; int num_pktio; + uint8_t collect_stat; } opt; int max_workers; @@ -66,6 +75,10 @@ typedef struct { /* Maps pktio input index to pktio[] index for output */ uint8_t pktio_map[MAX_PKTIO_INDEXES]; + worker_stat_t worker_stat[MAX_WORKERS]; + uint64_t rx_pkt_sum; + uint64_t tx_pkt_sum; + } test_global_t; static test_global_t *test_global; @@ -143,6 +156,11 @@ static int worker_thread(void *arg) if (odp_unlikely(drop)) odp_packet_free_multi(&pkt[sent], drop); + + if (odp_unlikely(test_global->opt.collect_stat)) { + test_global->worker_stat[worker_id].rx_pkt += num; + test_global->worker_stat[worker_id].tx_pkt += sent; + } } printf("Worker %i stopped\n", worker_id); @@ -172,7 +190,8 @@ static void print_usage(const char *progname) "\n" "OPTIONS:\n" " -i, --interface Packet IO interfaces (comma-separated, no spaces)\n" - " -c, --count Worker thread count. Default: 1\n" + " -c, --count Worker thread count. Default: 1\n" + " -s, --stat Collect statistics.\n" " -h, --help Display help and exit.\n\n", NO_PATH(progname)); } @@ -185,10 +204,11 @@ static int parse_options(int argc, char *argv[], test_global_t *test_global) const struct option longopts[] = { {"interface", required_argument, NULL, 'i'}, {"count", required_argument, NULL, 'c'}, + {"stat", no_argument, NULL, 's'}, {"help", no_argument, NULL, 'h'}, {NULL, 0, NULL, 0} }; - const char *shortopts = "+i:c:h"; + const char *shortopts = "+i:c:sh"; int ret = 0; test_global->opt.num_worker = 1; @@ -237,6 +257,9 @@ static int parse_options(int argc, char *argv[], test_global_t *test_global) case 'c': test_global->opt.num_worker = atoi(optarg); break; + case 's': + test_global->opt.collect_stat = 1; + break; case 'h': print_usage(argv[0]); ret = -1; @@ -342,9 +365,47 @@ static void print_config(test_global_t *test_global) " num output queues: %i\n", test_global->num_input_queues, test_global->num_output_queues); + printf(" collect statistics: %u\n", test_global->opt.collect_stat); + printf("\n"); } +static void print_stat(test_global_t *test_global, uint64_t nsec) +{ + int i; + uint64_t rx, tx, drop; + uint64_t rx_sum = 0; + uint64_t tx_sum = 0; + double sec = 0.0; + + printf("\nTest statistics\n"); + printf(" worker rx_pkt tx_pkt dropped\n"); + + for (i = 0; i < test_global->opt.num_worker; i++) { + rx = test_global->worker_stat[i].rx_pkt; + tx = test_global->worker_stat[i].tx_pkt; + rx_sum += rx; + tx_sum += tx; + + printf(" %6i %16" PRIu64 " %16" PRIu64 " %16" PRIu64 "\n", + i, rx, tx, rx - tx); + } + + test_global->rx_pkt_sum = rx_sum; + test_global->tx_pkt_sum = tx_sum; + drop = rx_sum - tx_sum; + + printf(" --------------------------------------------------\n"); + printf(" total %16" PRIu64 " %16" PRIu64 " %16" PRIu64 "\n\n", + rx_sum, tx_sum, drop); + + sec = nsec / 1000000000.0; + printf(" Total test time: %.2f sec\n", sec); + printf(" Rx packet rate: %.2f pps\n", rx_sum / sec); + printf(" Tx packet rate: %.2f pps\n", tx_sum / sec); + printf(" Drop rate: %.2f pps\n\n", drop / sec); +} + static int open_pktios(test_global_t *test_global) { odp_pool_param_t pool_param; @@ -628,7 +689,9 @@ int main(int argc, char *argv[]) odp_instance_t instance; odp_init_t init; odp_shm_t shm; + odp_time_t t1, t2; odph_odpthread_t thread[MAX_WORKERS]; + int ret = 0; signal(SIGINT, sig_handler); @@ -696,13 +759,28 @@ int main(int argc, char *argv[]) odp_mb_full(); } + t1 = odp_time_local(); + wait_workers(thread, test_global); + t2 = odp_time_local(); + quit: stop_pktios(test_global); empty_queues(); close_pktios(test_global); + if (test_global->opt.collect_stat) { + print_stat(test_global, odp_time_diff_ns(t2, t1)); + + /* Encode return value for validation test usage. */ + if (test_global->rx_pkt_sum > TEST_PASSED_LIMIT) + ret += 1; + + if (test_global->tx_pkt_sum > TEST_PASSED_LIMIT) + ret += 2; + } + if (odp_shm_free(shm)) { printf("Error: shm free failed.\n"); return -1; @@ -718,5 +796,5 @@ quit: return -1; } - return 0; + return ret; } -- cgit v1.2.3 From 654507c6eeed83dd84210f7e481ded34f5d4cdca Mon Sep 17 00:00:00 2001 From: Petri Savolainen Date: Tue, 27 Mar 2018 14:25:12 +0300 Subject: test: sched_pktio: run in validation test suite Add script to run the test as part of validation test suite. The script is based on odp_l2fwd_run.sh. Use only single worker as that requires only one rx/tx queue from pktio. Test passes if at least 1000 packets are received in 5 seconds. Signed-off-by: Petri Savolainen Reviewed-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- test/performance/Makefile.am | 1 + test/performance/odp_sched_pktio_run.sh | 101 ++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100755 test/performance/odp_sched_pktio_run.sh diff --git a/test/performance/Makefile.am b/test/performance/Makefile.am index c8f0bdecf..811cc1c0b 100644 --- a/test/performance/Makefile.am +++ b/test/performance/Makefile.am @@ -15,6 +15,7 @@ COMPILE_ONLY = odp_l2fwd \ TESTSCRIPTS = odp_l2fwd_run.sh \ odp_sched_latency_run.sh \ + odp_sched_pktio_run.sh \ odp_scheduling_run.sh if HAVE_PCAP diff --git a/test/performance/odp_sched_pktio_run.sh b/test/performance/odp_sched_pktio_run.sh new file mode 100755 index 000000000..db14fb598 --- /dev/null +++ b/test/performance/odp_sched_pktio_run.sh @@ -0,0 +1,101 @@ +#!/bin/sh +# +# Copyright (c) 2018, Linaro Limited +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +# + +# directory where test binaries have been built +TEST_DIR="${TEST_DIR:-$PWD}" +# directory where test sources are, including scripts +TEST_SRC_DIR=$(dirname $0) + +PATH=$TEST_DIR:$TEST_DIR/../../example/generator:$PATH + +# exit codes expected by automake for skipped tests +TEST_SKIPPED=77 + +VALIDATION_TESTDIR=platform/$ODP_PLATFORM/test/validation +PLATFORM_VALIDATION=${TEST_SRC_DIR}/../../$VALIDATION_TESTDIR + +FLOOD_MODE=0 + +# Use installed pktio env or for make check take it from platform directory +if [ -f "./pktio_env" ]; then + . ./pktio_env +elif [ "$ODP_PLATFORM" = "" ]; then + echo "$0: error: ODP_PLATFORM must be defined" + # not skipped as this should never happen via "make check" + exit 1 +elif [ -f ${PLATFORM_VALIDATION}/api/pktio/pktio_env ]; then + . ${PLATFORM_VALIDATION}/api/pktio/pktio_env +else + echo "BUG: unable to find pktio_env!" + echo "pktio_env has to be in current directory or " + echo "in platform/\$ODP_PLATFORM/test." + echo "ODP_PLATFORM=\"$ODP_PLATFORM\"" + exit 1 +fi + +run_sched_pktio() +{ + setup_pktio_env clean # install trap to call cleanup_pktio_env + + if [ $? -ne 0 ]; then + echo "setup_pktio_env error $?" + exit $TEST_SKIPPED + fi + + type odp_generator > /dev/null + if [ $? -ne 0 ]; then + echo "odp_generator not installed. Aborting." + cleanup_pktio_env + exit 1 + fi + + # 1 worker + odp_sched_pktio${EXEEXT} -i $IF1,$IF2 -c 1 -s & + + TEST_PID=$! + + sleep 1 + + # Run generator with one worker + (odp_generator${EXEEXT} --interval $FLOOD_MODE -I $IF0 \ + --srcip 192.168.0.1 --dstip 192.168.0.2 \ + -m u -w 1 2>&1 > /dev/null) \ + 2>&1 > /dev/null & + + GEN_PID=$! + + # Run test for 5 sec + sleep 5 + + kill ${GEN_PID} + wait ${GEN_PID} + + # Kill with SIGINT to output statistics + kill -2 ${TEST_PID} + wait ${TEST_PID} + + ret=$? + + if [ $ret -eq 3 ]; then + echo "PASS: received and transmitted over 5000 packets" + ret=0 + else + echo "FAIL: less than thousand rx or tx packets $ret" + ret=1 + fi + + cleanup_pktio_env + + exit $ret +} + +case "$1" in + setup) setup_pktio_env ;; + cleanup) cleanup_pktio_env ;; + *) run_sched_pktio ;; +esac -- cgit v1.2.3 From 83fda5a447335b95bdd991187d4efb0a15a5709a Mon Sep 17 00:00:00 2001 From: Petri Savolainen Date: Tue, 27 Mar 2018 16:53:08 +0300 Subject: api: packet: UDP checksum value of zero UDP checksum value of zero results OK status when checksum is checked. Signed-off-by: Petri Savolainen Reviewed-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- include/odp/api/spec/packet.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/odp/api/spec/packet.h b/include/odp/api/spec/packet.h index e1f2f2218..66665e121 100644 --- a/include/odp/api/spec/packet.h +++ b/include/odp/api/spec/packet.h @@ -1711,6 +1711,9 @@ odp_packet_chksum_status_t odp_packet_l3_chksum_status(odp_packet_t pkt); * attempt. It depends on packet input (or IPSEC) configuration, packet content * and implementation capabilities if checksum check is attempted for a packet. * + * When a UDP packet does not have a checksum (e.g. checksum field of a UDP/IPv4 + * packet is zero), checksum check result is ODP_PACKET_CHKSUM_OK. + * * @param pkt Packet handle * * @return L4 checksum check status -- cgit v1.2.3 From 61d16e162788ac0923c544b4d31bb847fa3d9189 Mon Sep 17 00:00:00 2001 From: yhe Date: Wed, 21 Mar 2018 22:06:58 +0800 Subject: linux-gen:crypto:implement AES-XCBC-MAC and SHA384-HMAC implement the algorithm AES-XCBC-MAC and SHA384-HMAC Signed-off-by: Tom He Reviewed-by: Maxim Uvarov Reviewed-by: Dmitry Eremin-Solenikov Signed-off-by: Maxim Uvarov --- platform/linux-generic/odp_crypto.c | 189 +++++++++++++++++++++++++++++++++ platform/linux-generic/odp_ipsec_sad.c | 4 + 2 files changed, 193 insertions(+) diff --git a/platform/linux-generic/odp_crypto.c b/platform/linux-generic/odp_crypto.c index 21449cfea..852f02125 100644 --- a/platform/linux-generic/odp_crypto.c +++ b/platform/linux-generic/odp_crypto.c @@ -35,6 +35,8 @@ #endif #define MAX_SESSIONS 32 +#define AES_BLOCK_SIZE 16 +#define AES_KEY_LENGTH 16 /* * Cipher algorithm capabilities @@ -95,10 +97,18 @@ static const odp_crypto_auth_capability_t auth_capa_sha256_hmac[] = { {.digest_len = 16, .key_len = 32, .aad_len = {.min = 0, .max = 0, .inc = 0} }, {.digest_len = 32, .key_len = 32, .aad_len = {.min = 0, .max = 0, .inc = 0} } }; +static const odp_crypto_auth_capability_t auth_capa_sha384_hmac[] = { +{.digest_len = 24, .key_len = 48, .aad_len = {.min = 0, .max = 0, .inc = 0} }, +{.digest_len = 48, .key_len = 48, .aad_len = {.min = 0, .max = 0, .inc = 0} } }; + static const odp_crypto_auth_capability_t auth_capa_sha512_hmac[] = { {.digest_len = 32, .key_len = 64, .aad_len = {.min = 0, .max = 0, .inc = 0} }, {.digest_len = 64, .key_len = 64, .aad_len = {.min = 0, .max = 0, .inc = 0} } }; +static const odp_crypto_auth_capability_t auth_capa_aes_xcbc[] = { +{.digest_len = 12, .key_len = 16, .aad_len = {.min = 0, .max = 0, .inc = 0} }, +{.digest_len = 16, .key_len = 16, .aad_len = {.min = 0, .max = 0, .inc = 0} } }; + static const odp_crypto_auth_capability_t auth_capa_aes_gcm[] = { {.digest_len = 16, .key_len = 0, .aad_len = {.min = 8, .max = 12, .inc = 4} } }; @@ -308,6 +318,169 @@ void packet_hmac(odp_packet_t pkt, HMAC_Final(ctx, hash, NULL); } +static void xor_block(uint8_t *res, const uint8_t *op) +{ + int i; + + for (i = 0; i < AES_BLOCK_SIZE; i++) + res[i] ^= op[i]; +} + +static void memxor(uint8_t *res, const uint8_t *op, size_t len) +{ + for (size_t i = 0; i < len; i++) + res[i] ^= op[i]; +} + +static +void packet_aes_xcbc_mac(odp_packet_t pkt, + const odp_crypto_packet_op_param_t *param, + odp_crypto_generic_session_t *session, + uint8_t *hash) +{ + uint8_t e[AES_BLOCK_SIZE] = {0}; + size_t eoff = 0; + uint32_t offset = param->auth_range.offset; + uint32_t len = param->auth_range.length; + uint32_t seglen = 0; + uint32_t datalen = 0; + int dummy_len = 0; + EVP_CIPHER_CTX *ctx; + void *mapaddr; + uint8_t *data = NULL; + + ODP_ASSERT(offset + len <= odp_packet_len(pkt)); + ODP_ASSERT(session != NULL); + ODP_ASSERT(sizeof(session->auth.key) >= 3 * AES_KEY_LENGTH); + + ctx = EVP_CIPHER_CTX_new(); + EVP_EncryptInit_ex(ctx, session->auth.evp_cipher, + NULL, session->auth.key, NULL); + + while (len > 0) { + mapaddr = odp_packet_offset(pkt, offset, &seglen, NULL); + datalen = seglen >= len ? len : seglen; + data = (uint8_t *)mapaddr; + offset += datalen; + len -= datalen; + if (eoff != 0) { + if (eoff + datalen > AES_BLOCK_SIZE) { + memxor(e + eoff, data, AES_BLOCK_SIZE - eoff); + datalen -= (AES_BLOCK_SIZE - eoff); + eoff = 0; + EVP_EncryptUpdate(ctx, + e, &dummy_len, e, sizeof(e)); + } else { + memxor(e + eoff, data, datalen); + eoff += datalen; + continue; + } + } + while (datalen > AES_BLOCK_SIZE) { + xor_block(e, data); + EVP_EncryptUpdate(ctx, e, &dummy_len, e, sizeof(e)); + data += AES_BLOCK_SIZE; + datalen -= AES_BLOCK_SIZE; + } + /* Segmentation handle */ + if (datalen > 0) { + memxor(e, data, datalen); + eoff = datalen; + } + } + + if (eoff == AES_BLOCK_SIZE) { + xor_block(e, session->auth.key + AES_KEY_LENGTH); + } else { + e[eoff] ^= 0x80; + xor_block(e, session->auth.key + AES_KEY_LENGTH * 2); + } + EVP_EncryptUpdate(ctx, hash, &dummy_len, e, sizeof(e)); + EVP_CIPHER_CTX_free(ctx); +} + +static +odp_crypto_alg_err_t auth_xcbcmac_gen(odp_packet_t pkt, + const odp_crypto_packet_op_param_t *param, + odp_crypto_generic_session_t *session) +{ + uint8_t hash[EVP_MAX_MD_SIZE]; + + /* Hash it */ + packet_aes_xcbc_mac(pkt, param, session, hash); + + /* Copy to the output location */ + odp_packet_copy_from_mem(pkt, + param->hash_result_offset, + session->p.auth_digest_len, + hash); + + return ODP_CRYPTO_ALG_ERR_NONE; +} + +static odp_crypto_alg_err_t +auth_xcbcmac_check(odp_packet_t pkt, + const odp_crypto_packet_op_param_t *param, + odp_crypto_generic_session_t *session) +{ + uint32_t bytes = session->p.auth_digest_len; + uint8_t hash_in[EVP_MAX_MD_SIZE]; + uint8_t hash_out[EVP_MAX_MD_SIZE]; + + /* Copy current value out and clear it before authentication */ + odp_packet_copy_to_mem(pkt, param->hash_result_offset, + bytes, hash_in); + + _odp_packet_set_data(pkt, param->hash_result_offset, + 0, bytes); + + /* Hash it */ + packet_aes_xcbc_mac(pkt, param, session, hash_out); + + /* Verify match */ + if (0 != memcmp(hash_in, hash_out, bytes)) + return ODP_CRYPTO_ALG_ERR_ICV_CHECK; + + /* Matched */ + return ODP_CRYPTO_ALG_ERR_NONE; +} + +static int process_aesxcbc_param(odp_crypto_generic_session_t *session, + const EVP_CIPHER *cipher) +{ + uint32_t k1[4] = { 0x01010101, 0x01010101, 0x01010101, 0x01010101 }; + uint32_t k2[4] = { 0x02020202, 0x02020202, 0x02020202, 0x02020202 }; + uint32_t k3[4] = { 0x03030303, 0x03030303, 0x03030303, 0x03030303 }; + EVP_CIPHER_CTX *ctx; + int dummy_len = 0; + + /* Set function */ + if (ODP_CRYPTO_OP_ENCODE == session->p.op) + session->auth.func = auth_xcbcmac_gen; + else + session->auth.func = auth_xcbcmac_check; + session->auth.init = null_crypto_init_routine; + + session->auth.evp_cipher = cipher; + ctx = EVP_CIPHER_CTX_new(); + EVP_EncryptInit_ex(ctx, session->auth.evp_cipher, NULL, + session->p.auth_key.data, NULL); + /* K1 = 0x01010101010101010101010101010101 encrypted with Key K */ + EVP_EncryptUpdate(ctx, session->auth.key, + &dummy_len, (uint8_t *)k1, AES_BLOCK_SIZE); + + /* K2 = 0x02020202020202020202020202020202 encrypted with Key K */ + EVP_EncryptUpdate(ctx, session->auth.key + AES_KEY_LENGTH, + &dummy_len, (uint8_t *)k2, AES_BLOCK_SIZE); + + /* K3 = 0x03030303030303030303030303030303 encrypted with Key K */ + EVP_EncryptUpdate(ctx, session->auth.key + AES_KEY_LENGTH * 2, + &dummy_len, (uint8_t *)k3, AES_BLOCK_SIZE); + + EVP_CIPHER_CTX_free(ctx); + return 0; +} + static odp_crypto_alg_err_t auth_hmac_gen(odp_packet_t pkt, const odp_crypto_packet_op_param_t *param, @@ -1171,7 +1344,9 @@ int odp_crypto_capability(odp_crypto_capability_t *capa) capa->auths.bit.md5_hmac = 1; capa->auths.bit.sha1_hmac = 1; capa->auths.bit.sha256_hmac = 1; + capa->auths.bit.sha384_hmac = 1; capa->auths.bit.sha512_hmac = 1; + capa->auths.bit.aes_xcbc_mac = 1; capa->auths.bit.aes_gcm = 1; capa->auths.bit.aes_ccm = 1; capa->auths.bit.aes_gmac = 1; @@ -1268,10 +1443,18 @@ int odp_crypto_auth_capability(odp_auth_alg_t auth, src = auth_capa_sha256_hmac; num = sizeof(auth_capa_sha256_hmac) / size; break; + case ODP_AUTH_ALG_SHA384_HMAC: + src = auth_capa_sha384_hmac; + num = sizeof(auth_capa_sha384_hmac) / size; + break; case ODP_AUTH_ALG_SHA512_HMAC: src = auth_capa_sha512_hmac; num = sizeof(auth_capa_sha512_hmac) / size; break; + case ODP_AUTH_ALG_AES_XCBC_MAC: + src = auth_capa_aes_xcbc; + num = sizeof(auth_capa_aes_xcbc) / size; + break; case ODP_AUTH_ALG_AES_GCM: src = auth_capa_aes_gcm; num = sizeof(auth_capa_aes_gcm) / size; @@ -1481,9 +1664,15 @@ odp_crypto_session_create(odp_crypto_session_param_t *param, case ODP_AUTH_ALG_SHA256_HMAC: rc = process_auth_hmac_param(session, EVP_sha256()); break; + case ODP_AUTH_ALG_SHA384_HMAC: + rc = process_auth_hmac_param(session, EVP_sha384()); + break; case ODP_AUTH_ALG_SHA512_HMAC: rc = process_auth_hmac_param(session, EVP_sha512()); break; + case ODP_AUTH_ALG_AES_XCBC_MAC: + rc = process_aesxcbc_param(session, EVP_aes_128_ecb()); + break; #if ODP_DEPRECATED_API case ODP_AUTH_ALG_AES128_GCM: if (param->cipher_alg == ODP_CIPHER_ALG_AES128_GCM) diff --git a/platform/linux-generic/odp_ipsec_sad.c b/platform/linux-generic/odp_ipsec_sad.c index 8dab489cc..c21269694 100644 --- a/platform/linux-generic/odp_ipsec_sad.c +++ b/platform/linux-generic/odp_ipsec_sad.c @@ -246,8 +246,12 @@ uint32_t _odp_ipsec_auth_digest_len(odp_auth_alg_t auth) #endif case ODP_AUTH_ALG_SHA256_HMAC: return 16; + case ODP_AUTH_ALG_SHA384_HMAC: + return 24; case ODP_AUTH_ALG_SHA512_HMAC: return 32; + case ODP_AUTH_ALG_AES_XCBC_MAC: + return 12; #if ODP_DEPRECATED_API case ODP_AUTH_ALG_AES128_GCM: #endif -- cgit v1.2.3 From 290decaf3259b036c3a402be428b04ba100f0f81 Mon Sep 17 00:00:00 2001 From: yhe Date: Wed, 28 Mar 2018 08:51:20 +0800 Subject: validation:crypto:implement AES-XCBC-MAC and SHA384-HMAC Add AES-XCBC-MAC and SHA384-HMAC into the test case Signed-off-by: Tom He Reviewed-by: Maxim Uvarov Reviewed-by: Dmitry Eremin-Solenikov Signed-off-by: Maxim Uvarov --- test/validation/api/crypto/odp_crypto_test_inp.c | 91 ++++++++++ test/validation/api/crypto/test_vectors.h | 209 +++++++++++++++++++++++ test/validation/api/crypto/test_vectors_len.h | 10 ++ test/validation/api/ipsec/ipsec.c | 8 + 4 files changed, 318 insertions(+) diff --git a/test/validation/api/crypto/odp_crypto_test_inp.c b/test/validation/api/crypto/odp_crypto_test_inp.c index 39ef33505..aa053dd50 100644 --- a/test/validation/api/crypto/odp_crypto_test_inp.c +++ b/test/validation/api/crypto/odp_crypto_test_inp.c @@ -47,8 +47,12 @@ static const char *auth_alg_name(odp_auth_alg_t auth) return "ODP_AUTH_ALG_SHA1_HMAC"; case ODP_AUTH_ALG_SHA256_HMAC: return "ODP_AUTH_ALG_SHA256_HMAC"; + case ODP_AUTH_ALG_SHA384_HMAC: + return "ODP_AUTH_ALG_SHA384_HMAC"; case ODP_AUTH_ALG_SHA512_HMAC: return "ODP_AUTH_ALG_SHA512_HMAC"; + case ODP_AUTH_ALG_AES_XCBC_MAC: + return "ODP_AUTH_ALG_AES_XCBC_MAC"; case ODP_AUTH_ALG_AES_GCM: return "ODP_AUTH_ALG_AES_GCM"; case ODP_AUTH_ALG_AES_GMAC: @@ -527,9 +531,15 @@ static void check_alg(odp_crypto_op_t op, if (auth_alg == ODP_AUTH_ALG_SHA256_HMAC && !(capa.auths.bit.sha256_hmac)) rc = -1; + if (auth_alg == ODP_AUTH_ALG_SHA384_HMAC && + !(capa.auths.bit.sha384_hmac)) + rc = -1; if (auth_alg == ODP_AUTH_ALG_SHA512_HMAC && !(capa.auths.bit.sha512_hmac)) rc = -1; + if (auth_alg == ODP_AUTH_ALG_AES_XCBC_MAC && + !(capa.auths.bit.aes_xcbc_mac)) + rc = -1; CU_ASSERT(!rc); CU_ASSERT((~capa.auths.all_bits & capa.hw_auths.all_bits) == 0); @@ -711,10 +721,18 @@ static int check_alg_support(odp_cipher_alg_t cipher, odp_auth_alg_t auth) if (!capability.auths.bit.sha256_hmac) return ODP_TEST_INACTIVE; break; + case ODP_AUTH_ALG_SHA384_HMAC: + if (!capability.auths.bit.sha384_hmac) + return ODP_TEST_INACTIVE; + break; case ODP_AUTH_ALG_SHA512_HMAC: if (!capability.auths.bit.sha512_hmac) return ODP_TEST_INACTIVE; break; + case ODP_AUTH_ALG_AES_XCBC_MAC: + if (!capability.auths.bit.aes_xcbc_mac) + return ODP_TEST_INACTIVE; + break; case ODP_AUTH_ALG_AES_GCM: if (!capability.auths.bit.aes_gcm) return ODP_TEST_INACTIVE; @@ -1205,6 +1223,38 @@ static void crypto_test_check_alg_hmac_sha256(void) false); } +static int check_alg_hmac_sha384(void) +{ + return check_alg_support(ODP_CIPHER_ALG_NULL, ODP_AUTH_ALG_SHA384_HMAC); +} + +/* This test verifies the correctness of HMAC_SHA384 digest operation. + * The output check length is truncated to 24 bytes (192 bits) as + * returned by the crypto operation API call. + * Note that hash digest is a one-way operation. + * In addition the test verifies if the implementation can use the + * packet buffer as completion event buffer. + * */ +static void crypto_test_gen_alg_hmac_sha384(void) +{ + check_alg(ODP_CRYPTO_OP_ENCODE, + ODP_CIPHER_ALG_NULL, + ODP_AUTH_ALG_SHA384_HMAC, + hmac_sha384_reference, + ARRAY_SIZE(hmac_sha384_reference), + false); +} + +static void crypto_test_check_alg_hmac_sha384(void) +{ + check_alg(ODP_CRYPTO_OP_DECODE, + ODP_CIPHER_ALG_NULL, + ODP_AUTH_ALG_SHA384_HMAC, + hmac_sha384_reference, + ARRAY_SIZE(hmac_sha384_reference), + false); +} + static int check_alg_hmac_sha512(void) { return check_alg_support(ODP_CIPHER_ALG_NULL, ODP_AUTH_ALG_SHA512_HMAC); @@ -1237,6 +1287,39 @@ static void crypto_test_check_alg_hmac_sha512(void) false); } +static int check_alg_aes_xcbc(void) +{ + return check_alg_support(ODP_CIPHER_ALG_NULL, + ODP_AUTH_ALG_AES_XCBC_MAC); +} + +/* This test verifies the correctness of AES_XCBC_MAC digest operation. + * The output check length is truncated to 16 bytes (128 bits) as + * returned by the crypto operation API call. + * Note that hash digest is a one-way operation. + * In addition the test verifies if the implementation can use the + * packet buffer as completion event buffer. + * */ +static void crypto_test_gen_alg_aes_xcbc(void) +{ + check_alg(ODP_CRYPTO_OP_ENCODE, + ODP_CIPHER_ALG_NULL, + ODP_AUTH_ALG_AES_XCBC_MAC, + aes_xcbc_reference, + ARRAY_SIZE(aes_xcbc_reference), + false); +} + +static void crypto_test_check_alg_aes_xcbc(void) +{ + check_alg(ODP_CRYPTO_OP_DECODE, + ODP_CIPHER_ALG_NULL, + ODP_AUTH_ALG_AES_XCBC_MAC, + aes_xcbc_reference, + ARRAY_SIZE(aes_xcbc_reference), + false); +} + static int check_alg_aes_gmac(void) { return check_alg_support(ODP_CIPHER_ALG_NULL, ODP_AUTH_ALG_AES_GMAC); @@ -1424,10 +1507,18 @@ odp_testinfo_t crypto_suite[] = { check_alg_hmac_sha256), ODP_TEST_INFO_CONDITIONAL(crypto_test_check_alg_hmac_sha256, check_alg_hmac_sha256), + ODP_TEST_INFO_CONDITIONAL(crypto_test_gen_alg_hmac_sha384, + check_alg_hmac_sha384), + ODP_TEST_INFO_CONDITIONAL(crypto_test_check_alg_hmac_sha384, + check_alg_hmac_sha384), ODP_TEST_INFO_CONDITIONAL(crypto_test_gen_alg_hmac_sha512, check_alg_hmac_sha512), ODP_TEST_INFO_CONDITIONAL(crypto_test_check_alg_hmac_sha512, check_alg_hmac_sha512), + ODP_TEST_INFO_CONDITIONAL(crypto_test_gen_alg_aes_xcbc, + check_alg_aes_xcbc), + ODP_TEST_INFO_CONDITIONAL(crypto_test_check_alg_aes_xcbc, + check_alg_aes_xcbc), ODP_TEST_INFO_CONDITIONAL(crypto_test_gen_alg_aes_gmac, check_alg_aes_gmac), ODP_TEST_INFO_CONDITIONAL(crypto_test_gen_alg_aes_gmac_ovr_iv, diff --git a/test/validation/api/crypto/test_vectors.h b/test/validation/api/crypto/test_vectors.h index 23ed95251..9adb43324 100644 --- a/test/validation/api/crypto/test_vectors.h +++ b/test/validation/api/crypto/test_vectors.h @@ -1188,6 +1188,134 @@ static crypto_test_reference_t hmac_sha256_reference[] = { } }; +static crypto_test_reference_t hmac_sha384_reference[] = { + { + .auth_key_length = HMAC_SHA384_KEY_LEN, + .auth_key = { 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0b, 0x0b, 0x0b, 0x0b }, + .length = 8, + /* "Hi There" */ + .plaintext = { 0x48, 0x69, 0x20, 0x54, 0x68, 0x65, 0x72, 0x65}, + .ciphertext = { 0x48, 0x69, 0x20, 0x54, 0x68, 0x65, 0x72, 0x65}, + .digest_length = HMAC_SHA384_192_CHECK_LEN, + .digest = { 0xaf, 0xd0, 0x39, 0x44, 0xd8, 0x48, 0x95, 0x62, + 0x6b, 0x08, 0x25, 0xf4, 0xab, 0x46, 0x90, 0x7f, + 0x15, 0xf9, 0xda, 0xdb, 0xe4, 0x10, 0x1e, 0xc6 } + }, + { + .auth_key_length = HMAC_SHA384_KEY_LEN, + /* "Jefe" */ + .auth_key = { 0x4a, 0x65, 0x66, 0x65 }, + .length = 28, + /* what do ya want for nothing?*/ + .plaintext = { 0x77, 0x68, 0x61, 0x74, 0x20, 0x64, 0x6f, 0x20, + 0x79, 0x61, 0x20, 0x77, 0x61, 0x6e, 0x74, 0x20, + 0x66, 0x6f, 0x72, 0x20, 0x6e, 0x6f, 0x74, 0x68, + 0x69, 0x6e, 0x67, 0x3f }, + .ciphertext = { 0x77, 0x68, 0x61, 0x74, 0x20, 0x64, 0x6f, 0x20, + 0x79, 0x61, 0x20, 0x77, 0x61, 0x6e, 0x74, 0x20, + 0x66, 0x6f, 0x72, 0x20, 0x6e, 0x6f, 0x74, 0x68, + 0x69, 0x6e, 0x67, 0x3f }, + .digest_length = HMAC_SHA384_192_CHECK_LEN, + .digest = { 0xaf, 0x45, 0xd2, 0xe3, 0x76, 0x48, 0x40, 0x31, + 0x61, 0x7f, 0x78, 0xd2, 0xb5, 0x8a, 0x6b, 0x1b, + 0x9c, 0x7e, 0xf4, 0x64, 0xf5, 0xa0, 0x1b, 0x47 } + }, + { + .auth_key_length = HMAC_SHA384_KEY_LEN, + .auth_key = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }, + .length = 50, + .plaintext = { 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd }, + .ciphertext = { 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd }, + .digest_length = HMAC_SHA384_192_CHECK_LEN, + .digest = { 0x88, 0x06, 0x26, 0x08, 0xd3, 0xe6, 0xad, 0x8a, + 0x0a, 0xa2, 0xac, 0xe0, 0x14, 0xc8, 0xa8, 0x6f, + 0x0a, 0xa6, 0x35, 0xd9, 0x47, 0xac, 0x9f, 0xeb } + }, + { + .auth_key_length = HMAC_SHA384_KEY_LEN, + .auth_key = { 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0b, 0x0b, 0x0b, 0x0b }, + .length = 8, + /* "Hi There" */ + .plaintext = { 0x48, 0x69, 0x20, 0x54, 0x68, 0x65, 0x72, 0x65}, + .ciphertext = { 0x48, 0x69, 0x20, 0x54, 0x68, 0x65, 0x72, 0x65}, + .digest_length = HMAC_SHA384_CHECK_LEN, + .digest = { 0xaf, 0xd0, 0x39, 0x44, 0xd8, 0x48, 0x95, 0x62, + 0x6b, 0x08, 0x25, 0xf4, 0xab, 0x46, 0x90, 0x7f, + 0x15, 0xf9, 0xda, 0xdb, 0xe4, 0x10, 0x1e, 0xc6, + 0x82, 0xaa, 0x03, 0x4c, 0x7c, 0xeb, 0xc5, 0x9c, + 0xfa, 0xea, 0x9e, 0xa9, 0x07, 0x6e, 0xde, 0x7f, + 0x4a, 0xf1, 0x52, 0xe8, 0xb2, 0xfa, 0x9c, 0xb6 } + }, + { + .auth_key_length = HMAC_SHA384_KEY_LEN, + /* "Jefe" */ + .auth_key = { 0x4a, 0x65, 0x66, 0x65 }, + .length = 28, + /* what do ya want for nothing?*/ + .plaintext = { 0x77, 0x68, 0x61, 0x74, 0x20, 0x64, 0x6f, 0x20, + 0x79, 0x61, 0x20, 0x77, 0x61, 0x6e, 0x74, 0x20, + 0x66, 0x6f, 0x72, 0x20, 0x6e, 0x6f, 0x74, 0x68, + 0x69, 0x6e, 0x67, 0x3f }, + .ciphertext = { 0x77, 0x68, 0x61, 0x74, 0x20, 0x64, 0x6f, 0x20, + 0x79, 0x61, 0x20, 0x77, 0x61, 0x6e, 0x74, 0x20, + 0x66, 0x6f, 0x72, 0x20, 0x6e, 0x6f, 0x74, 0x68, + 0x69, 0x6e, 0x67, 0x3f }, + .digest_length = HMAC_SHA384_CHECK_LEN, + .digest = { 0xaf, 0x45, 0xd2, 0xe3, 0x76, 0x48, 0x40, 0x31, + 0x61, 0x7f, 0x78, 0xd2, 0xb5, 0x8a, 0x6b, 0x1b, + 0x9c, 0x7e, 0xf4, 0x64, 0xf5, 0xa0, 0x1b, 0x47, + 0xe4, 0x2e, 0xc3, 0x73, 0x63, 0x22, 0x44, 0x5e, + 0x8e, 0x22, 0x40, 0xca, 0x5e, 0x69, 0xe2, 0xc7, + 0x8b, 0x32, 0x39, 0xec, 0xfa, 0xb2, 0x16, 0x49 } + }, + { + .auth_key_length = HMAC_SHA384_KEY_LEN, + .auth_key = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }, + .length = 50, + .plaintext = { 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd }, + .ciphertext = { 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd }, + .digest_length = HMAC_SHA384_CHECK_LEN, + .digest = {0x88, 0x06, 0x26, 0x08, 0xd3, 0xe6, 0xad, 0x8a, + 0x0a, 0xa2, 0xac, 0xe0, 0x14, 0xc8, 0xa8, 0x6f, + 0x0a, 0xa6, 0x35, 0xd9, 0x47, 0xac, 0x9f, 0xeb, + 0xe8, 0x3e, 0xf4, 0xe5, 0x59, 0x66, 0x14, 0x4b, + 0x2a, 0x5a, 0xb3, 0x9d, 0xc1, 0x38, 0x14, 0xb9, + 0x4e, 0x3a, 0xb6, 0xe1, 0x01, 0xa3, 0x4f, 0x27 } + } +}; + static crypto_test_reference_t hmac_sha512_reference[] = { { .auth_key_length = HMAC_SHA512_KEY_LEN, @@ -1325,4 +1453,85 @@ static crypto_test_reference_t hmac_sha512_reference[] = { } }; +static crypto_test_reference_t aes_xcbc_reference[] = { + { + .auth_key_length = AES_XCBC_MAC_KEY_LEN, + .auth_key = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + .length = 3, + .plaintext = { 0x00, 0x01, 0x02 }, + .ciphertext = { 0x00, 0x01, 0x02 }, + .digest_length = AES_XCBC_MAC_96_CHECK_LEN, + .digest = { 0x5b, 0x37, 0x65, 0x80, 0xae, 0x2f, 0x19, 0xaf, + 0xe7, 0x21, 0x9c, 0xee } + }, + { + .auth_key_length = AES_XCBC_MAC_KEY_LEN, + .auth_key = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + .length = 16, + .plaintext = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + .ciphertext = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + .digest_length = AES_XCBC_MAC_96_CHECK_LEN, + .digest = { 0xd2, 0xa2, 0x46, 0xfa, 0x34, 0x9b, 0x68, 0xa7, + 0x99, 0x98, 0xa4, 0x39 } + }, + { + .auth_key_length = AES_XCBC_MAC_KEY_LEN, + .auth_key = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + .length = 20, + .plaintext = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13 }, + .ciphertext = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13 }, + .digest_length = AES_XCBC_MAC_96_CHECK_LEN, + .digest = { 0x47, 0xf5, 0x1b, 0x45, 0x64, 0x96, 0x62, 0x15, + 0xb8, 0x98, 0x5c, 0x63 } + }, + { + .auth_key_length = AES_XCBC_MAC_KEY_LEN, + .auth_key = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + .length = 3, + .plaintext = { 0x00, 0x01, 0x02 }, + .ciphertext = { 0x00, 0x01, 0x02 }, + .digest_length = AES_XCBC_MAC_CHECK_LEN, + .digest = { 0x5b, 0x37, 0x65, 0x80, 0xae, 0x2f, 0x19, 0xaf, + 0xe7, 0x21, 0x9c, 0xee, 0xf1, 0x72, 0x75, 0x6f } + }, + { + .auth_key_length = AES_XCBC_MAC_KEY_LEN, + .auth_key = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + .length = 16, + .plaintext = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + .ciphertext = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + .digest_length = AES_XCBC_MAC_CHECK_LEN, + .digest = { 0xd2, 0xa2, 0x46, 0xfa, 0x34, 0x9b, 0x68, 0xa7, + 0x99, 0x98, 0xa4, 0x39, 0x4f, 0xf7, 0xa2, 0x63 } + }, + { + .auth_key_length = AES_XCBC_MAC_KEY_LEN, + .auth_key = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + .length = 20, + .plaintext = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13 }, + .ciphertext = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13 }, + .digest_length = AES_XCBC_MAC_CHECK_LEN, + .digest = { 0x47, 0xf5, 0x1b, 0x45, 0x64, 0x96, 0x62, 0x15, + 0xb8, 0x98, 0x5c, 0x63, 0x05, 0x5e, 0xd3, 0x08 } + } +}; + #endif diff --git a/test/validation/api/crypto/test_vectors_len.h b/test/validation/api/crypto/test_vectors_len.h index 860840cfe..95d202b62 100644 --- a/test/validation/api/crypto/test_vectors_len.h +++ b/test/validation/api/crypto/test_vectors_len.h @@ -50,6 +50,11 @@ #define HMAC_SHA1_96_CHECK_LEN 12 #define HMAC_SHA1_CHECK_LEN 20 +/* HMAC-SHA384 */ +#define HMAC_SHA384_KEY_LEN 48 +#define HMAC_SHA384_192_CHECK_LEN 24 +#define HMAC_SHA384_CHECK_LEN 48 + /* HMAC-SHA512 */ #define HMAC_SHA512_KEY_LEN 64 #define HMAC_SHA512_256_CHECK_LEN 32 @@ -60,4 +65,9 @@ #define CHACHA20_POLY1305_IV_LEN 12 #define CHACHA20_POLY1305_CHECK_LEN 16 +/* AES-XCBC-MAC */ +#define AES_XCBC_MAC_KEY_LEN 16 +#define AES_XCBC_MAC_96_CHECK_LEN 12 +#define AES_XCBC_MAC_CHECK_LEN 16 + #endif diff --git a/test/validation/api/ipsec/ipsec.c b/test/validation/api/ipsec/ipsec.c index 3e2e743d2..7c82d85a4 100644 --- a/test/validation/api/ipsec/ipsec.c +++ b/test/validation/api/ipsec/ipsec.c @@ -204,10 +204,18 @@ int ipsec_check(odp_bool_t ah, if (!capa.auths.bit.sha256_hmac) return ODP_TEST_INACTIVE; break; + case ODP_AUTH_ALG_SHA384_HMAC: + if (!capa.auths.bit.sha384_hmac) + return ODP_TEST_INACTIVE; + break; case ODP_AUTH_ALG_SHA512_HMAC: if (!capa.auths.bit.sha512_hmac) return ODP_TEST_INACTIVE; break; + case ODP_AUTH_ALG_AES_XCBC_MAC: + if (!capa.auths.bit.aes_xcbc_mac) + return ODP_TEST_INACTIVE; + break; case ODP_AUTH_ALG_AES_GCM: if (!capa.auths.bit.aes_gcm) return ODP_TEST_INACTIVE; -- cgit v1.2.3 From 009dab38672eaf8ab6eddc551da41a84e86915a5 Mon Sep 17 00:00:00 2001 From: Bill Fischofer Date: Wed, 4 Apr 2018 16:16:11 -0500 Subject: doc: userguide: typo corrections Signed-off-by: Bill Fischofer Signed-off-by: Josep Puigdemont Signed-off-by: Maxim Uvarov --- doc/users-guide/users-guide-cls.adoc | 2 +- doc/users-guide/users-guide-crypto.adoc | 2 +- doc/users-guide/users-guide-ipsec.adoc | 4 ++-- doc/users-guide/users-guide-pktio.adoc | 4 ++-- doc/users-guide/users-guide-tm.adoc | 8 ++++---- doc/users-guide/users-guide.adoc | 16 ++++++++-------- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/doc/users-guide/users-guide-cls.adoc b/doc/users-guide/users-guide-cls.adoc index a689826c7..359d225d8 100644 --- a/doc/users-guide/users-guide-cls.adoc +++ b/doc/users-guide/users-guide-cls.adoc @@ -7,7 +7,7 @@ prioritization, classification and scheduling of each packet, so that the software application can run faster, scale better and adhere to QoS requirements. -The following API abstraction are not modelled after any existing product +The following API abstraction are not modeled after any existing product implementation, but is instead defined in terms of what a typical data-plane application may require from such a platform, without sacrificing simplicity and avoiding ambiguity. Certain terms that are being used within the context of diff --git a/doc/users-guide/users-guide-crypto.adoc b/doc/users-guide/users-guide-crypto.adoc index 029f47b17..b44402adf 100644 --- a/doc/users-guide/users-guide-crypto.adoc +++ b/doc/users-guide/users-guide-crypto.adoc @@ -175,7 +175,7 @@ any software generated pseudo-random data. May not be available on all platforms. These form a hierarchy with BASIC being the lowest kind of random and TRUE -behing the highest. The main API for accessing random data is: +being the highest. The main API for accessing random data is: [source,c] ----- diff --git a/doc/users-guide/users-guide-ipsec.adoc b/doc/users-guide/users-guide-ipsec.adoc index ac4eae85d..6af676620 100644 --- a/doc/users-guide/users-guide-ipsec.adoc +++ b/doc/users-guide/users-guide-ipsec.adoc @@ -381,7 +381,7 @@ any further application involvement. Only if a problem arises will the packet be returned to the application with an `odp_ipsec_packet_result_t` indicating the nature of the problem. -Note that while operating in inline mode, asychronous lookaside operations are +Note that while operating in inline mode, asynchronous lookaside operations are also permitted. This provide the application with additional flexibility if, for example, some packets need additional handling that cannot be supported directly with inline IPsec processing. @@ -449,7 +449,7 @@ the application continues to receive and process IPsec events as normal. Disable completion is indicated by the application seeing an event of type `ODP_EVENT_IPSEC_STATUS` for this SA that contains an `odp_ipsec_status_id_t` of `ODP_IPSEC_STATUS_SA_DISABLE`. For inbound SAs, receipt of this event means -that the application has seen all IPsec packets associatd with this SA that +that the application has seen all IPsec packets associated with this SA that were pending at the time of the disable call. For outbound SAs, receipt of this event means that the application has seen all result events associated with packets sent via this SA. diff --git a/doc/users-guide/users-guide-pktio.adoc b/doc/users-guide/users-guide-pktio.adoc index 80a58d2fb..ef5cced66 100644 --- a/doc/users-guide/users-guide-pktio.adoc +++ b/doc/users-guide/users-guide-pktio.adoc @@ -114,8 +114,8 @@ typedef struct odp_pktio_param_t { ODP defines *"loop"* as a reserved name to indicate that this PktIO represents a loopback interface. Loopback interfaces are useful as a means of recycling packets back for reclassification after decryption or decapsulation, as well as -for diagnostic or testing purposes. For example, when receiving IPSEC traffic, -the classifier is able to recognize that the traffic is IPSEC, however until +for diagnostic or testing purposes. For example, when receiving IPsec traffic, +the classifier is able to recognize that the traffic is IPsec, however until the traffic is decrypted it is unable to say what that traffic contains. So following decryption, sending the decrypted packet back to a loopback interface allows the classifier to take a "second look" at the packet and diff --git a/doc/users-guide/users-guide-tm.adoc b/doc/users-guide/users-guide-tm.adoc index 251297335..55efb1b21 100644 --- a/doc/users-guide/users-guide-tm.adoc +++ b/doc/users-guide/users-guide-tm.adoc @@ -10,7 +10,7 @@ A given platform supporting this TM API could support one or more pure hardware based packet scheduling systems, one or more pure software based systems or one or more hybrid systems - where because of hardware constraints some of the packet scheduling is done in hardware and some is done in software. In -addition, there may also be additional API's beyond those described here for: +addition, there may also be additional APIs beyond those described here for: - controlling advanced capabilities supported by specific hardware, software or hybrid subsystems @@ -84,7 +84,7 @@ traffic, while allowing for less idle outputs. ==== Weighted Fair Queuing -Weighted Fair Queuing (WFQ) is used to arbitrate amongst multiple input +Weighted Fair Queuing (WFQ) is used to arbitrate among multiple input packets with the same priority. Each input can be assigned a weight in the range MIN_WFQ_WEIGHT..MAX_WFQ_WEIGHT (nominally 1..255) that affects the way the algorithm chooses the next packet. If all of the weights are equal AND all @@ -158,7 +158,7 @@ final scheduling decision is controlled by equal priority schedulers, strict priority multiplexers, bandwidth shapers - at multiple levels - all forming a tree rooted at a single egress object. In other words, all tm_queues and tm_nodes have the property that their logical "output" feeds -into one fan-in of a subsequent tm_node or egresss object - forming a proper +into one fan-in of a subsequent tm_node or egress object - forming a proper tree. .Hierarchical Scheduling @@ -178,7 +178,7 @@ choice" of what packet/tm_queue should next be serviced. Tm_nodes are the main "entity"/object that a TM system is composed of. Each tm_node is a mini-TM subsystem of its own, but the interconnection and interplay of a multi-level "tree" of tm_nodes can allow the user to specify -some very sophisticated behaviours. Each tm_node can contain a set of scheduler +some very sophisticated behaviors. Each tm_node can contain a set of scheduler (one per strict priority level), a strict priority multiplexer, a bandwidth shaper and a WRED component - or a subset of these. diff --git a/doc/users-guide/users-guide.adoc b/doc/users-guide/users-guide.adoc index 7f2ad69e2..d0687f97b 100644 --- a/doc/users-guide/users-guide.adoc +++ b/doc/users-guide/users-guide.adoc @@ -151,7 +151,7 @@ of the specification or other minor changes that do not affect either the syntax or semantics of the specification. Such changes in the API specification are expected to be rare. Increments to the minor level represent the introduction of new APIs or functional capabilities, or changes -to he specified syntax or functional behavior of APIs and thus may require +to the specified syntax or functional behavior of APIs and thus may require application source code changes. Such changes are well documented in the release notes for each revision of the specification. Finally, increments to the major level represent significant structural changes that most likely @@ -247,14 +247,14 @@ polled by ODP _Threads_, or can pass through the _Classifier_ and sorted into Queues that represent individual flows. These queues can then be dispatched to application threads via the _Scheduler_. -Threads, in term can invoke various ODP APIs to manipulate packet contents +Threads, in turn can invoke various ODP APIs to manipulate packet contents prior to disposing of them. For output processing, packets make by directly queued to a PktIO output queue or else they may be handed to the _Traffic Manager_ for programmatic _Quality of Service (QoS)_ processing before winding up being transmitted (TX). Note that output interfaces may operate in _loopback_ mode, in which case packets sent to them are re-routed back to the -input lines for "second pass" processing. For example, an incoming IPSec packet -cannot be properly classified (beyond being IPSec traffic) until it is +input lines for "second pass" processing. For example, an incoming IPsec packet +cannot be properly classified (beyond being IPsec traffic) until it is decrypted. Once decrypted and its actual contents made visible, it can then be classified into its real flow. @@ -576,7 +576,7 @@ values. Calling `odp_init_global()` establishes the ODP API framework and MUST be called before any other ODP API may be called. Note that it is only called once per application. A successful call to `odp_init_global()` returns rc = 0 -and sets the `instance` variable supplied as input to the call to an handle +and sets the `instance` variable supplied as input to the call to a handle representing this unique ODP instance. The `odp_init_t` parameter is used to specify various customizations to the @@ -661,7 +661,7 @@ area and how best to use ODP to achieve these goals. === Portability and Coexistence Because ODP offers a programming _framework_ rather than a programming _environment_, it is designed to be able to work alongside APIs offered by -other frameworks with minimual interference. Therefore when we speak of +other frameworks with minimal interference. Therefore when we speak of portability in an ODP context, we of necessity speak of portability of those portions of the application that make use of ODP APIs. If an application uses non-ODP APIs then those must be taken into consideration as well when @@ -756,10 +756,10 @@ Architecture (ISA), such as x86-64 or AArch64. Binaries cannot directly port between ISAs--that requires a recompilation. Each ODP implementation will identify which ABI definition it supports, if any. -When compiling against an ODP implementation in ABI compabitilty mode, the +When compiling against an ODP implementation in ABI compatibility mode, the resulting binary is automatically binary compatible with all other ODP implementations that share this ABI. For example, for the x86-64 ISA, both -the `odp-linux` and `odp-dpdk` implemtations are a common ABI. +the `odp-linux` and `odp-dpdk` implementations are a common ABI. == Shared memory === Allocating shared memory -- cgit v1.2.3 From 5555878edbda765134fbe50f3181f409963037ea Mon Sep 17 00:00:00 2001 From: Bill Fischofer Date: Wed, 4 Apr 2018 16:18:11 -0500 Subject: doc: userguide: change pool queue to plain queue Queues fall into types PLAIN and SCHED. Correct terminology from the old POLL queue type to the current PLAIN queue type. Signed-off-by: Bill Fischofer Reviewed-by: Josep Puigdemont Signed-off-by: Maxim Uvarov --- doc/users-guide/users-guide.adoc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/users-guide/users-guide.adoc b/doc/users-guide/users-guide.adoc index d0687f97b..e480c69d5 100644 --- a/doc/users-guide/users-guide.adoc +++ b/doc/users-guide/users-guide.adoc @@ -312,7 +312,7 @@ appropriate type represented by the event. A queue is a message passing channel that holds events. Events can be added to a queue via enqueue operations or removed from a queue via dequeue operations. The endpoints of a queue will vary depending on how it is used. -Queues come in two major types: polled and scheduled, which will be +Queues come in two major types: plain and scheduled, which will be discussed in more detail when the event model is introduced. Queues may also have an associated context, which represents a persistent state for all events that make use of it. These states are what permit threads to perform @@ -964,23 +964,23 @@ Queues are the fundamental event sequencing mechanism provided by ODP and all ODP applications make use of them either explicitly or implicitly. Queues are created via the 'odp_queue_create()' API that returns a handle of type `odp_queue_t` that is used to refer to this queue in all subsequent APIs that -reference it. Queues have one of two ODP-defined _types_, POLL, and SCHED that -determine how they are used. POLL queues directly managed by the ODP +reference it. Queues have one of two ODP-defined _types_, PLAIN, and SCHED that +determine how they are used. PLAIN queues directly managed by the ODP application while SCHED queues make use of the *ODP scheduler* to provide automatic scalable dispatching and synchronization services. -.Operations on POLL queues +.Operations on PLAIN queues [source,c] ---- -odp_queue_t poll_q1 = odp_queue_create("poll queue 1", ODP_QUEUE_TYPE_POLL, NULL); -odp_queue_t poll_q2 = odp_queue_create("poll queue 2", ODP_QUEUE_TYPE_POLL, NULL); +odp_queue_t plain_q1 = odp_queue_create("poll queue 1", ODP_QUEUE_TYPE_PLAIN, NULL); +odp_queue_t plain_q2 = odp_queue_create("poll queue 2", ODP_QUEUE_TYPE_PLAIN, NULL); ... -odp_event_t ev = odp_queue_deq(poll_q1); +odp_event_t ev = odp_queue_deq(plain_q1); ...do something -int rc = odp_queue_enq(poll_q2, ev); +int rc = odp_queue_enq(plain_q2, ev); ---- -The key distinction is that dequeueing events from POLL queues is an +The key distinction is that dequeueing events from PLAIN queues is an application responsibility while dequeueing events from SCHED queues is the responsibility of the ODP scheduler. -- cgit v1.2.3 From bd0377ae95427e80f04b0dd5f3a8dcb0b1ab89e3 Mon Sep 17 00:00:00 2001 From: Bill Fischofer Date: Wed, 4 Apr 2018 16:19:27 -0500 Subject: doc: userguide: shm corrections Clarify the use of the ODP_SHM_SINGLE_VM flag on shm operations. Signed-off-by: Bill Fischofer Reviewed-by: Josep Puigdemont Signed-off-by: Maxim Uvarov --- doc/users-guide/users-guide.adoc | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/doc/users-guide/users-guide.adoc b/doc/users-guide/users-guide.adoc index e480c69d5..9c1307b4b 100644 --- a/doc/users-guide/users-guide.adoc +++ b/doc/users-guide/users-guide.adoc @@ -798,16 +798,16 @@ shared_data_t *shared_data; shared_data = odp_shm_addr(shm); ---- -The address returned by `odp_shm_addr()` is valid only in the calling ODP -thread space: odp_shm_t handles can be shared between ODP threads and remain -valid within any threads, whereas the address returned by `odp_shm_addr(shm)` -may differ from ODP threads to ODP threads (for the same 'shm' block), and -should therefore not be shared between ODP threads. -For instance, it would be correct to send a shm handle using IPC between two -ODP threads and let each of these thread do their own `odp_shm_addr()` to -get the block address. Directly sending the address returned by -`odp_shm_addr()` from one ODP thread to another would however possibly fail -(the address may have no sense in the receiver address space). +The address returned by `odp_shm_addr()` is normally valid only in the calling +ODP thread space: odp_shm_t handles can be shared between ODP threads and +remain valid within any threads, whereas the address returned by +`odp_shm_addr(shm)` may differ from ODP threads to ODP threads (for the same +'shm' block), and should therefore not be shared between ODP threads. For +instance, it would be correct to send a shm handle using IPC between two ODP +threads and let each of these thread do their own `odp_shm_addr()` to get the +block address. Directly sending the address returned by `odp_shm_addr()` from +one ODP thread to another would however possibly fail (the address may make no +sense in the receiver address space). The address returned by `odp_shm_addr()` is nevertheless guaranteed to be aligned according to the alignment requirements provided at block creation @@ -819,7 +819,13 @@ All shared memory blocks are contiguous in any ODP thread addressing space: as provided in the `odp_shm_reserve()` call) is read and writeable and mapping the shared memory block. There is no fragmentation. -=== Memory behaviour +The exception to this rule is if the `odp_shm_t` is created with the +`ODP_SHM_SINGLE_VA` flag. This requests that `odp_shm_addr()` return the same +virtual address for all ODP threads in this instance. Note that there may be a +performance cost or shm size limit associated with providing this function in +some implementations. + +=== Memory behavior By default ODP threads are assumed to behave as cache coherent systems: Any change performed on a shared memory block is guaranteed to eventually become visible to other ODP threads sharing this memory block. -- cgit v1.2.3 From b0b07f403ffc22349ac8dd75f47238bea40891be Mon Sep 17 00:00:00 2001 From: Bill Fischofer Date: Wed, 4 Apr 2018 16:20:47 -0500 Subject: doc: userguide: change sched type none to parallel Replace old ODP_SCHED_SYNC_NONE with current ODP_SCHED_SYNC_PARALLEL when describing queue scheduling options. Signed-off-by: Bill Fischofer Reviewed-by: Josep Puigdemont Signed-off-by: Maxim Uvarov --- doc/users-guide/users-guide.adoc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/users-guide/users-guide.adoc b/doc/users-guide/users-guide.adoc index 9c1307b4b..7914459e6 100644 --- a/doc/users-guide/users-guide.adoc +++ b/doc/users-guide/users-guide.adoc @@ -998,7 +998,7 @@ odp_queue_param_init(&qp); odp_schedule_prio_t prio = ...; odp_schedule_group_t sched_group = ...; qp.sched.prio = prio; -qp.sched.sync = ODP_SCHED_SYNC_[NONE|ATOMIC|ORDERED]; +qp.sched.sync = ODP_SCHED_SYNC_[PARALLEL|ATOMIC|ORDERED]; qp.sched.group = sched_group; qp.lock_count = n; /* Only relevant for ordered queues */ odp_queue_t sched_q1 = odp_queue_create("sched queue 1", ODP_QUEUE_TYPE_SCHED, &qp); @@ -1054,8 +1054,8 @@ Three types of queue scheduler synchronization area supported: Parallel, Atomic, and Ordered. ==== Parallel Queues -SCHED queues that specify a sync mode of ODP_SCHED_SYNC_NONE are unrestricted -in how events are processed. +SCHED queues that specify a sync mode of ODP_SCHED_SYNC_PARALLEL are +unrestricted in how events are processed. .Parallel Queue Scheduling image::parallel_queue.svg[align="center"] -- cgit v1.2.3 From 0fa9977e21010f963892f59a75f03e3ba800bc74 Mon Sep 17 00:00:00 2001 From: Bill Fischofer Date: Thu, 5 Apr 2018 17:04:13 -0500 Subject: doc: userguide: refresh pktio code examples Update pktio code examples to reflect latest API definitions. Signed-off-by: Bill Fischofer Reviewed-by: Josep Puigdemont Signed-off-by: Maxim Uvarov --- doc/users-guide/users-guide-pktio.adoc | 72 +++++++++++++++++++++++++--------- 1 file changed, 54 insertions(+), 18 deletions(-) diff --git a/doc/users-guide/users-guide-pktio.adoc b/doc/users-guide/users-guide-pktio.adoc index ef5cced66..79d6b1a20 100644 --- a/doc/users-guide/users-guide-pktio.adoc +++ b/doc/users-guide/users-guide-pktio.adoc @@ -38,6 +38,9 @@ PktIO objects begin life by being _opened_ via the call: * errno set. Use odp_pktio_lookup() to obtain a handle to an already open * device. Packet IO parameters provide interface level configuration options. * + * Use odp_pktio_param_init() to initialize packet IO parameters into their + * default values. Default values are also used when 'param' pointer is NULL. + * * Packet input queue configuration must be setup with * odp_pktin_queue_config() before odp_pktio_start() is called. When packet * input mode is ODP_PKTIN_MODE_DISABLED, odp_pktin_queue_config() call is @@ -66,7 +69,7 @@ PktIO objects begin life by being _opened_ via the call: * @param name Packet IO device name * @param pool Default pool from which to allocate storage for packets * received over this interface, must be of type ODP_POOL_PACKET - * @param param Packet IO parameters + * @param param Packet IO parameters. Uses defaults when NULL. * * @return Packet IO handle * @retval ODP_PKTIO_INVALID on failure @@ -85,7 +88,7 @@ PktIO objects begin life by being _opened_ via the call: * @see odp_pktio_start(), odp_pktio_stop(), odp_pktio_close() */ odp_pktio_t odp_pktio_open(const char *name, odp_pool_t pool, - const odp_pktio_param_t *param); + const odp_pktio_param_t *param); ----- `odp_pktio_open()` takes three arguments: a *name*, which is an implementation-defined string that identifies the logical interface to be @@ -97,18 +100,20 @@ I/O options to be associated with this PktIO instance. /** * Packet IO parameters * - * In minimum, user must select input and output modes. Use 0 for defaults. - * Initialize entire struct with zero to maintain API compatibility. + * Packet IO interface level parameters. Use odp_pktio_param_init() to + * initialize the structure with default values. */ typedef struct odp_pktio_param_t { /** Packet input mode * * The default value is ODP_PKTIN_MODE_DIRECT. */ odp_pktin_mode_t in_mode; + /** Packet output mode * * The default value is ODP_PKTOUT_MODE_DIRECT. */ odp_pktout_mode_t out_mode; + } odp_pktio_param_t; ----- ODP defines *"loop"* as a reserved name to indicate that this PktIO represents @@ -209,25 +214,48 @@ typedef struct odp_pktin_queue_param_t { * applicable. */ odp_pktio_op_mode_t op_mode; + /** Enable classifier + * + * * 0: Classifier is disabled (default) + * * 1: Classifier is enabled. Use classifier to direct incoming + * packets into pktin event queues. Classifier can be enabled + * only in ODP_PKTIN_MODE_SCHED and ODP_PKTIN_MODE_QUEUE modes. + * Both classifier and hashing cannot be enabled simultaneously + * ('hash_enable' must be 0). */ + odp_bool_t classifier_enable; + /** Enable flow hashing - * 0: Do not hash flows - * 1: Hash flows to input queues */ + * + * * 0: Do not hash flows (default) + * * 1: Enable flow hashing. Use flow hashing to spread incoming + * packets into input queues. Hashing can be enabled in all + * modes. Both classifier and hashing cannot be enabled + * simultaneously ('classifier_enable' must be 0). */ odp_bool_t hash_enable; - /** Protocol field selection for hashing. Multiple protocols can be - * selected. */ + /** Protocol field selection for hashing + * + * Multiple protocols can be selected. Ignored when 'hash_enable' is + * zero. The default value is all bits zero. */ odp_pktin_hash_proto_t hash_proto; - /** Number of input queues to be created. More than one input queue - * require input hashing or classifier setup. Hash_proto is ignored - * when hash_enable is zero or num_queues is one. This value must be - * between 1 and interface capability. Queue type is defined by the - * input mode. The default value is 1. */ + /** Number of input queues to be created + * + * When classifier is enabled in odp_pktin_queue_config() this + * value is ignored, otherwise at least one queue is required. + * More than one input queues require flow hashing configured. + * The maximum value is defined by pktio capability 'max_input_queues'. + * Queue type is defined by the input mode. The default value is 1. */ unsigned num_queues; - /** Queue parameters for creating input queues in ODP_PKTIN_MODE_QUEUE + /** Queue parameters + * + * These are used for input queue creation in ODP_PKTIN_MODE_QUEUE * or ODP_PKTIN_MODE_SCHED modes. Scheduler parameters are considered - * only in ODP_PKTIN_MODE_SCHED mode. */ + * only in ODP_PKTIN_MODE_SCHED mode. Default values are defined in + * odp_queue_param_t documentation. + * When classifier is enabled in odp_pktin_queue_config() this + * value is ignored. */ odp_queue_param_t queue_param; } odp_pktin_queue_param_t; @@ -347,8 +375,10 @@ Once started, the PktIn queue handles are used as arguments to /** * Receive packets directly from an interface input queue * - * Receives up to 'num' packets from the pktio interface input queue. When - * input queue parameter 'op_mode' has been set to ODP_PKTIO_OP_MT_UNSAFE, + * Receives up to 'num' packets from the pktio interface input queue. Returns + * the number of packets received. + * + * When input queue parameter 'op_mode' has been set to ODP_PKTIO_OP_MT_UNSAFE, * the operation is optimized for single thread operation per queue and the same * queue must not be accessed simultaneously from multiple threads. * @@ -478,6 +508,11 @@ Once the PktIO has been configured for output and started via * is less than 'num', the remaining packets at the end of packets[] array * are not consumed, and the caller has to take care of them. * + * Entire packet data is sent out (odp_packet_len() bytes of data, starting from + * odp_packet_data()). All other packet metadata is ignored unless otherwise + * specified e.g. for protocol offload purposes. Link protocol specific frame + * checksum and padding are added to frames before transmission. + * * @param queue Packet output queue handle for sending packets * @param packets[] Array of packets to send * @param num Number of packets to send @@ -485,7 +520,8 @@ Once the PktIO has been configured for output and started via * @return Number of packets sent * @retval <0 on failure */ -int odp_pktout_send(odp_pktout_queue_t queue, odp_packet_t packets[], int num); +int odp_pktout_send(odp_pktout_queue_t queue, const odp_packet_t packets[], + int num);; ----- Note that the argument to this call specifies the PktOut queue that the packet is to be added to rather than the PktIO itself. This permits multiple -- cgit v1.2.3 From 183ce9babe20e161caef6b55eccfd043b334f748 Mon Sep 17 00:00:00 2001 From: Bill Fischofer Date: Sun, 8 Apr 2018 16:35:21 -0500 Subject: doc: userguide: add packet checksum and parsing info Update packet documentation to include information on packet parsing and checksum override handling. Signed-off-by: Bill Fischofer Reviewed-by: Josep Puigdemont Signed-off-by: Maxim Uvarov --- doc/users-guide/users-guide-packet.adoc | 35 +++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/doc/users-guide/users-guide-packet.adoc b/doc/users-guide/users-guide-packet.adoc index 6d9e98a5a..c38c5c866 100644 --- a/doc/users-guide/users-guide-packet.adoc +++ b/doc/users-guide/users-guide-packet.adoc @@ -499,3 +499,38 @@ Note also that a packet may not reference itself, nor may circular reference relationships be formed, _e.g.,_ packet A is used as a header for a reference to packet B and B is used as a header for a reference to packet A. Results are undefined if such circular references are attempted. + +=== Packet Parsing, Checksum Processing, and Overrides +Packet parsing is normally triggered automatically as part of packet RX +processing. However, the application can trigger parsing explicitly via the +API: +[source,c] +----- +int odp_packet_parse(odp_packet_t pkt, uint32_t offset, + const odp_packet_parse_param_t *param); +----- +This is typically done following packet decapsulation or other preprocessing +that would prevent RX parsing from "seeing" the relevant portion of the +packet. The `odp_packet_parse_param_t` struct that is passed to control the +depth of the desired parse, as well as whether checksum validation should be +performed as part of the parse, and if so which checksums require this +processing. + +Packets containing Layer 3 (IPv4) and Layer 4 (TCP, UDP, SCTP) checksums +can have these validated (on RX) and generated (on TX) automatically. +This is normally controlled by the settings on the PktIOs that +receive/transmit them, however they can also be controlled on an +individual packet basis. + +Packets have associated `odp_packet_chksum_status_t` metadata that indicates +the state any checksums contained in that packet. These can be queried via +the APIs `odp_packet_l3_chksum_status()` and `odp_packet_l4_chksum_status()`, +respectively. Checksums can either be known good, known bad, or unknown, where +unknown means that checksum validation processing has not occurred or the +attempt to validate the checksum failed. + +Similarly, the `odp_packet_l3_chksum_insert()` and +`odp_packet_l4_chksum_insert()` APIs may be used to override default checksum +processing for individual packets prior to transmission. If no explicit +checksum processing is specified for a packet, then any checksum generation +is controlled by the PktIO configuration of the interface used to transmit it. -- cgit v1.2.3 From cabee48a54a68f333569ee1904bc04d53da7c9c6 Mon Sep 17 00:00:00 2001 From: Bill Fischofer Date: Sun, 8 Apr 2018 16:36:23 -0500 Subject: doc: userguide: add pktio capability and config info Update PktIO section of Users Guide to cover pktio capability and input/output/parser configuration features. Signed-off-by: Bill Fischofer Reviewed-by: Josep Puigdemont Signed-off-by: Maxim Uvarov --- doc/users-guide/users-guide-pktio.adoc | 298 +++++++++++++++++++++++++++++++++ 1 file changed, 298 insertions(+) diff --git a/doc/users-guide/users-guide-pktio.adoc b/doc/users-guide/users-guide-pktio.adoc index 79d6b1a20..73d6e0485 100644 --- a/doc/users-guide/users-guide-pktio.adoc +++ b/doc/users-guide/users-guide-pktio.adoc @@ -132,6 +132,304 @@ use for packet allocation if not overridden by the classifier due to a specific or default Class-of-Service (CoS) match on the packet. The *param* struct, in turn, specifies the input and output *modes* of the PktIO. +=== PktIO Capabilities and PktIn/PktOut Configuration +Associated with each PktIO is a set of _capabilities_ that provide information +such as the maximum number of input/output queues it supports, its configuration +options, and the operations is supports. These are aggregated into +the struct: +[source,c] +----- +/** + * Packet IO capabilities + */ +typedef struct odp_pktio_capability_t { + /** Maximum number of input queues */ + unsigned max_input_queues; + + /** Maximum number of output queues */ + unsigned max_output_queues; + + /** Supported pktio configuration options */ + odp_pktio_config_t config; + + /** Supported set operations + * + * A bit set to one indicates a supported operation. All other bits are + * set to zero. */ + odp_pktio_set_op_t set_op; + + /** @deprecated Use enable_loop inside odp_pktin_config_t */ + odp_bool_t ODP_DEPRECATE(loop_supported); +} odp_pktio_capability_t; +----- +That is returned by the `odp_pktio_capability()` API. This returns the +limits and default values for these capabilities which can in turn be set +via the `odp_pktio_config()` API, which takes as input the struct: +[source,c] +----- +/** + * Packet IO configuration options + * + * Packet IO interface level configuration options. Use odp_pktio_capability() + * to see which options are supported by the implementation. + * Use odp_pktio_config_init() to initialize the structure with default values. + */ +typedef struct odp_pktio_config_t { + /** Packet input configuration options bit field + * + * Default value for all bits is zero. */ + odp_pktin_config_opt_t pktin; + + /** Packet output configuration options bit field + * + * Default value for all bits is zero. */ + odp_pktout_config_opt_t pktout; + + /** Packet input parser configuration */ + odp_pktio_parser_config_t parser; + + /** Interface loopback mode + * + * In this mode the packets sent out through the interface is + * looped back to input of the same interface. Supporting loopback mode + * is an optional feature per interface and should be queried in the + * interface capability before enabling the same. */ + odp_bool_t enable_loop; + + /** Inbound IPSEC inlined with packet input + * + * Enable/disable inline inbound IPSEC operation. When enabled packet + * input directs all IPSEC packets automatically to IPSEC inbound + * processing. IPSEC configuration is done through the IPSEC API. + * Packets that are not (recognized as) IPSEC are processed + * according to the packet input configuration. + * + * 0: Disable inbound IPSEC inline operation (default) + * 1: Enable inbound IPSEC inline operation + * + * @see odp_ipsec_config(), odp_ipsec_sa_create() + */ + odp_bool_t inbound_ipsec; + + /** Outbound IPSEC inlined with packet output + * + * Enable/disable inline outbound IPSEC operation. When enabled IPSEC + * outbound processing can send outgoing IPSEC packets directly + * to the pktio interface for output. IPSEC configuration is done + * through the IPSEC API. + * + * Outbound IPSEC inline operation cannot be combined with traffic + * manager (ODP_PKTOUT_MODE_TM). + * + * 0: Disable outbound IPSEC inline operation (default) + * 1: Enable outbound IPSEC inline operation + * + * @see odp_ipsec_config(), odp_ipsec_sa_create() + */ + odp_bool_t outbound_ipsec; + +} odp_pktio_config_t; +----- +The IPsec related configurations will be discussed later in the IPsec chapter, +but for now we'll focus on the PktIn/PktOut configuration and the +parser configuration. + +==== PktIn Configuration +For PktIOs that will receive packets, the `odp_pktin_config_opt_t` struct +controls RX processing to be performed on these packets as they are received: +[source,c] +----- +/** + * Packet input configuration options bit field + * + * Packet input configuration options listed in a bit field structure. Packet + * input timestamping may be enabled for all packets or at least for those that + * belong to time synchronization protocol (PTP). + * + * Packet input checksum checking may be enabled or disabled. When it is + * enabled, implementation will attempt to verify checksum correctness on + * incoming packets and depending on drop configuration either deliver erroneous + * packets with appropriate flags set (e.g. odp_packet_has_l3_error(), + * odp_packet_l3_chksum_status()) or drop those. When packet dropping is + * enabled, application will never receive a packet with the specified error + * and may avoid to check the error flag. + * + * If checksum checking is enabled, IPv4 header checksum checking is always + * done for packets that do not have IP options and L4 checksum checking + * is done for unfragmented packets that do not have IPv4 options or IPv6 + * extension headers. In other cases checksum checking may or may not + * be done. For example, L4 checksum of fragmented packets is typically + * not checked. + * + * IPv4 checksum checking may be enabled only when parsing level is + * ODP_PROTO_LAYER_L3 or higher. Similarly, L4 level checksum checking + * may be enabled only with parsing level ODP_PROTO_LAYER_L4 or higher. + * + * Whether checksum checking was done and whether a checksum was correct + * can be queried for each received packet with odp_packet_l3_chksum_status() + * and odp_packet_l4_chksum_status(). + */ +typedef union odp_pktin_config_opt_t { + /** Option flags */ + struct { + /** Timestamp all packets on packet input */ + uint64_t ts_all : 1; + + /** Timestamp (at least) IEEE1588 / PTP packets + * on packet input */ + uint64_t ts_ptp : 1; + + /** Check IPv4 header checksum on packet input */ + uint64_t ipv4_chksum : 1; + + /** Check UDP checksum on packet input */ + uint64_t udp_chksum : 1; + + /** Check TCP checksum on packet input */ + uint64_t tcp_chksum : 1; + + /** Check SCTP checksum on packet input */ + uint64_t sctp_chksum : 1; + + /** Drop packets with an IPv4 error on packet input */ + uint64_t drop_ipv4_err : 1; + + /** Drop packets with an IPv6 error on packet input */ + uint64_t drop_ipv6_err : 1; + + /** Drop packets with a UDP error on packet input */ + uint64_t drop_udp_err : 1; + + /** Drop packets with a TCP error on packet input */ + uint64_t drop_tcp_err : 1; + + /** Drop packets with a SCTP error on packet input */ + uint64_t drop_sctp_err : 1; + + } bit; + + /** All bits of the bit field structure + * + * This field can be used to set/clear all flags, or bitwise + * operations over the entire structure. */ + uint64_t all_bits; +} odp_pktin_config_opt_t; +----- +These are used to control packet timestamping as well as default packet checkum +verification processing. + +==== PktIO Parsing Configuration +For RX processing, packets may also be parsed automatically as part of +receipt as controlled by the `odp_pktio_parser_config_t` struct: +[source,c] +----- +/** + * Parser configuration + */ +typedef struct odp_pktio_parser_config_t { + /** Protocol parsing level in packet input + * + * Application requires that protocol headers in a packet are checked + * up to this layer during packet input. Use ODP_PROTO_LAYER_ALL for + * all layers. Packet metadata for this and all preceding layers are + * set. In addition, offset (and pointer) to the next layer is set. + * Other layer/protocol specific metadata have undefined values. + * + * The default value is ODP_PROTO_LAYER_ALL. */ + odp_proto_layer_t layer; + +} odp_pktio_parser_config_t; +----- +Note that parsing is automatically done whenever classification is enabled +for an RX interface (see below). + +==== PktOut Configuration +For PktIOs that will transmit packets, the `odp_pktout_config_opt_t` struct +controls TX processing to be performed on these packets as they are +transmitted: +[source,c] +----- +/** + * Packet output configuration options bit field + * + * Packet output configuration options listed in a bit field structure. Packet + * output checksum insertion may be enabled or disabled (e.g. ipv4_chksum_ena): + * + * 0: Disable checksum insertion. Application will not request checksum + * insertion for any packet. This is the default value for xxx_chksum_ena + * bits. + * 1: Enable checksum insertion. Application will request checksum insertion + * for some packets. + * + * When checksum insertion is enabled, application may use configuration options + * to set the default behaviour on packet output (e.g. ipv4_chksum): + * + * 0: Do not insert checksum by default. This is the default value for + * xxx_chksum bits. + * 1: Calculate and insert checksum by default. + * + * These defaults may be overridden on per packet basis using e.g. + * odp_packet_l4_chksum_insert(). + * + * For correct operation, packet metadata must provide valid offsets for the + * appropriate protocols. For example, UDP checksum calculation needs both L3 + * and L4 offsets (to access IP and UDP headers). When application + * (e.g. a switch) does not modify L3/L4 data and thus checksum does not need + * to be updated, checksum insertion should be disabled for optimal performance. + * + * Packet flags (odp_packet_has_*()) are ignored for the purpose of checksum + * insertion in packet output. + * + * UDP, TCP and SCTP checksum insertion must not be requested for IP fragments. + * Use checksum override function (odp_packet_l4_chksum_insert()) to disable + * checksumming when sending a fragment through a packet IO interface that has + * the relevant L4 checksum insertion enabled. + * + * Result of checksum insertion at packet output is undefined if the protocol + * headers required for checksum calculation are not well formed. Packet must + * contain at least as many data bytes after L3/L4 offsets as the headers + * indicate. Other data bytes of the packet are ignored for the checksum + * insertion. + */ +typedef union odp_pktout_config_opt_t { + /** Option flags for packet output */ + struct { + /** Enable IPv4 header checksum insertion. */ + uint64_t ipv4_chksum_ena : 1; + + /** Enable UDP checksum insertion */ + uint64_t udp_chksum_ena : 1; + + /** Enable TCP checksum insertion */ + uint64_t tcp_chksum_ena : 1; + + /** Enable SCTP checksum insertion */ + uint64_t sctp_chksum_ena : 1; + + /** Insert IPv4 header checksum by default */ + uint64_t ipv4_chksum : 1; + + /** Insert UDP checksum on packet by default */ + uint64_t udp_chksum : 1; + + /** Insert TCP checksum on packet by default */ + uint64_t tcp_chksum : 1; + + /** Insert SCTP checksum on packet by default */ + uint64_t sctp_chksum : 1; + + } bit; + + /** All bits of the bit field structure + * + * This field can be used to set/clear all flags, or bitwise + * operations over the entire structure. */ + uint64_t all_bits; +} odp_pktout_config_opt_t; +----- +These are used to control default checksum generation processing for +transmitted packets. + === PktIO Input and Output Modes PktIO objects support four different Input and Output modes, that may be specified independently at *open* time. -- cgit v1.2.3 From c9d31f30ba4fd7818b4ba2ffd7bbaea74761a2d9 Mon Sep 17 00:00:00 2001 From: Juha-Matti Tilli Date: Mon, 9 Apr 2018 19:59:46 +0300 Subject: linux-gen: netmap: ring configuration for VALE Configure ring for VALE. On my test laptop, this may increase odp_l2fwd performance from 1.9 MPPS to 2.3 MPPS, so it gives over 20% more performance. Signed-off-by: Juha-Matti Tilli Reviewed-and-tested-by: Matias Elo Signed-off-by: Maxim Uvarov --- config/odp-linux-generic.conf | 9 ++++ platform/linux-generic/include/odp_packet_netmap.h | 7 +++ platform/linux-generic/pktio/netmap.c | 63 ++++++++++++++++++++++ 3 files changed, 79 insertions(+) diff --git a/config/odp-linux-generic.conf b/config/odp-linux-generic.conf index 0034c64ba..f5f21b45f 100644 --- a/config/odp-linux-generic.conf +++ b/config/odp-linux-generic.conf @@ -31,6 +31,15 @@ pktio_dpdk: { } } +# netmap pktio options +pktio_netmap: { + # Interface specific options + virt: { + nr_rx_slots = 0 + nr_tx_slots = 0 + } +} + queue_basic: { # Maximum queue size. Value must be a power of two. max_queue_size = 8192 diff --git a/platform/linux-generic/include/odp_packet_netmap.h b/platform/linux-generic/include/odp_packet_netmap.h index bb81f5f85..bd3efdcf5 100644 --- a/platform/linux-generic/include/odp_packet_netmap.h +++ b/platform/linux-generic/include/odp_packet_netmap.h @@ -19,6 +19,12 @@ #define NM_MAX_DESC 64 +/** netmap runtime configuration options */ +typedef struct { + int nr_rx_slots; + int nr_tx_slots; +} netmap_opt_t; + /** Ring for mapping pktin/pktout queues to netmap descriptors */ struct netmap_ring_t { unsigned first; /**< Index of first netmap descriptor */ @@ -61,6 +67,7 @@ typedef struct { netmap_ring_t rx_desc_ring[PKTIO_MAX_QUEUES]; /** mapping of pktout queues to netmap tx descriptors */ netmap_ring_t tx_desc_ring[PKTIO_MAX_QUEUES]; + netmap_opt_t opt; /**< options */ } pkt_netmap_t; #endif diff --git a/platform/linux-generic/pktio/netmap.c b/platform/linux-generic/pktio/netmap.c index 4276d4bc1..835d81ebb 100644 --- a/platform/linux-generic/pktio/netmap.c +++ b/platform/linux-generic/pktio/netmap.c @@ -27,6 +27,8 @@ #include #include #include +#include + #include @@ -46,6 +48,49 @@ static int disable_pktio; /** !0 this pktio disabled, 0 enabled */ static int netmap_stats_reset(pktio_entry_t *pktio_entry); +static int lookup_opt(const char *opt_name, const char *drv_name, int *val) +{ + const char *base = "pktio_netmap"; + int ret; + + ret = _odp_libconfig_lookup_ext_int(base, drv_name, opt_name, val); + if (ret == 0) + ODP_ERR("Unable to find netmap configuration option: %s\n", + opt_name); + + return ret; +} + +static int init_options(pktio_entry_t *pktio_entry) +{ + netmap_opt_t *opt = &pktio_entry->s.pkt_nm.opt; + + if (!lookup_opt("nr_rx_slots", "virt", + &opt->nr_rx_slots)) + return -1; + if (opt->nr_rx_slots < 0 || + opt->nr_rx_slots > 4096) { + ODP_ERR("Invalid number of RX slots\n"); + return -1; + } + + if (!lookup_opt("nr_tx_slots", "virt", + &opt->nr_tx_slots)) + return -1; + if (opt->nr_tx_slots < 0 || + opt->nr_tx_slots > 4096) { + ODP_ERR("Invalid number of TX slots\n"); + return -1; + } + + ODP_PRINT("netmap interface: %s\n", + pktio_entry->s.pkt_nm.if_name); + ODP_PRINT(" num_rx_desc: %d\n", opt->nr_rx_slots); + ODP_PRINT(" num_tx_desc: %d\n", opt->nr_tx_slots); + + return 0; +} + static int netmap_do_ioctl(pktio_entry_t *pktio_entry, unsigned long cmd, int subcmd) { @@ -363,6 +408,12 @@ static int netmap_open(odp_pktio_t id ODP_UNUSED, pktio_entry_t *pktio_entry, netdev); snprintf(pkt_nm->if_name, sizeof(pkt_nm->if_name), "%s", netdev); + /* Initialize runtime options */ + if (init_options(pktio_entry)) { + ODP_ERR("Initializing runtime options failed\n"); + return -1; + } + /* Dummy open here to check if netmap module is available and to read * capability info. */ desc = nm_open(pkt_nm->nm_name, NULL, 0, NULL); @@ -526,6 +577,12 @@ static int netmap_start(pktio_entry_t *pktio_entry) base_desc.self = &base_desc; base_desc.mem = NULL; + if (pktio_entry->s.pkt_nm.is_virtual) { + base_desc.req.nr_rx_slots = + pktio_entry->s.pkt_nm.opt.nr_rx_slots; + base_desc.req.nr_tx_slots = + pktio_entry->s.pkt_nm.opt.nr_tx_slots; + } base_desc.req.nr_ringid = 0; if ((base_desc.req.nr_flags & NR_REG_MASK) == NR_REG_ALL_NIC || (base_desc.req.nr_flags & NR_REG_MASK) == NR_REG_ONE_NIC) { @@ -539,6 +596,8 @@ static int netmap_start(pktio_entry_t *pktio_entry) /* Only the first rx descriptor does mmap */ desc_ring = pkt_nm->rx_desc_ring; flags = NM_OPEN_IFNAME | NETMAP_NO_TX_POLL; + if (pktio_entry->s.pkt_nm.is_virtual) + flags |= NM_OPEN_RING_CFG; desc_ring[0].s.desc[0] = nm_open(pkt_nm->nm_name, NULL, flags, &base_desc); if (desc_ring[0].s.desc[0] == NULL) { @@ -547,6 +606,8 @@ static int netmap_start(pktio_entry_t *pktio_entry) } /* Open rest of the rx descriptors (one per netmap ring) */ flags = NM_OPEN_IFNAME | NETMAP_NO_TX_POLL | NM_OPEN_NO_MMAP; + if (pktio_entry->s.pkt_nm.is_virtual) + flags |= NM_OPEN_RING_CFG; for (i = 0; i < pktio_entry->s.num_in_queue; i++) { for (j = desc_ring[i].s.first; j <= desc_ring[i].s.last; j++) { if (i == 0 && j == 0) { /* First already opened */ @@ -568,6 +629,8 @@ static int netmap_start(pktio_entry_t *pktio_entry) /* Open tx descriptors */ desc_ring = pkt_nm->tx_desc_ring; flags = NM_OPEN_IFNAME | NM_OPEN_NO_MMAP; + if (pktio_entry->s.pkt_nm.is_virtual) + flags |= NM_OPEN_RING_CFG; if ((base_desc.req.nr_flags & NR_REG_MASK) == NR_REG_ALL_NIC) { base_desc.req.nr_flags &= ~NR_REG_ALL_NIC; -- cgit v1.2.3 From 1e925d0ccc7340575ddcfa07ab7bd372d751e006 Mon Sep 17 00:00:00 2001 From: Josep Puigdemont Date: Thu, 12 Apr 2018 13:53:48 +0200 Subject: linux-gen: shm: fill data used in do_map before the call do_map() checks the block's external_fd field to find out whether the file descriptor provided was allocated externally or not, before closing it in case an error occurred. However, this field was only updated _after_ calling the do_map() function. Fix this. Signed-off-by: Josep Puigdemont Signed-off-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- platform/linux-generic/odp_ishm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platform/linux-generic/odp_ishm.c b/platform/linux-generic/odp_ishm.c index 5f5e5eefb..182556783 100644 --- a/platform/linux-generic/odp_ishm.c +++ b/platform/linux-generic/odp_ishm.c @@ -839,6 +839,7 @@ int _odp_ishm_reserve(const char *name, uint64_t size, int fd, return -1; } len = statbuf.st_size; + new_block->external_fd = 1; /* note that the huge page flag is meningless here as huge * page is determined by the provided file descriptor: */ addr = do_map(new_index, len, align, flags, EXTERNAL, &fd); @@ -849,7 +850,6 @@ int _odp_ishm_reserve(const char *name, uint64_t size, int fd, return -1; } new_block->huge = EXTERNAL; - new_block->external_fd = 1; } else { new_block->external_fd = 0; } -- cgit v1.2.3 From f96a21992e13cf401a3494f2caeec9d42f56f0d9 Mon Sep 17 00:00:00 2001 From: Josep Puigdemont Date: Thu, 12 Apr 2018 13:49:00 +0200 Subject: linux-gen: shm: do not close provided file descriptors If we provide a file descriptor to _odp_ishm_reserve, this function should not close them. It's the responsibility of the caller to decide what to do with the file descriptor if the return value indicates an error. Signed-off-by: Josep Puigdemont Signed-off-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- platform/linux-generic/odp_ishm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/platform/linux-generic/odp_ishm.c b/platform/linux-generic/odp_ishm.c index 182556783..282c39e81 100644 --- a/platform/linux-generic/odp_ishm.c +++ b/platform/linux-generic/odp_ishm.c @@ -831,7 +831,6 @@ int _odp_ishm_reserve(const char *name, uint64_t size, int fd, /* If a file descriptor is provided, get the real size and map: */ if (fd >= 0) { if (fstat(fd, &statbuf) < 0) { - close(fd); odp_spinlock_unlock(&ishm_tbl->lock); ODP_ERR("_ishm_reserve failed (fstat failed: %s).\n", strerror(errno)); @@ -844,7 +843,6 @@ int _odp_ishm_reserve(const char *name, uint64_t size, int fd, * page is determined by the provided file descriptor: */ addr = do_map(new_index, len, align, flags, EXTERNAL, &fd); if (addr == NULL) { - close(fd); odp_spinlock_unlock(&ishm_tbl->lock); ODP_ERR("_ishm_reserve failed.\n"); return -1; -- cgit v1.2.3 From b1100fdba29d8f514236dbcfd6f17e7941c0256a Mon Sep 17 00:00:00 2001 From: Josep Puigdemont Date: Thu, 12 Apr 2018 14:07:30 +0200 Subject: linux-gen: shm: be consistent with rest of the code We should be consistent and use new_block as in the rest of the function, instead of accessing the array of blocks using an index. Signed-off-by: Josep Puigdemont Signed-off-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- platform/linux-generic/odp_ishm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platform/linux-generic/odp_ishm.c b/platform/linux-generic/odp_ishm.c index 282c39e81..a1a904248 100644 --- a/platform/linux-generic/odp_ishm.c +++ b/platform/linux-generic/odp_ishm.c @@ -566,7 +566,7 @@ static void *do_map(int block_index, uint64_t len, uint32_t align, } return NULL; } - ishm_tbl->block[block_index].fragment = fragment; + new_block->fragment = fragment; } /* try to mmap: */ -- cgit v1.2.3 From 48f8503baf847315dda6609abc01f5befe6e0284 Mon Sep 17 00:00:00 2001 From: Josep Puigdemont Date: Mon, 9 Apr 2018 13:45:25 +0200 Subject: fdserver: change session ID after fork This is to avoid the fdserver process from handling signals sent to the process group. This patch partly fixes: https://bugs.linaro.org/show_bug.cgi?id=3690 Signed-off-by: Josep Puigdemont Suggested-by: Janne Peltonen Reviewed-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- platform/linux-generic/odp_fdserver.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/platform/linux-generic/odp_fdserver.c b/platform/linux-generic/odp_fdserver.c index a06fb5081..9562ea0c9 100644 --- a/platform/linux-generic/odp_fdserver.c +++ b/platform/linux-generic/odp_fdserver.c @@ -654,6 +654,12 @@ int _odp_fdserver_init_global(void) /* orphans being "adopted" by the init process... */ prctl(PR_SET_PDEATHSIG, SIGTERM); + res = setsid(); + if (res == -1) { + ODP_ERR("Could not setsid()"); + exit(1); + } + /* allocate the space for the file descriptor<->key table: */ fd_table = malloc(FDSERVER_MAX_ENTRIES * sizeof(fdentry_t)); if (!fd_table) { -- cgit v1.2.3 From 813cd2df67ae0591b0f792eff82854c5c9c642ca Mon Sep 17 00:00:00 2001 From: Bill Fischofer Date: Mon, 9 Apr 2018 09:49:56 -0500 Subject: example: l2fwd: update readme regarding ordered queue usage Update the README file to clarify the effect of using ordered queues with direct I/O or output interfaces supporting multiple queues. Signed-off-by: Bill Fischofer Reviewed-by: Janne Peltonen Signed-off-by: Maxim Uvarov --- example/l2fwd/README | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/example/l2fwd/README b/example/l2fwd/README index f6fdc01ca..091d046ea 100644 --- a/example/l2fwd/README +++ b/example/l2fwd/README @@ -5,3 +5,10 @@ Source code and Makefiles placed under test/performance/ directory. This L2 forwarding application can be used as example reference as well as performance test for different odp modes (direct, queue or scheduler with parallel, atomic or ordered queues). + +Note that this example is tuned for performance. As a result, when using +scheduled mode with direct packet I/O output or queued output with multiple +output queues, packet order is not guaranteed. To achieve guaranteed order, +use a single worker thread or output interfaces with single output +queues. Other examples of scalable processing using ordered queues that +preserve order can be seen in the odp_pktio_ordered performance test. -- cgit v1.2.3 From a5340c53957b3174c89d03ecf2c61e9ba98ac57c Mon Sep 17 00:00:00 2001 From: Matias Elo Date: Thu, 12 Apr 2018 13:12:12 +0300 Subject: linux-gen: dpdk: allocate huge page memory for all numa nodes Allocate DPDK memory for all numa nodes in the system. Fixes bug: https://bugs.linaro.org/show_bug.cgi?id=3657 Signed-off-by: Matias Elo Reviewed-by: Dmitry Eremin-Solenikov Reviewed-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- platform/linux-generic/pktio/dpdk.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/platform/linux-generic/pktio/dpdk.c b/platform/linux-generic/pktio/dpdk.c index 7bccab873..0550416b0 100644 --- a/platform/linux-generic/pktio/dpdk.c +++ b/platform/linux-generic/pktio/dpdk.c @@ -49,6 +49,13 @@ #include #include +/* NUMA is not supported on all platforms */ +#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES +#include +#else +#define numa_num_configured_nodes() 1 +#endif + #if RTE_VERSION < RTE_VERSION_NUM(17, 5, 0, 0) #define rte_log_set_global_level rte_set_log_level #endif @@ -1086,6 +1093,7 @@ static int dpdk_pktio_init(void) int32_t masklen; int mem_str_len; int cmd_len; + int numa_nodes; cpu_set_t original_cpuset; struct rte_config *cfg; @@ -1120,21 +1128,29 @@ static int dpdk_pktio_init(void) return -1; } - mem_str_len = snprintf(NULL, 0, "%d", DPDK_MEMORY_MB); + mem_str_len = snprintf(NULL, 0, "%d,", DPDK_MEMORY_MB); + numa_nodes = numa_num_configured_nodes(); + + char mem_str[mem_str_len * numa_nodes]; + + for (i = 0; i < numa_nodes; i++) + sprintf(&mem_str[i * mem_str_len], "%d,", DPDK_MEMORY_MB); + mem_str[mem_str_len * numa_nodes - 1] = '\0'; cmdline = getenv("ODP_PKTIO_DPDK_PARAMS"); if (cmdline == NULL) cmdline = ""; /* masklen includes the terminating null as well */ - cmd_len = strlen("odpdpdk -c -m ") + masklen + mem_str_len + - strlen(cmdline) + strlen(" "); + cmd_len = strlen("odpdpdk -c --socket-mem ") + masklen + + strlen(mem_str) + strlen(cmdline) + strlen(" "); char full_cmd[cmd_len]; /* first argument is facility log, simply bind it to odpdpdk for now.*/ - cmd_len = snprintf(full_cmd, cmd_len, "odpdpdk -c %s -m %d %s", - mask_str, DPDK_MEMORY_MB, cmdline); + cmd_len = snprintf(full_cmd, cmd_len, + "odpdpdk -c %s --socket-mem %s %s", mask_str, + mem_str, cmdline); for (i = 0, dpdk_argc = 1; i < cmd_len; ++i) { if (isspace(full_cmd[i])) -- cgit v1.2.3 From a2cad95118b79f204bc13914acf43a8fc1dd2682 Mon Sep 17 00:00:00 2001 From: Maxim Uvarov Date: Mon, 16 Apr 2018 21:50:11 +0300 Subject: linux-gen: fix clang on aarch64 build No code change. Just move array definition several lines up to make clang happy to generate valid code. Fixes: https://bugs.linaro.org/show_bug.cgi?id=3611 Suggested-by: Dmitry Eremin-Solenikov Reviewed-by: Bill Fischofer Reviewed-by: Petri Savolainen Reviewed-by: Dmitry Eremin-Solenikov Signed-off-by: Maxim Uvarov --- platform/linux-generic/odp_pool.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/platform/linux-generic/odp_pool.c b/platform/linux-generic/odp_pool.c index 998fc649e..955e14f9b 100644 --- a/platform/linux-generic/odp_pool.c +++ b/platform/linux-generic/odp_pool.c @@ -718,12 +718,13 @@ int buffer_alloc_multi(pool_t *pool, odp_buffer_hdr_t *buf_hdr[], int max_num) buf_hdr[i] = buf_hdr_from_index(pool, cache->buf_index[j]); } + /* Declare variable here to fix clang compilation bug */ + uint32_t data[burst]; + /* If needed, get more from the global pool */ if (odp_unlikely(num_deq)) { - /* Temporary copy needed since odp_buffer_t is uintptr_t - * and not uint32_t. */ - uint32_t data[burst]; - + /* Temporary copy to data[] needed since odp_buffer_t is + * uintptr_t and not uint32_t. */ ring = &pool->ring->hdr; mask = pool->ring_mask; burst = ring_deq_multi(ring, mask, data, burst); -- cgit v1.2.3 From b7ee13d765e5d8da1ef68f0c0777cb5e40fffbf7 Mon Sep 17 00:00:00 2001 From: Maxim Uvarov Date: Mon, 2 Apr 2018 13:57:40 +0300 Subject: shippable: reenable clang with disable abi compat options Signed-off-by: Maxim Uvarov Reviewed-by: Bill Fischofer Reviewed-by: Dmitry Eremin-Solenikov Reviewed-by: Petri Savolainen --- .shippable.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.shippable.yml b/.shippable.yml index 6ef5cab7c..53e094bcb 100644 --- a/.shippable.yml +++ b/.shippable.yml @@ -15,11 +15,6 @@ env: # - CROSS_ARCH="powerpc" # - CROSS_ARCH="i386" -matrix: - exclude: - - compiler: clang - env: CONF="--disable-abi-compat --disable-test-perf --disable-test-perf-proc" - build: pre_ci: # use Dockerfile to install additional CI dependencies -- cgit v1.2.3 From 52ea8d51d522d7013f746748bf84bfc2e9bc042c Mon Sep 17 00:00:00 2001 From: Josep Puigdemont Date: Wed, 11 Apr 2018 10:35:22 +0200 Subject: linux-gen: fdserver: mask signals we don't need Make fdserver block all signals except those we are interested in, or those that can't be masked, and set default handlers for those left, thus preventing fdserver from executing any signal handlers that the application may have installed prior to forking this process. Signed-off-by: Josep Puigdemont Reviewed-by: Janne Peltonen Signed-off-by: Maxim Uvarov --- platform/linux-generic/odp_fdserver.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/platform/linux-generic/odp_fdserver.c b/platform/linux-generic/odp_fdserver.c index 9562ea0c9..c88c71cdc 100644 --- a/platform/linux-generic/odp_fdserver.c +++ b/platform/linux-generic/odp_fdserver.c @@ -647,6 +647,34 @@ int _odp_fdserver_init_global(void) } if (server_pid == 0) { /*child */ + sigset_t sigset; + struct sigaction action; + + sigfillset(&sigset); + /* undefined if these are ignored, as per POSIX */ + sigdelset(&sigset, SIGFPE); + sigdelset(&sigset, SIGILL); + sigdelset(&sigset, SIGSEGV); + /* can not be masked */ + sigdelset(&sigset, SIGKILL); + sigdelset(&sigset, SIGSTOP); + /* these we want to handle */ + sigdelset(&sigset, SIGTERM); + if (sigprocmask(SIG_SETMASK, &sigset, NULL) == -1) { + ODP_ERR("Could not set signal mask"); + exit(1); + } + + /* set default handlers for those signals we can handle */ + memset(&action, 0, sizeof(action)); + action.sa_handler = SIG_DFL; + sigemptyset(&action.sa_mask); + action.sa_flags = 0; + sigaction(SIGFPE, &action, NULL); + sigaction(SIGILL, &action, NULL); + sigaction(SIGSEGV, &action, NULL); + sigaction(SIGTERM, &action, NULL); + /* TODO: pin the server on appropriate service cpu mask */ /* when (if) we can agree on the usage of service mask */ -- cgit v1.2.3 From 2e96afe0b7c4e7e41fb03bcda605ab704f538ca1 Mon Sep 17 00:00:00 2001 From: Josep Puigdemont Date: Fri, 13 Apr 2018 13:50:21 +0200 Subject: linux-gen: shm: check return value when registering fds fdserver functions such as that for registering a new fd may fail. In this case we need to check the return value and do a cleanup of any allocated blocks and fragments. If it fails on deregistering, just return the error code. Signed-off-by: Josep Puigdemont Reviewed-by: Bill Fischofer Signed-off-by: Maxim Uvarov --- platform/linux-generic/odp_ishm.c | 112 ++++++++++++++++++++++---------------- 1 file changed, 64 insertions(+), 48 deletions(-) diff --git a/platform/linux-generic/odp_ishm.c b/platform/linux-generic/odp_ishm.c index a1a904248..babf51b91 100644 --- a/platform/linux-generic/odp_ishm.c +++ b/platform/linux-generic/odp_ishm.c @@ -767,6 +767,65 @@ static void procsync(void) ishm_proctable->nb_entries = last; } +/* + * Free a block as described in block_free(), but + * considering whether to close the file descriptor or not, and + * whether to deregister from the fdserver. + */ +static int block_free_internal(int block_index, int close_fd, int deregister) +{ + int proc_index; + ishm_block_t *block; /* entry in the main block table*/ + int last; + int ret = 0; + + if ((block_index < 0) || + (block_index >= ISHM_MAX_NB_BLOCKS) || + (ishm_tbl->block[block_index].len == 0)) { + ODP_ERR("Request to free an invalid block\n"); + return -1; + } + + block = &ishm_tbl->block[block_index]; + + proc_index = procfind_block(block_index); + if (proc_index >= 0) { + /* remove the mapping and possible fragment */ + do_unmap(ishm_proctable->entry[proc_index].start, + block->len, + ishm_proctable->entry[proc_index].flags, + block_index); + + /* close the related fd */ + if (close_fd) + close(ishm_proctable->entry[proc_index].fd); + + /* remove entry from process local table: */ + last = ishm_proctable->nb_entries - 1; + ishm_proctable->entry[proc_index] = ishm_proctable->entry[last]; + ishm_proctable->nb_entries = last; + } else { + /* just possibly free the fragment as no mapping exist here: */ + do_unmap(NULL, 0, block->flags, block_index); + } + + /* remove all files related to this block: */ + if (close_fd) + delete_file(block); + + /* deregister the file descriptor from the file descriptor server. */ + if (deregister) + ret = _odp_fdserver_deregister_fd(FD_SRV_CTX_ISHM, block_index); + + /* mark the block as free in the main block table: */ + block->len = 0; + + /* mark the change so other processes see this entry as obsolete: */ + block->seq++; + + return ret; +} + /* * Allocate and map internal shared memory, or other objects: * If a name is given, check that this name is not already in use. @@ -928,7 +987,10 @@ int _odp_ishm_reserve(const char *name, uint64_t size, int fd, ishm_proctable->entry[new_proc_entry].fd = fd; /* register the file descriptor to the file descriptor server. */ - _odp_fdserver_register_fd(FD_SRV_CTX_ISHM, new_index, fd); + if (_odp_fdserver_register_fd(FD_SRV_CTX_ISHM, new_index, fd) == -1) { + block_free_internal(new_index, !new_block->external_fd, 0); + new_index = -1; + } odp_spinlock_unlock(&ishm_tbl->lock); return new_index; @@ -1034,53 +1096,7 @@ error_exp_file: */ static int block_free(int block_index) { - int proc_index; - ishm_block_t *block; /* entry in the main block table*/ - int last; - - if ((block_index < 0) || - (block_index >= ISHM_MAX_NB_BLOCKS) || - (ishm_tbl->block[block_index].len == 0)) { - ODP_ERR("Request to free an invalid block\n"); - return -1; - } - - block = &ishm_tbl->block[block_index]; - - proc_index = procfind_block(block_index); - if (proc_index >= 0) { - /* close the related fd */ - close(ishm_proctable->entry[proc_index].fd); - - /* remove the mapping and possible fragment */ - do_unmap(ishm_proctable->entry[proc_index].start, - block->len, - ishm_proctable->entry[proc_index].flags, - block_index); - - /* remove entry from process local table: */ - last = ishm_proctable->nb_entries - 1; - ishm_proctable->entry[proc_index] = - ishm_proctable->entry[last]; - ishm_proctable->nb_entries = last; - } else { - /* just possibly free the fragment as no mapping exist here: */ - do_unmap(NULL, 0, block->flags, block_index); - } - - /* remove all files related to this block: */ - delete_file(block); - - /* deregister the file descriptor from the file descriptor server. */ - _odp_fdserver_deregister_fd(FD_SRV_CTX_ISHM, block_index); - - /* mark the block as free in the main block table: */ - block->len = 0; - - /* mark the change so other processes see this entry as obsolete: */ - block->seq++; - - return 0; + return block_free_internal(block_index, 1, 1); } /* -- cgit v1.2.3 From 88ba35773c76c871a222fe48a0dac05765c39936 Mon Sep 17 00:00:00 2001 From: Josep Puigdemont Date: Wed, 11 Apr 2018 14:40:53 +0200 Subject: fdserver: handle signal interruption in connect() Signed-off-by: Josep Puigdemont Reviewed-by: Janne Peltonen Signed-off-by: Maxim Uvarov --- platform/linux-generic/odp_fdserver.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/platform/linux-generic/odp_fdserver.c b/platform/linux-generic/odp_fdserver.c index c88c71cdc..0e9fb0e4d 100644 --- a/platform/linux-generic/odp_fdserver.c +++ b/platform/linux-generic/odp_fdserver.c @@ -265,7 +265,9 @@ static int get_socket(void) remote.sun_family = AF_UNIX; strcpy(remote.sun_path, sockpath); len = strlen(remote.sun_path) + sizeof(remote.sun_family); - if (connect(s_sock, (struct sockaddr *)&remote, len) == -1) { + while (connect(s_sock, (struct sockaddr *)&remote, len) == -1) { + if (errno == EINTR) + continue; ODP_ERR("cannot connect to server: %s\n", strerror(errno)); close(s_sock); return -1; -- cgit v1.2.3 From 2c99a9dc1978c9178e711d264aaaca2bc4702352 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Mon, 2 Apr 2018 22:52:11 +0300 Subject: linux-gen: packet: IPv4 checksum insertion Signed-off-by: Dmitry Eremin-Solenikov Reviewed-by: Bill Fischofer Reviewed-by: Petri Savolainen Signed-off-by: Maxim Uvarov --- .../linux-generic/include/odp_packet_internal.h | 2 + platform/linux-generic/odp_ipsec.c | 65 ++-------------------- platform/linux-generic/odp_packet.c | 58 +++++++++++++++++++ 3 files changed, 64 insertions(+), 61 deletions(-) diff --git a/platform/linux-generic/include/odp_packet_internal.h b/platform/linux-generic/include/odp_packet_internal.h index 75b4ce9e5..cb1c3849b 100644 --- a/platform/linux-generic/include/odp_packet_internal.h +++ b/platform/linux-generic/include/odp_packet_internal.h @@ -303,6 +303,8 @@ int _odp_packet_set_data(odp_packet_t pkt, uint32_t offset, int _odp_packet_cmp_data(odp_packet_t pkt, uint32_t offset, const void *s, uint32_t len); +int _odp_packet_ipv4_chksum_insert(odp_packet_t pkt); + #ifdef __cplusplus } #endif diff --git a/platform/linux-generic/odp_ipsec.c b/platform/linux-generic/odp_ipsec.c index 1e90cea0f..65f7361b9 100644 --- a/platform/linux-generic/odp_ipsec.c +++ b/platform/linux-generic/odp_ipsec.c @@ -162,67 +162,10 @@ static odp_ipsec_packet_result_t *ipsec_pkt_result(odp_packet_t packet) return &packet_hdr(packet)->ipsec_ctx; } -static inline int _odp_ipv4_csum(odp_packet_t pkt, - uint32_t offset, - _odp_ipv4hdr_t *ip, - odp_u16sum_t *chksum) -{ - unsigned nleft = _ODP_IPV4HDR_IHL(ip->ver_ihl) * 4; - uint16_t buf[nleft / 2]; - int res; - - if (odp_unlikely(nleft < sizeof(*ip))) - return -1; - ip->chksum = 0; - memcpy(buf, ip, sizeof(*ip)); - res = odp_packet_copy_to_mem(pkt, offset + sizeof(*ip), - nleft - sizeof(*ip), - buf + sizeof(*ip) / 2); - if (odp_unlikely(res < 0)) - return res; - - *chksum = ~odp_chksum_ones_comp16(buf, nleft); - - return 0; -} - -#define _ODP_IPV4HDR_CSUM_OFFSET ODP_OFFSETOF(_odp_ipv4hdr_t, chksum) #define _ODP_IPV4HDR_PROTO_OFFSET ODP_OFFSETOF(_odp_ipv4hdr_t, proto) #define _ODP_IPV6HDR_NHDR_OFFSET ODP_OFFSETOF(_odp_ipv6hdr_t, next_hdr) #define _ODP_IPV6HDREXT_NHDR_OFFSET ODP_OFFSETOF(_odp_ipv6hdr_ext_t, next_hdr) -/** - * Calculate and fill in IPv4 checksum - * - * @param pkt ODP packet - * - * @retval 0 on success - * @retval <0 on failure - */ -static inline int _odp_ipv4_csum_update(odp_packet_t pkt) -{ - uint32_t offset; - _odp_ipv4hdr_t ip; - odp_u16sum_t chksum; - int res; - - offset = odp_packet_l3_offset(pkt); - if (offset == ODP_PACKET_OFFSET_INVALID) - return -1; - - res = odp_packet_copy_to_mem(pkt, offset, sizeof(ip), &ip); - if (odp_unlikely(res < 0)) - return res; - - res = _odp_ipv4_csum(pkt, offset, &ip, &chksum); - if (odp_unlikely(res < 0)) - return res; - - return odp_packet_copy_from_mem(pkt, - offset + _ODP_IPV4HDR_CSUM_OFFSET, - 2, &chksum); -} - #define ipv4_hdr_len(ip) (_ODP_IPV4HDR_IHL((ip)->ver_ihl) * 4) static const uint8_t ipsec_padding[255] = { @@ -815,7 +758,7 @@ static ipsec_sa_t *ipsec_in_single(odp_packet_t pkt, ipv4hdr->tot_len = _odp_cpu_to_be_16(state.ip_tot_len); else ipv4hdr->ttl -= ipsec_sa->dec_ttl; - _odp_ipv4_csum_update(pkt); + _odp_packet_ipv4_chksum_insert(pkt); } else if (state.is_ipv6 && odp_packet_len(pkt) > _ODP_IPV6HDR_LEN) { _odp_ipv6hdr_t *ipv6hdr = odp_packet_l3_ptr(pkt, NULL); @@ -1238,7 +1181,7 @@ static int ipsec_out_esp(odp_packet_t *pkt, static void ipsec_out_esp_post(ipsec_state_t *state, odp_packet_t pkt) { if (state->is_ipv4) - _odp_ipv4_csum_update(pkt); + _odp_packet_ipv4_chksum_insert(pkt); } static int ipsec_out_ah(odp_packet_t *pkt, @@ -1343,7 +1286,7 @@ static void ipsec_out_ah_post(ipsec_state_t *state, odp_packet_t pkt) ipv4hdr->tos = state->ah_ipv4.tos; ipv4hdr->frag_offset = state->ah_ipv4.frag_offset; - _odp_ipv4_csum_update(pkt); + _odp_packet_ipv4_chksum_insert(pkt); } else { _odp_ipv6hdr_t *ipv6hdr = odp_packet_l3_ptr(pkt, NULL); @@ -1500,7 +1443,7 @@ static ipsec_sa_t *ipsec_out_single(odp_packet_t pkt, else if (ODP_IPSEC_AH == ipsec_sa->proto) ipsec_out_ah_post(&state, pkt); - _odp_ipv4_csum_update(pkt); + _odp_packet_ipv4_chksum_insert(pkt); *pkt_out = pkt; return ipsec_sa; diff --git a/platform/linux-generic/odp_packet.c b/platform/linux-generic/odp_packet.c index 462c8a4c7..8170c8125 100644 --- a/platform/linux-generic/odp_packet.c +++ b/platform/linux-generic/odp_packet.c @@ -2266,6 +2266,64 @@ int packet_parse_common(packet_parser_t *prs, const uint8_t *ptr, seg_len, layer, ethtype); } +static inline int packet_ipv4_chksum(odp_packet_t pkt, + uint32_t offset, + _odp_ipv4hdr_t *ip, + odp_u16sum_t *chksum) +{ + unsigned int nleft = _ODP_IPV4HDR_IHL(ip->ver_ihl) * 4; + uint16_t buf[nleft / 2]; + int res; + + if (odp_unlikely(nleft < sizeof(*ip))) + return -1; + ip->chksum = 0; + memcpy(buf, ip, sizeof(*ip)); + res = odp_packet_copy_to_mem(pkt, offset + sizeof(*ip), + nleft - sizeof(*ip), + buf + sizeof(*ip) / 2); + if (odp_unlikely(res < 0)) + return res; + + *chksum = ~odp_chksum_ones_comp16(buf, nleft); + + return 0; +} + +#define _ODP_IPV4HDR_CSUM_OFFSET ODP_OFFSETOF(_odp_ipv4hdr_t, chksum) + +/** + * Calculate and fill in IPv4 checksum + * + * @param pkt ODP packet + * + * @retval 0 on success + * @retval <0 on failure + */ +int _odp_packet_ipv4_chksum_insert(odp_packet_t pkt) +{ + uint32_t offset; + _odp_ipv4hdr_t ip; + odp_u16sum_t chksum; + int res; + + offset = odp_packet_l3_offset(pkt); + if (offset == ODP_PACKET_OFFSET_INVALID) + return -1; + + res = odp_packet_copy_to_mem(pkt, offset, sizeof(ip), &ip); + if (odp_unlikely(res < 0)) + return res; + + res = packet_ipv4_chksum(pkt, offset, &ip, &chksum); + if (odp_unlikely(res < 0)) + return res; + + return odp_packet_copy_from_mem(pkt, + offset + _ODP_IPV4HDR_CSUM_OFFSET, + 2, &chksum); +} + /** * Simple packet parser */ -- cgit v1.2.3 From 79f5c103310c36dc213a552b9534f68ce8df1132 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Mon, 9 Apr 2018 15:45:55 +0300 Subject: linux-gen: packet: l4 checksum insertion support Signed-off-by: Dmitry Eremin-Solenikov Reviewed-by: Bill Fischofer Reviewed-by: Petri Savolainen Signed-off-by: Maxim Uvarov --- .../linux-generic/include/odp_packet_internal.h | 2 + platform/linux-generic/odp_packet.c | 181 +++++++++++++++++++++ 2 files changed, 183 insertions(+) diff --git a/platform/linux-generic/include/odp_packet_internal.h b/platform/linux-generic/include/odp_packet_internal.h index cb1c3849b..be17cac86 100644 --- a/platform/linux-generic/include/odp_packet_internal.h +++ b/platform/linux-generic/include/odp_packet_internal.h @@ -304,6 +304,8 @@ int _odp_packet_cmp_data(odp_packet_t pkt, uint32_t offset, const void *s, uint32_t len); int _odp_packet_ipv4_chksum_insert(odp_packet_t pkt); +int _odp_packet_tcp_chksum_insert(odp_packet_t pkt); +int _odp_packet_udp_chksum_insert(odp_packet_t pkt); #ifdef __cplusplus } diff --git a/platform/linux-generic/odp_packet.c b/platform/linux-generic/odp_packet.c index 8170c8125..d4b19feeb 100644 --- a/platform/linux-generic/odp_packet.c +++ b/platform/linux-generic/odp_packet.c @@ -1910,6 +1910,92 @@ int _odp_packet_copy_md_to_packet(odp_packet_t srcpkt, odp_packet_t dstpkt) return dst_uarea_size < src_uarea_size; } +/* Simple implementation of ones complement sum. + * Based on RFC1071 and its errata. + */ +typedef union { + uint16_t w; + uint8_t b[2]; +} swap_buf_t; + +static uint32_t segment_sum16_32(const uint8_t *p, + uint32_t len, + uint32_t offset) + +{ + uint32_t sum = 0; + + /* Include second part of 16-bit short word split between segments */ + if (len > 0 && (offset % 2)) { + swap_buf_t sw; + + sw.b[0] = 0; + sw.b[1] = *p++; + sum = sw.w; + len--; + } + + /* + * If pointer is 16-bit aligned, we can do fast path calculation. + * If it is not, we sum hi and lo bytes separately and then sum them. + */ + if ((uintptr_t)p % 2) { + uint32_t sum1 = 0, sum2 = 0; + + while (len > 1) { + sum1 += *p++; + sum2 += *p++; + len -= 2; + } +#if (ODP_BYTE_ORDER == ODP_BIG_ENDIAN) + sum += sum2 + (sum1 << 8); +#else + sum += sum1 + (sum2 << 8); +#endif + } else { + while (len > 1) { + sum += *(const uint16_t *)(uintptr_t)p; + p += 2; + len -= 2; + } + } + + /* Add left-over byte, if any */ + if (len > 0) { + swap_buf_t sw; + + sw.b[0] = *p; + sw.b[1] = 0; + sum += sw.w; + } + + return sum; +} + +static uint32_t packet_sum16_32(odp_packet_hdr_t *pkt_hdr, + uint32_t offset, + uint32_t len) +{ + uint32_t sum = 0; + + if (offset + len > pkt_hdr->frame_len) + return 0; + + while (len > 0) { + uint32_t seglen = 0; /* GCC */ + void *mapaddr = packet_map(pkt_hdr, offset, &seglen, NULL); + + if (seglen > len) + seglen = len; + + sum += segment_sum16_32(mapaddr, seglen, offset); + len -= seglen; + offset += seglen; + } + + return sum; +} + /** Parser helper function for Ethernet packets */ static inline uint16_t parse_eth(packet_parser_t *prs, const uint8_t **parseptr, uint32_t *offset, uint32_t frame_len) @@ -2291,6 +2377,11 @@ static inline int packet_ipv4_chksum(odp_packet_t pkt, } #define _ODP_IPV4HDR_CSUM_OFFSET ODP_OFFSETOF(_odp_ipv4hdr_t, chksum) +#define _ODP_IPV4ADDR_OFFSSET ODP_OFFSETOF(_odp_ipv4hdr_t, src_addr) +#define _ODP_IPV6ADDR_OFFSSET ODP_OFFSETOF(_odp_ipv6hdr_t, src_addr) +#define _ODP_IPV4HDR_CSUM_OFFSET ODP_OFFSETOF(_odp_ipv4hdr_t, chksum) +#define _ODP_UDP_LEN_OFFSET ODP_OFFSETOF(_odp_udphdr_t, length) +#define _ODP_UDP_CSUM_OFFSET ODP_OFFSETOF(_odp_udphdr_t, chksum) /** * Calculate and fill in IPv4 checksum @@ -2324,6 +2415,96 @@ int _odp_packet_ipv4_chksum_insert(odp_packet_t pkt) 2, &chksum); } +static int _odp_packet_tcp_udp_chksum_insert(odp_packet_t pkt, uint16_t proto) +{ + odp_packet_hdr_t *pkt_hdr = packet_hdr(pkt); + uint32_t zero = 0; + uint32_t sum; + uint16_t l3_ver; + uint16_t chksum; + uint32_t chksum_offset; + + if (pkt_hdr->p.l3_offset == ODP_PACKET_OFFSET_INVALID) + return -1; + if (pkt_hdr->p.l4_offset == ODP_PACKET_OFFSET_INVALID) + return -1; + + odp_packet_copy_to_mem(pkt, pkt_hdr->p.l3_offset, 2, &l3_ver); + + if (_ODP_IPV4HDR_VER(l3_ver) == _ODP_IPV4) + sum = packet_sum16_32(pkt_hdr, + pkt_hdr->p.l3_offset + + _ODP_IPV4ADDR_OFFSSET, + 2 * _ODP_IPV4ADDR_LEN); + else + sum = packet_sum16_32(pkt_hdr, + pkt_hdr->p.l3_offset + + _ODP_IPV6ADDR_OFFSSET, + 2 * _ODP_IPV6ADDR_LEN); +#if ODP_BYTE_ORDER == ODP_BIG_ENDIAN + sum += proto; +#else + sum += proto << 8; +#endif + + if (proto == _ODP_IPPROTO_TCP) { + sum += _odp_cpu_to_be_16(pkt_hdr->frame_len - + pkt_hdr->p.l4_offset); + chksum_offset = pkt_hdr->p.l4_offset + _ODP_UDP_CSUM_OFFSET; + } else { + sum += packet_sum16_32(pkt_hdr, + pkt_hdr->p.l4_offset + + _ODP_UDP_LEN_OFFSET, + 2); + chksum_offset = pkt_hdr->p.l4_offset + _ODP_UDP_CSUM_OFFSET; + } + odp_packet_copy_from_mem(pkt, chksum_offset, 2, &zero); + + sum += packet_sum16_32(pkt_hdr, + pkt_hdr->p.l4_offset, + pkt_hdr->frame_len - + pkt_hdr->p.l4_offset); + + /* Not more than two additions */ + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + + chksum = ~sum; + + if (proto == _ODP_IPPROTO_UDP && chksum == 0) + chksum = 0xffff; + + return odp_packet_copy_from_mem(pkt, + chksum_offset, + 2, &chksum); +} + +/** + * Calculate and fill in TCP checksum + * + * @param pkt ODP packet + * + * @retval 0 on success + * @retval <0 on failure + */ +int _odp_packet_tcp_chksum_insert(odp_packet_t pkt) +{ + return _odp_packet_tcp_udp_chksum_insert(pkt, _ODP_IPPROTO_TCP); +} + +/** + * Calculate and fill in UDP checksum + * + * @param pkt ODP packet + * + * @retval 0 on success + * @retval <0 on failure + */ +int _odp_packet_udp_chksum_insert(odp_packet_t pkt) +{ + return _odp_packet_tcp_udp_chksum_insert(pkt, _ODP_IPPROTO_UDP); +} + /** * Simple packet parser */ -- cgit v1.2.3 From b2632ac253a3b9f5ba7a4174acf6abe295ec3550 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Fri, 6 Apr 2018 15:23:37 +0300 Subject: linux-gen: pktio: loop: support IPv4/TCP/UDP checksum generation Signed-off-by: Dmitry Eremin-Solenikov Reviewed-by: Bill Fischofer Reviewed-by: Petri Savolainen Signed-off-by: Maxim Uvarov --- platform/linux-generic/pktio/loop.c | 110 +++++++++++++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 2 deletions(-) diff --git a/platform/linux-generic/pktio/loop.c b/platform/linux-generic/pktio/loop.c index 7fc0fd61c..fc80e8019 100644 --- a/platform/linux-generic/pktio/loop.c +++ b/platform/linux-generic/pktio/loop.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -32,6 +33,7 @@ static const char pktio_loop_mac[] = {0x02, 0xe9, 0x34, 0x80, 0x73, 0x01}; static int loopback_stats_reset(pktio_entry_t *pktio_entry); +static int loopback_init_capability(pktio_entry_t *pktio_entry); static int loopback_open(odp_pktio_t id, pktio_entry_t *pktio_entry, const char *devname, odp_pool_t pool ODP_UNUSED) @@ -61,6 +63,7 @@ static int loopback_open(odp_pktio_t id, pktio_entry_t *pktio_entry, return -1; loopback_stats_reset(pktio_entry); + loopback_init_capability(pktio_entry); return 0; } @@ -172,6 +175,85 @@ static int loopback_recv(pktio_entry_t *pktio_entry, int index ODP_UNUSED, return num_rx; } +#define OL_TX_CHKSUM_PKT(_cfg, _capa, _proto, _ovr_set, _ovr) \ + (_capa && _proto && (_ovr_set ? _ovr : _cfg)) + +static inline int check_proto(void *l3_hdr, + uint32_t l3_len, + odp_bool_t *l3_proto_v4, + uint8_t *l4_proto) +{ + uint8_t l3_proto_ver = _ODP_IPV4HDR_VER(*(uint8_t *)l3_hdr); + + if (l3_proto_ver == _ODP_IPV4 && l3_len >= _ODP_IPV4HDR_LEN) { + _odp_ipv4hdr_t *ip = l3_hdr; + uint16_t frag_offset = _odp_be_to_cpu_16(ip->frag_offset); + + *l3_proto_v4 = 1; + if (!_ODP_IPV4HDR_IS_FRAGMENT(frag_offset)) + *l4_proto = ip->proto; + else + *l4_proto = 255; + + return 0; + } else if (l3_proto_ver == _ODP_IPV6 && l3_len >= _ODP_IPV6HDR_LEN) { + _odp_ipv6hdr_t *ipv6 = l3_hdr; + + *l3_proto_v4 = 0; + *l4_proto = ipv6->next_hdr; + + /* FIXME: check that packet is not a fragment !!! + * Might require parsing headers spanning several segments, so + * not implemented yet. */ + return 0; + } + + return -1; +} + +static inline void loopback_fix_checksums(odp_packet_t pkt, + odp_pktout_config_opt_t *pktout_cfg, + odp_pktout_config_opt_t *pktout_capa) +{ + odp_bool_t l3_proto_v4 = false; + uint8_t l4_proto; + void *l3_hdr; + uint32_t l3_len; + odp_bool_t ipv4_chksum_pkt, udp_chksum_pkt, tcp_chksum_pkt; + odp_packet_hdr_t *pkt_hdr = packet_hdr(pkt); + + l3_hdr = odp_packet_l3_ptr(pkt, &l3_len); + + if (l3_hdr == NULL || + check_proto(l3_hdr, l3_len, &l3_proto_v4, &l4_proto)) + return; + + ipv4_chksum_pkt = OL_TX_CHKSUM_PKT(pktout_cfg->bit.ipv4_chksum, + pktout_capa->bit.ipv4_chksum, + l3_proto_v4, + pkt_hdr->p.flags.l3_chksum_set, + pkt_hdr->p.flags.l3_chksum); + udp_chksum_pkt = OL_TX_CHKSUM_PKT(pktout_cfg->bit.udp_chksum, + pktout_capa->bit.udp_chksum, + l4_proto == _ODP_IPPROTO_UDP, + pkt_hdr->p.flags.l4_chksum_set, + pkt_hdr->p.flags.l4_chksum); + tcp_chksum_pkt = OL_TX_CHKSUM_PKT(pktout_cfg->bit.tcp_chksum, + pktout_capa->bit.tcp_chksum, + l4_proto == _ODP_IPPROTO_TCP, + pkt_hdr->p.flags.l4_chksum_set, + pkt_hdr->p.flags.l4_chksum); + + if (ipv4_chksum_pkt) + _odp_packet_ipv4_chksum_insert(pkt); + + if (tcp_chksum_pkt) + _odp_packet_tcp_chksum_insert(pkt); + + if (udp_chksum_pkt) + _odp_packet_udp_chksum_insert(pkt); +} + static int loopback_send(pktio_entry_t *pktio_entry, int index ODP_UNUSED, const odp_packet_t pkt_tbl[], int num) { @@ -182,6 +264,9 @@ static int loopback_send(pktio_entry_t *pktio_entry, int index ODP_UNUSED, int nb_tx = 0; uint32_t bytes = 0; uint32_t out_octets_tbl[num]; + odp_pktout_config_opt_t *pktout_cfg = &pktio_entry->s.config.pktout; + odp_pktout_config_opt_t *pktout_capa = + &pktio_entry->s.capa.config.pktout; if (odp_unlikely(num > QUEUE_MULTI_MAX)) num = QUEUE_MULTI_MAX; @@ -218,6 +303,9 @@ static int loopback_send(pktio_entry_t *pktio_entry, int index ODP_UNUSED, packet_subtype_set(pkt_tbl[i], ODP_EVENT_PACKET_BASIC); } + for (i = 0; i < nb_tx; ++i) + loopback_fix_checksums(pkt_tbl[i], pktout_cfg, pktout_capa); + odp_ticketlock_lock(&pktio_entry->s.txl); queue = queue_fn->from_ext(pktio_entry->s.pkt_loop.loopq); @@ -255,9 +343,10 @@ static int loopback_link_status(pktio_entry_t *pktio_entry ODP_UNUSED) return 1; } -static int loopback_capability(pktio_entry_t *pktio_entry ODP_UNUSED, - odp_pktio_capability_t *capa) +static int loopback_init_capability(pktio_entry_t *pktio_entry) { + odp_pktio_capability_t *capa = &pktio_entry->s.capa; + memset(capa, 0, sizeof(odp_pktio_capability_t)); capa->max_input_queues = 1; @@ -267,9 +356,26 @@ static int loopback_capability(pktio_entry_t *pktio_entry ODP_UNUSED, odp_pktio_config_init(&capa->config); capa->config.pktin.bit.ts_all = 1; capa->config.pktin.bit.ts_ptp = 1; + capa->config.pktout.bit.ipv4_chksum = 1; + capa->config.pktout.bit.tcp_chksum = 1; + capa->config.pktout.bit.udp_chksum = 1; capa->config.inbound_ipsec = 1; capa->config.outbound_ipsec = 1; + capa->config.pktout.bit.ipv4_chksum_ena = + capa->config.pktout.bit.ipv4_chksum; + capa->config.pktout.bit.udp_chksum_ena = + capa->config.pktout.bit.udp_chksum; + capa->config.pktout.bit.tcp_chksum_ena = + capa->config.pktout.bit.tcp_chksum; + + return 0; +} + +static int loopback_capability(pktio_entry_t *pktio_entry ODP_UNUSED, + odp_pktio_capability_t *capa) +{ + *capa = pktio_entry->s.capa; return 0; } -- cgit v1.2.3 From 60777ba4309911df0780bd8de6d96524bc2ca03c Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Fri, 6 Apr 2018 15:24:46 +0300 Subject: validation: verify IPv4 and UDP checksum validation and generation Signed-off-by: Dmitry Eremin-Solenikov Reviewed-by: Bill Fischofer Reviewed-by: Petri Savolainen Signed-off-by: Maxim Uvarov --- test/validation/api/pktio/pktio.c | 465 +++++++++++++++++++++++++++++++++++++- 1 file changed, 459 insertions(+), 6 deletions(-) diff --git a/test/validation/api/pktio/pktio.c b/test/validation/api/pktio/pktio.c index e82a96839..09367d4ea 100644 --- a/test/validation/api/pktio/pktio.c +++ b/test/validation/api/pktio/pktio.c @@ -296,7 +296,7 @@ static uint32_t pktio_init_packet(odp_packet_t pkt) return pktio_pkt_set_seq(pkt); } -static int pktio_fixup_checksums(odp_packet_t pkt) +static int pktio_zero_checksums(odp_packet_t pkt) { odph_ipv4hdr_t *ip; odph_udphdr_t *udp; @@ -312,8 +312,20 @@ static int pktio_fixup_checksums(odp_packet_t pkt) udp = (odph_udphdr_t *)odp_packet_l4_ptr(pkt, &len); ip->chksum = 0; - odph_ipv4_csum_update(pkt); udp->chksum = 0; + + return 0; +} + +static int pktio_fixup_checksums(odp_packet_t pkt) +{ + odph_udphdr_t *udp; + + pktio_zero_checksums(pkt); + + udp = (odph_udphdr_t *)odp_packet_l4_ptr(pkt, NULL); + + odph_ipv4_csum_update(pkt); udp->chksum = odph_ipv4_udp_chksum(pkt); return 0; @@ -407,10 +419,14 @@ static int flush_input_queue(odp_pktio_t pktio, odp_pktin_mode_t imode) return 0; } -static int create_packets(odp_packet_t pkt_tbl[], uint32_t pkt_seq[], int num, - odp_pktio_t pktio_src, odp_pktio_t pktio_dst) +static int create_packets_cs(odp_packet_t pkt_tbl[], + uint32_t pkt_seq[], + int num, + odp_pktio_t pktio_src, + odp_pktio_t pktio_dst, + odp_bool_t fix_cs) { - int i; + int i, ret; for (i = 0; i < num; i++) { pkt_tbl[i] = odp_packet_alloc(default_pkt_pool, packet_len); @@ -425,7 +441,11 @@ static int create_packets(odp_packet_t pkt_tbl[], uint32_t pkt_seq[], int num, pktio_pkt_set_macs(pkt_tbl[i], pktio_src, pktio_dst); - if (pktio_fixup_checksums(pkt_tbl[i]) != 0) { + if (fix_cs) + ret = pktio_fixup_checksums(pkt_tbl[i]); + else + ret = pktio_zero_checksums(pkt_tbl[i]); + if (ret != 0) { odp_packet_free(pkt_tbl[i]); break; } @@ -434,6 +454,13 @@ static int create_packets(odp_packet_t pkt_tbl[], uint32_t pkt_seq[], int num, return i; } +static int create_packets(odp_packet_t pkt_tbl[], uint32_t pkt_seq[], int num, + odp_pktio_t pktio_src, odp_pktio_t pktio_dst) +{ + return create_packets_cs(pkt_tbl, pkt_seq, num, pktio_src, pktio_dst, + true); +} + static int get_packets(pktio_info_t *pktio_rx, odp_packet_t pkt_tbl[], int num, txrx_mode_e mode) { @@ -1894,6 +1921,416 @@ static void pktio_test_pktin_ts(void) } } +static void pktio_test_chksum(void (*config_fn)(odp_pktio_t, odp_pktio_t), + void (*prep_fn)(odp_packet_t pkt), + void (*test_fn)(odp_packet_t pkt)) +{ + odp_pktio_t pktio_tx, pktio_rx; + odp_pktio_t pktio[MAX_NUM_IFACES] = {ODP_PKTIO_INVALID}; + pktio_info_t pktio_rx_info; + odp_pktout_queue_t pktout_queue; + odp_packet_t pkt_tbl[TX_BATCH_LEN]; + uint32_t pkt_seq[TX_BATCH_LEN]; + int ret; + int i, num_rx; + + CU_ASSERT_FATAL(num_ifaces >= 1); + + /* Open and configure interfaces */ + for (i = 0; i < num_ifaces; ++i) { + pktio[i] = create_pktio(i, ODP_PKTIN_MODE_DIRECT, + ODP_PKTOUT_MODE_DIRECT); + CU_ASSERT_FATAL(pktio[i] != ODP_PKTIO_INVALID); + } + + pktio_tx = pktio[0]; + pktio_rx = (num_ifaces > 1) ? pktio[1] : pktio_tx; + pktio_rx_info.id = pktio_rx; + pktio_rx_info.inq = ODP_QUEUE_INVALID; + pktio_rx_info.in_mode = ODP_PKTIN_MODE_DIRECT; + + config_fn(pktio_tx, pktio_rx); + + for (i = 0; i < num_ifaces; ++i) { + CU_ASSERT_FATAL(odp_pktio_start(pktio[i]) == 0); + _pktio_wait_linkup(pktio[i]); + } + + ret = create_packets_cs(pkt_tbl, pkt_seq, TX_BATCH_LEN, pktio_tx, + pktio_rx, false); + CU_ASSERT_FATAL(ret == TX_BATCH_LEN); + + ret = odp_pktout_queue(pktio_tx, &pktout_queue, 1); + CU_ASSERT_FATAL(ret > 0); + + for (i = 0; i < TX_BATCH_LEN; i++) + if (prep_fn) + prep_fn(pkt_tbl[i]); + + send_packets(pktout_queue, pkt_tbl, TX_BATCH_LEN); + num_rx = wait_for_packets(&pktio_rx_info, pkt_tbl, pkt_seq, + TX_BATCH_LEN, TXRX_MODE_MULTI, + ODP_TIME_SEC_IN_NS); + CU_ASSERT(num_rx == TX_BATCH_LEN); + for (i = 0; i < num_rx; i++) { + test_fn(pkt_tbl[i]); + odp_packet_free(pkt_tbl[i]); + } + + for (i = 0; i < num_ifaces; i++) { + CU_ASSERT_FATAL(odp_pktio_stop(pktio[i]) == 0); + CU_ASSERT_FATAL(odp_pktio_close(pktio[i]) == 0); + } +} + +static int pktio_check_chksum_in_ipv4(void) +{ + odp_pktio_t pktio; + odp_pktio_capability_t capa; + odp_pktio_param_t pktio_param; + int idx = (num_ifaces == 1) ? 0 : 1; + int ret; + + odp_pktio_param_init(&pktio_param); + pktio_param.in_mode = ODP_PKTIN_MODE_DIRECT; + + pktio = odp_pktio_open(iface_name[idx], pool[idx], &pktio_param); + if (pktio == ODP_PKTIO_INVALID) + return ODP_TEST_INACTIVE; + + ret = odp_pktio_capability(pktio, &capa); + (void)odp_pktio_close(pktio); + + if (ret < 0 || + !capa.config.pktin.bit.ipv4_chksum) + return ODP_TEST_INACTIVE; + + return ODP_TEST_ACTIVE; +} + +static void pktio_test_chksum_in_ipv4_config(odp_pktio_t pktio_tx ODP_UNUSED, + odp_pktio_t pktio_rx) +{ + odp_pktio_capability_t capa; + odp_pktio_config_t config; + + CU_ASSERT_FATAL(odp_pktio_capability(pktio_rx, &capa) == 0); + CU_ASSERT_FATAL(capa.config.pktin.bit.ipv4_chksum); + + odp_pktio_config_init(&config); + config.pktin.bit.ipv4_chksum = 1; + CU_ASSERT_FATAL(odp_pktio_config(pktio_rx, &config) == 0); +} + +static void pktio_test_chksum_in_ipv4_prep(odp_packet_t pkt) +{ + odph_ipv4_csum_update(pkt); +} + +static void pktio_test_chksum_in_ipv4_test(odp_packet_t pkt) +{ + CU_ASSERT(odp_packet_l3_chksum_status(pkt) == ODP_PACKET_CHKSUM_OK); +} + +static void pktio_test_chksum_in_ipv4(void) +{ + pktio_test_chksum(pktio_test_chksum_in_ipv4_config, + pktio_test_chksum_in_ipv4_prep, + pktio_test_chksum_in_ipv4_test); +} + +static int pktio_check_chksum_in_udp(void) +{ + odp_pktio_t pktio; + odp_pktio_capability_t capa; + odp_pktio_param_t pktio_param; + int idx = (num_ifaces == 1) ? 0 : 1; + int ret; + + odp_pktio_param_init(&pktio_param); + pktio_param.in_mode = ODP_PKTIN_MODE_DIRECT; + + pktio = odp_pktio_open(iface_name[idx], pool[idx], &pktio_param); + if (pktio == ODP_PKTIO_INVALID) + return ODP_TEST_INACTIVE; + + ret = odp_pktio_capability(pktio, &capa); + (void)odp_pktio_close(pktio); + + if (ret < 0 || + !capa.config.pktin.bit.udp_chksum) + return ODP_TEST_INACTIVE; + + return ODP_TEST_ACTIVE; +} + +static void pktio_test_chksum_in_udp_config(odp_pktio_t pktio_tx ODP_UNUSED, + odp_pktio_t pktio_rx) +{ + odp_pktio_capability_t capa; + odp_pktio_config_t config; + + CU_ASSERT_FATAL(odp_pktio_capability(pktio_rx, &capa) == 0); + CU_ASSERT_FATAL(capa.config.pktin.bit.udp_chksum); + + odp_pktio_config_init(&config); + config.pktin.bit.udp_chksum = 1; + CU_ASSERT_FATAL(odp_pktio_config(pktio_rx, &config) == 0); +} + +static void pktio_test_chksum_in_udp_prep(odp_packet_t pkt) +{ + odp_packet_has_ipv4_set(pkt, 1); + odp_packet_has_udp_set(pkt, 1); + odph_ipv4_csum_update(pkt); + odph_udp_chksum_set(pkt); +} + +static void pktio_test_chksum_in_udp_test(odp_packet_t pkt) +{ + CU_ASSERT(odp_packet_l4_chksum_status(pkt) == ODP_PACKET_CHKSUM_OK); +} + +static void pktio_test_chksum_in_udp(void) +{ + pktio_test_chksum(pktio_test_chksum_in_udp_config, + pktio_test_chksum_in_udp_prep, + pktio_test_chksum_in_udp_test); +} + +static int pktio_check_chksum_out_ipv4(void) +{ + odp_pktio_t pktio; + odp_pktio_capability_t capa; + odp_pktio_param_t pktio_param; + int ret; + + odp_pktio_param_init(&pktio_param); + pktio_param.in_mode = ODP_PKTIN_MODE_DIRECT; + + pktio = odp_pktio_open(iface_name[0], pool[0], &pktio_param); + if (pktio == ODP_PKTIO_INVALID) + return ODP_TEST_INACTIVE; + + ret = odp_pktio_capability(pktio, &capa); + (void)odp_pktio_close(pktio); + + if (ret < 0 || + !capa.config.pktout.bit.ipv4_chksum_ena || + !capa.config.pktout.bit.ipv4_chksum) + return ODP_TEST_INACTIVE; + + return ODP_TEST_ACTIVE; +} + +static void pktio_test_chksum_out_ipv4_config(odp_pktio_t pktio_tx, + odp_pktio_t pktio_rx ODP_UNUSED) +{ + odp_pktio_capability_t capa; + odp_pktio_config_t config; + + CU_ASSERT_FATAL(odp_pktio_capability(pktio_tx, &capa) == 0); + CU_ASSERT_FATAL(capa.config.pktout.bit.ipv4_chksum_ena); + CU_ASSERT_FATAL(capa.config.pktout.bit.ipv4_chksum); + + odp_pktio_config_init(&config); + config.pktout.bit.ipv4_chksum_ena = 1; + CU_ASSERT_FATAL(odp_pktio_config(pktio_tx, &config) == 0); +} + +static void pktio_test_chksum_out_ipv4_test(odp_packet_t pkt) +{ + odph_ipv4hdr_t *ip = odp_packet_l3_ptr(pkt, NULL); + + CU_ASSERT(ip != NULL); + if (ip != NULL) + CU_ASSERT(ip->chksum != 0); +} + +static void pktio_test_chksum_out_ipv4_no_ovr_prep(odp_packet_t pkt) +{ + odp_packet_l3_chksum_insert(pkt, false); +} + +static void pktio_test_chksum_out_ipv4_no_ovr_test(odp_packet_t pkt) +{ + odph_ipv4hdr_t *ip = odp_packet_l3_ptr(pkt, NULL); + + CU_ASSERT(ip != NULL); + if (ip != NULL) + CU_ASSERT(ip->chksum == 0); +} + +static void pktio_test_chksum_out_ipv4_no_ovr(void) +{ + pktio_test_chksum(pktio_test_chksum_out_ipv4_config, + pktio_test_chksum_out_ipv4_no_ovr_prep, + pktio_test_chksum_out_ipv4_no_ovr_test); +} + +static void pktio_test_chksum_out_ipv4_ovr_prep(odp_packet_t pkt) +{ + odp_packet_l3_chksum_insert(pkt, true); +} + +static void pktio_test_chksum_out_ipv4_ovr_test(odp_packet_t pkt) +{ + odph_ipv4hdr_t *ip = odp_packet_l3_ptr(pkt, NULL); + + CU_ASSERT(ip != NULL); + if (ip != NULL) + CU_ASSERT(ip->chksum != 0); +} + +static void pktio_test_chksum_out_ipv4_ovr(void) +{ + pktio_test_chksum(pktio_test_chksum_out_ipv4_config, + pktio_test_chksum_out_ipv4_ovr_prep, + pktio_test_chksum_out_ipv4_ovr_test); +} + +static void pktio_test_chksum_out_ipv4_pktio_config(odp_pktio_t pktio_tx, + odp_pktio_t pktio_rx + ODP_UNUSED) +{ + odp_pktio_capability_t capa; + odp_pktio_config_t config; + + CU_ASSERT_FATAL(odp_pktio_capability(pktio_tx, &capa) == 0); + CU_ASSERT_FATAL(capa.config.pktout.bit.ipv4_chksum_ena); + CU_ASSERT_FATAL(capa.config.pktout.bit.ipv4_chksum); + + odp_pktio_config_init(&config); + config.pktout.bit.ipv4_chksum_ena = 1; + config.pktout.bit.ipv4_chksum = 1; + CU_ASSERT_FATAL(odp_pktio_config(pktio_tx, &config) == 0); +} + +static void pktio_test_chksum_out_ipv4_pktio(void) +{ + pktio_test_chksum(pktio_test_chksum_out_ipv4_pktio_config, + NULL, + pktio_test_chksum_out_ipv4_test); +} + +static int pktio_check_chksum_out_udp(void) +{ + odp_pktio_t pktio; + odp_pktio_capability_t capa; + odp_pktio_param_t pktio_param; + int ret; + + odp_pktio_param_init(&pktio_param); + pktio_param.in_mode = ODP_PKTIN_MODE_DIRECT; + + pktio = odp_pktio_open(iface_name[0], pool[0], &pktio_param); + if (pktio == ODP_PKTIO_INVALID) + return ODP_TEST_INACTIVE; + + ret = odp_pktio_capability(pktio, &capa); + (void)odp_pktio_close(pktio); + + if (ret < 0 || + !capa.config.pktout.bit.udp_chksum_ena || + !capa.config.pktout.bit.udp_chksum) + return ODP_TEST_INACTIVE; + + return ODP_TEST_ACTIVE; +} + +static void pktio_test_chksum_out_udp_config(odp_pktio_t pktio_tx, + odp_pktio_t pktio_rx ODP_UNUSED) +{ + odp_pktio_capability_t capa; + odp_pktio_config_t config; + + CU_ASSERT_FATAL(odp_pktio_capability(pktio_tx, &capa) == 0); + CU_ASSERT_FATAL(capa.config.pktout.bit.udp_chksum_ena); + CU_ASSERT_FATAL(capa.config.pktout.bit.udp_chksum); + + odp_pktio_config_init(&config); + config.pktout.bit.udp_chksum_ena = 1; + CU_ASSERT_FATAL(odp_pktio_config(pktio_tx, &config) == 0); +} + +static void pktio_test_chksum_out_udp_test(odp_packet_t pkt) +{ + odph_udphdr_t *udp = odp_packet_l4_ptr(pkt, NULL); + + CU_ASSERT(udp != NULL); + if (udp != NULL) { + CU_ASSERT(udp->chksum != 0); + CU_ASSERT(!odph_udp_chksum_verify(pkt)); + } +} + +static void pktio_test_chksum_out_udp_no_ovr_prep(odp_packet_t pkt) +{ + odph_ipv4_csum_update(pkt); + odp_packet_l4_chksum_insert(pkt, false); +} + +static void pktio_test_chksum_out_udp_no_ovr_test(odp_packet_t pkt) +{ + odph_udphdr_t *udp = odp_packet_l4_ptr(pkt, NULL); + + CU_ASSERT(udp != NULL); + if (udp != NULL) + CU_ASSERT(udp->chksum == 0); +} + +static void pktio_test_chksum_out_udp_no_ovr(void) +{ + pktio_test_chksum(pktio_test_chksum_out_udp_config, + pktio_test_chksum_out_udp_no_ovr_prep, + pktio_test_chksum_out_udp_no_ovr_test); +} + +static void pktio_test_chksum_out_udp_ovr_prep(odp_packet_t pkt) +{ + odp_packet_l4_chksum_insert(pkt, true); +} + +static void pktio_test_chksum_out_udp_ovr_test(odp_packet_t pkt) +{ + odph_udphdr_t *udp = odp_packet_l4_ptr(pkt, NULL); + + CU_ASSERT(udp != NULL); + if (udp != NULL) + CU_ASSERT(udp->chksum != 0); +} + +static void pktio_test_chksum_out_udp_ovr(void) +{ + pktio_test_chksum(pktio_test_chksum_out_udp_config, + pktio_test_chksum_out_udp_ovr_prep, + pktio_test_chksum_out_udp_ovr_test); +} + +static void pktio_test_chksum_out_udp_pktio_config(odp_pktio_t pktio_tx, + odp_pktio_t pktio_rx + ODP_UNUSED) +{ + odp_pktio_capability_t capa; + odp_pktio_config_t config; + + CU_ASSERT_FATAL(odp_pktio_capability(pktio_tx, &capa) == 0); + CU_ASSERT_FATAL(capa.config.pktout.bit.udp_chksum_ena); + CU_ASSERT_FATAL(capa.config.pktout.bit.udp_chksum); + + odp_pktio_config_init(&config); + config.pktout.bit.udp_chksum_ena = 1; + config.pktout.bit.udp_chksum = 1; + CU_ASSERT_FATAL(odp_pktio_config(pktio_tx, &config) == 0); +} + +static void pktio_test_chksum_out_udp_pktio(void) +{ + pktio_test_chksum(pktio_test_chksum_out_udp_pktio_config, + NULL, + pktio_test_chksum_out_udp_test); +} + static int create_pool(const char *iface, int num) { char pool_name[ODP_POOL_NAME_LEN]; @@ -2034,6 +2471,22 @@ odp_testinfo_t pktio_suite_unsegmented[] = { pktio_check_statistics_counters), ODP_TEST_INFO_CONDITIONAL(pktio_test_pktin_ts, pktio_check_pktin_ts), + ODP_TEST_INFO_CONDITIONAL(pktio_test_chksum_in_ipv4, + pktio_check_chksum_in_ipv4), + ODP_TEST_INFO_CONDITIONAL(pktio_test_chksum_in_udp, + pktio_check_chksum_in_udp), + ODP_TEST_INFO_CONDITIONAL(pktio_test_chksum_out_ipv4_no_ovr, + pktio_check_chksum_out_ipv4), + ODP_TEST_INFO_CONDITIONAL(pktio_test_chksum_out_ipv4_pktio, + pktio_check_chksum_out_ipv4), + ODP_TEST_INFO_CONDITIONAL(pktio_test_chksum_out_ipv4_ovr, + pktio_check_chksum_out_ipv4), + ODP_TEST_INFO_CONDITIONAL(pktio_test_chksum_out_udp_no_ovr, + pktio_check_chksum_out_udp), + ODP_TEST_INFO_CONDITIONAL(pktio_test_chksum_out_udp_pktio, + pktio_check_chksum_out_udp), + ODP_TEST_INFO_CONDITIONAL(pktio_test_chksum_out_udp_ovr, + pktio_check_chksum_out_udp), ODP_TEST_INFO_NULL }; -- cgit v1.2.3 From 4142652b8eb397963bec48610bdb9ab6599e65fb Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Sat, 13 Jan 2018 05:13:03 +0300 Subject: validation: ipsec: fix packet checksums Signed-off-by: Dmitry Eremin-Solenikov Reviewed-by: Bill Fischofer Reviewed-by: Petri Savolainen Signed-off-by: Maxim Uvarov --- test/validation/api/ipsec/ipsec.c | 1 + test/validation/api/ipsec/test_vectors.h | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/test/validation/api/ipsec/ipsec.c b/test/validation/api/ipsec/ipsec.c index 7c82d85a4..91a66d0be 100644 --- a/test/validation/api/ipsec/ipsec.c +++ b/test/validation/api/ipsec/ipsec.c @@ -940,6 +940,7 @@ int ipsec_config(odp_instance_t ODP_UNUSED inst) ipsec_config.outbound_mode = suite_context.outbound_op_mode; ipsec_config.inbound.default_queue = suite_context.queue; ipsec_config.inbound.parse_level = ODP_PROTO_LAYER_ALL; + ipsec_config.inbound.chksums.all_chksum = ~0; if (ODP_IPSEC_OK != odp_ipsec_config(&ipsec_config)) return -1; diff --git a/test/validation/api/ipsec/test_vectors.h b/test/validation/api/ipsec/test_vectors.h index 5b357a160..0f133f016 100644 --- a/test/validation/api/ipsec/test_vectors.h +++ b/test/validation/api/ipsec/test_vectors.h @@ -269,7 +269,7 @@ static const ODP_UNUSED ipsec_test_packet pkt_ipv4_icmp_0_ah_sha256_1_bad1 = { /* IP */ 0x45, 0x00, 0x00, 0x9a, 0x00, 0x00, 0x00, 0x00, - 0x40, 0x33, 0xab, 0xd9, 0xc0, 0xa8, 0x6f, 0x02, + 0x40, 0x33, 0xab, 0xdb, 0xc0, 0xa8, 0x6f, 0x02, 0xc0, 0xa8, 0xde, 0x02, /* AH */ @@ -669,7 +669,7 @@ static const ODP_UNUSED ipsec_test_packet pkt_ipv4_icmp_0_esp_aes_cbc_null_1 = { /* IP */ 0x45, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x00, - 0x40, 0x32, 0xab, 0xca, 0xc0, 0xa8, 0x6f, 0x02, + 0x40, 0x32, 0xab, 0xda, 0xc0, 0xa8, 0x6f, 0x02, 0xc0, 0xa8, 0xde, 0x02, /* ESP */ @@ -1499,7 +1499,7 @@ static const ipsec_test_packet pkt_mcgrew_gcm_test_2_esp = { /* IP - not a part of RFC, added for simplicity */ 0x45, 0x00, 0x00, 0x74, 0x69, 0x8f, 0x00, 0x00, - 0x80, 0x32, 0x4d, 0x76, 0xc0, 0xa8, 0x01, 0x02, + 0x80, 0x32, 0x4d, 0x75, 0xc0, 0xa8, 0x01, 0x02, 0xc0, 0xa8, 0x01, 0x01, /* ESP */ @@ -1557,7 +1557,7 @@ static const ipsec_test_packet pkt_mcgrew_gcm_test_3_esp = { /* IP - not a part of RFC, added for simplicity */ 0x45, 0x00, 0x00, 0x68, 0x69, 0x8f, 0x00, 0x00, - 0x80, 0x32, 0x4d, 0x82, 0xc0, 0xa8, 0x01, 0x02, + 0x80, 0x32, 0x4d, 0x81, 0xc0, 0xa8, 0x01, 0x02, 0xc0, 0xa8, 0x01, 0x01, /* ESP */ @@ -1615,7 +1615,7 @@ static const ipsec_test_packet pkt_mcgrew_gcm_test_4_esp = { /* IP - not a part of RFC, added for simplicity */ 0x45, 0x00, 0x00, 0x74, 0x69, 0x8f, 0x00, 0x00, - 0x80, 0x32, 0x4d, 0x76, 0xc0, 0xa8, 0x01, 0x02, + 0x80, 0x32, 0x4d, 0x75, 0xc0, 0xa8, 0x01, 0x02, 0xc0, 0xa8, 0x01, 0x01, /* ESP */ @@ -1730,7 +1730,7 @@ static const ipsec_test_packet pkt_mcgrew_gcm_test_15_esp = { /* IP - not a part of RFC, added for simplicity */ 0x45, 0x00, 0x00, 0x68, 0x69, 0x8f, 0x00, 0x00, - 0x80, 0x32, 0x4d, 0xb2, 0xc0, 0xa8, 0x01, 0x02, + 0x80, 0x32, 0x4d, 0x81, 0xc0, 0xa8, 0x01, 0x02, 0xc0, 0xa8, 0x01, 0x01, /* ESP */ -- cgit v1.2.3 From 4b6787c84e020c350a03ec47d12e1ef68ddb6300 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Wed, 11 Apr 2018 17:28:46 +0300 Subject: linux-gen: ipsec: implement outbound checksumming support Signed-off-by: Dmitry Eremin-Solenikov Reviewed-by: Bill Fischofer Reviewed-by: Petri Savolainen Signed-off-by: Maxim Uvarov --- platform/linux-generic/odp_ipsec.c | 41 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/platform/linux-generic/odp_ipsec.c b/platform/linux-generic/odp_ipsec.c index 65f7361b9..782cbf45c 100644 --- a/platform/linux-generic/odp_ipsec.c +++ b/platform/linux-generic/odp_ipsec.c @@ -826,6 +826,7 @@ static int ipsec_out_tunnel_parse_ipv4(ipsec_state_t *state, state->out_tunnel.ip_tos = ipv4hdr->tos; state->out_tunnel.ip_df = _ODP_IPV4HDR_FLAGS_DONT_FRAG(flags); state->out_tunnel.ip_flabel = 0; + state->ip_next_hdr = ipv4hdr->proto; return 0; } @@ -1295,6 +1296,41 @@ static void ipsec_out_ah_post(ipsec_state_t *state, odp_packet_t pkt) } } +#define OL_TX_CHKSUM_PKT(_cfg, _proto, _ovr_set, _ovr) \ + (_proto && (_ovr_set ? _ovr : _cfg)) + +static void ipsec_out_checksums(odp_packet_t pkt, + ipsec_state_t *state) +{ + odp_bool_t ipv4_chksum_pkt, udp_chksum_pkt, tcp_chksum_pkt; + odp_packet_hdr_t *pkt_hdr = packet_hdr(pkt); + odp_ipsec_outbound_config_t outbound = ipsec_config.outbound; + + ipv4_chksum_pkt = OL_TX_CHKSUM_PKT(outbound.chksum.inner_ipv4, + state->is_ipv4, + pkt_hdr->p.flags.l3_chksum_set, + pkt_hdr->p.flags.l3_chksum); + udp_chksum_pkt = OL_TX_CHKSUM_PKT(outbound.chksum.inner_udp, + state->ip_next_hdr == + _ODP_IPPROTO_UDP, + pkt_hdr->p.flags.l4_chksum_set, + pkt_hdr->p.flags.l4_chksum); + tcp_chksum_pkt = OL_TX_CHKSUM_PKT(outbound.chksum.inner_tcp, + state->ip_next_hdr == + _ODP_IPPROTO_TCP, + pkt_hdr->p.flags.l4_chksum_set, + pkt_hdr->p.flags.l4_chksum); + + if (ipv4_chksum_pkt) + _odp_packet_ipv4_chksum_insert(pkt); + + if (tcp_chksum_pkt) + _odp_packet_tcp_chksum_insert(pkt); + + if (udp_chksum_pkt) + _odp_packet_udp_chksum_insert(pkt); +} + static ipsec_sa_t *ipsec_out_single(odp_packet_t pkt, odp_ipsec_sa_t sa, odp_packet_t *pkt_out, @@ -1354,6 +1390,9 @@ static ipsec_sa_t *ipsec_out_single(odp_packet_t pkt, if (state.ip_tot_len + state.ip_offset != odp_packet_len(pkt)) rc = -1; + + if (rc == 0) + ipsec_out_checksums(pkt, &state); } else { if (state.is_ipv4) rc = ipsec_out_tunnel_parse_ipv4(&state, ipsec_sa); @@ -1371,6 +1410,8 @@ static ipsec_sa_t *ipsec_out_single(odp_packet_t pkt, goto err; } + ipsec_out_checksums(pkt, &state); + if (ipsec_sa->tun_ipv4) rc = ipsec_out_tunnel_ipv4(&pkt, &state, ipsec_sa, opt->flag.ip_param ? -- cgit v1.2.3 From 8caae505c35444706cff8815c41821d80e791403 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Wed, 11 Apr 2018 17:29:19 +0300 Subject: validation: ipsec: validate outbound checksumming support Signed-off-by: Dmitry Eremin-Solenikov Reviewed-by: Bill Fischofer Reviewed-by: Petri Savolainen Signed-off-by: Maxim Uvarov --- test/validation/api/ipsec/ipsec.c | 1 + test/validation/api/ipsec/ipsec_test_out.c | 32 ++++++++++++++++ test/validation/api/ipsec/test_vectors.h | 59 ++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+) diff --git a/test/validation/api/ipsec/ipsec.c b/test/validation/api/ipsec/ipsec.c index 91a66d0be..31a6f9b53 100644 --- a/test/validation/api/ipsec/ipsec.c +++ b/test/validation/api/ipsec/ipsec.c @@ -938,6 +938,7 @@ int ipsec_config(odp_instance_t ODP_UNUSED inst) odp_ipsec_config_init(&ipsec_config); ipsec_config.inbound_mode = suite_context.inbound_op_mode; ipsec_config.outbound_mode = suite_context.outbound_op_mode; + ipsec_config.outbound.all_chksum = ~0; ipsec_config.inbound.default_queue = suite_context.queue; ipsec_config.inbound.parse_level = ODP_PROTO_LAYER_ALL; ipsec_config.inbound.chksums.all_chksum = ~0; diff --git a/test/validation/api/ipsec/ipsec_test_out.c b/test/validation/api/ipsec/ipsec_test_out.c index 5089dfa79..59c631b58 100644 --- a/test/validation/api/ipsec/ipsec_test_out.c +++ b/test/validation/api/ipsec/ipsec_test_out.c @@ -1155,6 +1155,36 @@ static void test_out_dummy_esp_null_sha256_tun_ipv6(void) ipsec_sa_destroy(sa); } +static void test_out_ipv4_udp_esp_null_sha256(void) +{ + odp_ipsec_sa_param_t param; + odp_ipsec_sa_t sa; + + ipsec_sa_param_fill(¶m, + false, false, 123, NULL, + ODP_CIPHER_ALG_NULL, NULL, + ODP_AUTH_ALG_SHA256_HMAC, &key_5a_256, + NULL); + + sa = odp_ipsec_sa_create(¶m); + + CU_ASSERT_NOT_EQUAL_FATAL(ODP_IPSEC_SA_INVALID, sa); + + ipsec_test_part test = { + .pkt_in = &pkt_ipv4_udp, + .out_pkt = 1, + .out = { + { .status.warn.all = 0, + .status.error.all = 0, + .pkt_out = &pkt_ipv4_udp_esp_null_sha256 }, + }, + }; + + ipsec_check_out_one(&test, sa); + + ipsec_sa_destroy(sa); +} + static void ipsec_test_capability(void) { odp_ipsec_capability_t capa; @@ -1218,5 +1248,7 @@ odp_testinfo_t ipsec_out_suite[] = { ipsec_check_esp_null_sha256), ODP_TEST_INFO_CONDITIONAL(test_out_dummy_esp_null_sha256_tun_ipv6, ipsec_check_esp_null_sha256), + ODP_TEST_INFO_CONDITIONAL(test_out_ipv4_udp_esp_null_sha256, + ipsec_check_esp_null_sha256), ODP_TEST_INFO_NULL, }; diff --git a/test/validation/api/ipsec/test_vectors.h b/test/validation/api/ipsec/test_vectors.h index 0f133f016..289b8008c 100644 --- a/test/validation/api/ipsec/test_vectors.h +++ b/test/validation/api/ipsec/test_vectors.h @@ -1883,4 +1883,63 @@ static const ipsec_test_packet pkt_test_nodata = { 0x0a, 0x0b, 0x0c, 0x0d, }, }; + +static const ipsec_test_packet pkt_ipv4_udp = { + .len = 76, + .l2_offset = 0, + .l3_offset = 14, + .l4_offset = 34, + .data = { + /* ETH - not a part of RFC, added for simplicity */ + 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, + 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00, + + /* IP */ + 0x45, 0x00, 0x00, 0x3e, 0x69, 0x8f, 0x00, 0x00, + 0x80, 0x11, 0x00, 0x00, 0xc0, 0xa8, 0x01, 0x02, + 0xc0, 0xa8, 0x01, 0x01, + + /* UDP */ + 0x0a, 0x98, 0x00, 0x35, 0x00, 0x2a, 0x00, 0x00, + 0xb2, 0xd0, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x03, 0x73, 0x69, 0x70, + 0x09, 0x63, 0x79, 0x62, 0x65, 0x72, 0x63, 0x69, + 0x74, 0x79, 0x02, 0x64, 0x6b, 0x00, 0x00, 0x01, + 0x00, 0x01, + }, +}; + +static const ipsec_test_packet pkt_ipv4_udp_esp_null_sha256 = { + .len = 102, + .l2_offset = 0, + .l3_offset = 14, + .l4_offset = 34, + .data = { + /* ETH - not a part of RFC, added for simplicity */ + 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, + 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0x08, 0x00, + + /* IP */ + 0x45, 0x00, 0x00, 0x58, 0x69, 0x8f, 0x00, 0x00, + 0x80, 0x32, 0x4d, 0x91, 0xc0, 0xa8, 0x01, 0x02, + 0xc0, 0xa8, 0x01, 0x01, + + /* ESP */ + 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x01, + + /* UDP */ + 0x0a, 0x98, 0x00, 0x35, 0x00, 0x2a, 0x23, 0x43, + 0xb2, 0xd0, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x03, 0x73, 0x69, 0x70, + 0x09, 0x63, 0x79, 0x62, 0x65, 0x72, 0x63, 0x69, + 0x74, 0x79, 0x02, 0x64, 0x6b, 0x00, 0x00, 0x01, + 0x00, 0x01, + + /* ESP TRL */ + 0x00, 0x11, 0x2d, 0x4a, 0x06, 0x9f, 0x97, 0xcf, + 0xa3, 0x05, 0xea, 0x90, 0x7a, 0xf6, 0x6b, 0x0a, + 0x3f, 0xc7, + }, +}; + #endif -- cgit v1.2.3 From 1e0ac11c75eee10959d1fa674a05e746476271b3 Mon Sep 17 00:00:00 2001 From: Bill Fischofer Date: Sun, 8 Apr 2018 20:30:18 -0500 Subject: changelog: updates for odp v1.19.0.0 Add updates for ODP v1.19.0.0 (Tiger Moth Final Release) Signed-off-by: Bill Fischofer Reviewed-by: Bogdan Pricope Reviewed-by: Dmitry Eremin-Solenikov Signed-off-by: Maxim Uvarov --- CHANGELOG | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index 1dfdd0e7b..5af1777c6 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,108 @@ +== OpenDataPlane (1.19.0.0) +=== Summary of Changes +ODP v1.19.0.0 is the official Tiger Moth final release. It incorporates final +implementation changes and bug fixes and completes the Tiger Moth ODP +development cycle. + +==== APIs +No functional changes for this release. The Tiger Moth API was frozen in ODP +v1.18.0.0. + +===== API Documentation Update +The specification for the `odp_packet_l4_chksum_status()` API has been +clarified to reflect that in IPv4 UDP checksums are optional. As a result, a +zero (nonexistent) checksum will be reported as `ODP_PACKET_CHKSUM_OK`. + +==== C++ Test Improvements +The {cpp} test included in the ODP validation suite now uses `cout` instead +of `printf()` to ensure that {cpp} is being used to compile it. + +==== Queue and Scheduler Configuration +For the ODP Reference Implementation, The `config/odp-linux-generic.conf` file +is extended with sections to control the default and maximum sizes for basic +queues, and the priority spread used by the scheduler for scheduled queues. + +The configuration file is a template named `platform/odp-$platform.conf` so +this can be easily inherited by other ODP implementations. + +==== Runtime Default `config` File Read Order Improvements +For the ODP Reference Implementation, the default values of the +application-provided `config` file (if used) override the values provided by +the built-in `config/odp-linux-generic.conf` file. + +=== Implementation Improvements +The `odp-linux` reference implementation is improved in a number of areas: + +==== Netmap Ring Configuration for VALE +PktIO netmap support now uses the ODP config file to allow rings used for VALE +processing to be specified. The supplied defaults provide optimal performance +in typical settings. + +==== AES-XCBC-MAC and SHA384-HMAC +These crypto/authentication algorithms are now implemented. + +==== Packet Checksum Validation and Insertion +Proper packet checksum validation and insertion, in conformance with the +relevant ODP APIs, is now provided. + +=== Dependency Changes + +==== DPDK 17.11 Support +The Tiger Moth LTS release is synchronized with the most recent DPDK LTS +release for DPDK pktio support. + +==== Removal of dependency on `xxd` package. +This dependency is removed. The Reference Implementation build tools now use +the standard `od` tool rather than the optional `xxd` package. + +=== Performance Tests + +==== `odp_sched_pktio` +A new test has been added to test the performance of PktIO operations in +scheduled mode. Scheduled PktIO is inherently more scalable and simpler from +an application standpoint than direct (polled) I/O, but depending on the +efficiency of the scheduler implementation can incur additional levels of +overhead. This test can give insight into a given platform's scheduler +efficiency. For the `odp-linux` reference implementation, this test has shown +scheduled I/O to be within 10% of rates achievable via direct I/O, meaning +that for many applications the simplicity and scalability of the event model +is preferable. + +==== `odp_ipsec` +A new test has been added that measures outbound (TX) IPsec performance with +a variety of cipher and authentication algorithms. + +=== Example Changes + +==== `l2fwd` Example +The `README` file associated with this example has been clarified to explain +that this example is a throughput test and as a result does not preserve +packet order under all conditions. + +=== Bug Fixes +==== https://bugs.linaro.org/show_bug.cgi?id=3611[Bug 3611] +ODP linux-generic fails on AArch64 in non-ABI-compat mode. + +==== https://bugs.linaro.org/show_bug.cgi?id=3657[Bug 3657] +PktIO does not work with Mellanox Interfaces + +==== https://bugs.linaro.org/show_bug.cgi?id=3685[Bug 3685] +RX UDP checksum offload drops valid UDP packets with Niantic + +==== https://bugs.linaro.org/show_bug.cgi?id=3686[Bug 3686] +IP header checksum not inserted if L4 offset not set + +==== https://bugs.linaro.org/show_bug.cgi?id=3690[Bug 3690] +fdserver process interferes with signal handling + +==== https://bugs.linaro.org/show_bug.cgi?id=3736[Bug 3736] +return value not checked for some fdserver interface functions + +=== Known Issues + +==== https://bugs.linaro.org/show_bug.cgi?id=2988[Bug 2988] +ODP exposes symbols outside of odp*/_odp* namespace + == OpenDataPlane (1.18.0.1) === Summary of Changes ODP v1.18.0.1 is a fix level for Tiger Moth Release Candidate 2 (RC 2). -- cgit v1.2.3 From e828b4d6f503ff94c40e30b1d8babf0dcbecde91 Mon Sep 17 00:00:00 2001 From: Maxim Uvarov Date: Wed, 18 Apr 2018 17:50:47 +0300 Subject: configure.ac: update version to v1.19.0.0 Signed-off-by: Maxim Uvarov Reviewed-by: Bill Fischofer Reviewed-by: Dmitry Eremin-Solenikov --- configure.ac | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 904e81972..d3f026651 100644 --- a/configure.ac +++ b/configure.ac @@ -3,9 +3,9 @@ AC_PREREQ([2.5]) # Set correct API version ########################################################################## m4_define([odpapi_generation_version], [1]) -m4_define([odpapi_major_version], [18]) +m4_define([odpapi_major_version], [19]) m4_define([odpapi_minor_version], [0]) -m4_define([odpapi_point_version], [1]) +m4_define([odpapi_point_version], [0]) m4_define([odpapi_version], [odpapi_generation_version.odpapi_major_version.odpapi_minor_version.odpapi_point_version]) AC_INIT([OpenDataPlane],[odpapi_version],[lng-odp@lists.linaro.org]) -- cgit v1.2.3 From e6070ca9eaef867e3aa991eea73fa7e1ebeb21c1 Mon Sep 17 00:00:00 2001 From: Matias Elo Date: Wed, 4 Apr 2018 14:18:38 +0300 Subject: Port 535ad26 "linux-gen: queue: configurable default size" Signed-off-by: Matias Elo --- config/odp-linux-dpdk.conf | 5 +++ platform/linux-dpdk/odp_queue_basic.c | 61 ++++++++++++++++++++++++++++++++--- 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/config/odp-linux-dpdk.conf b/config/odp-linux-dpdk.conf index 341dba0c5..09d8f5fb2 100644 --- a/config/odp-linux-dpdk.conf +++ b/config/odp-linux-dpdk.conf @@ -30,3 +30,8 @@ pktio_dpdk: { rx_drop_en = 1 } } + +queue_basic: { + # Default queue size. Value must be a power of two. + default_queue_size = 4096 +} diff --git a/platform/linux-dpdk/odp_queue_basic.c b/platform/linux-dpdk/odp_queue_basic.c index 5df48ecc2..948eceed8 100644 --- a/platform/linux-dpdk/odp_queue_basic.c +++ b/platform/linux-dpdk/odp_queue_basic.c @@ -25,6 +25,7 @@ #include #include #include +#include #define NUM_INTERNAL_QUEUES 64 @@ -36,6 +37,9 @@ #include #include +#define MIN_QUEUE_SIZE 8 +#define MAX_QUEUE_SIZE CONFIG_QUEUE_SIZE + static int queue_init(queue_entry_t *queue, const char *name, const odp_queue_param_t *param); @@ -60,11 +64,11 @@ static int queue_capa(odp_queue_capability_t *capa, int sched) /* Reserve some queues for internal use */ capa->max_queues = ODP_CONFIG_QUEUES - NUM_INTERNAL_QUEUES; capa->plain.max_num = capa->max_queues; - capa->plain.max_size = CONFIG_QUEUE_SIZE - 1; + capa->plain.max_size = MAX_QUEUE_SIZE - 1; capa->plain.lockfree.max_num = queue_glb->queue_lf_num; capa->plain.lockfree.max_size = queue_glb->queue_lf_size; capa->sched.max_num = capa->max_queues; - capa->sched.max_size = CONFIG_QUEUE_SIZE - 1; + capa->sched.max_size = MAX_QUEUE_SIZE - 1; if (sched) { capa->max_ordered_locks = sched_fn->max_ordered_locks(); @@ -75,6 +79,34 @@ static int queue_capa(odp_queue_capability_t *capa, int sched) return 0; } +static int read_config_file(queue_global_t *queue_glb) +{ + const char *str; + uint32_t val_u32; + int val = 0; + + ODP_PRINT("Queue config:\n"); + + str = "queue_basic.default_queue_size"; + if (!_odp_libconfig_lookup_int(str, &val)) { + ODP_ERR("Config option '%s' not found.\n", str); + return -1; + } + + val_u32 = val; + + if (val_u32 > MAX_QUEUE_SIZE || val_u32 < MIN_QUEUE_SIZE || + !CHECK_IS_POWER2(val_u32)) { + ODP_ERR("Bad value %s = %u\n", str, val_u32); + return -1; + } + + queue_glb->config.default_queue_size = val_u32; + ODP_PRINT(" %s: %u\n\n", str, val_u32); + + return 0; +} + static int queue_init_global(void) { uint32_t i; @@ -105,6 +137,11 @@ static int queue_init_global(void) queue->s.handle = queue_from_index(i); } + if (read_config_file(queue_glb)) { + odp_shm_free(shm); + return -1; + } + lf_func = &queue_glb->queue_lf_func; queue_glb->queue_lf_num = queue_lf_init_global(&lf_size, lf_func); queue_glb->queue_lf_size = lf_size; @@ -208,7 +245,7 @@ static odp_queue_t queue_create(const char *name, } if (param->nonblocking == ODP_BLOCKING) { - if (param->size > CONFIG_QUEUE_SIZE) + if (param->size > MAX_QUEUE_SIZE) return ODP_QUEUE_INVALID; } else if (param->nonblocking == ODP_NONBLOCKING_LF) { /* Only plain type lock-free queues supported */ @@ -558,6 +595,8 @@ static odp_event_t queue_deq(odp_queue_t handle) static int queue_init(queue_entry_t *queue, const char *name, const odp_queue_param_t *param) { + uint32_t queue_size; + if (name == NULL) { queue->s.name[0] = 0; } else { @@ -581,7 +620,21 @@ static int queue_init(queue_entry_t *queue, const char *name, queue->s.pktin = PKTIN_INVALID; queue->s.pktout = PKTOUT_INVALID; - queue->s.ring_st = ring_st_create(queue->s.name, CONFIG_QUEUE_SIZE); + /* Use default size for all small queues to quarantee performance + * level. */ + queue_size = queue_glb->config.default_queue_size; + if (param->size > queue_glb->config.default_queue_size) + queue_size = param->size; + + /* Round up if not already a power of two */ + queue_size = ROUNDUP_POWER2_U32(queue_size); + + if (queue_size > MAX_QUEUE_SIZE) { + ODP_ERR("Too large queue size %u\n", queue_size); + return -1; + } + + queue->s.ring_st = ring_st_create(queue->s.name, queue_size); if (queue->s.ring_st == NULL) return -1; -- cgit v1.2.3 From 9f124353fdd88e0a4fdb4a14234b7699dfc273db Mon Sep 17 00:00:00 2001 From: Matias Elo Date: Wed, 4 Apr 2018 14:28:20 +0300 Subject: Port c57da8d "linux-gen: queue: configurable max size" Signed-off-by: Matias Elo --- config/odp-linux-dpdk.conf | 3 +++ platform/linux-dpdk/odp_queue_basic.c | 48 +++++++++++++++++++++++++---------- 2 files changed, 37 insertions(+), 14 deletions(-) diff --git a/config/odp-linux-dpdk.conf b/config/odp-linux-dpdk.conf index 09d8f5fb2..0c4c5781f 100644 --- a/config/odp-linux-dpdk.conf +++ b/config/odp-linux-dpdk.conf @@ -32,6 +32,9 @@ pktio_dpdk: { } queue_basic: { + # Maximum queue size. Value must be a power of two. + max_queue_size = 8192 + # Default queue size. Value must be a power of two. default_queue_size = 4096 } diff --git a/platform/linux-dpdk/odp_queue_basic.c b/platform/linux-dpdk/odp_queue_basic.c index 948eceed8..67303c185 100644 --- a/platform/linux-dpdk/odp_queue_basic.c +++ b/platform/linux-dpdk/odp_queue_basic.c @@ -38,7 +38,7 @@ #include #define MIN_QUEUE_SIZE 8 -#define MAX_QUEUE_SIZE CONFIG_QUEUE_SIZE +#define MAX_QUEUE_SIZE (1 * 1024 * 1024) static int queue_init(queue_entry_t *queue, const char *name, const odp_queue_param_t *param); @@ -64,11 +64,11 @@ static int queue_capa(odp_queue_capability_t *capa, int sched) /* Reserve some queues for internal use */ capa->max_queues = ODP_CONFIG_QUEUES - NUM_INTERNAL_QUEUES; capa->plain.max_num = capa->max_queues; - capa->plain.max_size = MAX_QUEUE_SIZE - 1; + capa->plain.max_size = queue_glb->config.max_queue_size - 1; capa->plain.lockfree.max_num = queue_glb->queue_lf_num; capa->plain.lockfree.max_size = queue_glb->queue_lf_size; capa->sched.max_num = capa->max_queues; - capa->sched.max_size = MAX_QUEUE_SIZE - 1; + capa->sched.max_size = queue_glb->config.max_queue_size - 1; if (sched) { capa->max_ordered_locks = sched_fn->max_ordered_locks(); @@ -87,7 +87,7 @@ static int read_config_file(queue_global_t *queue_glb) ODP_PRINT("Queue config:\n"); - str = "queue_basic.default_queue_size"; + str = "queue_basic.max_queue_size"; if (!_odp_libconfig_lookup_int(str, &val)) { ODP_ERR("Config option '%s' not found.\n", str); return -1; @@ -101,6 +101,24 @@ static int read_config_file(queue_global_t *queue_glb) return -1; } + queue_glb->config.max_queue_size = val_u32; + ODP_PRINT(" %s: %u\n", str, val_u32); + + str = "queue_basic.default_queue_size"; + if (!_odp_libconfig_lookup_int(str, &val)) { + ODP_ERR("Config option '%s' not found.\n", str); + return -1; + } + + val_u32 = val; + + if (val_u32 > queue_glb->config.max_queue_size || + val_u32 < MIN_QUEUE_SIZE || + !CHECK_IS_POWER2(val_u32)) { + ODP_ERR("Bad value %s = %u\n", str, val_u32); + return -1; + } + queue_glb->config.default_queue_size = val_u32; ODP_PRINT(" %s: %u\n\n", str, val_u32); @@ -117,7 +135,7 @@ static int queue_init_global(void) ODP_DBG("Starts...\n"); - shm = odp_shm_reserve("odp_queues", + shm = odp_shm_reserve("_odp_queue_gbl", sizeof(queue_global_t), sizeof(queue_entry_t), 0); @@ -142,6 +160,10 @@ static int queue_init_global(void) return -1; } + queue_glb->queue_gbl_shm = shm; + queue_glb->queue_ring_shm = ODP_SHM_INVALID; + queue_glb->ring_data = NULL; + lf_func = &queue_glb->queue_lf_func; queue_glb->queue_lf_num = queue_lf_init_global(&lf_size, lf_func); queue_glb->queue_lf_size = lf_size; @@ -171,7 +193,6 @@ static int queue_term_local(void) static int queue_term_global(void) { int ret = 0; - int rc = 0; queue_entry_t *queue; int i; @@ -180,20 +201,19 @@ static int queue_term_global(void) LOCK(queue); if (queue->s.status != QUEUE_STATUS_FREE) { ODP_ERR("Not destroyed queue: %s\n", queue->s.name); - rc = -1; + ret = -1; } UNLOCK(queue); } queue_lf_term_global(); - ret = odp_shm_free(odp_shm_lookup("odp_queues")); - if (ret < 0) { - ODP_ERR("shm free failed for odp_queues"); - rc = -1; + if (odp_shm_free(queue_glb->queue_gbl_shm)) { + ODP_ERR("shm free failed"); + ret = -1; } - return rc; + return ret; } static int queue_capability(odp_queue_capability_t *capa) @@ -245,7 +265,7 @@ static odp_queue_t queue_create(const char *name, } if (param->nonblocking == ODP_BLOCKING) { - if (param->size > MAX_QUEUE_SIZE) + if (param->size > queue_glb->config.max_queue_size) return ODP_QUEUE_INVALID; } else if (param->nonblocking == ODP_NONBLOCKING_LF) { /* Only plain type lock-free queues supported */ @@ -629,7 +649,7 @@ static int queue_init(queue_entry_t *queue, const char *name, /* Round up if not already a power of two */ queue_size = ROUNDUP_POWER2_U32(queue_size); - if (queue_size > MAX_QUEUE_SIZE) { + if (queue_size > queue_glb->config.max_queue_size) { ODP_ERR("Too large queue size %u\n", queue_size); return -1; } -- cgit v1.2.3 From 94ef2a5f39d136822e05c7b2d43a4baf55210042 Mon Sep 17 00:00:00 2001 From: Matias Elo Date: Wed, 4 Apr 2018 14:30:11 +0300 Subject: Port bcd246e "linux-gen: sched: configurable priority spread" Signed-off-by: Matias Elo --- config/odp-linux-dpdk.conf | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/config/odp-linux-dpdk.conf b/config/odp-linux-dpdk.conf index 0c4c5781f..33296080c 100644 --- a/config/odp-linux-dpdk.conf +++ b/config/odp-linux-dpdk.conf @@ -38,3 +38,12 @@ queue_basic: { # Default queue size. Value must be a power of two. default_queue_size = 4096 } + +sched_basic: { + # Priority level spread. Each priority level is spread into multiple + # scheduler internal queues. A higher spread value typically improves + # parallelism and thus is better for high thread counts, but causes + # uneven service level for low thread counts. Typically, optimal + # value is the number of threads using the scheduler. + prio_spread = 4 +} -- cgit v1.2.3 From 51423916ceb46bbe75247fe805af023c0da5e5f5 Mon Sep 17 00:00:00 2001 From: Matias Elo Date: Wed, 4 Apr 2018 14:35:36 +0300 Subject: Port 72febba "linux-gen: pktio: dpdk: accept UDPv4 packets with all-zero csum" Signed-off-by: Matias Elo --- platform/linux-dpdk/include/odp_packet_internal.h | 5 +++++ platform/linux-dpdk/odp_packet_dpdk.c | 11 +++++++++++ 2 files changed, 16 insertions(+) diff --git a/platform/linux-dpdk/include/odp_packet_internal.h b/platform/linux-dpdk/include/odp_packet_internal.h index 328b499ff..20ce64119 100644 --- a/platform/linux-dpdk/include/odp_packet_internal.h +++ b/platform/linux-dpdk/include/odp_packet_internal.h @@ -142,6 +142,11 @@ static inline odp_packet_hdr_t *packet_hdr(odp_packet_t pkt) return (odp_packet_hdr_t *)(uintptr_t)pkt; } +static inline odp_packet_t packet_handle(odp_packet_hdr_t *pkt_hdr) +{ + return (odp_packet_t)pkt_hdr; +} + static inline struct rte_mbuf *pkt_to_mbuf(odp_packet_t pkt) { return (struct rte_mbuf *)(uintptr_t)pkt; diff --git a/platform/linux-dpdk/odp_packet_dpdk.c b/platform/linux-dpdk/odp_packet_dpdk.c index 1c85577fa..c738cfbfc 100644 --- a/platform/linux-dpdk/odp_packet_dpdk.c +++ b/platform/linux-dpdk/odp_packet_dpdk.c @@ -33,8 +33,11 @@ #include #include #include +#include #include +#include + /* DPDK poll mode drivers requiring minimum RX burst size DPDK_MIN_RX_BURST */ #define IXGBE_DRV_NAME "net_ixgbe" #define I40E_DRV_NAME "net_i40e" @@ -590,6 +593,7 @@ static void _odp_pktio_send_completion(pktio_entry_t *pktio_entry) #define IP4_CSUM_RESULT(m) (m->ol_flags & PKT_RX_IP_CKSUM_MASK) #define L4_CSUM_RESULT(m) (m->ol_flags & PKT_RX_L4_CKSUM_MASK) #define HAS_L4_PROTO(m, proto) ((m->packet_type & RTE_PTYPE_L4_MASK) == proto) +#define UDP4_CSUM(_p) (((_odp_udphdr_t *)_odp_packet_l4_ptr(_p, NULL))->chksum) #define PKTIN_CSUM_BITS 0x1C @@ -622,6 +626,13 @@ static inline int pkt_set_ol_rx(odp_pktin_config_opt_t *pktin_cfg, if (packet_csum_result == PKT_RX_L4_CKSUM_GOOD) { pkt_hdr->p.input_flags.l4_chksum_done = 1; } else if (packet_csum_result != PKT_RX_L4_CKSUM_UNKNOWN) { + if (pkt_hdr->p.input_flags.ipv4 && + pkt_hdr->p.input_flags.udp && + !UDP4_CSUM(packet_handle(pkt_hdr))) { + pkt_hdr->p.input_flags.l4_chksum_done = 1; + return 0; + } + if (pktin_cfg->bit.drop_udp_err) return -1; -- cgit v1.2.3 From d669c09dd79019c1e2754fd779da6ef2df576805 Mon Sep 17 00:00:00 2001 From: Matias Elo Date: Wed, 4 Apr 2018 14:39:08 +0300 Subject: Port 0225a8a "linux-gen: pktio: dpdk: fix IPv4 csum calculation when l4 offset is not set" Signed-off-by: Matias Elo --- platform/linux-dpdk/odp_packet_dpdk.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/platform/linux-dpdk/odp_packet_dpdk.c b/platform/linux-dpdk/odp_packet_dpdk.c index c738cfbfc..d85b4f787 100644 --- a/platform/linux-dpdk/odp_packet_dpdk.c +++ b/platform/linux-dpdk/odp_packet_dpdk.c @@ -871,11 +871,7 @@ static inline void pkt_set_ol_tx(odp_pktout_config_opt_t *pktout_cfg, if (!ipv4_chksum_pkt && !udp_chksum_pkt && !tcp_chksum_pkt) return; - if (pkt_p->l4_offset == ODP_PACKET_OFFSET_INVALID) - return; - mbuf->l2_len = pkt_p->l3_offset - pkt_p->l2_offset; - mbuf->l3_len = pkt_p->l4_offset - pkt_p->l3_offset; if (l3_proto_v4) mbuf->ol_flags = PKT_TX_IPV4; @@ -886,8 +882,14 @@ static inline void pkt_set_ol_tx(odp_pktout_config_opt_t *pktout_cfg, mbuf->ol_flags |= PKT_TX_IP_CKSUM; ((struct ipv4_hdr *)l3_hdr)->hdr_checksum = 0; + mbuf->l3_len = _ODP_IPV4HDR_IHL(*(uint8_t *)l3_hdr) * 4; } + if (pkt_p->l4_offset == ODP_PACKET_OFFSET_INVALID) + return; + + mbuf->l3_len = pkt_p->l4_offset - pkt_p->l3_offset; + l4_hdr = (void *)(mbuf_data + pkt_p->l4_offset); if (udp_chksum_pkt) { -- cgit v1.2.3 From 9b3fceee7317641cc29e78c641b8b175b7ad92bd Mon Sep 17 00:00:00 2001 From: Matias Elo Date: Wed, 4 Apr 2018 14:43:57 +0300 Subject: Port 6312314 "build: don't use xxd to hexdump config file" Signed-off-by: Matias Elo --- platform/linux-dpdk/Makefile.am | 2 +- platform/linux-dpdk/odp_libconfig.c | 99 ------------------------------------- 2 files changed, 1 insertion(+), 100 deletions(-) delete mode 100644 platform/linux-dpdk/odp_libconfig.c diff --git a/platform/linux-dpdk/Makefile.am b/platform/linux-dpdk/Makefile.am index 0ce6ccc03..7a5d79352 100644 --- a/platform/linux-dpdk/Makefile.am +++ b/platform/linux-dpdk/Makefile.am @@ -152,7 +152,7 @@ __LIB__libodp_linux_la_SOURCES = \ ../linux-generic/odp_ipsec_events.c \ ../linux-generic/odp_ipsec_sad.c \ ../linux-generic/odp_name_table.c \ - odp_libconfig.c \ + ../linux-generic/odp_libconfig.c \ odp_packet.c \ odp_packet_dpdk.c \ odp_packet_flags.c \ diff --git a/platform/linux-dpdk/odp_libconfig.c b/platform/linux-dpdk/odp_libconfig.c deleted file mode 100644 index 6d5ee524e..000000000 --- a/platform/linux-dpdk/odp_libconfig.c +++ /dev/null @@ -1,99 +0,0 @@ -/* Copyright (c) 2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ - -#include "config.h" - -#include -#include -#include - -#include -#include -#include -#include -#include - -#define CONF_STR_NAME ((const char *)odp_linux_dpdk_conf) - -extern struct odp_global_data_s odp_global_data; - -int _odp_libconfig_init_global(void) -{ - const char *filename; - const char *vers; - const char *vers_rt; - const char *ipml; - const char *ipml_rt; - config_t *config = &odp_global_data.libconfig_default; - config_t *config_rt = &odp_global_data.libconfig_runtime; - - config_init(config); - config_init(config_rt); - - if (!config_read_string(config, CONF_STR_NAME)) { - ODP_ERR("Failed to read default config: %s(%d): %s\n", - config_error_file(config), config_error_line(config), - config_error_text(config)); - goto fail; - } - - filename = getenv("ODP_CONFIG_FILE"); - if (filename == NULL) - return 0; - - if (!config_read_file(config_rt, filename)) { - ODP_ERR("Failed to read config file: %s(%d): %s\n", - config_error_file(config_rt), - config_error_line(config_rt), - config_error_text(config_rt)); - goto fail; - } - - /* Check runtime configuration's implementation name and version */ - if (!config_lookup_string(config, "odp_implementation", &ipml) || - !config_lookup_string(config_rt, "odp_implementation", &ipml_rt)) { - ODP_ERR("Configuration missing 'odp_implementation' field\n"); - goto fail; - } - if (!config_lookup_string(config, "config_file_version", &vers) || - !config_lookup_string(config_rt, "config_file_version", &vers_rt)) { - ODP_ERR("Configuration missing 'config_file_version' field\n"); - goto fail; - } - if (strcmp(vers, vers_rt) || strcmp(ipml, ipml_rt)) { - ODP_ERR("Runtime configuration mismatch\n"); - goto fail; - } - - return 0; -fail: - config_destroy(config); - config_destroy(config_rt); - return -1; -} - -int _odp_libconfig_term_global(void) -{ - config_destroy(&odp_global_data.libconfig_default); - config_destroy(&odp_global_data.libconfig_runtime); - - return 0; -} - -int _odp_libconfig_lookup_int(const char *path, int *value) -{ - int ret_def = CONFIG_FALSE; - int ret_rt = CONFIG_FALSE; - - ret_def = config_lookup_int(&odp_global_data.libconfig_default, path, - value); - - /* Runtime option overrides default value */ - ret_rt = config_lookup_int(&odp_global_data.libconfig_runtime, path, - value); - - return (ret_def == CONFIG_TRUE || ret_rt == CONFIG_TRUE) ? 1 : 0; -} -- cgit v1.2.3 From 272b58540d8df75aef589ff73f82d6d1ddaffd52 Mon Sep 17 00:00:00 2001 From: Matias Elo Date: Wed, 4 Apr 2018 14:46:50 +0300 Subject: Port 57df98e "linux-gen: dpdk: fix runtime/default config read order" Signed-off-by: Matias Elo --- platform/linux-dpdk/odp_packet_dpdk.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/platform/linux-dpdk/odp_packet_dpdk.c b/platform/linux-dpdk/odp_packet_dpdk.c index d85b4f787..7f1b93a68 100644 --- a/platform/linux-dpdk/odp_packet_dpdk.c +++ b/platform/linux-dpdk/odp_packet_dpdk.c @@ -60,22 +60,16 @@ extern void *pktio_entry_ptr[ODP_CONFIG_PKTIO_ENTRIES]; static uint32_t mtu_get_pkt_dpdk(pktio_entry_t *pktio_entry); -static int lookup_opt(const char *path, const char *drv_name, int *val) +static int lookup_opt(const char *opt_name, const char *drv_name, int *val) { const char *base = "pktio_dpdk"; - char opt_path[256]; - int ret = 0; - - /* Default option */ - snprintf(opt_path, sizeof(opt_path), "%s.%s", base, path); - ret += _odp_libconfig_lookup_int(opt_path, val); - - /* Driver specific option overrides default option */ - snprintf(opt_path, sizeof(opt_path), "%s.%s.%s", base, drv_name, path); - ret += _odp_libconfig_lookup_int(opt_path, val); + int ret; + ret = _odp_libconfig_lookup_ext_int(base, drv_name, opt_name, val); if (ret == 0) - ODP_ERR("Unable to find DPDK configuration option: %s\n", path); + ODP_ERR("Unable to find DPDK configuration option: %s\n", + opt_name); + return ret; } -- cgit v1.2.3 From 0eaa26a5ea7391d79617f1a5933ccaaedf03257d Mon Sep 17 00:00:00 2001 From: Matias Elo Date: Wed, 25 Apr 2018 10:26:14 +0300 Subject: Port 2c99a9d "linux-gen: packet: IPv4 checksum insertion" Signed-off-by: Matias Elo --- platform/linux-dpdk/include/odp_packet_internal.h | 2 + platform/linux-dpdk/odp_packet.c | 58 +++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/platform/linux-dpdk/include/odp_packet_internal.h b/platform/linux-dpdk/include/odp_packet_internal.h index 20ce64119..5e2569754 100644 --- a/platform/linux-dpdk/include/odp_packet_internal.h +++ b/platform/linux-dpdk/include/odp_packet_internal.h @@ -261,6 +261,8 @@ int _odp_packet_set_data(odp_packet_t pkt, uint32_t offset, int _odp_packet_cmp_data(odp_packet_t pkt, uint32_t offset, const void *s, uint32_t len); +int _odp_packet_ipv4_chksum_insert(odp_packet_t pkt); + /* We can't enforce tailroom reservation for received packets */ ODP_STATIC_ASSERT(CONFIG_PACKET_TAILROOM == 0, "ERROR: Tailroom has to be 0, DPDK doesn't support this"); diff --git a/platform/linux-dpdk/odp_packet.c b/platform/linux-dpdk/odp_packet.c index e0d565dc2..a811e048d 100644 --- a/platform/linux-dpdk/odp_packet.c +++ b/platform/linux-dpdk/odp_packet.c @@ -1496,6 +1496,64 @@ int packet_parse_common(packet_parser_t *prs, const uint8_t *ptr, seg_len, layer, ethtype); } +static inline int packet_ipv4_chksum(odp_packet_t pkt, + uint32_t offset, + _odp_ipv4hdr_t *ip, + odp_u16sum_t *chksum) +{ + unsigned int nleft = _ODP_IPV4HDR_IHL(ip->ver_ihl) * 4; + uint16_t buf[nleft / 2]; + int res; + + if (odp_unlikely(nleft < sizeof(*ip))) + return -1; + ip->chksum = 0; + memcpy(buf, ip, sizeof(*ip)); + res = odp_packet_copy_to_mem(pkt, offset + sizeof(*ip), + nleft - sizeof(*ip), + buf + sizeof(*ip) / 2); + if (odp_unlikely(res < 0)) + return res; + + *chksum = ~odp_chksum_ones_comp16(buf, nleft); + + return 0; +} + +#define _ODP_IPV4HDR_CSUM_OFFSET ODP_OFFSETOF(_odp_ipv4hdr_t, chksum) + +/** + * Calculate and fill in IPv4 checksum + * + * @param pkt ODP packet + * + * @retval 0 on success + * @retval <0 on failure + */ +int _odp_packet_ipv4_chksum_insert(odp_packet_t pkt) +{ + uint32_t offset; + _odp_ipv4hdr_t ip; + odp_u16sum_t chksum; + int res; + + offset = odp_packet_l3_offset(pkt); + if (offset == ODP_PACKET_OFFSET_INVALID) + return -1; + + res = odp_packet_copy_to_mem(pkt, offset, sizeof(ip), &ip); + if (odp_unlikely(res < 0)) + return res; + + res = packet_ipv4_chksum(pkt, offset, &ip, &chksum); + if (odp_unlikely(res < 0)) + return res; + + return odp_packet_copy_from_mem(pkt, + offset + _ODP_IPV4HDR_CSUM_OFFSET, + 2, &chksum); +} + /** * Simple packet parser */ -- cgit v1.2.3 From 6be04bb3f78acb1ff29b2d84ee195f0031c53ebc Mon Sep 17 00:00:00 2001 From: Matias Elo Date: Wed, 25 Apr 2018 10:30:31 +0300 Subject: Port 79f5c10 "linux-gen: packet: l4 checksum insertion support" Signed-off-by: Matias Elo --- platform/linux-dpdk/include/odp_packet_internal.h | 2 + platform/linux-dpdk/odp_packet.c | 181 ++++++++++++++++++++++ 2 files changed, 183 insertions(+) diff --git a/platform/linux-dpdk/include/odp_packet_internal.h b/platform/linux-dpdk/include/odp_packet_internal.h index 5e2569754..2268833f8 100644 --- a/platform/linux-dpdk/include/odp_packet_internal.h +++ b/platform/linux-dpdk/include/odp_packet_internal.h @@ -262,6 +262,8 @@ int _odp_packet_cmp_data(odp_packet_t pkt, uint32_t offset, const void *s, uint32_t len); int _odp_packet_ipv4_chksum_insert(odp_packet_t pkt); +int _odp_packet_tcp_chksum_insert(odp_packet_t pkt); +int _odp_packet_udp_chksum_insert(odp_packet_t pkt); /* We can't enforce tailroom reservation for received packets */ ODP_STATIC_ASSERT(CONFIG_PACKET_TAILROOM == 0, diff --git a/platform/linux-dpdk/odp_packet.c b/platform/linux-dpdk/odp_packet.c index a811e048d..2f3bea21b 100644 --- a/platform/linux-dpdk/odp_packet.c +++ b/platform/linux-dpdk/odp_packet.c @@ -1140,6 +1140,93 @@ int _odp_packet_copy_md_to_packet(odp_packet_t srcpkt, odp_packet_t dstpkt) return dst_size < src_size; } +/* Simple implementation of ones complement sum. + * Based on RFC1071 and its errata. + */ +typedef union { + uint16_t w; + uint8_t b[2]; +} swap_buf_t; + +static uint32_t segment_sum16_32(const uint8_t *p, + uint32_t len, + uint32_t offset) + +{ + uint32_t sum = 0; + + /* Include second part of 16-bit short word split between segments */ + if (len > 0 && (offset % 2)) { + swap_buf_t sw; + + sw.b[0] = 0; + sw.b[1] = *p++; + sum = sw.w; + len--; + } + + /* + * If pointer is 16-bit aligned, we can do fast path calculation. + * If it is not, we sum hi and lo bytes separately and then sum them. + */ + if ((uintptr_t)p % 2) { + uint32_t sum1 = 0, sum2 = 0; + + while (len > 1) { + sum1 += *p++; + sum2 += *p++; + len -= 2; + } +#if (ODP_BYTE_ORDER == ODP_BIG_ENDIAN) + sum += sum2 + (sum1 << 8); +#else + sum += sum1 + (sum2 << 8); +#endif + } else { + while (len > 1) { + sum += *(const uint16_t *)(uintptr_t)p; + p += 2; + len -= 2; + } + } + + /* Add left-over byte, if any */ + if (len > 0) { + swap_buf_t sw; + + sw.b[0] = *p; + sw.b[1] = 0; + sum += sw.w; + } + + return sum; +} + +static uint32_t packet_sum16_32(odp_packet_hdr_t *pkt_hdr, + uint32_t offset, + uint32_t len) +{ + uint32_t sum = 0; + + if (offset + len > packet_len(pkt_hdr)) + return 0; + + while (len > 0) { + uint32_t seglen = 0; /* GCC */ + void *mapaddr = _odp_packet_offset(packet_handle(pkt_hdr), + offset, &seglen, NULL); + + if (seglen > len) + seglen = len; + + sum += segment_sum16_32(mapaddr, seglen, offset); + len -= seglen; + offset += seglen; + } + + return sum; +} + /** Parser helper function for Ethernet packets */ static inline uint16_t parse_eth(packet_parser_t *prs, const uint8_t **parseptr, uint32_t *offset, uint32_t frame_len) @@ -1521,6 +1608,11 @@ static inline int packet_ipv4_chksum(odp_packet_t pkt, } #define _ODP_IPV4HDR_CSUM_OFFSET ODP_OFFSETOF(_odp_ipv4hdr_t, chksum) +#define _ODP_IPV4ADDR_OFFSSET ODP_OFFSETOF(_odp_ipv4hdr_t, src_addr) +#define _ODP_IPV6ADDR_OFFSSET ODP_OFFSETOF(_odp_ipv6hdr_t, src_addr) +#define _ODP_IPV4HDR_CSUM_OFFSET ODP_OFFSETOF(_odp_ipv4hdr_t, chksum) +#define _ODP_UDP_LEN_OFFSET ODP_OFFSETOF(_odp_udphdr_t, length) +#define _ODP_UDP_CSUM_OFFSET ODP_OFFSETOF(_odp_udphdr_t, chksum) /** * Calculate and fill in IPv4 checksum @@ -1554,6 +1646,95 @@ int _odp_packet_ipv4_chksum_insert(odp_packet_t pkt) 2, &chksum); } +static int _odp_packet_tcp_udp_chksum_insert(odp_packet_t pkt, uint16_t proto) +{ + odp_packet_hdr_t *pkt_hdr = packet_hdr(pkt); + uint32_t zero = 0; + uint32_t sum; + uint16_t l3_ver; + uint16_t chksum; + uint32_t chksum_offset; + uint32_t frame_len = packet_len(pkt_hdr); + + if (pkt_hdr->p.l3_offset == ODP_PACKET_OFFSET_INVALID) + return -1; + if (pkt_hdr->p.l4_offset == ODP_PACKET_OFFSET_INVALID) + return -1; + + odp_packet_copy_to_mem(pkt, pkt_hdr->p.l3_offset, 2, &l3_ver); + + if (_ODP_IPV4HDR_VER(l3_ver) == _ODP_IPV4) + sum = packet_sum16_32(pkt_hdr, + pkt_hdr->p.l3_offset + + _ODP_IPV4ADDR_OFFSSET, + 2 * _ODP_IPV4ADDR_LEN); + else + sum = packet_sum16_32(pkt_hdr, + pkt_hdr->p.l3_offset + + _ODP_IPV6ADDR_OFFSSET, + 2 * _ODP_IPV6ADDR_LEN); +#if ODP_BYTE_ORDER == ODP_BIG_ENDIAN + sum += proto; +#else + sum += proto << 8; +#endif + + if (proto == _ODP_IPPROTO_TCP) { + sum += _odp_cpu_to_be_16(frame_len - pkt_hdr->p.l4_offset); + chksum_offset = pkt_hdr->p.l4_offset + _ODP_UDP_CSUM_OFFSET; + } else { + sum += packet_sum16_32(pkt_hdr, + pkt_hdr->p.l4_offset + + _ODP_UDP_LEN_OFFSET, + 2); + chksum_offset = pkt_hdr->p.l4_offset + _ODP_UDP_CSUM_OFFSET; + } + odp_packet_copy_from_mem(pkt, chksum_offset, 2, &zero); + + sum += packet_sum16_32(pkt_hdr, + pkt_hdr->p.l4_offset, + frame_len - pkt_hdr->p.l4_offset); + + /* Not more than two additions */ + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + + chksum = ~sum; + + if (proto == _ODP_IPPROTO_UDP && chksum == 0) + chksum = 0xffff; + + return odp_packet_copy_from_mem(pkt, + chksum_offset, + 2, &chksum); +} + +/** + * Calculate and fill in TCP checksum + * + * @param pkt ODP packet + * + * @retval 0 on success + * @retval <0 on failure + */ +int _odp_packet_tcp_chksum_insert(odp_packet_t pkt) +{ + return _odp_packet_tcp_udp_chksum_insert(pkt, _ODP_IPPROTO_TCP); +} + +/** + * Calculate and fill in UDP checksum + * + * @param pkt ODP packet + * + * @retval 0 on success + * @retval <0 on failure + */ +int _odp_packet_udp_chksum_insert(odp_packet_t pkt) +{ + return _odp_packet_tcp_udp_chksum_insert(pkt, _ODP_IPPROTO_UDP); +} + /** * Simple packet parser */ -- cgit v1.2.3 From 948e842052488c30432003653956fdbc94626a98 Mon Sep 17 00:00:00 2001 From: Matias Elo Date: Wed, 18 Apr 2018 16:52:04 +0300 Subject: linux-dpdk: pktio: fix crash when trying to send packets after calling stop Calling odp_pktout_send() after the pktio device had been stopped with odp_pktio_stop() caused crash. Fix this by moving rte_eth_dev_stop() to odp_pktio_close() and only stop the input/output queues. Signed-off-by: Matias Elo --- platform/linux-dpdk/odp_packet_dpdk.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/platform/linux-dpdk/odp_packet_dpdk.c b/platform/linux-dpdk/odp_packet_dpdk.c index 7f1b93a68..4b16a5424 100644 --- a/platform/linux-dpdk/odp_packet_dpdk.c +++ b/platform/linux-dpdk/odp_packet_dpdk.c @@ -382,8 +382,9 @@ static int setup_pkt_dpdk(odp_pktio_t pktio ODP_UNUSED, return 0; } -static int close_pkt_dpdk(pktio_entry_t *pktio_entry ODP_UNUSED) +static int close_pkt_dpdk(pktio_entry_t *pktio_entry) { + rte_eth_dev_stop(pktio_entry->s.pkt_dpdk.port_id); return 0; } @@ -438,6 +439,10 @@ static int start_pkt_dpdk(pktio_entry_t *pktio_entry) struct rte_eth_txconf *txconf = NULL; uint32_t txq_flags = 0; + if (pktio_entry->s.state == PKTIO_STATE_STOPPED || + pktio_entry->s.state == PKTIO_STATE_STOP_PENDING) + rte_eth_dev_stop(pkt_dpdk->port_id); + /* DPDK doesn't support nb_rx_q/nb_tx_q being 0 */ if (!pktio_entry->s.num_in_queue) pktio_entry->s.num_in_queue = 1; @@ -545,7 +550,14 @@ static int start_pkt_dpdk(pktio_entry_t *pktio_entry) static int stop_pkt_dpdk(pktio_entry_t *pktio_entry) { - rte_eth_dev_stop(pktio_entry->s.pkt_dpdk.port_id); + unsigned int i; + uint16_t port_id = pktio_entry->s.pkt_dpdk.port_id; + + for (i = 0; i < pktio_entry->s.num_in_queue; i++) + rte_eth_dev_rx_queue_stop(port_id, i); + for (i = 0; i < pktio_entry->s.num_out_queue; i++) + rte_eth_dev_tx_queue_stop(port_id, i); + return 0; } -- cgit v1.2.3 From 06ad81cd20ab9bd4e7b70268f0cf560b2c9899d3 Mon Sep 17 00:00:00 2001 From: Matias Elo Date: Tue, 17 Apr 2018 08:45:18 +0300 Subject: linux-dpdk: enable running example and performance tests Use 2MB huge pages to minimize memory footpring when running multiple application instances at the same time. Signed-off-by: Matias Elo --- example/generator/generator_null_test.sh | 3 ++- example/l2fwd_simple/Makefile.am | 2 +- example/l2fwd_simple/l2fwd_simple_run.sh | 6 +++++- example/l3fwd/Makefile.am | 4 ++-- example/l3fwd/empty.pcap | Bin 0 -> 24 bytes example/l3fwd/odp_l3fwd_run.sh | 9 +++++++-- example/packet/Makefile.am | 2 +- example/packet/pktio_run.sh | 11 +++++++---- example/switch/Makefile.am | 4 ++-- example/switch/empty.pcap | Bin 0 -> 24 bytes example/switch/switch_run.sh | 10 ++++++++-- m4/odp_dpdk.m4 | 7 +++++++ platform/linux-dpdk/test/wrapper-script.sh | 19 ++++++------------- test/performance/Makefile.am | 2 +- test/performance/odp_l2fwd_run.sh | 12 ++++++++++-- test/performance/odp_pktio_ordered_run.sh | 11 +++++++---- test/performance/odp_sched_pktio_run.sh | 12 ++++++++++-- 17 files changed, 76 insertions(+), 38 deletions(-) create mode 100644 example/l3fwd/empty.pcap create mode 100644 example/switch/empty.pcap diff --git a/example/generator/generator_null_test.sh b/example/generator/generator_null_test.sh index a598ffd92..3c37a99d2 100755 --- a/example/generator/generator_null_test.sh +++ b/example/generator/generator_null_test.sh @@ -6,7 +6,8 @@ # SPDX-License-Identifier: BSD-3-Clause # -if [ -n "${ODP_PLATFORM}" -a "x${ODP_PLATFORM}" != "xlinux-generic" ] +if [ -n "${ODP_PLATFORM}" -a "x${ODP_PLATFORM}" != "xlinux-generic" ] && + [ -n "${ODP_PLATFORM}" -a "x${ODP_PLATFORM}" != "xlinux-dpdk" ] then echo "null pktio might be unsupported on this platform, skipping" exit 77 diff --git a/example/l2fwd_simple/Makefile.am b/example/l2fwd_simple/Makefile.am index f082335ef..7a6d1f68c 100644 --- a/example/l2fwd_simple/Makefile.am +++ b/example/l2fwd_simple/Makefile.am @@ -5,7 +5,7 @@ bin_PROGRAMS = odp_l2fwd_simple odp_l2fwd_simple_SOURCES = odp_l2fwd_simple.c if test_example -if HAVE_PCAP +if HAVE_PMD_PCAP TESTS = l2fwd_simple_run.sh endif endif diff --git a/example/l2fwd_simple/l2fwd_simple_run.sh b/example/l2fwd_simple/l2fwd_simple_run.sh index 10f4e6dc6..6ebfb9803 100755 --- a/example/l2fwd_simple/l2fwd_simple_run.sh +++ b/example/l2fwd_simple/l2fwd_simple_run.sh @@ -9,7 +9,11 @@ PCAP_IN=`find . ${TEST_DIR} $(dirname $0) -name udp64.pcap -print -quit` echo "using PCAP_IN = ${PCAP_IN}" -./odp_l2fwd_simple${EXEEXT} pcap:in=${PCAP_IN} pcap:out=pcapout.pcap \ +export ODP_PLATFORM_PARAMS="--no-pci \ +--vdev net_pcap0,rx_pcap=${PCAP_IN},tx_pcap=pcapout.pcap \ +--vdev net_pcap1,rx_pcap=${PCAP_IN},tx_pcap=pcapout.pcap" + +./odp_l2fwd_simple${EXEEXT} 0 1 \ 02:00:00:00:00:01 02:00:00:00:00:02 & sleep 1 diff --git a/example/l3fwd/Makefile.am b/example/l3fwd/Makefile.am index 9a48ea173..96530b3d4 100644 --- a/example/l3fwd/Makefile.am +++ b/example/l3fwd/Makefile.am @@ -11,9 +11,9 @@ odp_l3fwd_SOURCES = \ if test_example -if HAVE_PCAP +if HAVE_PMD_PCAP TESTS = odp_l3fwd_run.sh endif endif -EXTRA_DIST = odp_l3fwd_run.sh udp64.pcap +EXTRA_DIST = odp_l3fwd_run.sh udp64.pcap empty.pcap diff --git a/example/l3fwd/empty.pcap b/example/l3fwd/empty.pcap new file mode 100644 index 000000000..4f9600e90 Binary files /dev/null and b/example/l3fwd/empty.pcap differ diff --git a/example/l3fwd/odp_l3fwd_run.sh b/example/l3fwd/odp_l3fwd_run.sh index acffb8431..ffe32aa2f 100755 --- a/example/l3fwd/odp_l3fwd_run.sh +++ b/example/l3fwd/odp_l3fwd_run.sh @@ -8,11 +8,16 @@ PCAP_IN=`find . ${TEST_DIR} $(dirname $0) -name udp64.pcap -print -quit` PCAP_OUT="pcapout.pcap" +PCAP_EMPTY="empty.pcap" PCAP_IN_SIZE=`stat -c %s ${PCAP_IN}` echo "using PCAP_IN = ${PCAP_IN}, PCAP_OUT = ${PCAP_OUT}" -./odp_l3fwd${EXEEXT} -i pcap:in=${PCAP_IN},pcap:out=${PCAP_OUT} \ - -r "10.0.0.0/24,pcap:out=${PCAP_OUT}" -d 30 +export ODP_PLATFORM_PARAMS="--no-pci \ +--vdev net_pcap0,rx_pcap=${PCAP_IN},tx_pcap=/dev/null \ +--vdev net_pcap1,rx_pcap=${PCAP_EMPTY},tx_pcap=${PCAP_OUT}" + +./odp_l3fwd${EXEEXT} -i 0,1 \ + -r "10.0.0.0/24,1" -d 30 STATUS=$? PCAP_OUT_SIZE=`stat -c %s ${PCAP_OUT}` diff --git a/example/packet/Makefile.am b/example/packet/Makefile.am index 228c3506d..eea26d3e5 100644 --- a/example/packet/Makefile.am +++ b/example/packet/Makefile.am @@ -5,7 +5,7 @@ bin_PROGRAMS = odp_pktio odp_pktio_SOURCES = odp_pktio.c if test_example -if HAVE_PCAP +if HAVE_PMD_PCAP TESTS = pktio_run.sh endif endif diff --git a/example/packet/pktio_run.sh b/example/packet/pktio_run.sh index 6abaec16d..caf8649a7 100755 --- a/example/packet/pktio_run.sh +++ b/example/packet/pktio_run.sh @@ -11,8 +11,11 @@ PCAP_OUT="pcapout.pcap" PCAP_IN_SIZE=`stat -c %s ${PCAP_IN}` echo "using PCAP in=${PCAP_IN}:out=${PCAP_OUT} size %${PCAP_IN_SIZE}" +export ODP_PLATFORM_PARAMS="--no-pci \ +--vdev net_pcap0,rx_pcap=${PCAP_IN},tx_pcap=${PCAP_OUT}" + # burst mode -./odp_pktio${EXEEXT} -ipcap:in=${PCAP_IN}:out=${PCAP_OUT} -t 5 -m 0 +./odp_pktio${EXEEXT} -i 0 -t 5 -m 0 STATUS=$? PCAP_OUT_SIZE=`stat -c %s ${PCAP_OUT}` rm -f ${PCAP_OUT} @@ -24,7 +27,7 @@ fi echo "Pass -m 0: status ${STATUS}, in:${PCAP_IN_SIZE} out:${PCAP_OUT_SIZE}" # queue mode -./odp_pktio${EXEEXT} -ipcap:in=${PCAP_IN}:out=${PCAP_OUT} -t 5 -m 1 +./odp_pktio${EXEEXT} -i 0 -t 5 -m 1 STATUS=$? PCAP_OUT_SIZE=`stat -c %s ${PCAP_OUT}` rm -f ${PCAP_OUT} @@ -36,7 +39,7 @@ fi echo "Pass -m 1: status ${STATUS}, in:${PCAP_IN_SIZE} out:${PCAP_OUT_SIZE}" # sched/queue mode -./odp_pktio${EXEEXT} -ipcap:in=${PCAP_IN}:out=${PCAP_OUT} -t 5 -m 2 +./odp_pktio${EXEEXT} -i 0 -t 5 -m 2 STATUS=$? PCAP_OUT_SIZE=`stat -c %s ${PCAP_OUT}` rm -f ${PCAP_OUT} @@ -48,7 +51,7 @@ fi echo "Pass -m 2: status ${STATUS}, in:${PCAP_IN_SIZE} out:${PCAP_OUT_SIZE}" # cpu number option test 1 -./odp_pktio${EXEEXT} -ipcap:in=${PCAP_IN}:out=${PCAP_OUT} -t 5 -m 0 -c 1 +./odp_pktio${EXEEXT} -i 0 -t 5 -m 0 -c 1 STATUS=$? PCAP_OUT_SIZE=`stat -c %s ${PCAP_OUT}` rm -f ${PCAP_OUT} diff --git a/example/switch/Makefile.am b/example/switch/Makefile.am index 2fb21bc41..cd74cd509 100644 --- a/example/switch/Makefile.am +++ b/example/switch/Makefile.am @@ -5,8 +5,8 @@ bin_PROGRAMS = odp_switch odp_switch_SOURCES = odp_switch.c if test_example -if HAVE_PCAP +if HAVE_PMD_PCAP TESTS = switch_run.sh endif endif -EXTRA_DIST = switch_run.sh udp64.pcap +EXTRA_DIST = switch_run.sh udp64.pcap empty.pcap diff --git a/example/switch/empty.pcap b/example/switch/empty.pcap new file mode 100644 index 000000000..4f9600e90 Binary files /dev/null and b/example/switch/empty.pcap differ diff --git a/example/switch/switch_run.sh b/example/switch/switch_run.sh index 5fa1ae1cd..c14dcb22f 100755 --- a/example/switch/switch_run.sh +++ b/example/switch/switch_run.sh @@ -10,16 +10,22 @@ NUM_RX_PORT=3 RETVAL=0 PCAP_IN=`find . ${TEST_DIR} $(dirname $0) -name udp64.pcap -print -quit` +PCAP_EMPTY="empty.pcap" echo "Switch test using PCAP_IN = ${PCAP_IN}" RX_PORTS="" +RX_VDEVS="" for i in `seq 1 $NUM_RX_PORT`; do - RX_PORTS="${RX_PORTS},pcap:out=pcapout${i}.pcap" + RX_PORTS="${RX_PORTS},${i}" + RX_VDEVS="${RX_VDEVS} --vdev net_pcap${i},rx_pcap=${PCAP_EMPTY},tx_pcap=pcapout${i}.pcap" done -./odp_switch${EXEEXT} -i pcap:in=${PCAP_IN}${RX_PORTS} -t 1 +export ODP_PLATFORM_PARAMS="--no-pci \ +--vdev net_pcap0,rx_pcap=${PCAP_IN},tx_pcap=/dev/null ${RX_VDEVS}" + +./odp_switch${EXEEXT} -i 0${RX_PORTS} -t 1 STATUS=$? if [ "$STATUS" -ne 0 ]; then echo "Error: status was: $STATUS, expected 0" diff --git a/m4/odp_dpdk.m4 b/m4/odp_dpdk.m4 index 2ef5253c8..7425fb910 100644 --- a/m4/odp_dpdk.m4 +++ b/m4/odp_dpdk.m4 @@ -21,6 +21,13 @@ AS_CASE([$cur_driver], [rte_pmd_openssl], [AS_VAR_APPEND([DPDK_LIBS], [" -lcrypto"])]) done AS_VAR_APPEND([DPDK_PMDS], [--no-whole-archive]) +have_pmd_pcap=no +if [[ -f "$1"/librte_pmd_pcap.a ]]; then + have_pmd_pcap=yes +fi +AC_CONFIG_COMMANDS_PRE([dnl +AM_CONDITIONAL([HAVE_PMD_PCAP], [test x$have_pmd_pcap = xyes]) +]) ]) # _ODP_DPDK_SET_LIBS diff --git a/platform/linux-dpdk/test/wrapper-script.sh b/platform/linux-dpdk/test/wrapper-script.sh index 23ebec202..b095b7621 100755 --- a/platform/linux-dpdk/test/wrapper-script.sh +++ b/platform/linux-dpdk/test/wrapper-script.sh @@ -1,6 +1,6 @@ #!/bin/bash -export ODP_PLATFORM_PARAMS=${ODP_PLATFORM_PARAMS:--n 4 --vdev "crypto_openssl" --vdev crypto_null} +export ODP_PLATFORM_PARAMS=${ODP_PLATFORM_PARAMS:--n 4 --vdev="crypto_openssl" --vdev="crypto_null"} # where to mount huge pages export HUGEPAGEDIR=${HUGEPAGEDIR:-/mnt/huge} # exit codes expected by automake for skipped tests @@ -52,21 +52,14 @@ if [ "$(id -u)" != "0" ]; then fi echo "Mounting hugetlbfs" -export SIZE=1G -export SIZE_KB=1048576 -export RESERVE=1 +export SIZE=2MB +export SIZE_KB=2048 +export RESERVE=512 mount_and_reserve res=$? if [ $res -ne 0 ]; then - export SIZE=2MB - export SIZE_KB=2048 - export RESERVE=1024 - mount_and_reserve - res=$? - if [ $res -ne 0 ]; then - echo "ERROR: can't mount hugepages with any size" - exit $res - fi + echo "ERROR: can't mount hugepages" + exit $res fi echo "running $1!" $1 diff --git a/test/performance/Makefile.am b/test/performance/Makefile.am index 811cc1c0b..a1c6c98a7 100644 --- a/test/performance/Makefile.am +++ b/test/performance/Makefile.am @@ -18,7 +18,7 @@ TESTSCRIPTS = odp_l2fwd_run.sh \ odp_sched_pktio_run.sh \ odp_scheduling_run.sh -if HAVE_PCAP +if HAVE_PMD_PCAP TESTSCRIPTS += odp_pktio_ordered_run.sh endif diff --git a/test/performance/odp_l2fwd_run.sh b/test/performance/odp_l2fwd_run.sh index 6166c8b27..af8000607 100755 --- a/test/performance/odp_l2fwd_run.sh +++ b/test/performance/odp_l2fwd_run.sh @@ -67,8 +67,12 @@ run_l2fwd() exit 1 fi + export ODP_PLATFORM_PARAMS="-m 512 --file-prefix="gen" \ +--proc-type auto --no-pci \ +--vdev net_pcap0,iface=$IF0" + # Run generator with one worker - (odp_generator${EXEEXT} --interval $FLOOD_MODE -I $IF0 \ + (odp_generator${EXEEXT} --interval $FLOOD_MODE -I 0 \ --srcip 192.168.0.1 --dstip 192.168.0.2 \ -m u -w 1 2>&1 > /dev/null) \ 2>&1 > /dev/null & @@ -84,8 +88,12 @@ run_l2fwd() fi LOG=odp_l2fwd_tmp.log + export ODP_PLATFORM_PARAMS="-m 512 --file-prefix="l2fwd" \ +--proc-type auto --no-pci --vdev net_pcap1,iface=$IF1 \ +--vdev net_pcap2,iface=$IF2" + # Max 2 workers - $STDBUF odp_l2fwd${EXEEXT} -i $IF1,$IF2 -m 0 -t 30 -c 2 | tee $LOG + $STDBUF odp_l2fwd${EXEEXT} -i 0,1 -m 0 -t 30 -c 2 | tee $LOG ret=$? kill ${GEN_PID} diff --git a/test/performance/odp_pktio_ordered_run.sh b/test/performance/odp_pktio_ordered_run.sh index d7f238120..a4a7cb347 100755 --- a/test/performance/odp_pktio_ordered_run.sh +++ b/test/performance/odp_pktio_ordered_run.sh @@ -11,7 +11,7 @@ TEST_DIR="${TEST_DIR:-$(dirname $0)}" DURATION=5 LOG=odp_pktio_ordered.log LOOPS=100000000 -PASS_PPS=5000 +PASS_PPS=350 PCAP_IN=`find . ${TEST_SRC_DIR} $(dirname $0) -name udp64.pcap -print -quit` PCAP_OUT=/dev/null @@ -28,11 +28,14 @@ else STDBUF= fi +export ODP_PLATFORM_PARAMS="--no-pci \ +--vdev net_pcap0,rx_pcap=${PCAP_IN},tx_pcap=${PCAP_OUT} \ +--vdev net_pcap1,rx_pcap=${PCAP_IN},tx_pcap=${PCAP_OUT}" + $STDBUF ${TEST_DIR}/odp_pktio_ordered${EXEEXT} \ - -i pcap:in=${PCAP_IN}:loops=$LOOPS,pcap:out=${PCAP_OUT} \ + -i 0,1 \ -t $DURATION | tee $LOG - -ret=${PIPESTATUS[0]} +ret=$? if [ $ret -ne 0 ]; then echo "FAIL: no odp_pktio_ordered${EXEEXT}" diff --git a/test/performance/odp_sched_pktio_run.sh b/test/performance/odp_sched_pktio_run.sh index db14fb598..566636126 100755 --- a/test/performance/odp_sched_pktio_run.sh +++ b/test/performance/odp_sched_pktio_run.sh @@ -55,14 +55,22 @@ run_sched_pktio() fi # 1 worker - odp_sched_pktio${EXEEXT} -i $IF1,$IF2 -c 1 -s & + export ODP_PLATFORM_PARAMS="-m 512 --file-prefix="sched" \ +--proc-type auto --no-pci --vdev net_pcap1,iface=$IF1 \ +--vdev net_pcap2,iface=$IF2" + + odp_sched_pktio${EXEEXT} -i 0,1 -c 1 -s & TEST_PID=$! sleep 1 # Run generator with one worker - (odp_generator${EXEEXT} --interval $FLOOD_MODE -I $IF0 \ + export ODP_PLATFORM_PARAMS="-m 512 --file-prefix="gen" \ +--proc-type auto --no-pci \ +--vdev net_pcap0,iface=$IF0" + + (odp_generator${EXEEXT} --interval $FLOOD_MODE -I 0 \ --srcip 192.168.0.1 --dstip 192.168.0.2 \ -m u -w 1 2>&1 > /dev/null) \ 2>&1 > /dev/null & -- cgit v1.2.3 From 6aab1afe53f2f337d87c1ad6f713be29afa3531e Mon Sep 17 00:00:00 2001 From: Matias Elo Date: Fri, 27 Apr 2018 12:17:10 +0300 Subject: README.DPDK: minor updates The errata is no longer valid and the multi-buffer crypto library has been moved to GitHub. Signed-off-by: Matias Elo --- platform/linux-dpdk/README | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/platform/linux-dpdk/README b/platform/linux-dpdk/README index 71cced161..7678525f6 100644 --- a/platform/linux-dpdk/README +++ b/platform/linux-dpdk/README @@ -4,13 +4,6 @@ All rights reserved. SPDX-License-Identifier: BSD-3-Clause -ERRATA: -- DPDK 16.07 and earlier supports pool names with RTE_MEMZONE_NAMESIZE - characters (including terminating NULL), which is 6 characters less than - ODP_POOL_NAME_LEN. Names reaching into this interval might collide if the - first 25 characters are not unique. - - 1. Rationale ================================================= @@ -270,8 +263,8 @@ http://dpdk.org/doc/guides/cryptodevs/index.html. To build odp-dpdk with dpdk virtual crypto devices, we need to build supporting intel multi-buffer library prior to dpdk build. -get the Intel multi-buffer crypto from, -https://downloadcenter.intel.com/download/22972 +Get the Intel multi-buffer crypto library from, +https://github.com/intel/intel-ipsec-mb and follow the README from the repo on how to build the library. building dpdk: -- cgit v1.2.3 From 0aadbcb71229bdd1c90db53e283555a70599a5bd Mon Sep 17 00:00:00 2001 From: Matias Elo Date: Fri, 27 Apr 2018 13:54:44 +0300 Subject: linux-dpdk: make usage of ODP_PLATFORM_PARAMS optional The number of memory channels is no longer a mandatory DPDK parameter, so make use of ODP_PLATFORM_PARAMS optional. Signed-off-by: Matias Elo --- .travis.yml | 4 ++-- platform/linux-dpdk/README | 15 +++++++------- platform/linux-dpdk/odp_init.c | 23 ++-------------------- .../test/validation/api/pktio/pktio_run.sh | 2 +- platform/linux-dpdk/test/wrapper-script.sh | 2 +- 5 files changed, 13 insertions(+), 33 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1be920d6a..5bc119644 100644 --- a/.travis.yml +++ b/.travis.yml @@ -257,7 +257,7 @@ script: - echo "Dynamic link.." - ${CC} ${CFLAGS} ${OLDPWD}/example/hello/odp_hello.c -o odp_hello_inst_dynamic `PKG_CONFIG_PATH=${HOME}/odp-install/lib/pkgconfig:${PKG_CONFIG_PATH} pkg-config --cflags --libs libodp-linux` - if [ -z "$CROSS_ARCH" ] ; then - sudo ODP_PLATFORM_PARAMS="-n 2" LD_LIBRARY_PATH="${HOME}/odp-install/lib:$LD_LIBRARY_PATH" ./odp_hello_inst_dynamic ; + sudo LD_LIBRARY_PATH="${HOME}/odp-install/lib:$LD_LIBRARY_PATH" ./odp_hello_inst_dynamic ; fi - | # it is not possible to do static linking if we only have shared DPDK library. Compiler complains about missing -ldpdk @@ -265,7 +265,7 @@ script: echo "Static link.." ${CC} ${CFLAGS} ${OLDPWD}/example/hello/odp_hello.c -o odp_hello_inst_static `PKG_CONFIG_PATH=${HOME}/odp-install/lib/pkgconfig:${PKG_CONFIG_PATH} pkg-config --cflags --libs libodp-linux --static` -static || exit 1 if [ -z "$CROSS_ARCH" ] ; then - sudo ODP_PLATFORM_PARAMS="-n 2" ./odp_hello_inst_static; + sudo ./odp_hello_inst_static; fi fi - popd diff --git a/platform/linux-dpdk/README b/platform/linux-dpdk/README index 7678525f6..c9e7af78e 100644 --- a/platform/linux-dpdk/README +++ b/platform/linux-dpdk/README @@ -173,16 +173,15 @@ To restore the NIC's back to kernel use something like this: 5. Running ODP apps ================================================= -ODP-DPDK applications need to be run as root. You also need to supply the DPDK -command line parameters either as a null-terminated array of char's to -odp_global_init()'s platform_params parameter: +ODP-DPDK applications need to be run as root. You may also need to +supply DPDK command line parameters either as a null-terminated array of +char's to odp_global_init()'s platform_params parameter: - odp_global_init([params], "-n 4"); + odp_global_init([params], "--no-huge"); Or, if it's NULL the platform tries to read the ODP_PLATFORM_PARAMS environment variable. -You need to pass at least "-n [1..4]" to specify the number of memory channels. The coremask (-c) is calculated by ODP-DPDK based on the process affinity at startup. You can influence that with 'taskset'. DPDK init changes the affinity of the calling thread, so after it returns the original affinity is restored. @@ -194,7 +193,7 @@ like proper DPDK threads. Exaple how to run an ODP-DPDK L2 forwarding application: - sudo ODP_PLATFORM_PARAMS="-n 4" ./odp_l2fwd -i 0,1 -c 2 + sudo ./odp_l2fwd -i 0,1 -c 2 -i 0,1 - interface numbers -c 2 - number of worker cpus @@ -227,7 +226,7 @@ CONFIG_RTE_LIBRTE_PMD_PCAP=y mount -t hugetlbfs none /mnt/huge Finally give l2fwd fake devices: - ./l2fwd -c '0xf' -n 4 --vdev "eth_pcap0,iface=veth2-1" --vdev="eth_pcap1,iface=veth2-3" -- -p 3 + ./l2fwd -c '0xf' --vdev "eth_pcap0,iface=veth2-1" --vdev="eth_pcap1,iface=veth2-3" -- -p 3 7. Upgrading ODP-DPDK to newer ODP API level ================================================= @@ -287,5 +286,5 @@ AESNI_MULTI_BUFFER_LIB_PATH=/path-to/Intel-multi-buffer-crypto/ \ when building odp-dpdk application, add the multi-buffer crypto library path to make file. Before running the application, export ODP_PLATFORM_PARAMS with corresponding crypto vdev's. -ex: ODP_PLATFORM_PARAMS="-n 4 --vdev cryptodev_aesni_mb_pmd,max_nb_sessions=32 \ +ex: ODP_PLATFORM_PARAMS="--vdev cryptodev_aesni_mb_pmd,max_nb_sessions=32 \ --vdev cryptodev_null_pmd,max_nb_sessions=32" diff --git a/platform/linux-dpdk/odp_init.c b/platform/linux-dpdk/odp_init.c index 7a51ff23b..a7bc0a843 100644 --- a/platform/linux-dpdk/odp_init.c +++ b/platform/linux-dpdk/odp_init.c @@ -50,23 +50,6 @@ void refer_constructors(void) } #endif -static void print_dpdk_env_help(void) -{ - char prgname[] = "odpdpdk"; - char help_str[] = "--help"; - char *dpdk_argv[] = {prgname, help_str}; - int dpdk_argc = 2; - - ODP_ERR("Neither (char *)platform_params were provided to " - "odp_init_global(),\n"); - ODP_ERR("nor ODP_PLATFORM_PARAMS environment variable were " - "specified.\n"); - ODP_ERR("A string of DPDK command line arguments should be provided"); - ODP_ERR("Example: export ODP_PLATFORM_PARAMS=\"-n 4 --no-huge\"\n"); - ODP_ERR("Note: -c argument substitutes automatically from odp coremask\n"); - rte_eal_init(dpdk_argc, dpdk_argv); -} - static int odp_init_dpdk(const char *cmdline) { char **dpdk_argv; @@ -80,10 +63,8 @@ static int odp_init_dpdk(const char *cmdline) if (cmdline == NULL) { cmdline = getenv("ODP_PLATFORM_PARAMS"); - if (cmdline == NULL) { - print_dpdk_env_help(); - return -1; - } + if (cmdline == NULL) + cmdline = ""; } CPU_ZERO(&original_cpuset); diff --git a/platform/linux-dpdk/test/validation/api/pktio/pktio_run.sh b/platform/linux-dpdk/test/validation/api/pktio/pktio_run.sh index 6007195e1..d051fa801 100755 --- a/platform/linux-dpdk/test/validation/api/pktio/pktio_run.sh +++ b/platform/linux-dpdk/test/validation/api/pktio/pktio_run.sh @@ -85,7 +85,7 @@ run() echo "Failed to setup test environment, skipping test." exit $TEST_SKIPPED fi - export ODP_PLATFORM_PARAMS="-n 4 --no-pci --vdev net_pcap0,iface=$IF0 --vdev net_pcap1,iface=$IF1" + export ODP_PLATFORM_PARAMS="--no-pci --vdev net_pcap0,iface=$IF0 --vdev net_pcap1,iface=$IF1" export ODP_PKTIO_IF0=0 export ODP_PKTIO_IF1=1 fi diff --git a/platform/linux-dpdk/test/wrapper-script.sh b/platform/linux-dpdk/test/wrapper-script.sh index b095b7621..b092e1064 100755 --- a/platform/linux-dpdk/test/wrapper-script.sh +++ b/platform/linux-dpdk/test/wrapper-script.sh @@ -1,6 +1,6 @@ #!/bin/bash -export ODP_PLATFORM_PARAMS=${ODP_PLATFORM_PARAMS:--n 4 --vdev="crypto_openssl" --vdev="crypto_null"} +export ODP_PLATFORM_PARAMS=${ODP_PLATFORM_PARAMS:---vdev="crypto_openssl" --vdev="crypto_null"} # where to mount huge pages export HUGEPAGEDIR=${HUGEPAGEDIR:-/mnt/huge} # exit codes expected by automake for skipped tests -- cgit v1.2.3 From 19c1fb8b49e61de90ae4bab82d89e1dbcc3fa577 Mon Sep 17 00:00:00 2001 From: Matias Elo Date: Fri, 27 Apr 2018 14:21:38 +0300 Subject: linux-dpdk: clean header includes Remove odp_packet_dpdk.h and include only the required dpdk headers. Also, make sure rte_config.h is included before other dpdk headers. Signed-off-by: Matias Elo --- platform/linux-dpdk/Makefile.am | 1 - .../include/odp/api/plat/std_clib_inlines.h | 2 + platform/linux-dpdk/include/odp_packet_dpdk.h | 50 ---------------------- .../linux-dpdk/include/odp_packet_io_internal.h | 1 - platform/linux-dpdk/odp_crypto.c | 2 + platform/linux-dpdk/odp_init.c | 6 ++- platform/linux-dpdk/odp_packet_dpdk.c | 7 ++- platform/linux-dpdk/odp_pool.c | 3 +- platform/linux-dpdk/odp_time.c | 4 +- 9 files changed, 19 insertions(+), 57 deletions(-) delete mode 100644 platform/linux-dpdk/include/odp_packet_dpdk.h diff --git a/platform/linux-dpdk/Makefile.am b/platform/linux-dpdk/Makefile.am index 7a5d79352..5e9f40de5 100644 --- a/platform/linux-dpdk/Makefile.am +++ b/platform/linux-dpdk/Makefile.am @@ -98,7 +98,6 @@ noinst_HEADERS = \ ${top_srcdir}/platform/linux-generic/include/odp_libconfig_internal.h \ ${top_srcdir}/platform/linux-generic/include/odp_llqueue.h \ ${top_srcdir}/platform/linux-generic/include/odp_macros_internal.h \ - include/odp_packet_dpdk.h \ include/odp_packet_internal.h \ ${top_srcdir}/platform/linux-generic/include/odp_name_table_internal.h \ include/odp_packet_io_internal.h \ diff --git a/platform/linux-dpdk/include/odp/api/plat/std_clib_inlines.h b/platform/linux-dpdk/include/odp/api/plat/std_clib_inlines.h index cf556e042..14072ba52 100644 --- a/platform/linux-dpdk/include/odp/api/plat/std_clib_inlines.h +++ b/platform/linux-dpdk/include/odp/api/plat/std_clib_inlines.h @@ -12,6 +12,8 @@ extern "C" { #endif #include + +#include #include _ODP_INLINE void *odp_memcpy(void *dst, const void *src, size_t num) diff --git a/platform/linux-dpdk/include/odp_packet_dpdk.h b/platform/linux-dpdk/include/odp_packet_dpdk.h deleted file mode 100644 index a2c9132b9..000000000 --- a/platform/linux-dpdk/include/odp_packet_dpdk.h +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright (c) 2013-2018, Linaro Limited - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ - -#ifndef ODP_PACKET_DPDK_H -#define ODP_PACKET_DPDK_H - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#endif diff --git a/platform/linux-dpdk/include/odp_packet_io_internal.h b/platform/linux-dpdk/include/odp_packet_io_internal.h index 963ea7ff9..6160cfb66 100644 --- a/platform/linux-dpdk/include/odp_packet_io_internal.h +++ b/platform/linux-dpdk/include/odp_packet_io_internal.h @@ -31,7 +31,6 @@ extern "C" { #define PKTIO_MAX_QUEUES 64 #include -#include #include #define PKTIO_NAME_LEN 256 diff --git a/platform/linux-dpdk/odp_crypto.c b/platform/linux-dpdk/odp_crypto.c index 6b323b5e2..8be6bc146 100644 --- a/platform/linux-dpdk/odp_crypto.c +++ b/platform/linux-dpdk/odp_crypto.c @@ -21,6 +21,8 @@ #include #include #include + +#include #include #include diff --git a/platform/linux-dpdk/odp_init.c b/platform/linux-dpdk/odp_init.c index a7bc0a843..2729d6a6f 100644 --- a/platform/linux-dpdk/odp_init.c +++ b/platform/linux-dpdk/odp_init.c @@ -7,7 +7,6 @@ #include "config.h" #include -#include #include #include #include @@ -25,6 +24,11 @@ #include #include #include +#include + +#include +#include +#include #define MEMPOOL_OPS(hdl) extern void mp_hdlr_init_##hdl(void) MEMPOOL_OPS(ops_mp_mc); diff --git a/platform/linux-dpdk/odp_packet_dpdk.c b/platform/linux-dpdk/odp_packet_dpdk.c index 4b16a5424..eae2da1dc 100644 --- a/platform/linux-dpdk/odp_packet_dpdk.c +++ b/platform/linux-dpdk/odp_packet_dpdk.c @@ -32,12 +32,17 @@ #include #include #include -#include #include #include #include +#include +#include +#include +#include +#include + /* DPDK poll mode drivers requiring minimum RX burst size DPDK_MIN_RX_BURST */ #define IXGBE_DRV_NAME "net_ixgbe" #define I40E_DRV_NAME "net_i40e" diff --git a/platform/linux-dpdk/odp_pool.c b/platform/linux-dpdk/odp_pool.c index 463d7aedb..d5b3195a9 100644 --- a/platform/linux-dpdk/odp_pool.c +++ b/platform/linux-dpdk/odp_pool.c @@ -29,8 +29,7 @@ #include -/* for DPDK */ -#include +#include #include #ifdef POOL_USE_TICKETLOCK diff --git a/platform/linux-dpdk/odp_time.c b/platform/linux-dpdk/odp_time.c index 3cbb771d8..e4c82b756 100644 --- a/platform/linux-dpdk/odp_time.c +++ b/platform/linux-dpdk/odp_time.c @@ -11,10 +11,12 @@ #include #include #include -#include #include #include +#include +#include + typedef uint64_t (*time_to_ns_fn) (odp_time_t time); typedef odp_time_t (*time_cur_fn)(void); typedef odp_time_t (*time_from_ns_fn) (uint64_t ns); -- cgit v1.2.3