diff --git a/app/boards/intel_adsp_ace30_ptl.conf b/app/boards/intel_adsp_ace30_ptl.conf index 12aa78162c06..8f88f4ea82f1 100644 --- a/app/boards/intel_adsp_ace30_ptl.conf +++ b/app/boards/intel_adsp_ace30_ptl.conf @@ -74,4 +74,4 @@ CONFIG_DYNAMIC_THREAD_ALLOC=y CONFIG_DYNAMIC_THREAD_PREFER_ALLOC=y CONFIG_SOF_STACK_SIZE=8192 CONFIG_SOF_USERSPACE_PROXY=y -CONFIG_MAX_THREAD_BYTES=3 +CONFIG_MAX_THREAD_BYTES=4 diff --git a/app/debug_overlay.conf b/app/debug_overlay.conf index 914210d4184d..976430b90ed4 100644 --- a/app/debug_overlay.conf +++ b/app/debug_overlay.conf @@ -3,6 +3,7 @@ CONFIG_ASSERT=y CONFIG_ZTEST_NO_YIELD=n CONFIG_ZTEST_SUMMARY=n +CONFIG_ZTEST_TEST_DELAY_MS=1 CONFIG_SOF_BOOT_TEST_ALLOWED=y CONFIG_TEST_EXTRA_STACK_SIZE=7168 diff --git a/app/overlays/ptl/userspace_overlay.conf b/app/overlays/ptl/userspace_overlay.conf index 331e2b4280d6..555e27ba7189 100644 --- a/app/overlays/ptl/userspace_overlay.conf +++ b/app/overlays/ptl/userspace_overlay.conf @@ -4,7 +4,8 @@ CONFIG_MAX_THREAD_BYTES=4 CONFIG_INIT_STACKS=n CONFIG_THREAD_STACK_INFO=n -CONFIG_DYNAMIC_THREAD_PREFER_ALLOC=y +CONFIG_DYNAMIC_THREAD_PREFER_POOL=y +CONFIG_DYNAMIC_THREAD_PREFER_ALLOC=n CONFIG_DYNAMIC_THREAD=y CONFIG_DYNAMIC_THREAD_POOL_SIZE=4 CONFIG_DYNAMIC_THREAD_ALLOC=n diff --git a/zephyr/test/CMakeLists.txt b/zephyr/test/CMakeLists.txt index c5b66c83bbaa..5e4c06da864e 100644 --- a/zephyr/test/CMakeLists.txt +++ b/zephyr/test/CMakeLists.txt @@ -4,6 +4,7 @@ if(CONFIG_SOF_BOOT_TEST) ) zephyr_library_sources_ifdef(CONFIG_USERSPACE userspace/ksem.c + userspace/test_perf.c ) endif() diff --git a/zephyr/test/userspace/test_perf.c b/zephyr/test/userspace/test_perf.c new file mode 100644 index 000000000000..7b174d7ba125 --- /dev/null +++ b/zephyr/test/userspace/test_perf.c @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright(c) 2026 Intel Corporation. */ + +/* Test kernel vs. user-space performance. */ + +#include +#include + +#include +#include +#include + +LOG_MODULE_DECLARE(sof_boot_test, LOG_LEVEL_DBG); + +static int load_add(void) +{ +#define N_ADD (1000 * 1000 * 100) + unsigned long r = 0; + + for (unsigned int i = 0; i < N_ADD; i++) + r += i; +#define N_DIV 10000 + for (unsigned int i = 1; i <= N_DIV; i++) + r = r / (i % 10 + 1) * (i % 10 + 3); + return (int)r; +} + +#ifdef __XCC__ +#include + +/* Compute dot product of two vectors using HiFi4 SIMD instructions */ +static int32_t dot_product_hifi4(const int16_t *a, const int16_t *b, int length) +{ + ae_int64 acc = AE_ZERO64(); /* 1. Initialize accumulator to zero */ + ae_int16x4 *pa = (ae_int16x4 *)a; /* Pointer to vector a */ + ae_int16x4 *pb = (ae_int16x4 *)b; /* Pointer to vector b */ + + for (int i = 0; i < length / 4; i++) { + ae_int16x4 va, vb; + + AE_L16X4_IP(va, pa, 8); /* 2. Load 4x 16-bit values from a */ + AE_L16X4_IP(vb, pb, 8); /* 3. Load 4x 16-bit values from b */ + AE_MULAAAAQ16(acc, va, vb); /* 4. Multiply-accumulate (4 MACs in parallel) */ + } + + return AE_TRUNCA32F64S(acc, 0); /* 5. Convert 64-bit result to 32-bit */ +} + +#define VECTOR_LENGTH 100 +static int load_hifi4(void) +{ + uint16_t a[VECTOR_LENGTH], b[VECTOR_LENGTH]; + int ret = 0; + + for (unsigned int j = 0; j < 1000; j++) { + for (unsigned int i = 0; i < VECTOR_LENGTH; i++) { + a[i] = i * 3 - 47 * j; + b[i] = 411 * j - i * 5; + } + + ret += dot_product_hifi4(a, b, VECTOR_LENGTH); + } + return ret; +} +#endif /* __XCC__ */ + +typedef int (*load_fn_t)(void); + +load_fn_t load_fn[] = { + load_add, +#ifdef __XCC__ + load_hifi4, +#endif +}; + +static unsigned int test_perf(load_fn_t fn, struct k_event *event, + struct k_sem *sem) +{ + uint64_t start = k_uptime_ticks(); + + k_event_set(event, (uint32_t)fn); + + int ret = k_sem_take(sem, K_MSEC(200)); + + zassert_ok(ret); + + uint64_t end = k_uptime_ticks(); + + return (unsigned int)(end - start); +} + +static void thread_fn(void *p1, void *p2, void *p3) +{ + struct k_event *event = p1; + struct k_sem *sem = p2; + bool first = true; + + for (;;) { + load_fn_t fn = (load_fn_t)k_event_wait(event, 0xffffffff, !first, K_FOREVER); + + first = false; + LOG_INF("fn %p ret %d", (void *)fn, fn()); + + k_sem_give(sem); + } +} + +#define STACK_SIZE 4096 + +ZTEST(sof_boot, test_perf) +{ + /* Synchronization objects allocated on original uncached heap */ + struct k_event *u_event = k_object_alloc(K_OBJ_EVENT); + struct k_event *k_event = k_object_alloc(K_OBJ_EVENT); + + zassert_not_null(u_event); + zassert_not_null(k_event); + + k_event_init(u_event); + k_event_init(k_event); + + struct k_sem *sem = k_object_alloc(K_OBJ_SEM); + + zassert_not_null(sem); + k_sem_init(sem, 0, 1); + + /* Allocate kernel stack and thread and start it */ + struct k_thread *k_thread = k_object_alloc(K_OBJ_THREAD); + + zassert_not_null(k_thread); + /* Important: Xtensa thread initialization code checks certain fields for 0 */ + memset(&k_thread->arch, 0, sizeof(k_thread->arch)); + + k_thread_stack_t *k_stack = k_thread_stack_alloc(STACK_SIZE, 0); + + zassert_not_null(k_stack); + + struct k_thread *pk_thread = k_thread_create(k_thread, k_stack, STACK_SIZE, thread_fn, + k_event, sem, NULL, 0, 0, K_FOREVER); + + k_thread_start(pk_thread); + + /* Allocate userspace stack and thread and start it */ + struct k_thread *u_thread = k_object_alloc(K_OBJ_THREAD); + + zassert_not_null(u_thread); + memset(&u_thread->arch, 0, sizeof(u_thread->arch)); + + k_thread_stack_t *u_stack = k_thread_stack_alloc(STACK_SIZE, K_USER); + + zassert_not_null(u_stack); + + struct k_thread *pu_thread = k_thread_create(u_thread, u_stack, STACK_SIZE, thread_fn, + u_event, sem, NULL, 0, K_USER, K_FOREVER); + + zassert_not_null(pu_thread); + k_thread_access_grant(pu_thread, u_event, sem); + k_thread_start(pu_thread); + + for (unsigned int i = 0; i < ARRAY_SIZE(load_fn); i++) { + LOG_INF("user: fn %p took %u", load_fn[i], test_perf(load_fn[i], u_event, sem)); + LOG_INF("kernel: fn %p took %u", load_fn[i], test_perf(load_fn[i], k_event, sem)); + } + + k_thread_abort(pu_thread); + k_thread_stack_free(u_stack); + k_thread_abort(pk_thread); + k_thread_stack_free(k_stack); + k_object_free(sem); + k_object_free(u_event); + k_object_free(k_event); +}