Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion app/boards/intel_adsp_ace30_ptl.conf
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,4 @@ CONFIG_DYNAMIC_THREAD_ALLOC=y
CONFIG_DYNAMIC_THREAD_PREFER_ALLOC=y
CONFIG_SOF_STACK_SIZE=8192
CONFIG_SOF_USERSPACE_PROXY=y
CONFIG_MAX_THREAD_BYTES=3
CONFIG_MAX_THREAD_BYTES=4
1 change: 1 addition & 0 deletions app/debug_overlay.conf
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ CONFIG_ASSERT=y

CONFIG_ZTEST_NO_YIELD=n
CONFIG_ZTEST_SUMMARY=n
CONFIG_ZTEST_TEST_DELAY_MS=1
CONFIG_SOF_BOOT_TEST_ALLOWED=y
CONFIG_TEST_EXTRA_STACK_SIZE=7168

Expand Down
3 changes: 2 additions & 1 deletion app/overlays/ptl/userspace_overlay.conf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ CONFIG_MAX_THREAD_BYTES=4

CONFIG_INIT_STACKS=n
CONFIG_THREAD_STACK_INFO=n
CONFIG_DYNAMIC_THREAD_PREFER_ALLOC=y
CONFIG_DYNAMIC_THREAD_PREFER_POOL=y
CONFIG_DYNAMIC_THREAD_PREFER_ALLOC=n
CONFIG_DYNAMIC_THREAD=y
CONFIG_DYNAMIC_THREAD_POOL_SIZE=4
CONFIG_DYNAMIC_THREAD_ALLOC=n
Expand Down
1 change: 1 addition & 0 deletions zephyr/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ if(CONFIG_SOF_BOOT_TEST)
)
zephyr_library_sources_ifdef(CONFIG_USERSPACE
userspace/ksem.c
userspace/test_perf.c
)
endif()

Expand Down
172 changes: 172 additions & 0 deletions zephyr/test/userspace/test_perf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
// SPDX-License-Identifier: BSD-3-Clause
/* Copyright(c) 2026 Intel Corporation. */

/* Test kernel vs. user-space performance. */

#include <sof/boot_test.h>
#include <rtos/alloc.h>

#include <zephyr/kernel.h>
#include <zephyr/ztest.h>
#include <zephyr/logging/log.h>

LOG_MODULE_DECLARE(sof_boot_test, LOG_LEVEL_DBG);

static int load_add(void)
{
#define N_ADD (1000 * 1000 * 100)
unsigned long r = 0;

for (unsigned int i = 0; i < N_ADD; i++)
r += i;
#define N_DIV 10000
for (unsigned int i = 1; i <= N_DIV; i++)
r = r / (i % 10 + 1) * (i % 10 + 3);
return (int)r;
}

#ifdef __XCC__
#include <xtensa/tie/xt_hifi4.h>

/* Compute dot product of two vectors using HiFi4 SIMD instructions */
static int32_t dot_product_hifi4(const int16_t *a, const int16_t *b, int length)
{
ae_int64 acc = AE_ZERO64(); /* 1. Initialize accumulator to zero */
ae_int16x4 *pa = (ae_int16x4 *)a; /* Pointer to vector a */
ae_int16x4 *pb = (ae_int16x4 *)b; /* Pointer to vector b */

for (int i = 0; i < length / 4; i++) {
ae_int16x4 va, vb;

AE_L16X4_IP(va, pa, 8); /* 2. Load 4x 16-bit values from a */
AE_L16X4_IP(vb, pb, 8); /* 3. Load 4x 16-bit values from b */
AE_MULAAAAQ16(acc, va, vb); /* 4. Multiply-accumulate (4 MACs in parallel) */
}

return AE_TRUNCA32F64S(acc, 0); /* 5. Convert 64-bit result to 32-bit */
}

#define VECTOR_LENGTH 100
static int load_hifi4(void)
{
uint16_t a[VECTOR_LENGTH], b[VECTOR_LENGTH];
int ret = 0;

for (unsigned int j = 0; j < 1000; j++) {
for (unsigned int i = 0; i < VECTOR_LENGTH; i++) {
a[i] = i * 3 - 47 * j;
b[i] = 411 * j - i * 5;
}

ret += dot_product_hifi4(a, b, VECTOR_LENGTH);
}
return ret;
}
#endif /* __XCC__ */

typedef int (*load_fn_t)(void);

load_fn_t load_fn[] = {
load_add,
#ifdef __XCC__
load_hifi4,
#endif
};

static unsigned int test_perf(load_fn_t fn, struct k_event *event,
struct k_sem *sem)
{
uint64_t start = k_uptime_ticks();

k_event_set(event, (uint32_t)fn);

int ret = k_sem_take(sem, K_MSEC(200));

zassert_ok(ret);

uint64_t end = k_uptime_ticks();

return (unsigned int)(end - start);
}

static void thread_fn(void *p1, void *p2, void *p3)
{
struct k_event *event = p1;
struct k_sem *sem = p2;
bool first = true;

for (;;) {
load_fn_t fn = (load_fn_t)k_event_wait(event, 0xffffffff, !first, K_FOREVER);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is some serious event overuse to pass a value here 😆. You might also try k_poll_signal_raise(), it can pass a value too. The only catch is that we don't have enabled k_poll, so it won't help. I guess this is just a quick draft for tests, right?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@softwarecki well, I do think it would be good to have it merged. "overuse" - well, maybe indeed this isn't how events are intended to be used, but I'd call it a "creative use" ;-)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would be the perfect place to use RTIO! Just kidding 😄 I'd suggest using something actually meant for passing data, maybe a FIFO or a queue? https://docs.zephyrproject.org/latest/kernel/services/index.html#data-passing


first = false;
LOG_INF("fn %p ret %d", (void *)fn, fn());

k_sem_give(sem);
}
}

#define STACK_SIZE 4096

ZTEST(sof_boot, test_perf)
{
/* Synchronization objects allocated on original uncached heap */
struct k_event *u_event = k_object_alloc(K_OBJ_EVENT);
struct k_event *k_event = k_object_alloc(K_OBJ_EVENT);

zassert_not_null(u_event);
zassert_not_null(k_event);

k_event_init(u_event);
k_event_init(k_event);

struct k_sem *sem = k_object_alloc(K_OBJ_SEM);

zassert_not_null(sem);
k_sem_init(sem, 0, 1);

/* Allocate kernel stack and thread and start it */
struct k_thread *k_thread = k_object_alloc(K_OBJ_THREAD);

zassert_not_null(k_thread);
/* Important: Xtensa thread initialization code checks certain fields for 0 */
memset(&k_thread->arch, 0, sizeof(k_thread->arch));

k_thread_stack_t *k_stack = k_thread_stack_alloc(STACK_SIZE, 0);

zassert_not_null(k_stack);

struct k_thread *pk_thread = k_thread_create(k_thread, k_stack, STACK_SIZE, thread_fn,
k_event, sem, NULL, 0, 0, K_FOREVER);

k_thread_start(pk_thread);

/* Allocate userspace stack and thread and start it */
struct k_thread *u_thread = k_object_alloc(K_OBJ_THREAD);

zassert_not_null(u_thread);
memset(&u_thread->arch, 0, sizeof(u_thread->arch));

k_thread_stack_t *u_stack = k_thread_stack_alloc(STACK_SIZE, K_USER);

zassert_not_null(u_stack);

struct k_thread *pu_thread = k_thread_create(u_thread, u_stack, STACK_SIZE, thread_fn,
u_event, sem, NULL, 0, K_USER, K_FOREVER);

zassert_not_null(pu_thread);
k_thread_access_grant(pu_thread, u_event, sem);
k_thread_start(pu_thread);

for (unsigned int i = 0; i < ARRAY_SIZE(load_fn); i++) {
LOG_INF("user: fn %p took %u", load_fn[i], test_perf(load_fn[i], u_event, sem));
LOG_INF("kernel: fn %p took %u", load_fn[i], test_perf(load_fn[i], k_event, sem));
}

k_thread_abort(pu_thread);
k_thread_stack_free(u_stack);
k_thread_abort(pk_thread);
k_thread_stack_free(k_stack);
k_object_free(sem);
k_object_free(u_event);
k_object_free(k_event);
}