Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | /* * Copyright (c) 2019 Intel Corporation * * SPDX-License-Identifier: Apache-2.0 */ #include <zephyr/kernel.h> #include <zephyr/sys/printk.h> #include <zephyr/wait_q.h> #include <ksched.h> #include "app_threads.h" #include "user.h" #define MAIN_PRIO 8 #define THREADS_PRIO 9 enum { MEAS_START, MEAS_END, NUM_STAMP_STATES }; uint32_t stamps[NUM_STAMP_STATES]; static inline int stamp(int state) { uint32_t t; /* In theory the TSC has much lower overhead and higher * precision. In practice it's VERY jittery in recent qemu * versions and frankly too noisy to trust. */ #ifdef CONFIG_X86 __asm__ volatile("rdtsc" : "=a"(t) : : "edx"); #else t = k_cycle_get_32(); #endif stamps[state] = t; return t; } static int yielder_status; void yielder_entry(void *_thread, void *_tid, void *_nb_threads) { struct k_app_thread *thread = (struct k_app_thread *) _thread; int ret; struct k_mem_partition *parts[] = { thread->partition, }; ret = k_mem_domain_init(&thread->domain, ARRAY_SIZE(parts), parts); if (ret != 0) { printk("k_mem_domain_init failed %d\n", ret); yielder_status = 1; return; } k_mem_domain_add_thread(&thread->domain, k_current_get()); k_thread_user_mode_enter(context_switch_yield, _nb_threads, NULL, NULL); } static k_tid_t threads[MAX_NB_THREADS]; static int exec_test(uint8_t nb_threads) { if (nb_threads > MAX_NB_THREADS) { printk("Too many threads\n"); return 1; } yielder_status = 0; for (size_t tid = 0; tid < nb_threads; tid++) { app_threads[tid].partition = app_partitions[tid]; app_threads[tid].stack = &app_thread_stacks[tid]; void *_tid = (void *)(uintptr_t)tid; threads[tid] = k_thread_create(&app_threads[tid].thread, app_thread_stacks[tid], APP_STACKSIZE, yielder_entry, &app_threads[tid], _tid, (void *)(uintptr_t)nb_threads, THREADS_PRIO, 0, K_FOREVER); } /* make sure the main thread has a higher priority * this way, user threads all start together * (lower number --> higher prio) */ k_thread_priority_set(k_current_get(), MAIN_PRIO); stamp(MEAS_START); for (size_t tid = 0; tid < nb_threads; tid++) { k_thread_start(threads[tid]); } for (size_t tid = 0; tid < nb_threads; tid++) { k_thread_join(threads[tid], K_FOREVER); } stamp(MEAS_END); uint32_t full_time = stamps[MEAS_END] - stamps[MEAS_START]; uint64_t time_ms = k_cyc_to_ns_near64(full_time)/NB_YIELDS; printk("Swapping %2u threads: %8" PRIu32 " cyc & %6" PRIu32 " rounds -> %6" PRIu64 " ns per ctx\n", nb_threads, full_time, NB_YIELDS, time_ms); return yielder_status; } int main(void) { int ret; printk("Userspace scheduling benchmark started on board %s\n", CONFIG_BOARD); size_t nb_threads_list[] = {2, 8, 16, 32, 0}; printk("============================\n"); printk("user/user^n swapping (yield)\n"); for (size_t i = 0; nb_threads_list[i] > 0; i++) { ret = exec_test(nb_threads_list[i]); if (ret != 0) { printk("FAIL\n"); return 0; } } printk("SUCCESS\n"); return 0; } |