Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 | /* * Copyright (c) 2020 Intel Corporation * * SPDX-License-Identifier: Apache-2.0 */ #include <zephyr/kernel.h> #include "msgdev.h" #include <zephyr/logging/log.h> LOG_MODULE_REGISTER(main, LOG_LEVEL_INF); #define STACK_SIZE 2048 /* How many messages can be queued for a single thread */ #define QUEUE_DEPTH MAX_EVENTS /* Array of worker threads, and their stacks */ static struct thread_rec { struct k_thread thread; struct k_msgq msgq; struct msg msgq_buf[QUEUE_DEPTH]; } threads[NUM_THREADS]; K_THREAD_STACK_ARRAY_DEFINE(thread_stacks, NUM_THREADS, STACK_SIZE); /* The static metairq thread we'll use for dispatch */ static void metairq_fn(void *p1, void *p2, void *p3); K_THREAD_DEFINE(metairq_thread, STACK_SIZE, metairq_fn, NULL, NULL, NULL, K_HIGHEST_THREAD_PRIO, 0, 0); /* Accumulated list of latencies, for a naive variance computation at * the end. */ struct { atomic_t num_mirq; uint32_t mirq_latencies[MAX_EVENTS]; struct { uint32_t nevt; uint32_t latencies[MAX_EVENTS * 2 / NUM_THREADS]; } threads[NUM_THREADS]; } stats; /* A semaphore with an initial count, used to allow only one thread to * log the final report. */ K_SEM_DEFINE(report_cookie, 1, 1); static void metairq_fn(void *p1, void *p2, void *p3) { ARG_UNUSED(p1); ARG_UNUSED(p2); ARG_UNUSED(p3); while (true) { /* Receive a message, immediately check a timestamp * and compute a latency value, then dispatch it to * the queue for its target thread */ struct msg m; message_dev_fetch(&m); m.metairq_latency = k_cycle_get_32() - m.timestamp; int ret = k_msgq_put(&threads[m.target].msgq, &m, K_NO_WAIT); if (ret) { LOG_INF("Thread %d queue full, message %d dropped", m.target, m.seq); } } } /* Simple recursive implementation of an integer square root, cribbed * from wikipedia */ static uint32_t isqrt(uint64_t n) { if (n > 1) { uint64_t lo = isqrt(n >> 2) << 1; uint64_t hi = lo + 1; return (uint32_t)(((hi * hi) > n) ? lo : hi); } return (uint32_t) n; } static void calc_stats(const uint32_t *array, uint32_t n, uint32_t *lo, uint32_t *hi, uint32_t *mean, uint32_t *stdev) { uint64_t tot = 0, totsq = 0; *lo = INT_MAX; *hi = 0; for (int i = 0; i < n; i++) { *lo = MIN(*lo, array[i]); *hi = MAX(*hi, array[i]); tot += array[i]; } *mean = (uint32_t)((tot + (n / 2)) / n); for (int i = 0; i < n; i++) { int64_t d = (int32_t) (array[i] - *mean); totsq += d * d; } *stdev = isqrt((totsq + (n / 2)) / n); } static void record_latencies(struct msg *m, uint32_t latency) { /* Workaround: qemu emulation shows an erroneously high * metairq latency for the very first event of 7-8us. Maybe * it needs to fault in the our code pages in the host? */ if (IS_ENABLED(CONFIG_QEMU_TARGET) && m->seq == 0) { return; } /* It might be a potential race condition in this subroutine. * We check if the msg sequence is reaching the MAX EVENT first. * To prevent the coming incorrect changes of the array. */ if (m->seq >= MAX_EVENTS) { return; } int t = m->target; int lidx = stats.threads[t].nevt++; if (lidx < ARRAY_SIZE(stats.threads[t].latencies)) { stats.threads[t].latencies[lidx] = latency; } stats.mirq_latencies[atomic_inc(&stats.num_mirq)] = m->metairq_latency; /* Once we've logged our final event, print a report. We use * a semaphore with an initial count of 1 to ensure that only * one thread gets to do this. Also events can be processed * out of order, so add a small sleep to let the queues * finish. */ if (m->seq == MAX_EVENTS - 1) { uint32_t hi, lo, mean, stdev, ret; ret = k_sem_take(&report_cookie, K_FOREVER); __ASSERT_NO_MSG(ret == 0); k_msleep(100); calc_stats(stats.mirq_latencies, stats.num_mirq, &lo, &hi, &mean, &stdev); LOG_INF(" ---------- Latency (cyc) ----------"); LOG_INF(" Best Worst Mean Stdev"); LOG_INF("MetaIRQ %8d %8d %8d %8d", lo, hi, mean, stdev); for (int i = 0; i < NUM_THREADS; i++) { if (stats.threads[i].nevt == 0) { LOG_WRN("No events for thread %d", i); continue; } calc_stats(stats.threads[i].latencies, stats.threads[i].nevt, &lo, &hi, &mean, &stdev); LOG_INF("Thread%d %8d %8d %8d %8d", i, lo, hi, mean, stdev); } LOG_INF("MetaIRQ Test Complete"); } } static void thread_fn(void *p1, void *p2, void *p3) { ARG_UNUSED(p2); ARG_UNUSED(p3); int id = (long)p1; struct msg m; LOG_INF("Starting Thread%d at priority %d", id, k_thread_priority_get(k_current_get())); while (true) { int ret = k_msgq_get(&threads[id].msgq, &m, K_FOREVER); uint32_t start = k_cycle_get_32(); /* Spin on the CPU for the requested number of cycles * doing the "work" required to "process" the event. * Note the inner loop: hammering on k_cycle_get_32() * on some platforms requires locking around the timer * driver internals and can affect interrupt latency. * Obviously we may be preempted as new events arrive * and get queued. */ while (k_cycle_get_32() - start < m.proc_cyc) { for (volatile int i = 0; i < 100; i++) { } } __ASSERT_NO_MSG(ret == 0); uint32_t dur = k_cycle_get_32() - start; #ifdef LOG_EVERY_EVENT /* Log the message, its thread, and the following cycle values: * 1. Receive it from the driver in the MetaIRQ thread * 2. Begin processing it out of the queue in the worker thread * 3. The requested processing time in the message * 4. The actual time taken to process the message * (may be higher if the thread was preempted) */ LOG_INF("M%d T%d mirq %d disp %d proc %d real %d", m.seq, id, m.metairq_latency, start - m.timestamp, m.proc_cyc, dur); #endif /* Collect the latency values in a big statistics array */ record_latencies(&m, start - m.timestamp); } } int main(void) { for (long i = 0; i < NUM_THREADS; i++) { /* Each thread gets a different priority. Half should * be at (negative) cooperative priorities. Lower * thread numbers have higher priority values, * e.g. thread 0 will be preempted only by the * metairq. */ int prio = (-NUM_THREADS/2) + i; k_msgq_init(&threads[i].msgq, (char *)threads[i].msgq_buf, sizeof(struct msg), QUEUE_DEPTH); k_thread_create(&threads[i].thread, thread_stacks[i], STACK_SIZE, thread_fn, (void *)i, NULL, NULL, prio, 0, K_NO_WAIT); } message_dev_init(); return 0; } |