Linux Audio

Check our new training course

Loading...
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
/*
 * Copyright (c) 2019 Intel Corporation
 *
 * SPDX-License-Identifier: Apache-2.0
 */

#include <device.h>
#include <init.h>
#include <kernel.h>
#include <kernel_structs.h>
#include <toolchain.h>
#include <sys/__assert.h>
#include <sys/sys_io.h>

#include <xtensa/config/core-isa.h>

#include <logging/log.h>
LOG_MODULE_REGISTER(soc_mp, CONFIG_SOC_LOG_LEVEL);

#include <cavs-idc.h>
#include <soc.h>
#include <arch/xtensa/cache.h>
#include <adsp/io.h>

#include <soc/shim.h>

#include <drivers/ipm.h>
#include <ipm/ipm_cavs_idc.h>

extern void z_sched_ipi(void);
extern void z_smp_start_cpu(int id);
extern void z_reinit_idle_thread(int i);

/* ROM wake version parsed by ROM during core wake up. */
#define IDC_ROM_WAKE_VERSION	0x2

/* IDC message type. */
#define IDC_TYPE_SHIFT		24
#define IDC_TYPE_MASK		0x7f
#define IDC_TYPE(x)		(((x) & IDC_TYPE_MASK) << IDC_TYPE_SHIFT)

/* IDC message header. */
#define IDC_HEADER_MASK		0xffffff
#define IDC_HEADER(x)		((x) & IDC_HEADER_MASK)

/* IDC message extension. */
#define IDC_EXTENSION_MASK	0x3fffffff
#define IDC_EXTENSION(x)	((x) & IDC_EXTENSION_MASK)

/* IDC power up message. */
#define IDC_MSG_POWER_UP	\
	(IDC_TYPE(0x1) | IDC_HEADER(IDC_ROM_WAKE_VERSION))

#define IDC_MSG_POWER_UP_EXT(x)	IDC_EXTENSION((x) >> 2)

struct cpustart_rec {
	uint32_t		cpu;

	arch_cpustart_t	fn;
	void		*arg;
	uint32_t		vecbase;

	uint32_t		alive;
};

static struct k_spinlock mplock;

char *z_mp_stack_top;

#ifdef CONFIG_KERNEL_COHERENCE
/* Coherence guarantees that normal .data will be coherent and that it
 * won't overlap any cached memory.
 */
static struct {
	struct cpustart_rec cpustart;
} cpustart_mem;
#else
/* If .data RAM is by default incoherent, then the start record goes
 * into its own dedicated cache line(s)
 */
static __aligned(XCHAL_DCACHE_LINESIZE) union {
	struct cpustart_rec cpustart;
	char pad[XCHAL_DCACHE_LINESIZE];
} cpustart_mem;
#endif

#define start_rec \
	(*((volatile struct cpustart_rec *) \
	   z_soc_uncached_ptr(&cpustart_mem.cpustart)))

/* Simple array of CPUs that are active and available for an IPI.  The
 * IDC interrupt is ALSO used to bring a CPU out of reset, so we need
 * to be absolutely sure we don't try to IPI a CPU that isn't ready to
 * start, or else we'll launch it into garbage and crash the DSP.
 */
static bool cpus_active[CONFIG_MP_NUM_CPUS];

/* Tiny assembly stub for calling z_mp_entry() on the auxiliary CPUs.
 * Mask interrupts, clear the register window state and set the stack
 * pointer.  This represents the minimum work required to run C code
 * safely.
 *
 * Note that alignment is absolutely required: the IDC protocol passes
 * only the upper 30 bits of the address to the second CPU.
 */
void z_soc_mp_asm_entry(void);
__asm__(".align 4                   \n\t"
	".global z_soc_mp_asm_entry \n\t"
	"z_soc_mp_asm_entry:        \n\t"
	"  movi  a0, 0x40025        \n\t" /* WOE | UM | INTLEVEL(5) */
	"  wsr   a0, PS             \n\t"
	"  movi  a0, 0              \n\t"
	"  wsr   a0, WINDOWBASE     \n\t"
	"  movi  a0, 1              \n\t"
	"  wsr   a0, WINDOWSTART    \n\t"
	"  rsync                    \n\t"
	"  movi  a1, z_mp_stack_top \n\t"
	"  l32i  a1, a1, 0          \n\t"
	"  call4 z_mp_entry         \n\t");
BUILD_ASSERT(XCHAL_EXCM_LEVEL == 5);

int cavs_idc_smp_init(const struct device *dev);

#define CxL1CCAP (*(volatile uint32_t *)0x9F080080)
#define CxL1CCFG (*(volatile uint32_t *)0x9F080084)
#define CxL1PCFG (*(volatile uint32_t *)0x9F080088)

/* "Data/Instruction Cache Memory Way Count" fields */
#define CxL1CCAP_DCMWC ((CxL1CCAP >> 16) & 7)
#define CxL1CCAP_ICMWC ((CxL1CCAP >> 20) & 7)

static ALWAYS_INLINE void enable_l1_cache(void)
{
	uint32_t reg;

#ifdef CONFIG_SOC_SERIES_INTEL_CAVS_V25
	/* First, on cAVS 2.5 we need to power the cache SRAM banks
	 * on!  Write a bit for each cache way in the bottom half of
	 * the L1CCFG register and poll the top half for them to turn
	 * on.
	 */
	uint32_t dmask = BIT(CxL1CCAP_DCMWC) - 1;
	uint32_t imask = BIT(CxL1CCAP_ICMWC) - 1;
	uint32_t waymask = (imask << 8) | dmask;

	CxL1CCFG = waymask;
	while (((CxL1CCFG >> 16) & waymask) != waymask) {
	}

	/* Prefetcher also power gates, same interface */
	CxL1PCFG = 1;
	while ((CxL1PCFG & 0x10000) == 0) {
	}
#endif

	/* Now set up the Xtensa CPU to enable the cache logic.  The
	 * details of the fields are somewhat complicated, but per the
	 * ISA ref: "Turning on caches at power-up usually consists of
	 * writing a constant with bits[31:8] all 1’s to MEMCTL.".
	 * Also set bit 0 to enable the LOOP extension instruction
	 * fetch buffer.
	 */
#ifdef XCHAL_HAVE_ICACHE_DYN_ENABLE
	reg = 0xffffff01;
	__asm__ volatile("wsr %0, MEMCTL; rsync" :: "r"(reg));
#endif

	/* Likewise enable prefetching.  Sadly these values are not
	 * architecturally defined by Xtensa (they're just documented
	 * as priority hints), so this constant is just copied from
	 * SOF for now.  If we care about prefetch priority tuning
	 * we're supposed to ask Cadence I guess.
	 */
	reg = IS_ENABLED(CONFIG_SOC_SERIES_INTEL_CAVS_V25) ? 0x1038 : 0;
	__asm__ volatile("wsr %0, PREFCTL; rsync" :: "r"(reg));

	/* Finally we need to enable the cache in the Region
	 * Protection Option "TLB" entries.  The hardware defaults
	 * have this set to RW/uncached (2) everywhere.  We want
	 * writeback caching (4) in the sixth mapping (the second of
	 * two RAM mappings) and to mark all unused regions
	 * inaccessible (15) for safety.  Note that there is a HAL
	 * routine that does this (by emulating the older "cacheattr"
	 * hardware register), but it generates significantly larger
	 * code.
	 */
#ifdef CONFIG_SOC_SERIES_INTEL_CAVS_V25
	/* Already set up by the ROM on older hardware. */
	const uint8_t attribs[] = { 2, 15, 15, 15, 2, 4, 15, 15 };

	for (int region = 0; region < 8; region++) {
		reg = 0x20000000 * region;
		__asm__ volatile("wdtlb %0, %1" :: "r"(attribs[region]), "r"(reg));
	}
#endif
}

void z_mp_entry(void)
{
	volatile int ie;
	uint32_t reg;

	enable_l1_cache();

	/* Fix ATOMCTL to match CPU0.  Hardware defaults for S32C1I
	 * use internal operations (and are thus presumably atomic
	 * only WRT the local CPU!).  We need external transactions on
	 * the shared bus.
	 */
	reg = 0x15;
	__asm__ volatile("wsr %0, ATOMCTL" :: "r"(reg));

	/* We don't know what the boot ROM (on pre-2.5 DSPs) might
	 * have touched and we don't care.  Make sure it's not in our
	 * local cache to be flushed accidentally later.
	 *
	 * Note that technically this is dropping our own (cached)
	 * stack memory, which we don't have a guarantee the compiler
	 * isn't using yet.  Manual inspection of generated code says
	 * we're safe, but really we need a better solution here.
	 */
#ifndef CONFIG_SOC_SERIES_INTEL_CAVS_V25
	z_xtensa_cache_flush_inv_all();
#endif

	/* Copy over VECBASE from the main CPU for an initial value
	 * (will need to revisit this if we ever allow a user API to
	 * change interrupt vectors at runtime).
	 */
	ie = 0;
	__asm__ volatile("wsr.INTENABLE %0" : : "r"(ie));
	__asm__ volatile("wsr.VECBASE %0" : : "r"(start_rec.vecbase));
	__asm__ volatile("rsync");

	/* Set up the CPU pointer. */
	_cpu_t *cpu = &_kernel.cpus[start_rec.cpu];

	__asm__ volatile(
		"wsr." CONFIG_XTENSA_KERNEL_CPU_PTR_SR " %0" : : "r"(cpu));

	/* We got here via an IDC interrupt.  Clear the TFC high bit
	 * (by writing a one!) to acknowledge and clear the latched
	 * hardware interrupt (so we don't have to service it as a
	 * spurious IPI when we enter user code).  Remember: this
	 * could have come from any core, clear all of them.
	 */
	for (int i = 0; i < CONFIG_MP_NUM_CPUS; i++) {
		IDC[start_rec.cpu].core[i].tfc = BIT(31);
	}

	/* Interrupt must be enabled while running on current core */
	irq_enable(DT_IRQN(DT_INST(0, intel_cavs_idc)));

#ifdef CONFIG_SMP_BOOT_DELAY
	cavs_idc_smp_init(NULL);
#endif

	cpus_active[start_rec.cpu] = true;
	start_rec.alive = 1;

	start_rec.fn(start_rec.arg);

#if CONFIG_MP_NUM_CPUS == 1
	/* CPU#1 can be under manual control running custom functions
	 * instead of participating in general thread execution.
	 * Put the CPU into idle after those functions return
	 * so this won't return.
	 */
	for (;;) {
		k_cpu_idle();
	}
#endif
}

bool arch_cpu_active(int cpu_num)
{
	return cpus_active[cpu_num];
}

static ALWAYS_INLINE uint32_t prid(void)
{
	uint32_t prid;

	__asm__ volatile("rsr %0, PRID" : "=r"(prid));
	return prid;
}

void arch_start_cpu(int cpu_num, k_thread_stack_t *stack, int sz,
		    arch_cpustart_t fn, void *arg)
{
	uint32_t vecbase, curr_cpu = prid();

#ifdef CONFIG_SOC_SERIES_INTEL_CAVS_V25
	/* On cAVS v2.5, MP startup works differently.  The core has
	 * no ROM, and starts running immediately upon receipt of an
	 * IDC interrupt at the start of LPSRAM at 0xbe800000.  Note
	 * that means we don't need to bother constructing a "message"
	 * below, it will be ignored.  But it's left in place for
	 * simplicity and compatibility.
	 *
	 * All we need to do is place a single jump at that address to
	 * our existing MP entry point.  Unfortunately Xtensa makes
	 * this difficult, as the region is beyond the range of a
	 * relative jump instruction, so we need an immediate, which
	 * can only be backwards-referenced.  So we hand-assemble a
	 * tiny trampoline here ("jump over the immediate address,
	 * load it, jump to it").
	 *
	 * Long term we want to have this in linkable LP-SRAM memory
	 * such that the standard system bootstrap out of IMR can
	 * place it there.  But this is fine for now.
	 */
	void **lpsram = z_soc_uncached_ptr((void *)LP_SRAM_BASE);
	uint8_t tramp[] = {
		0x06, 0x01, 0x00, /* J <PC+8>  (jump to L32R) */
		0,                /* (padding to align entry_addr) */
		0, 0, 0, 0,       /* (entry_addr goes here) */
		0x01, 0xff, 0xff, /* L32R a0, <entry_addr> */
		0xa0, 0x00, 0x00, /* JX a0 */
	};

	memcpy(lpsram, tramp, ARRAY_SIZE(tramp));
	lpsram[1] = z_soc_mp_asm_entry;
#endif

	__asm__ volatile("rsr.VECBASE %0\n\t" : "=r"(vecbase));

	start_rec.cpu = cpu_num;
	start_rec.fn = fn;
	start_rec.arg = arg;
	start_rec.vecbase = vecbase;
	start_rec.alive = 0;

	z_mp_stack_top = Z_THREAD_STACK_BUFFER(stack) + sz;

	/* Pre-2.x cAVS delivers the IDC to ROM code, so unmask it */
	CAVS_INTCTRL[cpu_num].l2.clear = CAVS_L2_IDC;

	/* Disable automatic power and clock gating for that CPU, so
	 * it won't just go back to sleep.  Note that after startup,
	 * the cores are NOT power gated even if they're configured to
	 * be, so by default a core will launch successfully but then
	 * turn itself off when it gets to the WAITI instruction in
	 * the idle thread.
	 */
	volatile struct soc_dsp_shim_regs *shim = (void *)SOC_DSP_SHIM_REG_BASE;

	shim->pwrctl |= BIT(cpu_num);
	if (!IS_ENABLED(CONFIG_SOC_SERIES_INTEL_CAVS_V15)) {
		shim->clkctl |= BIT(16 + cpu_num);
	}

	/* Send power up message to the other core */
	uint32_t ietc = IDC_MSG_POWER_UP_EXT((long) z_soc_mp_asm_entry);

	IDC[curr_cpu].core[cpu_num].ietc = ietc;
	IDC[curr_cpu].core[cpu_num].itc = IDC_MSG_POWER_UP | IPC_IDCITC_BUSY;

#ifndef CONFIG_SOC_SERIES_INTEL_CAVS_V25
	/* Early DSPs have a ROM that actually receives the startup
	 * IDC as an interrupt, and we don't want that to be confused
	 * by IPIs sent by the OS elsewhere.  Mask the IDC interrupt
	 * on other core so IPI won't cause them to jump to ISR until
	 * the core is fully initialized.
	 */
	uint32_t idc_reg = idc_read(IPC_IDCCTL, cpu_num);

	idc_reg &= ~IPC_IDCCTL_IDCTBIE(0);
	idc_write(IPC_IDCCTL, cpu_num, idc_reg);
	sys_set_bit(DT_REG_ADDR(DT_NODELABEL(cavs0)) + 0x00 +
		      CAVS_ICTL_INT_CPU_OFFSET(cpu_num), 8);

	k_busy_wait(100);

#ifdef CONFIG_SMP_BOOT_DELAY
	cavs_idc_smp_init(NULL);
#endif
#endif

	while (!start_rec.alive)
		;
}

void arch_sched_ipi(void)
{
#ifdef CONFIG_SOC_SERIES_INTEL_CAVS_V25
	uint32_t curr = prid();

	for (int c = 0; c < CONFIG_MP_NUM_CPUS; c++) {
		if (c != curr && cpus_active[c]) {
			IDC[curr].core[c].itc = BIT(31);
		}
	}
#else
	/* Legacy implementation for cavs15 based on the 2-core-only
	 * IPM driver.  To be replaced with the general one when
	 * validated.
	 */
	const struct device *idcdev =
		device_get_binding(DT_LABEL(DT_INST(0, intel_cavs_idc)));

	ipm_send(idcdev, 0, IPM_CAVS_IDC_MSG_SCHED_IPI_ID,
		 IPM_CAVS_IDC_MSG_SCHED_IPI_DATA, 0);
#endif
}

void idc_isr(void *param)
{
	ARG_UNUSED(param);

#ifdef CONFIG_SMP
	/* Right now this interrupt is only used for IPIs */
	z_sched_ipi();
#endif

	/* ACK the interrupt to all the possible sources.  This is a
	 * level-sensitive interrupt triggered by a logical OR of each
	 * of the ITC/TFC high bits, INCLUDING the one "from this
	 * CPU".
	 */
	for (int i = 0; i < CONFIG_MP_NUM_CPUS; i++) {
		IDC[prid()].core[i].tfc = BIT(31);
	}
}

#ifndef CONFIG_IPM_CAVS_IDC
/* Fallback stub for external SOF code */
int cavs_idc_smp_init(const struct device *dev)
{
	ARG_UNUSED(dev);
	return 0;
}
#endif

void soc_idc_init(void)
{
#ifndef CONFIG_IPM_CAVS_IDC
	IRQ_CONNECT(DT_IRQN(DT_NODELABEL(idc)), 0, idc_isr, NULL, 0);
#endif

	/* Every CPU should be able to receive an IDC interrupt from
	 * every other CPU, but not to be back-interrupted when the
	 * target core clears the busy bit.
	 */
	for (int core = 0; core < CONFIG_MP_NUM_CPUS; core++) {
		uint32_t coremask = BIT(CONFIG_MP_NUM_CPUS) - 1;

		IDC[core].busy_int |= coremask;
		IDC[core].done_int &= ~coremask;

		/* Also unmask the IDC interrupt for every core in the
		 * L2 mask register.
		 */
		CAVS_INTCTRL[core].l2.clear = CAVS_L2_IDC;

	}

	/* Clear out any existing pending interrupts that might be present */
	for (int i = 0; i < CONFIG_MP_NUM_CPUS; i++) {
		for (int j = 0; j < CONFIG_MP_NUM_CPUS; j++) {
			IDC[i].core[j].tfc = BIT(31);
		}
	}

	cpus_active[0] = true;
}

/**
 * @brief Restart halted SMP CPU
 *
 * Relaunches a CPU that has entered an idle power state via
 * soc_halt_cpu().  Returns -EINVAL if the CPU is not in a power-gated
 * idle state.  Upon successful return, the CPU is online and
 * available to run any Zephyr thread.
 *
 * @param id CPU to start, in the range [1:CONFIG_MP_NUM_CPUS)
 */
int soc_relaunch_cpu(int id)
{
	volatile struct soc_dsp_shim_regs *shim = (void *)SOC_DSP_SHIM_REG_BASE;
	int ret = 0;
	k_spinlock_key_t k = k_spin_lock(&mplock);

	if (id < 1 || id >= CONFIG_MP_NUM_CPUS) {
		ret = -EINVAL;
		goto out;
	}

	if (shim->pwrsts & BIT(id)) {
		ret = -EINVAL;
		goto out;
	}

	CAVS_INTCTRL[id].l2.clear = CAVS_L2_IDC;
	z_reinit_idle_thread(id);
	z_smp_start_cpu(id);

 out:
	k_spin_unlock(&mplock, k);
	return ret;
}

/**
 * @brief Halts and offlines a running CPU
 *
 * Enables power gating on the specified CPU, which cannot be the
 * current CPU or CPU 0.  The CPU must be idle; no application threads
 * may be runnable on it when this function is called (or at least the
 * CPU must be guaranteed to reach idle in finite time without
 * deadlock).  Actual CPU shutdown can only happen in the context of
 * the idle thread, and synchronization is an application
 * responsibility.  This function will hang if the other CPU fails to
 * reach idle.
 *
 * @param id CPU to halt, not current cpu or cpu 0
 * @return 0 on success, -EINVAL on error
 */
int soc_halt_cpu(int id)
{
	volatile struct soc_dsp_shim_regs *shim = (void *)SOC_DSP_SHIM_REG_BASE;
	int ret = 0;
	k_spinlock_key_t k = k_spin_lock(&mplock);

	if (id == 0 || id == _current_cpu->id) {
		ret = -EINVAL;
		goto out;
	}

	/* Turn off the "prevent power/clock gating" bits, enabling
	 * low power idle, and mask off IDC interrupts so it will not
	 * be woken up by scheduler IPIs
	 */
	CAVS_INTCTRL[id].l2.set = CAVS_L2_IDC;
	shim->pwrctl &= ~BIT(id);
	shim->clkctl &= ~BIT(16 + id);

	/* Wait for the CPU to reach an idle state before returing */
	while (shim->pwrsts & BIT(id)) {
	}

 out:
	k_spin_unlock(&mplock, k);
	return ret;
}