Linux Audio

Check our new training course

Loading...
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
/*
 * Copyright (c) 2011-2014 Wind River Systems, Inc.
 * Copyright (c) 2017 Intel Corporation
 *
 * SPDX-License-Identifier: Apache-2.0
 */
#include <kernel.h>
#include <ia32/mmustructs.h>
#include <linker/linker-defs.h>
#include <kernel_internal.h>
#include <kernel_structs.h>
#include <init.h>
#include <ctype.h>
#include <string.h>

/* Despite our use of PAE page tables, we do not (and will never) actually
 * support PAE. Use a 64-bit x86 target if you have that much RAM.
 */
BUILD_ASSERT(DT_PHYS_RAM_ADDR + (DT_RAM_SIZE * 1024ULL) - 1ULL <=
				 (unsigned long long)UINTPTR_MAX);

/* Common regions for all x86 processors.
 * Peripheral I/O ranges configured at the SOC level
 */

/* Mark text and rodata as read-only.
 * Userspace may read all text and rodata.
 */
MMU_BOOT_REGION((u32_t)&_image_text_start, (u32_t)&_image_text_size,
		MMU_ENTRY_READ | MMU_ENTRY_USER);

MMU_BOOT_REGION((u32_t)&_image_rodata_start, (u32_t)&_image_rodata_size,
		MMU_ENTRY_READ | MMU_ENTRY_USER | MMU_ENTRY_EXECUTE_DISABLE);

#ifdef CONFIG_USERSPACE
MMU_BOOT_REGION((u32_t)&_app_smem_start, (u32_t)&_app_smem_size,
		MMU_ENTRY_WRITE | MMU_ENTRY_EXECUTE_DISABLE);
#endif

#ifdef CONFIG_COVERAGE_GCOV
MMU_BOOT_REGION((u32_t)&__gcov_bss_start, (u32_t)&__gcov_bss_size,
		MMU_ENTRY_WRITE | MMU_ENTRY_USER | MMU_ENTRY_EXECUTE_DISABLE);
#endif

/* __kernel_ram_size includes all unused memory, which is used for heaps.
 * User threads cannot access this unless granted at runtime. This is done
 * automatically for stacks.
 */
MMU_BOOT_REGION((u32_t)&__kernel_ram_start, (u32_t)&__kernel_ram_size,
		MMU_ENTRY_WRITE | MMU_ENTRY_EXECUTE_DISABLE);

/* Works for PDPT, PD, PT entries, the bits we check here are all the same.
 *
 * Not trying to capture every flag, just the most interesting stuff,
 * Present, write, XD, user, in typically encountered combinations.
 */
static char get_entry_code(u64_t value)
{
	char ret;

	if ((value & MMU_ENTRY_PRESENT) == 0) {
		ret = '.';
	} else {
		if ((value & MMU_ENTRY_WRITE) != 0) {
			/* Writable page */
			if ((value & MMU_ENTRY_EXECUTE_DISABLE) != 0) {
				/* RW */
				ret = 'w';
			} else {
				/* RWX */
				ret = 'a';
			}
		} else {
			if ((value & MMU_ENTRY_EXECUTE_DISABLE) != 0) {
				/* R */
				ret = 'r';
			} else {
				/* RX */
				ret = 'x';
			}
		}

		if ((value & MMU_ENTRY_USER) != 0) {
			/* Uppercase indicates user mode access */
			ret = toupper(ret);
		}
	}

	return ret;
}

static void z_x86_dump_pt(struct x86_mmu_pt *pt, uintptr_t base, int index)
{
	int column = 0;

	printk("Page table %d for 0x%08lX - 0x%08lX at %p\n",
	       index, base, base + Z_X86_PT_AREA - 1, pt);

	for (int i = 0; i < Z_X86_NUM_PT_ENTRIES; i++) {
		printk("%c", get_entry_code(pt->entry[i].value));

		column++;
		if (column == 64) {
			column = 0;
			printk("\n");
		}
	}
}

static void z_x86_dump_pd(struct x86_mmu_pd *pd, uintptr_t base, int index)
{
	int column = 0;

	printk("Page directory %d for 0x%08lX - 0x%08lX at %p\n",
	       index, base, base + Z_X86_PD_AREA - 1, pd);

	for (int i = 0; i < Z_X86_NUM_PD_ENTRIES; i++) {
		printk("%c", get_entry_code(pd->entry[i].pt.value));

		column++;
		if (column == 64) {
			column = 0;
			printk("\n");
		}
	}

	for (int i = 0; i < Z_X86_NUM_PD_ENTRIES; i++) {
		struct x86_mmu_pt *pt;
		union x86_mmu_pde_pt *pde = &pd->entry[i].pt;

		if (pde->p == 0 || pde->ps == 1) {
			/* Skip non-present, or 2MB directory entries, there's
			 * no page table to examine */
			continue;
		}
		pt = (struct x86_mmu_pt *)(pde->pt << MMU_PAGE_SHIFT);

		z_x86_dump_pt(pt, base + (i * Z_X86_PT_AREA), i);
	}
}

static void z_x86_dump_pdpt(struct x86_mmu_pdpt *pdpt, uintptr_t base,
			    int index)
{
	printk("Page directory pointer table %d for 0x%08lX - 0x%08lX at %p\n",
	       index, base, base + Z_X86_PDPT_AREA - 1, pdpt);

	for (int i = 0; i < Z_X86_NUM_PDPT_ENTRIES; i++) {
		printk("%c", get_entry_code(pdpt->entry[i].value));
	}
	printk("\n");
	for (int i = 0; i < Z_X86_NUM_PDPT_ENTRIES; i++) {
		struct x86_mmu_pd *pd;

		if (pdpt->entry[i].p == 0) {
			continue;
		}
		pd = (struct x86_mmu_pd *)(pdpt->entry[i].pd << MMU_PAGE_SHIFT);

		z_x86_dump_pd(pd, base + (i * Z_X86_PD_AREA), i);
	}
}

void z_x86_dump_page_tables(struct x86_mmu_pdpt *pdpt)
{
	z_x86_dump_pdpt(pdpt, 0, 0);
}

void z_x86_mmu_get_flags(struct x86_mmu_pdpt *pdpt, void *addr,
			x86_page_entry_data_t *pde_flags,
			x86_page_entry_data_t *pte_flags)
{
	*pde_flags =
		(x86_page_entry_data_t)(X86_MMU_GET_PDE(pdpt, addr)->value &
			~(x86_page_entry_data_t)MMU_PDE_PAGE_TABLE_MASK);

	if ((*pde_flags & MMU_ENTRY_PRESENT) != 0) {
		*pte_flags = (x86_page_entry_data_t)
			(X86_MMU_GET_PTE(pdpt, addr)->value &
			 ~(x86_page_entry_data_t)MMU_PTE_PAGE_MASK);
	} else {
		*pte_flags = 0;
	}
}

int z_x86_mmu_validate(struct x86_mmu_pdpt *pdpt, void *addr, size_t size,
		       int write)
{
	u32_t start_pde_num;
	u32_t end_pde_num;
	u32_t starting_pte_num;
	u32_t ending_pte_num;
	u32_t pde;
	u32_t pte;
	union x86_mmu_pte pte_value;
	u32_t start_pdpte_num = MMU_PDPTE_NUM(addr);
	u32_t end_pdpte_num = MMU_PDPTE_NUM((char *)addr + size - 1);
	u32_t pdpte;
	struct x86_mmu_pt *pte_address;
	int ret = -EPERM;

	start_pde_num = MMU_PDE_NUM(addr);
	end_pde_num = MMU_PDE_NUM((char *)addr + size - 1);
	starting_pte_num = MMU_PAGE_NUM((char *)addr);

	for (pdpte = start_pdpte_num; pdpte <= end_pdpte_num; pdpte++) {
		if (pdpte != start_pdpte_num) {
			start_pde_num = 0U;
		}

		if (pdpte != end_pdpte_num) {
			end_pde_num = 0U;
		} else {
			end_pde_num = MMU_PDE_NUM((char *)addr + size - 1);
		}

		/* Ensure page directory pointer table entry is present */
		if (X86_MMU_GET_PDPTE_INDEX(pdpt, pdpte)->p == 0) {
			goto out;
		}

		struct x86_mmu_pd *pd_address =
			X86_MMU_GET_PD_ADDR_INDEX(pdpt, pdpte);

		/* Iterate for all the pde's the buffer might take up.
		 * (depends on the size of the buffer and start address
		 * of the buff)
		 */
		for (pde = start_pde_num; pde <= end_pde_num; pde++) {
			union x86_mmu_pde_pt pde_value =
				pd_address->entry[pde].pt;

			if ((pde_value.p) == 0 ||
			    (pde_value.us) == 0 ||
			    ((write != 0) && (pde_value.rw == 0))) {
				goto out;
			}

			pte_address = (struct x86_mmu_pt *)
				(pde_value.pt << MMU_PAGE_SHIFT);

			/* loop over all the possible page tables for the
			 * required size. If the pde is not the last one
			 * then the last pte would be 511. So each pde
			 * will be using all the page table entries except
			 * for the last pde. For the last pde, pte is
			 * calculated using the last memory address
			 * of the buffer.
			 */
			if (pde != end_pde_num) {
				ending_pte_num = 511U;
			} else {
				ending_pte_num =
					MMU_PAGE_NUM((char *)addr + size - 1);
			}

			/* For all the pde's apart from the starting pde,
			 * will have the start pte number as zero.
			 */
			if (pde != start_pde_num) {
				starting_pte_num = 0U;
			}

			pte_value.value = 0xFFFFFFFFU;

			/* Bitwise AND all the pte values.
			 * An optimization done to make sure a compare is
			 * done only once.
			 */
			for (pte = starting_pte_num;
			     pte <= ending_pte_num;
			     pte++) {
				pte_value.value &=
					pte_address->entry[pte].value;
			}

			if ((pte_value.p) == 0 ||
			    (pte_value.us) == 0 ||
			    ((write != 0) && (pte_value.rw == 0))) {
				goto out;
			}
		}
	}
	ret = 0;
out:
#ifdef CONFIG_X86_BOUNDS_CHECK_BYPASS_MITIGATION
	__asm__ volatile ("lfence" : : : "memory");
#endif

	return ret;
}

static inline void tlb_flush_page(void *addr)
{
	/* Invalidate TLB entries corresponding to the page containing the
	 * specified address
	 */
	char *page = (char *)addr;

	__asm__ ("invlpg %0" :: "m" (*page));
}

#define PDPTE_FLAGS_MASK	MMU_ENTRY_PRESENT

#define PDE_FLAGS_MASK		(MMU_ENTRY_WRITE | MMU_ENTRY_USER | \
				 PDPTE_FLAGS_MASK)

#define PTE_FLAGS_MASK		(PDE_FLAGS_MASK | MMU_ENTRY_EXECUTE_DISABLE | \
				 MMU_ENTRY_WRITE_THROUGH | \
				 MMU_ENTRY_CACHING_DISABLE)

void z_x86_mmu_set_flags(struct x86_mmu_pdpt *pdpt, void *ptr, size_t size,
			 x86_page_entry_data_t flags,
			 x86_page_entry_data_t mask, bool flush)
{
	u32_t addr = (u32_t)ptr;

	__ASSERT((addr & MMU_PAGE_MASK) == 0U, "unaligned address provided");
	__ASSERT((size & MMU_PAGE_MASK) == 0U, "unaligned size provided");

	/* L1TF mitigation: non-present PTEs will have address fields
	 * zeroed. Expand the mask to include address bits if we are changing
	 * the present bit.
	 */
	if ((mask & MMU_PTE_P_MASK) != 0) {
		mask |= MMU_PTE_PAGE_MASK;
	}

	while (size != 0) {
		union x86_mmu_pte *pte;
		union x86_mmu_pde_pt *pde;
		union x86_mmu_pdpte *pdpte;
		x86_page_entry_data_t cur_flags = flags;

		pdpte = X86_MMU_GET_PDPTE(pdpt, addr);
		__ASSERT(pdpte->p == 1, "set flags on non-present PDPTE");
		pdpte->value |= (flags & PDPTE_FLAGS_MASK);

		pde = X86_MMU_GET_PDE(pdpt, addr);
		__ASSERT(pde->p == 1, "set flags on non-present PDE");
		pde->value |= (flags & PDE_FLAGS_MASK);
		/* If any flags enable execution, clear execute disable at the
		 * page directory level
		 */
		if ((flags & MMU_ENTRY_EXECUTE_DISABLE) == 0) {
			pde->value &= ~MMU_ENTRY_EXECUTE_DISABLE;
		}

		pte = X86_MMU_GET_PTE(pdpt, addr);
		/* If we're setting the present bit, restore the address
		 * field. If we're clearing it, then the address field
		 * will be zeroed instead, mapping the PTE to the NULL page.
		 */
		if (((mask & MMU_PTE_P_MASK) != 0) &&
		    ((flags & MMU_ENTRY_PRESENT) != 0)) {
			cur_flags |= addr;
		}

		pte->value = (pte->value & ~mask) | cur_flags;
		if (flush) {
			tlb_flush_page((void *)addr);
		}

		size -= MMU_PAGE_SIZE;
		addr += MMU_PAGE_SIZE;
	}
}

static char __aligned(MMU_PAGE_SIZE)
	page_pool[MMU_PAGE_SIZE * CONFIG_X86_MMU_PAGE_POOL_PAGES];

static char *page_pos = page_pool + sizeof(page_pool);

static void *get_page(void)
{
	page_pos -= MMU_PAGE_SIZE;

	__ASSERT(page_pos >= page_pool, "out of MMU pages\n");

	return page_pos;
}

__aligned(0x20) struct x86_mmu_pdpt z_x86_kernel_pdpt;
#ifdef CONFIG_X86_KPTI
__aligned(0x20) struct x86_mmu_pdpt z_x86_user_pdpt;
#endif

extern char z_shared_kernel_page_start[];

static inline bool is_within_system_ram(uintptr_t addr)
{
	return (addr >= DT_PHYS_RAM_ADDR) &&
		(addr < (DT_PHYS_RAM_ADDR + (DT_RAM_SIZE * 1024U)));
}

static void add_mmu_region_page(struct x86_mmu_pdpt *pdpt, uintptr_t addr,
				u64_t flags, bool user_table)
{
	union x86_mmu_pdpte *pdpte;
	struct x86_mmu_pd *pd;
	union x86_mmu_pde_pt *pde;
	struct x86_mmu_pt *pt;
	union x86_mmu_pte *pte;

#ifdef CONFIG_X86_KPTI
	/* If we are generating a page table for user mode, and this address
	 * does not have the user flag set, and this address falls outside
	 * of system RAM, then don't bother generating any tables for it,
	 * we will never need them later as memory domains are limited to
	 * regions within system RAM.
	 */
	if (user_table && (flags & MMU_ENTRY_USER) == 0 &&
	    !is_within_system_ram(addr)) {
		return;
	}
#endif

	/* Setup the PDPTE entry for the address, creating a page directory
	 * if one didn't exist
	 */
	pdpte = &pdpt->entry[MMU_PDPTE_NUM(addr)];
	if (pdpte->p == 0) {
		pd = get_page();
		pdpte->pd = ((uintptr_t)pd) >> MMU_PAGE_SHIFT;
	} else {
		pd = (struct x86_mmu_pd *)(pdpte->pd << MMU_PAGE_SHIFT);
	}
	pdpte->value |= (flags & PDPTE_FLAGS_MASK);

	/* Setup the PDE entry for the address, creating a page table
	 * if necessary
	 */
	pde = &pd->entry[MMU_PDE_NUM(addr)].pt;
	if (pde->p == 0) {
		pt = get_page();
		pde->pt = ((uintptr_t)pt) >> MMU_PAGE_SHIFT;
	} else {
		pt = (struct x86_mmu_pt *)(pde->pt << MMU_PAGE_SHIFT);
	}
	pde->value |= (flags & PDE_FLAGS_MASK);

	/* Execute disable bit needs special handling, we should only set it
	 * at the page directory level if ALL pages have XD set (instead of
	 * just one).
	 *
	 * Use the 'ignored2' field to store a marker on whether any
	 * configured region allows execution, the CPU never looks at
	 * or modifies it.
	 */
	if ((flags & MMU_ENTRY_EXECUTE_DISABLE) == 0) {
		pde->ignored2 = 1;
		pde->value &= ~MMU_ENTRY_EXECUTE_DISABLE;
	} else if (pde->ignored2 == 0) {
		pde->value |= MMU_ENTRY_EXECUTE_DISABLE;
	}

#ifdef CONFIG_X86_KPTI
	if (user_table && (flags & MMU_ENTRY_USER) == 0 &&
	    addr != (uintptr_t)(&z_shared_kernel_page_start)) {
		/* All non-user accessible pages except the shared page
		 * are marked non-present in the page table.
		 */
		return;
	}
#else
	ARG_UNUSED(user_table);
#endif

	/* Finally set up the page table entry */
	pte = &pt->entry[MMU_PAGE_NUM(addr)];
	pte->page = addr >> MMU_PAGE_SHIFT;
	pte->value |= (flags & PTE_FLAGS_MASK);
}

static void add_mmu_region(struct x86_mmu_pdpt *pdpt, struct mmu_region *rgn,
			   bool user_table)
{
	size_t size;
	u64_t flags;
	uintptr_t addr;

	__ASSERT((rgn->address & MMU_PAGE_MASK) == 0U,
		 "unaligned address provided");
	__ASSERT((rgn->size & MMU_PAGE_MASK) == 0U,
		 "unaligned size provided");

	addr = rgn->address;

	/* Add the present flag, and filter out 'runtime user' since this
	 * has no meaning to the actual MMU
	 */
	flags = rgn->flags | MMU_ENTRY_PRESENT;

	/* Iterate through the region a page at a time, creating entries as
	 * necessary.
	 */
	size = rgn->size;
	while (size > 0) {
		add_mmu_region_page(pdpt, addr, flags, user_table);

		size -= MMU_PAGE_SIZE;
		addr += MMU_PAGE_SIZE;
	}
}

extern struct mmu_region z_x86_mmulist_start[];
extern struct mmu_region z_x86_mmulist_end[];

/* Called from x86's kernel_arch_init() */
void z_x86_paging_init(void)
{
	size_t pages_free;

	for (struct mmu_region *rgn = z_x86_mmulist_start;
	     rgn < z_x86_mmulist_end; rgn++) {
		add_mmu_region(&z_x86_kernel_pdpt, rgn, false);
#ifdef CONFIG_X86_KPTI
		add_mmu_region(&z_x86_user_pdpt, rgn, true);
#endif
	}

	pages_free = (page_pos - page_pool) / MMU_PAGE_SIZE;

	if (pages_free != 0) {
		printk("Optimal CONFIG_X86_MMU_PAGE_POOL_PAGES %zu\n",
		       CONFIG_X86_MMU_PAGE_POOL_PAGES - pages_free);
	}

	z_x86_enable_paging();
}

#ifdef CONFIG_X86_USERSPACE
int z_arch_buffer_validate(void *addr, size_t size, int write)
{
	return z_x86_mmu_validate(z_x86_pdpt_get(_current), addr, size, write);
}

static uintptr_t thread_pd_create(uintptr_t pages,
				  struct x86_mmu_pdpt *thread_pdpt,
				  struct x86_mmu_pdpt *master_pdpt)
{
	uintptr_t pos = pages, phys_addr = Z_X86_PD_START;

	for (int i = 0; i < Z_X86_NUM_PD; i++, phys_addr += Z_X86_PD_AREA) {
		union x86_mmu_pdpte *pdpte;
		struct x86_mmu_pd *master_pd, *dest_pd;

		/* Obtain PD in master tables for the address range and copy
		 * into the per-thread PD for this range
		 */
		master_pd = X86_MMU_GET_PD_ADDR(master_pdpt, phys_addr);
		dest_pd = (struct x86_mmu_pd *)pos;

		(void)memcpy(dest_pd, master_pd, sizeof(struct x86_mmu_pd));

		/* Update pointer in per-thread pdpt to point to the per-thread
		 * directory we just copied
		 */
		pdpte = X86_MMU_GET_PDPTE(thread_pdpt, phys_addr);
		pdpte->pd = pos >> MMU_PAGE_SHIFT;
		pos += MMU_PAGE_SIZE;
	}

	return pos;
}

/* thread_pdpt must be initialized, as well as all the page directories */
static uintptr_t thread_pt_create(uintptr_t pages,
				  struct x86_mmu_pdpt *thread_pdpt,
				  struct x86_mmu_pdpt *master_pdpt)
{
	uintptr_t pos = pages, phys_addr = Z_X86_PT_START;

	for (int i = 0; i < Z_X86_NUM_PT; i++, phys_addr += Z_X86_PT_AREA) {
		union x86_mmu_pde_pt *pde;
		struct x86_mmu_pt *master_pt, *dest_pt;

		/* Same as we did with the directories, obtain PT in master
		 * tables for the address range and copy into per-thread PT
		 * for this range
		 */
		master_pt = X86_MMU_GET_PT_ADDR(master_pdpt, phys_addr);
		dest_pt = (struct x86_mmu_pt *)pos;
		(void)memcpy(dest_pt, master_pt, sizeof(struct x86_mmu_pd));

		/* And then wire this up to the relevant per-thread
		 * page directory entry
		 */
		pde = X86_MMU_GET_PDE(thread_pdpt, phys_addr);
		pde->pt = pos >> MMU_PAGE_SHIFT;
		pos += MMU_PAGE_SIZE;
	}

	return pos;
}

/* Initialize the page tables for a thread. This will contain, once done,
 * the boot-time configuration for a user thread page tables. There are
 * no pre-conditions on the existing state of the per-thread tables.
 */
static void copy_page_tables(struct k_thread *thread,
			     struct x86_mmu_pdpt *master_pdpt)
{
	uintptr_t pos, start;
	struct x86_mmu_pdpt *thread_pdpt = z_x86_pdpt_get(thread);
	struct z_x86_thread_stack_header *header =
		(struct z_x86_thread_stack_header *)thread->stack_obj;

	__ASSERT(thread->stack_obj != NULL, "no stack object assigned");
	__ASSERT(z_x86_page_tables_get() != thread_pdpt, "PDPT is active");
	__ASSERT(((uintptr_t)thread_pdpt & 0x1f) == 0, "unaligned pdpt at %p",
		 thread_pdpt);

	(void)memcpy(thread_pdpt, master_pdpt, sizeof(struct x86_mmu_pdpt));

	/* pos represents the page we are working with in the reserved area
	 * in the stack buffer for per-thread tables. As we create tables in
	 * this area, pos is incremented to the next free page.
	 *
	 * The layout of the stack object, when this is done:
	 *
	 * +---------------------------+  <- thread->stack_obj
	 * | PDE(0)                    |
	 * +---------------------------+
	 * | ...                       |
	 * +---------------------------+
	 * | PDE(Z_X86_NUM_PD - 1)     |
	 * +---------------------------+
	 * | PTE(0)                    |
	 * +---------------------------+
	 * | ...                       |
	 * +---------------------------+
	 * | PTE(Z_X86_NUM_PT - 1)     |
	 * +---------------------------+ <- pos once this logic completes
	 * | Stack guard               |
	 * +---------------------------+
	 * | Privilege elevation stack |
	 * | PDPT                      |
	 * +---------------------------+ <- thread->stack_info.start
	 * | Thread stack              |
	 * | ...                       |
	 *
	 */
	start = (uintptr_t)(&header->page_tables);
	pos = thread_pd_create(start, thread_pdpt, master_pdpt);
	pos = thread_pt_create(pos, thread_pdpt, master_pdpt);

	__ASSERT(pos == (start + Z_X86_THREAD_PT_AREA),
		 "wrong amount of stack object memory used");
}

static void reset_mem_partition(struct x86_mmu_pdpt *thread_pdpt,
				struct k_mem_partition *partition)
{
	uintptr_t addr = partition->start;
	size_t size = partition->size;

	__ASSERT((addr & MMU_PAGE_MASK) == 0U, "unaligned address provided");
	__ASSERT((size & MMU_PAGE_MASK) == 0U, "unaligned size provided");

	while (size != 0) {
		union x86_mmu_pte *thread_pte, *master_pte;

		thread_pte = X86_MMU_GET_PTE(thread_pdpt, addr);
		master_pte = X86_MMU_GET_PTE(&USER_PDPT, addr);

		(void)memcpy(thread_pte, master_pte, sizeof(union x86_mmu_pte));

		size -= MMU_PAGE_SIZE;
		addr += MMU_PAGE_SIZE;
	}
}

static void apply_mem_partition(struct x86_mmu_pdpt *pdpt,
				struct k_mem_partition *partition)
{
	x86_page_entry_data_t x86_attr;
	x86_page_entry_data_t mask;

	if (IS_ENABLED(CONFIG_X86_KPTI)) {
		x86_attr = partition->attr | MMU_ENTRY_PRESENT;
		mask = K_MEM_PARTITION_PERM_MASK | MMU_PTE_P_MASK;
	} else {
		x86_attr = partition->attr;
		mask = K_MEM_PARTITION_PERM_MASK;
	}

	__ASSERT(partition->start >= DT_PHYS_RAM_ADDR,
		 "region at %08lx[%u] extends below system ram start 0x%08x",
		 partition->start, partition->size, DT_PHYS_RAM_ADDR);
	__ASSERT(((partition->start + partition->size) <=
		  (DT_PHYS_RAM_ADDR + (DT_RAM_SIZE * 1024U))),
		 "region at %08lx[%u] end at %08lx extends beyond system ram end 0x%08x",
		 partition->start, partition->size,
		 partition->start + partition->size,
		 (DT_PHYS_RAM_ADDR + (DT_RAM_SIZE * 1024U)));

	z_x86_mmu_set_flags(pdpt, (void *)partition->start, partition->size,
			    x86_attr, mask, false);
}

void z_x86_apply_mem_domain(struct x86_mmu_pdpt *pdpt,
			    struct k_mem_domain *mem_domain)
{
	for (int i = 0, pcount = 0; pcount < mem_domain->num_partitions; i++) {
		struct k_mem_partition *partition;

		partition = &mem_domain->partitions[i];
		if (partition->size == 0) {
			continue;
		}
		pcount++;

		apply_mem_partition(pdpt, partition);
	}
}

/* Called on creation of a user thread or when a supervisor thread drops to
 * user mode.
 *
 * Sets up the per-thread page tables, such that when they are activated on
 * context switch, everything is ready to go.
 */
void z_x86_thread_pt_init(struct k_thread *thread)
{
	struct x86_mmu_pdpt *pdpt = z_x86_pdpt_get(thread);

	/* USER_PDPT contains the page tables with the boot time memory
	 * policy. We use it as a template to set up the per-thread page
	 * tables.
	 *
	 * With KPTI, this is a distinct set of tables z_x86_user_pdpt from the
	 * kernel page tables in z_x86_kernel_pdpt; it has all non user
	 * accessible pages except the trampoline page marked as non-present.
	 * Without KPTI, they are the same object.
	 */
	copy_page_tables(thread, &USER_PDPT);

	/* Enable access to the thread's own stack buffer */
	z_x86_mmu_set_flags(pdpt, (void *)thread->stack_info.start,
			    ROUND_UP(thread->stack_info.size, MMU_PAGE_SIZE),
			    MMU_ENTRY_PRESENT | K_MEM_PARTITION_P_RW_U_RW,
			    MMU_PTE_P_MASK | K_MEM_PARTITION_PERM_MASK,
			    false);
}

/*
 * Memory domain interface
 *
 * In all cases, if one of these APIs is called on a supervisor thread,
 * we don't need to do anything. If the thread later drops into supervisor
 * mode the per-thread page tables will be generated and the memory domain
 * configuration applied.
 */
void z_arch_mem_domain_partition_remove(struct k_mem_domain *domain,
					u32_t partition_id)
{
	sys_dnode_t *node, *next_node;

	/* Removing a partition. Need to reset the relevant memory range
	 * to the defaults in USER_PDPT for each thread.
	 */
	SYS_DLIST_FOR_EACH_NODE_SAFE(&domain->mem_domain_q, node, next_node) {
		struct k_thread *thread =
			CONTAINER_OF(node, struct k_thread, mem_domain_info);

		if ((thread->base.user_options & K_USER) == 0) {
			continue;
		}

		reset_mem_partition(z_x86_pdpt_get(thread),
				    &domain->partitions[partition_id]);
	}
}

void z_arch_mem_domain_destroy(struct k_mem_domain *domain)
{
	for (int i = 0, pcount = 0; pcount < domain->num_partitions; i++) {
		struct k_mem_partition *partition;

		partition = &domain->partitions[i];
		if (partition->size == 0) {
			continue;
		}
		pcount++;

		z_arch_mem_domain_partition_remove(domain, i);
	}
}

void z_arch_mem_domain_thread_remove(struct k_thread *thread)
{
	struct k_mem_domain *domain = thread->mem_domain_info.mem_domain;

	/* Non-user threads don't have per-thread page tables set up */
	if ((thread->base.user_options & K_USER) == 0) {
		return;
	}

	for (int i = 0, pcount = 0; pcount < domain->num_partitions; i++) {
		struct k_mem_partition *partition;

		partition = &domain->partitions[i];
		if (partition->size == 0) {
			continue;
		}
		pcount++;

		reset_mem_partition(z_x86_pdpt_get(thread), partition);
	}
}

void z_arch_mem_domain_partition_add(struct k_mem_domain *domain,
				     u32_t partition_id)
{
	sys_dnode_t *node, *next_node;

	SYS_DLIST_FOR_EACH_NODE_SAFE(&domain->mem_domain_q, node, next_node) {
		struct k_thread *thread =
			CONTAINER_OF(node, struct k_thread, mem_domain_info);

		if ((thread->base.user_options & K_USER) == 0) {
			continue;
		}

		apply_mem_partition(z_x86_pdpt_get(thread),
				    &domain->partitions[partition_id]);
	}
}

void z_arch_mem_domain_thread_add(struct k_thread *thread)
{
	if ((thread->base.user_options & K_USER) == 0) {
		return;
	}

	z_x86_apply_mem_domain(z_x86_pdpt_get(thread),
			       thread->mem_domain_info.mem_domain);
}

int z_arch_mem_domain_max_partitions_get(void)
{
	return CONFIG_MAX_DOMAIN_PARTITIONS;
}
#endif	/* CONFIG_X86_USERSPACE*/