Boot Linux faster!

Check our new training course

Boot Linux faster!

Check our new training course
and Creative Commons CC-BY-SA
lecture and lab materials

Bootlin logo

Elixir Cross Referencer

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
#!/usr/bin/env python3
#
# Copyright (c) 2020 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

"""Create the kernel's page tables for x86 CPUs.

For additional detail on paging and x86 memory management, please
consult the IA Architecture SW Developer Manual, volume 3a, chapter 4.

This script produces the initial page tables installed into the CPU
at early boot. These pages will have an identity mapping of the kernel
image. The script takes the 'zephyr_prebuilt.elf' as input to obtain region
sizes, certain memory addresses, and configuration values.

If CONFIG_SRAM_REGION_PERMISSIONS is not enabled, the kernel image will be
mapped with the Present and Write bits set. The linker scripts shouldn't
add page alignment padding between sections.

If CONFIG_SRAM_REGION_PERMISSIONS is enabled, the access permissions
vary:
  - By default, the Present, Write, and Execute Disable bits are
    set.
  - The _image_text region will have Present and User bits set
  - The _image_rodata region will have Present, User, and Execute
    Disable bits set
  - On x86_64, the _locore region will have Present set and
    the _lorodata region will have Present and Execute Disable set.

Because the set of page tables are linked together by physical address,
we must know a priori the physical address of each table. The linker
script must define a z_x86_pagetables_start symbol where the page
tables will be placed, and this memory address must not shift between
prebuilt and final ELF builds. This script will not work on systems
where the physical load address of the kernel is unknown at build time.

64-bit systems will always build IA-32e page tables. 32-bit systems
build PAE page tables if CONFIG_X86_PAE is set, otherwise standard
32-bit page tables are built.

The kernel will expect to find the top-level structure of the produced
page tables at the physical address corresponding to the symbol
z_x86_kernel_ptables. The linker script will need to set that symbol
to the end of the binary produced by this script, minus the size of the
top-level paging structure as it is written out last.
"""

import sys
import array
import argparse
import os
import struct
import elftools
from distutils.version import LooseVersion
from elftools.elf.elffile import ELFFile
from elftools.elf.sections import SymbolTableSection

if LooseVersion(elftools.__version__) < LooseVersion('0.24'):
    sys.exit("pyelftools is out of date, need version 0.24 or later")


def bit(pos):
    return 1 << pos


# Page table entry flags
FLAG_P = bit(0)
FLAG_RW = bit(1)
FLAG_US = bit(2)
FLAG_G = bit(8)
FLAG_XD = bit(63)

FLAG_IGNORED0 = bit(9)
FLAG_IGNORED1 = bit(10)
FLAG_IGNORED2 = bit(11)

ENTRY_RW = FLAG_RW | FLAG_IGNORED0
ENTRY_US = FLAG_US | FLAG_IGNORED1
ENTRY_XD = FLAG_XD | FLAG_IGNORED2

def debug(text):
    if not args.verbose:
        return
    sys.stdout.write(os.path.basename(sys.argv[0]) + ": " + text + "\n")


def error(text):
    sys.exit(os.path.basename(sys.argv[0]) + ": " + text)


def align_check(base, size):
    if (base % 4096) != 0:
        error("unaligned base address %x" % base)
    if (size % 4096) != 0:
        error("Unaligned region size %d for base %x" % (size, base))


def dump_flags(flags):
    ret = ""

    if flags & FLAG_P:
        ret += "P "

    if flags & FLAG_RW:
        ret += "RW "

    if flags & FLAG_US:
        ret += "US "

    if flags & FLAG_G:
        ret += "G "

    if flags & FLAG_XD:
        ret += "XD "

    return ret.strip()

# Hard-coded flags for intermediate paging levels. Permissive, we only control
# access or set caching properties at leaf levels.
INT_FLAGS = FLAG_P | FLAG_RW | FLAG_US

class MMUTable(object):
    """Represents a particular table in a set of page tables, at any level"""

    def __init__(self):
        self.entries = array.array(self.type_code,
                                   [0 for i in range(self.num_entries)])

    def get_binary(self):
        """Return a bytearray representation of this table"""
        # Always little-endian
        ctype = "<" + self.type_code
        entry_size = struct.calcsize(ctype)
        ret = bytearray(entry_size * self.num_entries)

        for i in range(self.num_entries):
            struct.pack_into(ctype, ret, entry_size * i, self.entries[i])
        return ret

    @property
    def supported_flags(self):
        """Class property indicating what flag bits are supported"""
        raise NotImplementedError()

    @property
    def addr_shift(self):
        """Class property for how much to shift virtual addresses to obtain
        the appropriate index in the table for it"""
        raise NotImplementedError()

    @property
    def addr_mask(self):
        """Mask to apply to an individual entry to get the physical address
        mapping"""
        raise NotImplementedError()

    @property
    def type_code(self):
        """Struct packing letter code for table entries. Either I for
        32-bit entries, or Q for PAE/IA-32e"""
        raise NotImplementedError()

    @property
    def num_entries(self):
        """Number of entries in the table. Varies by table type and paging
        mode"""
        raise NotImplementedError()

    def entry_index(self, virt_addr):
        """Get the index of the entry in this table that corresponds to the
        provided virtual address"""
        return (virt_addr >> self.addr_shift) & (self.num_entries - 1)

    def has_entry(self, virt_addr):
        """Indicate whether an entry is present in this table for the provided
        virtual address"""
        index = self.entry_index(virt_addr)

        return (self.entries[index] & FLAG_P) != 0

    def lookup(self, virt_addr):
        """Look up the physical mapping for a virtual address.

        If this is a leaf table, this is the physical address mapping. If not,
        this is the physical address of the next level table"""
        index = self.entry_index(virt_addr)

        return self.entries[index] & self.addr_mask

    def map(self, virt_addr, phys_addr, entry_flags):
        """For the table entry corresponding to the provided virtual address,
        set the corresponding physical entry in the table. Unsupported flags
        will be filtered out.

        If this is a leaf table, this is the physical address mapping. If not,
        this is the physical address of the next level table"""
        index = self.entry_index(virt_addr)

        self.entries[index] = ((phys_addr & self.addr_mask) |
                               (entry_flags & self.supported_flags))

    def set_perms(self, virt_addr, entry_flags):
        """"For the table entry corresponding to the provided virtual address,
        update just the flags, leaving the physical mapping alone.
        Unsupported flags will be filtered out."""
        index = self.entry_index(virt_addr)

        self.entries[index] = ((self.entries[index] & self.addr_mask) |
                               (entry_flags & self.supported_flags))


# Specific supported table types
class Pml4(MMUTable):
    """Page mapping level 4 for IA-32e"""
    addr_shift = 39
    addr_mask = 0x7FFFFFFFFFFFF000
    type_code = 'Q'
    num_entries = 512
    supported_flags = INT_FLAGS

class Pdpt(MMUTable):
    """Page directory pointer table for IA-32e"""
    addr_shift = 30
    addr_mask = 0x7FFFFFFFFFFFF000
    type_code = 'Q'
    num_entries = 512
    supported_flags = INT_FLAGS

class PdptPAE(Pdpt):
    """Page directory pointer table for PAE"""
    num_entries = 4

class Pd(MMUTable):
    """Page directory for 32-bit"""
    addr_shift = 22
    addr_mask = 0xFFFFF000
    type_code = 'I'
    num_entries = 1024
    supported_flags = INT_FLAGS

class PdXd(Pd):
    """Page directory for either PAE or IA-32e"""
    addr_shift = 21
    addr_mask = 0x7FFFFFFFFFFFF000
    num_entries = 512
    type_code = 'Q'

class Pt(MMUTable):
    """Page table for 32-bit"""
    addr_shift = 12
    addr_mask = 0xFFFFF000
    type_code = 'I'
    num_entries = 1024
    supported_flags = (FLAG_P | FLAG_RW | FLAG_US | FLAG_G |
                       FLAG_IGNORED0 | FLAG_IGNORED1)

class PtXd(Pt):
    """Page table for either PAE or IA-32e"""
    addr_mask = 0x07FFFFFFFFFFF000
    type_code = 'Q'
    num_entries = 512
    supported_flags = (FLAG_P | FLAG_RW | FLAG_US | FLAG_G | FLAG_XD |
                       FLAG_IGNORED0 | FLAG_IGNORED1 | FLAG_IGNORED2)


class PtableSet(object):
    """Represents a complete set of page tables for any paging mode"""

    def __init__(self, pages_start):
        """Instantiate a set of page tables which will be located in the
        image starting at the provided physical memory location"""
        self.page_pos = pages_start
        self.toplevel = self.levels[0]()

        debug("%s starting at physical address 0x%x" %
              (self.__class__.__name__, pages_start))

        # Database of page table pages. Maps physical memory address to
        # MMUTable objects, excluding the top-level table which is tracked
        # separately. Starts out empty as we haven't mapped anything and
        # the top-level table is tracked separately.
        self.tables = {}

    def get_new_mmutable_addr(self):
        """If we need to instantiate a new MMUTable, return a physical
        address location for it"""
        ret = self.page_pos
        self.page_pos += 4096
        return ret

    @property
    def levels(self):
        """Class hierarchy of paging levels, with the first entry being
        the toplevel table class, and the last entry always being
        some kind of leaf page table class (Pt or PtXd)"""
        raise NotImplementedError()

    def new_child_table(self, table, virt_addr, depth):
        new_table_addr = self.get_new_mmutable_addr()
        new_table = self.levels[depth]()
        debug("new %s at physical addr 0x%x"
                      % (self.levels[depth].__name__, new_table_addr))
        self.tables[new_table_addr] = new_table
        table.map(virt_addr, new_table_addr, INT_FLAGS)

        return new_table

    def map_page(self, virt_addr, phys_addr, flags, reserve):
        """Map a virtual address to a physical address in the page tables,
        with provided access flags"""
        table = self.toplevel

        # Create and link up intermediate tables if necessary
        for depth in range(1, len(self.levels)):
            # Create child table if needed
            if not table.has_entry(virt_addr):
                table = self.new_child_table(table, virt_addr, depth)
            else:
                table = self.tables[table.lookup(virt_addr)]

        # Set up entry in leaf page table
        if not reserve:
            table.map(virt_addr, phys_addr, flags)

    def reserve(self, virt_base, size):
        debug("Reserving paging structures 0x%x (%d)" %
              (virt_base, size))

        align_check(virt_base, size)

        # How much memory is covered by leaf page table
        scope = 1 << self.levels[-2].addr_shift

        if virt_base % scope != 0:
            error("misaligned virtual address space, 0x%x not a multiple of 0x%x" %
                  (virt_base, scope))

        for addr in range(virt_base, virt_base + size, scope):
            self.map_page(addr, 0, 0, True)

    def map(self, phys_base, size, flags):
        """Identity map an address range in the page tables, with provided
        access flags.
        """
        debug("Identity-mapping 0x%x (%d): %s" %
              (phys_base, size, dump_flags(flags)))

        align_check(phys_base, size)
        for addr in range(phys_base, phys_base + size, 4096):
            if addr == 0:
                # Never map the NULL page
                continue

            self.map_page(addr, addr, flags, False)

    def set_region_perms(self, name, flags):
        """Set access permissions for a named region that is already mapped

        The bounds of the region will be looked up in the symbol table
        with _start and _size suffixes. The physical address mapping
        is unchanged and this will not disturb any double-mapping."""

        # Doesn't matter if this is a virtual address, we have a
        # either dual mapping or it's the same as physical
        base = syms[name + "_start"]
        size = syms[name + "_size"]

        debug("change flags for %s at 0x%x (%d): %s" %
              (name, base, size, dump_flags(flags)))
        align_check(base, size)

        try:
            for addr in range(base, base + size, 4096):
                # Never map the NULL page
                if addr == 0:
                    continue

                table = self.toplevel
                for _ in range(1, len(self.levels)):
                    table = self.tables[table.lookup(addr)]
                table.set_perms(addr, flags)
        except KeyError:
            error("no mapping for %s region 0x%x (size 0x%x)" %
                  (name, base, size))

    def write_output(self, filename):
        """Write the page tables to the output file in binary format"""
        with open(filename, "wb") as fp:
            for addr in sorted(self.tables):
                mmu_table = self.tables[addr]
                fp.write(mmu_table.get_binary())

            # We always have the top-level table be last. This is because
            # in PAE, the top-level PDPT has only 4 entries and is not a
            # full page in size. We do not put it in the tables dictionary
            # and treat it as a special case.
            debug("top-level %s at physical addr 0x%x" %
                  (self.toplevel.__class__.__name__,
                   self.get_new_mmutable_addr()))
            fp.write(self.toplevel.get_binary())

# Paging mode classes, we'll use one depending on configuration
class Ptables32bit(PtableSet):
    levels = [Pd, Pt]

class PtablesPAE(PtableSet):
    levels = [PdptPAE, PdXd, PtXd]

class PtablesIA32e(PtableSet):
    levels = [Pml4, Pdpt, PdXd, PtXd]


def parse_args():
    global args
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument("-k", "--kernel", required=True,
                        help="path to prebuilt kernel ELF binary")
    parser.add_argument("-o", "--output", required=True,
                        help="output file")
    parser.add_argument("-v", "--verbose", action="store_true",
                        help="Print extra debugging information")
    args = parser.parse_args()
    if "VERBOSE" in os.environ:
        args.verbose = True


def get_symbols(obj):
    for section in obj.iter_sections():
        if isinstance(section, SymbolTableSection):
            return {sym.name: sym.entry.st_value
                    for sym in section.iter_symbols()}

    raise LookupError("Could not find symbol table")

def isdef(sym_name):
    return sym_name in syms

def main():
    global syms
    parse_args()

    with open(args.kernel, "rb") as fp:
        kernel = ELFFile(fp)
        syms = get_symbols(kernel)

    if isdef("CONFIG_X86_64"):
        pclass = PtablesIA32e
    elif isdef("CONFIG_X86_PAE"):
        pclass = PtablesPAE
    else:
        pclass = Ptables32bit

    debug("building %s" % pclass.__name__)

    vm_base = syms["CONFIG_KERNEL_VM_BASE"]
    vm_size = syms["CONFIG_KERNEL_VM_SIZE"]

    if isdef("CONFIG_ARCH_MAPS_ALL_RAM"):
        image_base = syms["CONFIG_SRAM_BASE_ADDRESS"]
        image_size = syms["CONFIG_SRAM_SIZE"] * 1024
    else:
        image_base = syms["z_mapped_start"]
        image_size = syms["z_mapped_size"]
    ptables_phys = syms["z_x86_pagetables_start"]

    debug("Address space: 0x%x - 0x%x size %x" %
          (vm_base, vm_base + vm_size, vm_size))

    debug("Zephyr image: 0x%x - 0x%x size %x" %
          (image_base, image_base + image_size, image_size))

    is_perm_regions = isdef("CONFIG_SRAM_REGION_PERMISSIONS")

    if image_size >= vm_size:
        error("VM size is too small (have 0x%x need more than 0x%x)" % (vm_size, image_size))

    if is_perm_regions:
        # Don't allow execution by default for any pages. We'll adjust this
        # in later calls to pt.set_region_perms()
        map_flags = FLAG_P |  ENTRY_XD
    else:
        map_flags = FLAG_P

    pt = pclass(ptables_phys)
    # Instantiate all the paging structures for the address space
    pt.reserve(vm_base, vm_size)
    # Map the zephyr image
    pt.map(image_base, image_size, map_flags | ENTRY_RW)

    if isdef("CONFIG_X86_64"):
        # 64-bit has a special region in the first 64K to bootstrap other CPUs
        # from real mode
        locore_base = syms["_locore_start"]
        locore_size = syms["_lodata_end"] - locore_base
        debug("Base addresses: physical 0x%x size %d" % (locore_base,
                                                         locore_size))
        pt.map(locore_base, locore_size, map_flags | ENTRY_RW)

    if isdef("CONFIG_XIP"):
        # Additionally identity-map all ROM as read-only
        pt.map(syms["CONFIG_FLASH_BASE_ADDRESS"],
               syms["CONFIG_FLASH_SIZE"] * 1024, map_flags)

    # Adjust mapped region permissions if configured
    if is_perm_regions:
        # Need to accomplish the following things:
        # - Text regions need the XD flag cleared and RW flag removed
        #   if not built with gdbstub support
        # - Rodata regions need the RW flag cleared
        # - User mode needs access as we currently do not separate application
        #   text/rodata from kernel text/rodata
        if isdef("CONFIG_GDBSTUB"):
            pt.set_region_perms("_image_text", FLAG_P | ENTRY_US | ENTRY_RW)
        else:
            pt.set_region_perms("_image_text", FLAG_P | ENTRY_US)
        pt.set_region_perms("_image_rodata", FLAG_P | ENTRY_US | ENTRY_XD)

        if isdef("CONFIG_COVERAGE_GCOV") and isdef("CONFIG_USERSPACE"):
            # If GCOV is enabled, user mode must be able to write to its
            # common data area
            pt.set_region_perms("__gcov_bss",
                                FLAG_P | ENTRY_RW | ENTRY_US | ENTRY_XD)

        if isdef("CONFIG_X86_64"):
            # Set appropriate permissions for locore areas much like we did
            # with the main text/rodata regions

            if isdef("CONFIG_X86_KPTI"):
                # Set the User bit for the read-only locore/lorodata areas.
                # This ensures they get mapped into the User page tables if
                # KPTI is turned on. There is no sensitive data in them, and
                # they contain text/data needed to take an exception or
                # interrupt.
                flag_user = ENTRY_US
            else:
                flag_user = 0

            pt.set_region_perms("_locore", FLAG_P | flag_user)
            pt.set_region_perms("_lorodata", FLAG_P | ENTRY_XD | flag_user)

    pt.write_output(args.output)

if __name__ == "__main__":
    main()