Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 | /* * Copyright (c) 2017 Intel Corporation * * SPDX-License-Identifier: Apache-2.0 */ #include <kernel_structs.h> #include <arch/x86/ia32/asm.h> #include <arch/cpu.h> #include <offsets_short.h> #include <syscall.h> /* Exports */ GTEXT(z_x86_syscall_entry_stub) GTEXT(z_x86_userspace_enter) GTEXT(z_arch_user_string_nlen) GTEXT(z_arch_user_string_nlen_fault_start) GTEXT(z_arch_user_string_nlen_fault_end) GTEXT(z_arch_user_string_nlen_fixup) /* Imports */ GDATA(_k_syscall_table) #ifdef CONFIG_X86_KPTI /* Switch from the shadow to the kernel page table, switch to the interrupted * thread's kernel stack, and copy all context from the trampoline stack. * * Assumes all registers are callee-saved since this gets called from other * ASM code. Assumes a particular stack layout which is correct for * _exception_enter and _interrupt_enter when invoked with a call instruction: * * 28 SS * 24 ES * 20 EFLAGS * 16 CS * 12 EIP * 8 isr_param or exc code * 4 isr or exc handler * 0 return address */ SECTION_FUNC(TEXT, z_x86_trampoline_to_kernel) /* Check interrupted code segment to see if we came from ring 3 * and hence on the trampoline stack */ testb $3, 16(%esp) /* Offset of CS */ jz 1f /* Stash these regs as we need to use them */ pushl %esi pushl %edi /* Switch to kernel page table */ movl $z_x86_kernel_pdpt, %esi movl %esi, %cr3 /* Save old trampoline stack pointer in %edi */ movl %esp, %edi /* %esp = _kernel->current->stack_info.start * * This is the lowest address of the user mode stack, the PDPT is * immediately before it, and then the highest address of the kernel * stack. We want to transplant context here. */ movl $_kernel, %esi movl _kernel_offset_to_current(%esi), %esi movl _thread_offset_to_stack_start(%esi), %esp subl $Z_X86_PDPT_SIZE, %esp /* Transplant stack context and restore ESI/EDI. Taking care to zero * or put uninteresting values where we stashed ESI/EDI since the * trampoline page is insecure and there might a context switch * on the way out instead of returning to the original thread * immediately. */ pushl 36(%edi) /* SS */ pushl 32(%edi) /* ESP */ pushl 28(%edi) /* EFLAGS */ pushl 24(%edi) /* CS */ pushl 20(%edi) /* EIP */ pushl 16(%edi) /* error code or isr parameter */ pushl 12(%edi) /* exception/irq handler */ pushl 8(%edi) /* return address */ movl 4(%edi), %esi /* restore ESI */ movl $0, 4(%edi) /* Zero old esi storage area */ xchgl %edi, (%edi) /* Exchange old edi to restore it and put old sp in the storage area */ /* Trampoline stack should have nothing sensitive in it at this point */ 1: ret /* Copy interrupt return stack context to the trampoline stack, switch back * to the user page table, and only then 'iret'. We jump to this instead * of calling 'iret' if KPTI is turned on. * * Stack layout is expected to be as follows: * * 16 SS * 12 ESP * 8 EFLAGS * 4 CS * 0 EIP * * This function is conditionally macroed to KPTI_IRET/KPTI_IRET_USER */ SECTION_FUNC(TEXT, z_x86_trampoline_to_user) /* Check interrupted code segment to see if we came from ring 3 * and hence on the trampoline stack */ testb $3, 4(%esp) /* Offset of CS */ jz 1f /* Otherwise, fall through ... */ SECTION_FUNC(TEXT, z_x86_trampoline_to_user_always) /* Stash EDI, need a free register */ pushl %edi /* Store old stack pointer and switch to trampoline stack */ movl %esp, %edi movl $z_trampoline_stack_end, %esp /* Lock IRQs until we get out, we don't want anyone else using the * trampoline stack */ cli /* Copy context */ pushl 20(%edi) /* SS */ pushl 16(%edi) /* ESP */ pushl 12(%edi) /* EFLAGS */ pushl 8(%edi) /* CS */ pushl 4(%edi) /* EIP */ xchgl %edi, (%edi) /* Exchange old edi to restore it and put trampoline stack address in its old storage area */ /* Switch to user page table. The per-thread user page table is * located at the highest addresses of the privilege mode elevation * stack, immediately below the thread stack buffer. */ pushl %eax movl $_kernel, %eax movl _kernel_offset_to_current(%eax), %eax movl _thread_offset_to_stack_start(%eax), %eax subl $Z_X86_PDPT_SIZE, %eax movl %eax, %cr3 popl %eax movl $0, -4(%esp) /* Delete stashed EAX data */ /* Trampoline stack should have nothing sensitive in it at this point */ 1: iret #endif /* CONFIG_X86_KPTI */ /* Landing site for syscall SW IRQ. Marshal arguments and call C function for * further processing. We're on the kernel stack for the invoking thread, * unless KPTI is enabled, in which case we're on the trampoline stack and * need to get off it before enabling interrupts. */ SECTION_FUNC(TEXT, z_x86_syscall_entry_stub) #ifdef CONFIG_X86_KPTI /* Stash these regs as we need to use them */ pushl %esi pushl %edi /* Switch to kernel page table */ movl $z_x86_kernel_pdpt, %esi movl %esi, %cr3 /* Save old trampoline stack pointer in %edi */ movl %esp, %edi /* %esp = _kernel->current->stack_info.start * * This is the lowest address of the user mode stack, the PDPT is * immediately before it, and then the highest address of the kernel * stack. We want to transplant context here. */ movl $_kernel, %esi movl _kernel_offset_to_current(%esi), %esi movl _thread_offset_to_stack_start(%esi), %esp subl $Z_X86_PDPT_SIZE, %esp /* Transplant context according to layout above. Variant of logic * in x86_trampoline_to_kernel */ pushl 24(%edi) /* SS */ pushl 20(%edi) /* ESP */ pushl 16(%edi) /* EFLAGS */ pushl 12(%edi) /* CS */ pushl 8(%edi) /* EIP */ movl 4(%edi), %esi /* restore ESI */ movl $0, 4(%edi) /* Zero old esi storage area */ xchgl %edi, (%edi) /* Exchange old edi to restore it and put old sp in the storage area */ /* Trampoline stack should have nothing sensitive in it at this point */ #endif /* CONFIG_X86_KPTI */ sti /* re-enable interrupts */ cld /* clear direction flag, restored on 'iret' */ /* call_id is in ESI. bounds-check it, must be less than * K_SYSCALL_LIMIT */ cmp $K_SYSCALL_LIMIT, %esi jae _bad_syscall _id_ok: #ifdef CONFIG_X86_BOUNDS_CHECK_BYPASS_MITIGATION /* Prevent speculation with bogus system call IDs */ lfence #endif /* Marshal arguments per calling convention to match what is expected * for _k_syscall_handler_t functions */ push %esp /* ssf */ push %ebp /* arg6 */ push %edi /* arg5 */ push %ebx /* arg4 */ #ifndef CONFIG_X86_IAMCU push %ecx /* arg3 */ push %edx /* arg2 */ push %eax /* arg1 */ #endif /* from the call ID in ESI, load EBX with the actual function pointer * to call by looking it up in the system call dispatch table */ xor %edi, %edi mov _k_syscall_table(%edi, %esi, 4), %ebx /* Run the handler, which is some entry in _k_syscall_table */ INDIRECT_CALL(%ebx) /* EAX now contains return value. Pop or xor everything else to prevent * information leak from kernel mode. */ #ifndef CONFIG_X86_IAMCU pop %edx /* old arg1 value, discard it */ pop %edx pop %ecx #endif pop %ebx pop %edi #ifndef CONFIG_X86_IAMCU /* Discard ssf and arg6 */ add $8, %esp #else pop %ecx /* Clean ECX and get arg6 off the stack */ pop %edx /* Clean EDX and get ssf off the stack */ #endif KPTI_IRET_USER _bad_syscall: /* ESI had a bogus syscall value in it, replace with the bad syscall * handler's ID, and put the bad ID as its first argument. This * clobbers ESI but the bad syscall handler never returns * anyway, it's going to generate a kernel oops */ mov %esi, %eax mov $K_SYSCALL_BAD, %esi jmp _id_ok /* * size_t z_arch_user_string_nlen(const char *s, size_t maxsize, int *err_arg) */ SECTION_FUNC(TEXT, z_arch_user_string_nlen) push %ebp mov %esp, %ebp /* error value, set to -1 initially. This location is -4(%ebp) */ push $-1 /* Do the strlen operation, based on disassembly of minimal libc */ xor %eax, %eax /* EAX = 0, length count */ mov 0x8(%ebp), %edx /* EDX base of string */ /* This code might page fault */ strlen_loop: z_arch_user_string_nlen_fault_start: cmpb $0x0, (%edx, %eax, 1) /* *(EDX + EAX) == 0? Could fault. */ z_arch_user_string_nlen_fault_end: je strlen_done cmp 0xc(%ebp), %eax /* Max length reached? */ je strlen_done inc %eax /* EAX++ and loop again */ jmp strlen_loop strlen_done: /* Set error value to 0 since we succeeded */ movl $0, -4(%ebp) z_arch_user_string_nlen_fixup: /* Write error value to err pointer parameter */ movl 0x10(%ebp), %ecx pop %edx movl %edx, (%ecx) pop %ebp ret /* FUNC_NORETURN void z_x86_userspace_enter(k_thread_entry_t user_entry, * void *p1, void *p2, void *p3, * u32_t stack_end, * u32_t stack_start) * * A one-way trip to userspace. */ SECTION_FUNC(TEXT, z_x86_userspace_enter) pop %esi /* Discard return address on stack */ /* Fetch parameters on the stack */ #ifndef CONFIG_X86_IAMCU pop %eax /* user_entry */ pop %edx /* p1 */ pop %ecx /* p2 */ #endif pop %esi /* p3 */ pop %ebx /* stack_end (high address) */ pop %edi /* stack_start (low address) */ /* Move to the kernel stack for this thread, so we can erase the * user stack. The kernel stack is the page immediately before * the user stack. * * For security reasons, we must erase the entire user stack. * We don't know what previous contexts it was used and do not * want to leak any information. */ mov %edi, %esp subl $Z_X86_PDPT_SIZE, %esp /* Stash some registers we are going to need to erase the user * stack. */ push %ecx push %edi push %eax /* Compute size of user stack in 4-byte chunks and put in ECX */ mov %ebx, %ecx sub %edi, %ecx shr $2, %ecx /* Divide by 4 */ #ifdef CONFIG_INIT_STACKS mov $0xAAAAAAAA, %eax #else xor %eax, %eax #endif /* Copy 4 bytes of memory at a time, starting at ES:EDI, with whatever * is in EAX. Repeat this ECX times. Stack sizes are always at least * 4-byte aligned. */ cld rep stosl /* Restore registers */ pop %eax pop %edi pop %ecx /* Now set stack pointer to the base of the user stack. Now that this * is set we won't need EBX any more. */ mov %ebx, %esp /* Set segment registers (except CS and SS which are done in * a special way by 'iret' below) */ mov $USER_DATA_SEG, %bx mov %bx, %ds mov %bx, %es /* Push arguments to z_thread_entry() */ push %esi /* p3 */ #ifndef CONFIG_X86_IAMCU push %ecx /* p2 */ push %edx /* p1 */ push %eax /* user_entry */ #endif /* NULL return address */ push $0 /* Save stack pointer at this position, this is where it will be * when we land in z_thread_entry() */ mov %esp, %edi /* Inter-privilege 'iret' pops all of these. Need to fake an interrupt * return to enter user mode as far calls cannot change privilege * level */ push $USER_DATA_SEG /* SS */ push %edi /* ESP */ pushfl /* EFLAGS */ push $USER_CODE_SEG /* CS */ push $z_thread_entry /* EIP */ #ifdef CONFIG_EXECUTION_BENCHMARKING /* Save the eax and edx registers before reading the time stamp * once done pop the values. */ push %eax push %edx rdtsc mov %eax,__end_drop_to_usermode_time mov %edx,__end_drop_to_usermode_time+4 pop %edx pop %eax #endif /* We will land in z_thread_entry() in user mode after this */ KPTI_IRET_USER |