Boot Linux faster!

Check our new training course

Boot Linux faster!

Check our new training course
and Creative Commons CC-BY-SA
lecture and lab materials

Bootlin logo

Elixir Cross Referencer

/*
 * Copyright (c) 2010-2015 Wind River Systems, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 * @file
 * @brief Nanokernel swapper code for IA-32
 *
 * This module implements the _Swap() routine for the IA-32 architecture.
 *
 * Note that the file include/nanokernel/x86/swapstk.h defines
 * a representation of the save stack frame generated by _Swap() in order
 * to generate offsets (in the form of absolute symbols) for consumption by
 * host tools.  Please update swapstk.h if changing the structure of the
 * save frame on the stack.
 */

#define _ASMLANGUAGE

#include <nano_private.h>
#include <arch/x86/asm.h>
#include <offsets.h>	/* nanokernel structure offset definitions */

	/* exports (internal APIs) */

	GTEXT(_Swap)

	/* externs */


/**
 *
 * @brief Initiate a cooperative context switch
 *
 * The _Swap() routine is invoked by various nanokernel services to effect
 * a cooperative context switch.  Prior to invoking _Swap(), the
 * caller disables interrupts (via irq_lock) and the return 'key'
 * is passed as a parameter to _Swap().  The 'key' actually represents
 * the EFLAGS register prior to disabling interrupts via a 'cli' instruction.
 *
 * Given that _Swap() is called to effect a cooperative context switch,
 * only the non-volatile integer registers need to be saved in the TCS of the
 * outgoing thread.  The restoration of the integer registers of the incoming
 * thread depends on whether that thread was preemptively context switched
 * out.  The INT_ACTIVE and EXC_ACTIVE bits in the tTCS->flags field will signify
 * that the thread was preemptively context switched out, and thus both the
 * volatile and non-volatile integer registers need to be restored.
 *
 * The non-volatile registers need to be scrubbed to ensure they contain no
 * sensitive information that could compromise system security.  This is to
 * make sure that information will not be leaked from one application to
 * another via these volatile registers.
 *
 * Here, the integer registers (EAX, ECX, EDX) have been scrubbed.  Any changes
 * to this routine that alter the values of these registers MUST be reviewed
 * for potential security impacts.
 *
 * Floating point registers are handled using a lazy save/restore
 * mechanism since it's expected relatively few threads will be created
 * with the USE_FP or USE_SSE option bits.  The nanokernel data structure
 * maintains a 'current_fp' field to keep track of the thread that "owns"
 * the floating point registers.  Floating point registers consist of
 * ST0->ST7 (x87 FPU and MMX registers) and XMM0 -> XMM7.
 *
 * All floating point registers are considered 'volatile' thus they will
 * only be saved/restored when a preemptive context switch occurs.
 *
 * Floating point registers are currently NOT scrubbed, and are subject to
 * potential security leaks.
 *
 * The scheduling algorithm is simple: schedule the head of the runnable fiber
 * list (_nanokernel.fiber).  If there are no runnable fibers, then schedule
 * the task (_nanokernel.task).  The _nanokernel.task field will never be NULL.
 *
 * @return may contain a return value setup by a call to fiberRtnValueSet()
 *
 * C function prototype:
 *
 * unsigned int _Swap (unsigned int eflags);
 *
 */

SECTION_FUNC(TEXT, _Swap)
#ifdef CONFIG_X86_IAMCU
	/* save EFLAGS on stack right before return address, just as SYSV would
	 * have done
	 */
	pushl	0(%esp)
	movl	%eax, 4(%esp)
#endif
	movl	$_nanokernel, %eax

	/*
	 * Push all non-volatile registers onto the stack; do not copy
	 * any of these registers into the tTCS.  Only the 'esp' register
	 * after all the pushes have been performed) will be stored in the
	 * tTCS.
	 */

	pushl	%edi
	pushl	%esi
	pushl	%ebx
	pushl	%ebp

	/*
	 * Leave slot for eax register when _Swap() needs to return a value;
	 * pre-populate slot with ebx's value in case _Swap() does not return
	 * a value.
	 */

	pushl	%ebx


	/* save esp into tTCS structure */

	movl	__tNANO_current_OFFSET (%eax), %ecx
	movl	%esp, __tTCS_coopReg_OFFSET + __tCoopReg_esp_OFFSET (%ecx)

#ifdef CONFIG_KERNEL_EVENT_LOGGER_CONTEXT_SWITCH
	/* save %eax since it used as the return value for _Swap */
	pushl	%eax
	/* Register the context switch */
	call	_sys_k_event_logger_context_switch
	/* restore _Swap's %eax */
	popl	%eax
#endif

	/*
	 * Determine what thread needs to be swapped in.
	 * Note that the %eax still contains &_nanokernel.
	 */

	movl	tNANO_fiber_OFFSET (%__KEEPIDENTS__CF), %__ecx
	testl	%ecx, %ecx
	jz	swapTask	/* Jump if no ready fibers */

	/* remove the head 'TCS *' from the runnable fiber list */

	movl	tTCS_link_OFFSET (%__KEEPIDENTS__CJ), %__ebx
	movl	%ebx, __tNANO_fiber_OFFSET (%eax)
	jmp 	restoreContext


	/*
	 * There are no fiber in the run queue, thus swap in the task
	 * (_nanokernel.task).  The 'task' field will _never_ be NULL.
	 */

BRANCH_LABEL(swapTask)
	movl	tNANO_task_OFFSET (%__KEEPIDENTS__DE), %__ecx

	/* fall through to 'restoreContext' */


	/*
	 * At this point, the %ecx register contains the 'tTCS *' of
	 * the TASK or FIBER to be swapped in, and %eax still
	 * contains &_nanokernel.
	 */

BRANCH_LABEL(restoreContext)

#ifdef CONFIG_FP_SHARING
	/*
	 * Clear the CR0[TS] bit (in the event the current thread
	 * doesn't have floating point enabled) to prevent the "device not
	 * available" exception when executing the subsequent fxsave/fnsave
	 * and/or fxrstor/frstor instructions.
	 *
	 * Indeed, it's possible that none of the aforementioned instructions
	 * need to be executed, for example, the incoming thread doesn't
	 * utilize floating point operations.  However, the code responsible
	 * for setting the CR0[TS] bit appropriately for the incoming thread
	 * (just after the 'restoreContext_NoFloatSwap' label) will leverage
	 * the fact that the following 'clts' was performed already.
	 */

	clts


	/*
	 * Determine whether the incoming thread utilizes non-integer
	 * capabilities _and_ whether the thread was context switched
	 * out preemptively.
	 */

	testl	$USE_FP, __tTCS_flags_OFFSET (%ecx)
	je 	restoreContext_NoFloatSwap


	/*
	 * The incoming thread uses non-integer capabilities (x87 FPU and/or
	 * XMM regs): Was it the last thread to use non-integer capabilities?
	 * If so, there there is no need to restore the non-integer context.
	 */

	movl	__tNANO_current_fp_OFFSET (%eax), %ebx
	cmpl	%ebx, %ecx
	je	restoreContext_NoFloatSwap


	/*
	 * The incoming thread uses non-integer capabilities (x87 FPU and/or
	 * XMM regs) and it was _not_ the last thread to use the non-integer
	 * capabilities: Check whether the current FP context actually needs
	 * to be saved before swapping in the context of the incoming thread
	 */

	testl	%ebx, %ebx
	jz	restoreContext_NoFloatSave


	/*
	 * The incoming thread uses non-integer capabilities (x87 FPU and/or
	 * XMM regs) and it was _not_ the last thread to use the non-integer
	 * capabilities _and_ the current FP context needs to be saved.
	 *
	 * Given that the ST[0] -> ST[7] and XMM0 -> XMM7 registers are all
	 * 'volatile', only save the registers if the "current FP context"
	 * was preemptively context switched.
	 */

	testl	$INT_OR_EXC_MASK, __tTCS_flags_OFFSET (%ebx)
	je	restoreContext_NoFloatSave


#ifdef CONFIG_SSE
	testl	$USE_SSE, __tTCS_flags_OFFSET (%ebx)
	je	x87FloatSave

	/*
	 * 'fxsave' does NOT perform an implicit 'fninit', therefore issue an
	 * 'fninit' to ensure a "clean" FPU state for the incoming thread
	 * (for the case when the fxrstor is not executed).
	 */

	fxsave	__tTCS_preempFloatReg_OFFSET (%ebx)
	fninit
	jmp	floatSaveDone

BRANCH_LABEL(x87FloatSave)
#endif /* CONFIG_SSE */

	/* 'fnsave' performs an implicit 'fninit' after saving state! */

	fnsave	 __tTCS_preempFloatReg_OFFSET (%ebx)

	/* fall through to 'floatSaveDone' */

BRANCH_LABEL(floatSaveDone)
BRANCH_LABEL(restoreContext_NoFloatSave)

	/*********************************************************
	 * Restore floating point context of the incoming thread.
	 *********************************************************/

        /*
	 * Again, given that the ST[0] -> ST[7] and XMM0 -> XMM7 registers are
	 * all 'volatile', only restore the registers if the incoming
	 * thread was previously preemptively context switched out.
	 */

	testl   $INT_OR_EXC_MASK, __tTCS_flags_OFFSET (%ecx)
	je 	restoreContext_NoFloatRestore

#ifdef CONFIG_SSE
	testl	$USE_SSE, __tTCS_flags_OFFSET (%ecx)
	je	x87FloatRestore

	fxrstor	__tTCS_preempFloatReg_OFFSET (%ecx)
	jmp	floatRestoreDone

BRANCH_LABEL(x87FloatRestore)

#endif /* CONFIG_SSE */

	frstor	__tTCS_preempFloatReg_OFFSET (%ecx)

	/* fall through to 'floatRestoreDone' */

BRANCH_LABEL(floatRestoreDone)
BRANCH_LABEL(restoreContext_NoFloatRestore)

	/* record that the incoming thread "owns" the non-integer registers */

	movl	%ecx, __tNANO_current_fp_OFFSET (%eax)


	/*
	 * Branch point when none of the non-integer registers need to be
	 * swapped either due to a) the incoming thread does not
	 * USE_FP | USE_SSE, or b) the incoming thread is the same as
	 * the last thread that utilized the non-integer registers.
	 */

BRANCH_LABEL(restoreContext_NoFloatSwap)

	/*
	 * Leave CR0[TS] clear if incoming thread utilizes "floating point"
	 * instructions
	 */

	testl	$USE_FP, __tTCS_flags_OFFSET (%ecx)
	jne	CROHandlingDone

	/*
	 * The incoming thread does NOT currently utilize "floating point"
	 * instructions, so set CR0[TS] to ensure the "device not available"
	 * exception occurs on the first attempt to access a x87 FPU, MMX,
	 * or XMM register.
	 */

	movl %cr0, %edx
	orl $0x8, %edx
	movl %edx, %cr0

BRANCH_LABEL(CROHandlingDone)

#endif /* CONFIG_FP_SHARING */




	/* update _nanokernel.current to reflect incoming thread */

	movl    %ecx, __tNANO_current_OFFSET (%eax)

	/* recover task/fiber stack pointer from tTCS */

	movl	__tTCS_coopReg_OFFSET + __tCoopReg_esp_OFFSET (%ecx), %esp


	/* load return value from a possible fiberRtnValueSet() */

	popl	%eax

	/* pop the non-volatile registers from the stack */

	popl	%ebp
	popl	%ebx
	popl	%esi
	popl	%edi

	/*
	 * For a non-preemptive context switch, it is checked that the volatile
	 * integer registers have the following values:
	 *
	 * 1. ECX - points to the task's own TCS structure.
	 * 2. EDX - contains the flags field of the task's own TCS structure.
	 * 3. EAX - may contain one of the two values:
	 *          (a) the return value for _Swap() that was set up by a
	 *              call to fiberRtnValueSet()
	 *          (b) same value as EBX, which is non-volatile
	 */

	/* Utilize the 'eflags' parameter to _Swap() */

	pushl	4(%esp)
#ifdef CONFIG_INT_LATENCY_BENCHMARK
	testl	$0x200, (%esp)
	jz	skipIntLatencyStop

	/* save %eax since it used as the return value for _Swap */
	pushl	%eax
	/* interrupts are being reenabled, stop accumulating time */
	call	_int_latency_stop
	/* restore _Swap's %eax */
	popl	%eax

BRANCH_LABEL(skipIntLatencyStop)
#endif
	popfl
#if CONFIG_X86_IAMCU
	/* Remember that eflags we stuck into the stack before the return
	 * address? need to get it out of there since the calling convention
	 * will not do that for us.
	 */
	popl	%edx
	movl	%edx, (%esp)
#endif
	ret