summaryrefslogtreecommitdiff
path: root/UefiCpuPkg
diff options
context:
space:
mode:
authorJeff Fan <jeff.fan@intel.com>2016-07-20 22:44:39 +0800
committerJeff Fan <jeff.fan@intel.com>2016-08-17 19:58:30 +0800
commitd94e5f672994f67ea7bd16aaf577bf4cde65cfc1 (patch)
treeee936509bb6a13d292b29698bf0b18bbf3ae6067 /UefiCpuPkg
parent3e8ad6bd7684502c6a57796a57fb7c6dbe3ec1c5 (diff)
UefiCpuPkg/MpInitLib: Add AP assembly code and MP_CPU_EXCHANGE_INFO
Add assembly code for AP reset vector and the definition of MP_CPU_EXCHANGE_INFO that are used to exchange the data between C code and assembly code when AP wake up. v4: 1. Copy MP_CPU_EXCHANGE_INFO from UefiCpuPkg/CpuMpPei/CpuMpPei.h 2. Copy MpEqu.inc and MpFuncs.nasm from UefiCpuPkg/CpuMpPei. v3: 1. Rename NumApsExecutingLoction to NumApsExecutingLocation 2. Add whitespace after ; in .nasm file Cc: Michael Kinney <michael.d.kinney@intel.com> Cc: Feng Tian <feng.tian@intel.com> Cc: Giri P Mudusuru <giri.p.mudusuru@intel.com> Cc: Laszlo Ersek <lersek@redhat.com> Contributed-under: TianoCore Contribution Agreement 1.0 Signed-off-by: Jeff Fan <jeff.fan@intel.com> Reviewed-by: Michael Kinney <michael.d.kinney@intel.com> Tested-by: Laszlo Ersek <lersek@redhat.com> Tested-by: Michael Kinney <michael.d.kinney@intel.com>
Diffstat (limited to 'UefiCpuPkg')
-rw-r--r--UefiCpuPkg/Library/MpInitLib/DxeMpInitLib.inf8
-rw-r--r--UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc39
-rw-r--r--UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm229
-rw-r--r--UefiCpuPkg/Library/MpInitLib/MpLib.h24
-rw-r--r--UefiCpuPkg/Library/MpInitLib/PeiMpInitLib.inf8
-rw-r--r--UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc41
-rw-r--r--UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm281
7 files changed, 630 insertions, 0 deletions
diff --git a/UefiCpuPkg/Library/MpInitLib/DxeMpInitLib.inf b/UefiCpuPkg/Library/MpInitLib/DxeMpInitLib.inf
index 1f131c0d1e..e9a27250a7 100644
--- a/UefiCpuPkg/Library/MpInitLib/DxeMpInitLib.inf
+++ b/UefiCpuPkg/Library/MpInitLib/DxeMpInitLib.inf
@@ -27,6 +27,14 @@
# VALID_ARCHITECTURES = IA32 X64
#
+[Sources.IA32]
+ Ia32/MpEqu.inc
+ Ia32/MpFuncs.nasm
+
+[Sources.X64]
+ X64/MpEqu.inc
+ X64/MpFuncs.nasm
+
[Sources.common]
DxeMpLib.c
MpLib.c
diff --git a/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc b/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc
new file mode 100644
index 0000000000..773eab3a4d
--- /dev/null
+++ b/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc
@@ -0,0 +1,39 @@
+;------------------------------------------------------------------------------ ;
+; Copyright (c) 2015 - 2016, Intel Corporation. All rights reserved.<BR>
+; This program and the accompanying materials
+; are licensed and made available under the terms and conditions of the BSD License
+; which accompanies this distribution. The full text of the license may be found at
+; http://opensource.org/licenses/bsd-license.php.
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+; Module Name:
+;
+; MpEqu.inc
+;
+; Abstract:
+;
+; This is the equates file for Multiple Processor support
+;
+;-------------------------------------------------------------------------------
+
+VacantFlag equ 00h
+NotVacantFlag equ 0ffh
+
+CPU_SWITCH_STATE_IDLE equ 0
+CPU_SWITCH_STATE_STORED equ 1
+CPU_SWITCH_STATE_LOADED equ 2
+
+LockLocation equ (RendezvousFunnelProcEnd - RendezvousFunnelProcStart)
+StackStartAddressLocation equ LockLocation + 04h
+StackSizeLocation equ LockLocation + 08h
+ApProcedureLocation equ LockLocation + 0Ch
+GdtrLocation equ LockLocation + 10h
+IdtrLocation equ LockLocation + 16h
+BufferStartLocation equ LockLocation + 1Ch
+ModeOffsetLocation equ LockLocation + 20h
+NumApsExecutingLoction equ LockLocation + 24h
+CodeSegmentLocation equ LockLocation + 28h
+DataSegmentLocation equ LockLocation + 2Ch
+
diff --git a/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm b/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
new file mode 100644
index 0000000000..0852a5bc84
--- /dev/null
+++ b/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
@@ -0,0 +1,229 @@
+;------------------------------------------------------------------------------ ;
+; Copyright (c) 2015 - 2016, Intel Corporation. All rights reserved.<BR>
+; This program and the accompanying materials
+; are licensed and made available under the terms and conditions of the BSD License
+; which accompanies this distribution. The full text of the license may be found at
+; http://opensource.org/licenses/bsd-license.php.
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+; Module Name:
+;
+; MpFuncs.nasm
+;
+; Abstract:
+;
+; This is the assembly code for MP support
+;
+;-------------------------------------------------------------------------------
+
+%include "MpEqu.inc"
+extern ASM_PFX(InitializeFloatingPointUnits)
+
+SECTION .text
+
+;-------------------------------------------------------------------------------------
+;RendezvousFunnelProc procedure follows. All APs execute their procedure. This
+;procedure serializes all the AP processors through an Init sequence. It must be
+;noted that APs arrive here very raw...ie: real mode, no stack.
+;ALSO THIS PROCEDURE IS EXECUTED BY APs ONLY ON 16 BIT MODE. HENCE THIS PROC
+;IS IN MACHINE CODE.
+;-------------------------------------------------------------------------------------
+global ASM_PFX(RendezvousFunnelProc)
+ASM_PFX(RendezvousFunnelProc):
+RendezvousFunnelProcStart:
+; At this point CS = 0x(vv00) and ip= 0x0.
+BITS 16
+ mov ebp, eax ; save BIST information
+
+ mov ax, cs
+ mov ds, ax
+ mov es, ax
+ mov ss, ax
+ xor ax, ax
+ mov fs, ax
+ mov gs, ax
+
+ mov si, BufferStartLocation
+ mov ebx, [si]
+
+ mov si, ModeOffsetLocation
+ mov eax, [si]
+ mov si, CodeSegmentLocation
+ mov edx, [si]
+ mov di, ax
+ sub di, 02h
+ mov [di], dx
+ sub di, 04h
+ add eax, ebx
+ mov [di],eax
+
+ mov si, DataSegmentLocation
+ mov edx, [si]
+
+ mov si, GdtrLocation
+o32 lgdt [cs:si]
+
+ mov si, IdtrLocation
+o32 lidt [cs:si]
+
+ xor ax, ax
+ mov ds, ax
+
+ mov eax, cr0 ;Get control register 0
+ or eax, 000000003h ;Set PE bit (bit #0) & MP
+ mov cr0, eax
+
+ jmp 0:strict dword 0 ; far jump to protected mode
+BITS 32
+Flat32Start: ; protected mode entry point
+ mov ds, dx
+ mov es, dx
+ mov fs, dx
+ mov gs, dx
+ mov ss, dx
+
+ mov esi, ebx
+ mov edi, esi
+ add edi, LockLocation
+ mov eax, NotVacantFlag
+
+TestLock:
+ xchg [edi], eax
+ cmp eax, NotVacantFlag
+ jz TestLock
+
+ mov edi, esi
+ add edi, NumApsExecutingLoction
+ inc dword [edi]
+ mov ebx, [edi]
+
+ProgramStack:
+ mov edi, esi
+ add edi, StackSizeLocation
+ mov eax, [edi]
+ mov edi, esi
+ add edi, StackStartAddressLocation
+ add eax, [edi]
+ mov esp, eax
+ mov [edi], eax
+
+Releaselock:
+ mov eax, VacantFlag
+ mov edi, esi
+ add edi, LockLocation
+ xchg [edi], eax
+
+CProcedureInvoke:
+ push ebp ; push BIST data at top of AP stack
+ xor ebp, ebp ; clear ebp for call stack trace
+ push ebp
+ mov ebp, esp
+
+ mov eax, ASM_PFX(InitializeFloatingPointUnits)
+ call eax ; Call assembly function to initialize FPU per UEFI spec
+
+ push ebx ; Push NumApsExecuting
+ mov eax, esi
+ add eax, LockLocation
+ push eax ; push address of exchange info data buffer
+
+ mov edi, esi
+ add edi, ApProcedureLocation
+ mov eax, [edi]
+
+ call eax ; invoke C function
+
+ jmp $ ; never reach here
+RendezvousFunnelProcEnd:
+
+;-------------------------------------------------------------------------------------
+; AsmGetAddressMap (&AddressMap);
+;-------------------------------------------------------------------------------------
+global ASM_PFX(AsmGetAddressMap)
+ASM_PFX(AsmGetAddressMap):
+ pushad
+ mov ebp,esp
+
+ mov ebx, [ebp + 24h]
+ mov dword [ebx], RendezvousFunnelProcStart
+ mov dword [ebx + 4h], Flat32Start - RendezvousFunnelProcStart
+ mov dword [ebx + 8h], RendezvousFunnelProcEnd - RendezvousFunnelProcStart
+
+ popad
+ ret
+
+;-------------------------------------------------------------------------------------
+;AsmExchangeRole procedure follows. This procedure executed by current BSP, that is
+;about to become an AP. It switches it'stack with the current AP.
+;AsmExchangeRole (IN CPU_EXCHANGE_INFO *MyInfo, IN CPU_EXCHANGE_INFO *OthersInfo);
+;-------------------------------------------------------------------------------------
+global ASM_PFX(AsmExchangeRole)
+ASM_PFX(AsmExchangeRole):
+ ; DO NOT call other functions in this function, since 2 CPU may use 1 stack
+ ; at the same time. If 1 CPU try to call a function, stack will be corrupted.
+ pushad
+ mov ebp,esp
+
+ ; esi contains MyInfo pointer
+ mov esi, [ebp + 24h]
+
+ ; edi contains OthersInfo pointer
+ mov edi, [ebp + 28h]
+
+ ;Store EFLAGS, GDTR and IDTR register to stack
+ pushfd
+ mov eax, cr4
+ push eax ; push cr4 firstly
+ mov eax, cr0
+ push eax
+
+ sgdt [esi + 8]
+ sidt [esi + 14]
+
+ ; Store the its StackPointer
+ mov [esi + 4],esp
+
+ ; update its switch state to STORED
+ mov byte [esi], CPU_SWITCH_STATE_STORED
+
+WaitForOtherStored:
+ ; wait until the other CPU finish storing its state
+ cmp byte [edi], CPU_SWITCH_STATE_STORED
+ jz OtherStored
+ pause
+ jmp WaitForOtherStored
+
+OtherStored:
+ ; Since another CPU already stored its state, load them
+ ; load GDTR value
+ lgdt [edi + 8]
+
+ ; load IDTR value
+ lidt [edi + 14]
+
+ ; load its future StackPointer
+ mov esp, [edi + 4]
+
+ ; update the other CPU's switch state to LOADED
+ mov byte [edi], CPU_SWITCH_STATE_LOADED
+
+WaitForOtherLoaded:
+ ; wait until the other CPU finish loading new state,
+ ; otherwise the data in stack may corrupt
+ cmp byte [esi], CPU_SWITCH_STATE_LOADED
+ jz OtherLoaded
+ pause
+ jmp WaitForOtherLoaded
+
+OtherLoaded:
+ ; since the other CPU already get the data it want, leave this procedure
+ pop eax
+ mov cr0, eax
+ pop eax
+ mov cr4, eax
+ popfd
+
+ popad
+ ret
diff --git a/UefiCpuPkg/Library/MpInitLib/MpLib.h b/UefiCpuPkg/Library/MpInitLib/MpLib.h
index 66425d30fb..0453c22d5c 100644
--- a/UefiCpuPkg/Library/MpInitLib/MpLib.h
+++ b/UefiCpuPkg/Library/MpInitLib/MpLib.h
@@ -36,5 +36,29 @@
#include <Library/HobLib.h>
+#pragma pack(1)
+
+//
+// MP CPU exchange information for AP reset code
+// This structure is required to be packed because fixed field offsets
+// into this structure are used in assembly code in this module
+//
+typedef struct {
+ UINTN Lock;
+ UINTN StackStart;
+ UINTN StackSize;
+ UINTN CFunction;
+ IA32_DESCRIPTOR GdtrProfile;
+ IA32_DESCRIPTOR IdtrProfile;
+ UINTN BufferStart;
+ UINTN ModeOffset;
+ UINTN NumApsExecuting;
+ UINTN CodeSegment;
+ UINTN DataSegment;
+ UINTN Cr3;
+ PEI_CPU_MP_DATA *PeiCpuMpData;
+} MP_CPU_EXCHANGE_INFO;
+
+#pragma pack()
#endif
diff --git a/UefiCpuPkg/Library/MpInitLib/PeiMpInitLib.inf b/UefiCpuPkg/Library/MpInitLib/PeiMpInitLib.inf
index 014a248b5e..c195a38f59 100644
--- a/UefiCpuPkg/Library/MpInitLib/PeiMpInitLib.inf
+++ b/UefiCpuPkg/Library/MpInitLib/PeiMpInitLib.inf
@@ -27,6 +27,14 @@
# VALID_ARCHITECTURES = IA32 X64
#
+[Sources.IA32]
+ Ia32/MpEqu.inc
+ Ia32/MpFuncs.nasm
+
+[Sources.X64]
+ X64/MpEqu.inc
+ X64/MpFuncs.nasm
+
[Sources.common]
PeiMpLib.c
MpLib.c
diff --git a/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc b/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc
new file mode 100644
index 0000000000..00f57ce549
--- /dev/null
+++ b/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc
@@ -0,0 +1,41 @@
+;------------------------------------------------------------------------------ ;
+; Copyright (c) 2015 - 2016, Intel Corporation. All rights reserved.<BR>
+; This program and the accompanying materials
+; are licensed and made available under the terms and conditions of the BSD License
+; which accompanies this distribution. The full text of the license may be found at
+; http://opensource.org/licenses/bsd-license.php.
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+; Module Name:
+;
+; MpEqu.inc
+;
+; Abstract:
+;
+; This is the equates file for Multiple Processor support
+;
+;-------------------------------------------------------------------------------
+
+VacantFlag equ 00h
+NotVacantFlag equ 0ffh
+
+CPU_SWITCH_STATE_IDLE equ 0
+CPU_SWITCH_STATE_STORED equ 1
+CPU_SWITCH_STATE_LOADED equ 2
+
+LockLocation equ (RendezvousFunnelProcEnd - RendezvousFunnelProcStart)
+StackStartAddressLocation equ LockLocation + 08h
+StackSizeLocation equ LockLocation + 10h
+ApProcedureLocation equ LockLocation + 18h
+GdtrLocation equ LockLocation + 20h
+IdtrLocation equ LockLocation + 2Ah
+BufferStartLocation equ LockLocation + 34h
+ModeOffsetLocation equ LockLocation + 3Ch
+NumApsExecutingLoction equ LockLocation + 44h
+CodeSegmentLocation equ LockLocation + 4Ch
+DataSegmentLocation equ LockLocation + 54h
+Cr3Location equ LockLocation + 5Ch
+
+;-------------------------------------------------------------------------------
diff --git a/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm b/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
new file mode 100644
index 0000000000..f19c75f913
--- /dev/null
+++ b/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
@@ -0,0 +1,281 @@
+;------------------------------------------------------------------------------ ;
+; Copyright (c) 2015 - 2016, Intel Corporation. All rights reserved.<BR>
+; This program and the accompanying materials
+; are licensed and made available under the terms and conditions of the BSD License
+; which accompanies this distribution. The full text of the license may be found at
+; http://opensource.org/licenses/bsd-license.php.
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+; Module Name:
+;
+; MpFuncs.nasm
+;
+; Abstract:
+;
+; This is the assembly code for MP support
+;
+;-------------------------------------------------------------------------------
+
+%include "MpEqu.inc"
+extern ASM_PFX(InitializeFloatingPointUnits)
+
+DEFAULT REL
+
+SECTION .text
+
+;-------------------------------------------------------------------------------------
+;RendezvousFunnelProc procedure follows. All APs execute their procedure. This
+;procedure serializes all the AP processors through an Init sequence. It must be
+;noted that APs arrive here very raw...ie: real mode, no stack.
+;ALSO THIS PROCEDURE IS EXECUTED BY APs ONLY ON 16 BIT MODE. HENCE THIS PROC
+;IS IN MACHINE CODE.
+;-------------------------------------------------------------------------------------
+global ASM_PFX(RendezvousFunnelProc)
+ASM_PFX(RendezvousFunnelProc):
+RendezvousFunnelProcStart:
+; At this point CS = 0x(vv00) and ip= 0x0.
+; Save BIST information to ebp firstly
+
+BITS 16
+ mov ebp, eax ; Save BIST information
+
+ mov ax, cs
+ mov ds, ax
+ mov es, ax
+ mov ss, ax
+ xor ax, ax
+ mov fs, ax
+ mov gs, ax
+
+ mov si, BufferStartLocation
+ mov ebx, [si]
+
+ mov di, ModeOffsetLocation
+ mov eax, [di]
+ mov di, CodeSegmentLocation
+ mov edx, [di]
+ mov di, ax
+ sub di, 02h
+ mov [di],dx ; Patch long mode CS
+ sub di, 04h
+ add eax, ebx
+ mov [di],eax ; Patch address
+
+ mov si, GdtrLocation
+o32 lgdt [cs:si]
+
+ mov si, IdtrLocation
+o32 lidt [cs:si]
+
+
+ mov di, DataSegmentLocation
+ mov edi, [di] ; Save long mode DS in edi
+
+ mov si, Cr3Location ; Save CR3 in ecx
+ mov ecx, [si]
+
+ xor ax, ax
+ mov ds, ax ; Clear data segment
+
+ mov eax, cr0 ; Get control register 0
+ or eax, 000000003h ; Set PE bit (bit #0) & MP
+ mov cr0, eax
+
+ mov eax, cr4
+ bts eax, 5
+ mov cr4, eax
+
+ mov cr3, ecx ; Load CR3
+
+ mov ecx, 0c0000080h ; EFER MSR number
+ rdmsr ; Read EFER
+ bts eax, 8 ; Set LME=1
+ wrmsr ; Write EFER
+
+ mov eax, cr0 ; Read CR0
+ bts eax, 31 ; Set PG=1
+ mov cr0, eax ; Write CR0
+
+ jmp 0:strict dword 0 ; far jump to long mode
+BITS 64
+LongModeStart:
+ mov eax, edi
+ mov ds, ax
+ mov es, ax
+ mov ss, ax
+
+ mov esi, ebx
+ mov edi, esi
+ add edi, LockLocation
+ mov rax, NotVacantFlag
+
+TestLock:
+ xchg qword [edi], rax
+ cmp rax, NotVacantFlag
+ jz TestLock
+
+ mov edi, esi
+ add edi, NumApsExecutingLoction
+ inc dword [edi]
+ mov ebx, [edi]
+
+ProgramStack:
+ mov edi, esi
+ add edi, StackSizeLocation
+ mov rax, qword [edi]
+ mov edi, esi
+ add edi, StackStartAddressLocation
+ add rax, qword [edi]
+ mov rsp, rax
+ mov qword [edi], rax
+
+Releaselock:
+ mov rax, VacantFlag
+ mov edi, esi
+ add edi, LockLocation
+ xchg qword [edi], rax
+
+CProcedureInvoke:
+ push rbp ; push BIST data at top of AP stack
+ xor rbp, rbp ; clear ebp for call stack trace
+ push rbp
+ mov rbp, rsp
+
+ mov rax, ASM_PFX(InitializeFloatingPointUnits)
+ sub rsp, 20h
+ call rax ; Call assembly function to initialize FPU per UEFI spec
+ add rsp, 20h
+
+ mov edx, ebx ; edx is NumApsExecuting
+ mov ecx, esi
+ add ecx, LockLocation ; rcx is address of exchange info data buffer
+
+ mov edi, esi
+ add edi, ApProcedureLocation
+ mov rax, qword [edi]
+
+ sub rsp, 20h
+ call rax ; invoke C function
+ add rsp, 20h
+ jmp $
+
+RendezvousFunnelProcEnd:
+
+;-------------------------------------------------------------------------------------
+; AsmGetAddressMap (&AddressMap);
+;-------------------------------------------------------------------------------------
+global ASM_PFX(AsmGetAddressMap)
+ASM_PFX(AsmGetAddressMap):
+ mov rax, ASM_PFX(RendezvousFunnelProc)
+ mov qword [rcx], rax
+ mov qword [rcx + 8h], LongModeStart - RendezvousFunnelProcStart
+ mov qword [rcx + 10h], RendezvousFunnelProcEnd - RendezvousFunnelProcStart
+ ret
+
+;-------------------------------------------------------------------------------------
+;AsmExchangeRole procedure follows. This procedure executed by current BSP, that is
+;about to become an AP. It switches it'stack with the current AP.
+;AsmExchangeRole (IN CPU_EXCHANGE_INFO *MyInfo, IN CPU_EXCHANGE_INFO *OthersInfo);
+;-------------------------------------------------------------------------------------
+global ASM_PFX(AsmExchangeRole)
+ASM_PFX(AsmExchangeRole):
+ ; DO NOT call other functions in this function, since 2 CPU may use 1 stack
+ ; at the same time. If 1 CPU try to call a function, stack will be corrupted.
+
+ push rax
+ push rbx
+ push rcx
+ push rdx
+ push rsi
+ push rdi
+ push rbp
+ push r8
+ push r9
+ push r10
+ push r11
+ push r12
+ push r13
+ push r14
+ push r15
+
+ mov rax, cr0
+ push rax
+
+ mov rax, cr4
+ push rax
+
+ ; rsi contains MyInfo pointer
+ mov rsi, rcx
+
+ ; rdi contains OthersInfo pointer
+ mov rdi, rdx
+
+ ;Store EFLAGS, GDTR and IDTR regiter to stack
+ pushfq
+ sgdt [rsi + 16]
+ sidt [rsi + 26]
+
+ ; Store the its StackPointer
+ mov [rsi + 8], rsp
+
+ ; update its switch state to STORED
+ mov byte [rsi], CPU_SWITCH_STATE_STORED
+
+WaitForOtherStored:
+ ; wait until the other CPU finish storing its state
+ cmp byte [rdi], CPU_SWITCH_STATE_STORED
+ jz OtherStored
+ pause
+ jmp WaitForOtherStored
+
+OtherStored:
+ ; Since another CPU already stored its state, load them
+ ; load GDTR value
+ lgdt [rdi + 16]
+
+ ; load IDTR value
+ lidt [rdi + 26]
+
+ ; load its future StackPointer
+ mov rsp, [rdi + 8]
+
+ ; update the other CPU's switch state to LOADED
+ mov byte [rdi], CPU_SWITCH_STATE_LOADED
+
+WaitForOtherLoaded:
+ ; wait until the other CPU finish loading new state,
+ ; otherwise the data in stack may corrupt
+ cmp byte [rsi], CPU_SWITCH_STATE_LOADED
+ jz OtherLoaded
+ pause
+ jmp WaitForOtherLoaded
+
+OtherLoaded:
+ ; since the other CPU already get the data it want, leave this procedure
+ popfq
+
+ pop rax
+ mov cr4, rax
+
+ pop rax
+ mov cr0, rax
+
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop r11
+ pop r10
+ pop r9
+ pop r8
+ pop rbp
+ pop rdi
+ pop rsi
+ pop rdx
+ pop rcx
+ pop rbx
+ pop rax
+
+ ret