MdeModulePkg CapsuleX64: Reduce reserved memory consumption

We are going to reduce reserved memory consumption by page table buffer,
then OS can have more available memory to use.
Take PhysicalAddressBits = 48 and 2MB page granularity as example,
1:1 Virtual to Physical identity mapping page table buffer needs to be
((512 + 1) * 512 + 1) * 4096 = 1075843072 bytes = 0x40201000 bytes.

The code is updated to build 4G page table by default and only use 8 extra
pages to handles > 4G request by page fault.

Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Star Zeng <star.zeng@intel.com>
Reviewed-by: Jiewen Yao <jiewen.yao@intel.com>

git-svn-id: https://svn.code.sf.net/p/edk2/code/trunk/edk2@18069 6f19259b-4bc3-4df7-8a09-765794883524
This commit is contained in:
Star Zeng
2015-07-27 03:04:41 +00:00
committed by lzeng14
parent 353f5ba92f
commit 716087e2be
6 changed files with 478 additions and 132 deletions

View File

@@ -0,0 +1,81 @@
## @file
# This is the assembly code for page fault handler hook.
#
# Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>
#
# This program and the accompanying materials are
# licensed and made available under the terms and conditions of the BSD License
# which accompanies this distribution. The full text of the license may be found at
# http://opensource.org/licenses/bsd-license.php
#
# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
#
##
ASM_GLOBAL ASM_PFX(PageFaultHandlerHook)
ASM_PFX(PageFaultHandlerHook):
addq $-0x10, %rsp
# save rax
movq %rax, 0x08(%rsp)
# pushq %rax # save all volatile registers
pushq %rcx
pushq %rdx
pushq %r8
pushq %r9
pushq %r10
pushq %r11
# save volatile fp registers
# 68h + 08h(for alignment)
addq $-0x70, %rsp
stmxcsr 0x60(%rsp)
movdqa %xmm0, 0x0(%rsp)
movdqa %xmm1, 0x10(%rsp)
movdqa %xmm2, 0x20(%rsp)
movdqa %xmm3, 0x30(%rsp)
movdqa %xmm4, 0x40(%rsp)
movdqa %xmm5, 0x50(%rsp)
addq $-0x20, %rsp
call ASM_PFX(PageFaultHandler)
addq $0x20, %rsp
# load volatile fp registers
ldmxcsr 0x60(%rsp)
movdqa 0x0(%rsp), %xmm0
movdqa 0x10(%rsp), %xmm1
movdqa 0x20(%rsp), %xmm2
movdqa 0x30(%rsp), %xmm3
movdqa 0x40(%rsp), %xmm4
movdqa 0x50(%rsp), %xmm5
addq $0x70, %rsp
popq %r11
popq %r10
popq %r9
popq %r8
popq %rdx
popq %rcx
# popq %rax # restore all volatile registers
addq $0x10, %rsp
# rax returned from PageFaultHandler is NULL or OriginalHandler address
# NULL if the page fault is handled by PageFaultHandler
# OriginalHandler address if the page fault is not handled by PageFaultHandler
testq %rax, %rax
# save OriginalHandler address
movq %rax, -0x10(%rsp)
# restore rax
movq -0x08(%rsp), %rax
jz L1
# jump to OriginalHandler
jmpq *-0x10(%rsp)
L1:
addq $0x08, %rsp # skip error code for PF
iretq

View File

@@ -0,0 +1,87 @@
;; @file
; This is the assembly code for page fault handler hook.
;
; Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>
;
; This program and the accompanying materials
; are licensed and made available under the terms and conditions of the BSD License
; which accompanies this distribution. The full text of the license may be found at
; http://opensource.org/licenses/bsd-license.php
;
; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
;
;;
EXTERN PageFaultHandler:PROC
.code
PageFaultHandlerHook PROC
add rsp, -10h
; save rax
mov [rsp + 08h], rax
;push rax ; save all volatile registers
push rcx
push rdx
push r8
push r9
push r10
push r11
; save volatile fp registers
; 68h + 08h(for alignment)
add rsp, -70h
stmxcsr [rsp + 60h]
movdqa [rsp + 0h], xmm0
movdqa [rsp + 10h], xmm1
movdqa [rsp + 20h], xmm2
movdqa [rsp + 30h], xmm3
movdqa [rsp + 40h], xmm4
movdqa [rsp + 50h], xmm5
add rsp, -20h
call PageFaultHandler
add rsp, 20h
; load volatile fp registers
ldmxcsr [rsp + 60h]
movdqa xmm0, [rsp + 0h]
movdqa xmm1, [rsp + 10h]
movdqa xmm2, [rsp + 20h]
movdqa xmm3, [rsp + 30h]
movdqa xmm4, [rsp + 40h]
movdqa xmm5, [rsp + 50h]
add rsp, 70h
pop r11
pop r10
pop r9
pop r8
pop rdx
pop rcx
;pop rax ; restore all volatile registers
add rsp, 10h
; rax returned from PageFaultHandler is NULL or OriginalHandler address
; NULL if the page fault is handled by PageFaultHandler
; OriginalHandler address if the page fault is not handled by PageFaultHandler
test rax, rax
; save OriginalHandler address
mov [rsp - 10h], rax
; restore rax
mov rax, [rsp - 08h]
jz @F
; jump to OriginalHandler
jmp qword ptr [rsp - 10h]
@@:
add rsp, 08h ; skip error code for PF
iretq
PageFaultHandlerHook ENDP
END

View File

@@ -1,7 +1,7 @@
/** @file
The X64 entrypoint is used to process capsule in long mode.
Copyright (c) 2011 - 2013, Intel Corporation. All rights reserved.<BR>
Copyright (c) 2011 - 2015, Intel Corporation. All rights reserved.<BR>
This program and the accompanying materials
are licensed and made available under the terms and conditions of the BSD License
which accompanies this distribution. The full text of the license may be found at
@@ -20,6 +20,184 @@ WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
#define EXCEPTION_VECTOR_NUMBER 0x22
#define IA32_PG_P BIT0
#define IA32_PG_RW BIT1
#define IA32_PG_PS BIT7
typedef struct _PAGE_FAULT_CONTEXT {
BOOLEAN Page1GSupport;
UINT64 PhyMask;
UINTN PageFaultBuffer;
UINTN PageFaultIndex;
//
// Store the uplink information for each page being used.
//
UINT64 *PageFaultUplink[EXTRA_PAGE_TABLE_PAGES];
VOID *OriginalHandler;
} PAGE_FAULT_CONTEXT;
typedef struct _PAGE_FAULT_IDT_TABLE {
PAGE_FAULT_CONTEXT PageFaultContext;
IA32_IDT_GATE_DESCRIPTOR IdtEntryTable[EXCEPTION_VECTOR_NUMBER];
} PAGE_FAULT_IDT_TABLE;
/**
Page fault handler.
**/
VOID
EFIAPI
PageFaultHandlerHook (
VOID
);
/**
Hook IDT with our page fault handler so that the on-demand paging works on page fault.
@param[in, out] IdtEntry Pointer to IDT entry.
@param[in, out] PageFaultContext Pointer to page fault context.
**/
VOID
HookPageFaultHandler (
IN OUT IA32_IDT_GATE_DESCRIPTOR *IdtEntry,
IN OUT PAGE_FAULT_CONTEXT *PageFaultContext
)
{
UINT32 RegEax;
UINT8 PhysicalAddressBits;
UINTN PageFaultHandlerHookAddress;
AsmCpuid (0x80000000, &RegEax, NULL, NULL, NULL);
if (RegEax >= 0x80000008) {
AsmCpuid (0x80000008, &RegEax, NULL, NULL, NULL);
PhysicalAddressBits = (UINT8) RegEax;
} else {
PhysicalAddressBits = 36;
}
PageFaultContext->PhyMask = LShiftU64 (1, PhysicalAddressBits) - 1;
PageFaultContext->PhyMask &= (1ull << 48) - SIZE_4KB;
//
// Set Page Fault entry to catch >4G access
//
PageFaultHandlerHookAddress = (UINTN)PageFaultHandlerHook;
PageFaultContext->OriginalHandler = (VOID *)(UINTN)(LShiftU64 (IdtEntry->Bits.OffsetUpper, 32) + IdtEntry->Bits.OffsetLow + (IdtEntry->Bits.OffsetHigh << 16));
IdtEntry->Bits.OffsetLow = (UINT16)PageFaultHandlerHookAddress;
IdtEntry->Bits.Selector = (UINT16)AsmReadCs ();
IdtEntry->Bits.Reserved_0 = 0;
IdtEntry->Bits.GateType = IA32_IDT_GATE_TYPE_INTERRUPT_32;
IdtEntry->Bits.OffsetHigh = (UINT16)(PageFaultHandlerHookAddress >> 16);
IdtEntry->Bits.OffsetUpper = (UINT32)(PageFaultHandlerHookAddress >> 32);
IdtEntry->Bits.Reserved_1 = 0;
if (PageFaultContext->Page1GSupport) {
PageFaultContext->PageFaultBuffer = (UINTN)(AsmReadCr3 () & PageFaultContext->PhyMask) + EFI_PAGES_TO_SIZE(2);
}else {
PageFaultContext->PageFaultBuffer = (UINTN)(AsmReadCr3 () & PageFaultContext->PhyMask) + EFI_PAGES_TO_SIZE(6);
}
PageFaultContext->PageFaultIndex = 0;
ZeroMem (PageFaultContext->PageFaultUplink, sizeof (PageFaultContext->PageFaultUplink));
}
/**
Acquire page for page fault.
@param[in, out] PageFaultContext Pointer to page fault context.
@param[in, out] Uplink Pointer to up page table entry.
**/
VOID
AcquirePage (
IN OUT PAGE_FAULT_CONTEXT *PageFaultContext,
IN OUT UINT64 *Uplink
)
{
UINTN Address;
Address = PageFaultContext->PageFaultBuffer + EFI_PAGES_TO_SIZE (PageFaultContext->PageFaultIndex);
ZeroMem ((VOID *) Address, EFI_PAGES_TO_SIZE (1));
//
// Cut the previous uplink if it exists and wasn't overwritten.
//
if ((PageFaultContext->PageFaultUplink[PageFaultContext->PageFaultIndex] != NULL) && ((*PageFaultContext->PageFaultUplink[PageFaultContext->PageFaultIndex] & PageFaultContext->PhyMask) == Address)) {
*PageFaultContext->PageFaultUplink[PageFaultContext->PageFaultIndex] = 0;
}
//
// Link & Record the current uplink.
//
*Uplink = Address | IA32_PG_P | IA32_PG_RW;
PageFaultContext->PageFaultUplink[PageFaultContext->PageFaultIndex] = Uplink;
PageFaultContext->PageFaultIndex = (PageFaultContext->PageFaultIndex + 1) % EXTRA_PAGE_TABLE_PAGES;
}
/**
The page fault handler that on-demand read >4G memory/MMIO.
@retval NULL The page fault is correctly handled.
@retval OriginalHandler The page fault is not handled and is passed through to original handler.
**/
VOID *
EFIAPI
PageFaultHandler (
VOID
)
{
IA32_DESCRIPTOR Idtr;
PAGE_FAULT_CONTEXT *PageFaultContext;
UINT64 PhyMask;
UINT64 *PageTable;
UINT64 PFAddress;
UINTN PTIndex;
//
// Get the IDT Descriptor.
//
AsmReadIdtr ((IA32_DESCRIPTOR *) &Idtr);
//
// Then get page fault context by IDT Descriptor.
//
PageFaultContext = (PAGE_FAULT_CONTEXT *) (UINTN) (Idtr.Base - sizeof (PAGE_FAULT_CONTEXT));
PhyMask = PageFaultContext->PhyMask;
PFAddress = AsmReadCr2 ();
DEBUG ((EFI_D_ERROR, "CapsuleX64 - PageFaultHandler: Cr2 - %lx\n", PFAddress));
if (PFAddress >= PhyMask + SIZE_4KB) {
return PageFaultContext->OriginalHandler;
}
PFAddress &= PhyMask;
PageTable = (UINT64*)(UINTN)(AsmReadCr3 () & PhyMask);
PTIndex = BitFieldRead64 (PFAddress, 39, 47);
// PML4E
if ((PageTable[PTIndex] & IA32_PG_P) == 0) {
AcquirePage (PageFaultContext, &PageTable[PTIndex]);
}
PageTable = (UINT64*)(UINTN)(PageTable[PTIndex] & PhyMask);
PTIndex = BitFieldRead64 (PFAddress, 30, 38);
// PDPTE
if (PageFaultContext->Page1GSupport) {
PageTable[PTIndex] = (PFAddress & ~((1ull << 30) - 1)) | IA32_PG_P | IA32_PG_RW | IA32_PG_PS;
} else {
if ((PageTable[PTIndex] & IA32_PG_P) == 0) {
AcquirePage (PageFaultContext, &PageTable[PTIndex]);
}
PageTable = (UINT64*)(UINTN)(PageTable[PTIndex] & PhyMask);
PTIndex = BitFieldRead64 (PFAddress, 21, 29);
// PD
PageTable[PTIndex] = (PFAddress & ~((1ull << 21) - 1)) | IA32_PG_P | IA32_PG_RW | IA32_PG_PS;
}
return NULL;
}
/**
The X64 entrypoint is used to process capsule in long mode then
return to 32-bit protected mode.
@@ -40,7 +218,8 @@ _ModuleEntryPoint (
EFI_STATUS Status;
IA32_DESCRIPTOR Ia32Idtr;
IA32_DESCRIPTOR X64Idtr;
IA32_IDT_GATE_DESCRIPTOR IdtEntryTable[EXCEPTION_VECTOR_NUMBER];
PAGE_FAULT_IDT_TABLE PageFaultIdtTable;
IA32_IDT_GATE_DESCRIPTOR *IdtEntry;
//
// Save the IA32 IDT Descriptor
@@ -50,8 +229,8 @@ _ModuleEntryPoint (
//
// Setup X64 IDT table
//
ZeroMem (IdtEntryTable, sizeof (IA32_IDT_GATE_DESCRIPTOR) * EXCEPTION_VECTOR_NUMBER);
X64Idtr.Base = (UINTN) IdtEntryTable;
ZeroMem (PageFaultIdtTable.IdtEntryTable, sizeof (IA32_IDT_GATE_DESCRIPTOR) * EXCEPTION_VECTOR_NUMBER);
X64Idtr.Base = (UINTN) PageFaultIdtTable.IdtEntryTable;
X64Idtr.Limit = (UINT16) (sizeof (IA32_IDT_GATE_DESCRIPTOR) * EXCEPTION_VECTOR_NUMBER - 1);
AsmWriteIdtr ((IA32_DESCRIPTOR *) &X64Idtr);
@@ -60,7 +239,14 @@ _ModuleEntryPoint (
//
Status = InitializeCpuExceptionHandlers (NULL);
ASSERT_EFI_ERROR (Status);
//
// Hook page fault handler to handle >4G request.
//
PageFaultIdtTable.PageFaultContext.Page1GSupport = EntrypointContext->Page1GSupport;
IdtEntry = (IA32_IDT_GATE_DESCRIPTOR *) (X64Idtr.Base + (14 * sizeof (IA32_IDT_GATE_DESCRIPTOR)));
HookPageFaultHandler (IdtEntry, &(PageFaultIdtTable.PageFaultContext));
//
// Initialize Debug Agent to support source level debug
//