Files
system76-edk2/UefiCpuPkg/Library/CpuCommonFeaturesLib/ProcTrace.c
Dun Tan ede0bd1496 UefiCpuPkg: Update PT code to support enable collect performance
Update ProcTrace feature code to support enable collect performance
data by generating CYC and TSC packets. Add a new dynamic
PCD to indicate if enable performance collecting. In ProcTrace.c
code, if this new PCD is true, after check cpuid, CYC and TSC
packets will be generated by setting the corresponding MSR bits
feilds if supported.

Bugzila: https://bugzilla.tianocore.org/show_bug.cgi?id=4423
Signed-off-by: Dun Tan <dun.tan@intel.com>
Cc: Eric Dong <eric.dong@intel.com>
Reviewed-by: Ray Ni <ray.ni@intel.com>
Cc: Rahul Kumar <rahul1.kumar@intel.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Xiao X Chen <xiao.x.chen@intel.com>
2023-04-26 09:19:51 +00:00

551 lines
20 KiB
C

/** @file
Intel Processor Trace feature.
Copyright (c) 2017 - 2023, Intel Corporation. All rights reserved.<BR>
SPDX-License-Identifier: BSD-2-Clause-Patent
**/
#include "CpuCommonFeatures.h"
///
/// This macro define the max entries in the Topa table.
/// Each entry in the table contains some attribute bits, a pointer to an output region, and the size of the region.
/// The last entry in the table may hold a pointer to the next table. This pointer can either point to the top of the
/// current table (for circular array) or to the base of another table.
/// At least 2 entries are needed because the list of entries must
/// be terminated by an entry with the END bit set to 1, so 2
/// entries are required to use a single valid entry.
///
#define MAX_TOPA_ENTRY_COUNT 2
///
/// Processor trace output scheme selection.
///
typedef enum {
RtitOutputSchemeSingleRange = 0,
RtitOutputSchemeToPA
} RTIT_OUTPUT_SCHEME;
typedef struct {
BOOLEAN TopaSupported;
BOOLEAN SingleRangeSupported;
MSR_IA32_RTIT_CTL_REGISTER RtitCtrl;
MSR_IA32_RTIT_OUTPUT_BASE_REGISTER RtitOutputBase;
MSR_IA32_RTIT_OUTPUT_MASK_PTRS_REGISTER RtitOutputMaskPtrs;
BOOLEAN CycPacketSupported;
} PROC_TRACE_PROCESSOR_DATA;
typedef struct {
UINT32 NumberOfProcessors;
UINT8 ProcTraceOutputScheme;
UINT32 ProcTraceMemSize;
UINTN *ThreadMemRegionTable;
UINTN AllocatedThreads;
UINTN *TopaMemArray;
BOOLEAN EnableOnBspOnly;
BOOLEAN EnablePerformanceCollecting;
PROC_TRACE_PROCESSOR_DATA *ProcessorData;
} PROC_TRACE_DATA;
typedef struct {
RTIT_TOPA_TABLE_ENTRY TopaEntry[MAX_TOPA_ENTRY_COUNT];
} PROC_TRACE_TOPA_TABLE;
/**
Prepares for the data used by CPU feature detection and initialization.
@param[in] NumberOfProcessors The number of CPUs in the platform.
@return Pointer to a buffer of CPU related configuration data.
@note This service could be called by BSP only.
**/
VOID *
EFIAPI
ProcTraceGetConfigData (
IN UINTN NumberOfProcessors
)
{
PROC_TRACE_DATA *ConfigData;
ConfigData = AllocateZeroPool (sizeof (PROC_TRACE_DATA) + sizeof (PROC_TRACE_PROCESSOR_DATA) * NumberOfProcessors);
ASSERT (ConfigData != NULL);
ConfigData->ProcessorData = (PROC_TRACE_PROCESSOR_DATA *)((UINT8 *)ConfigData + sizeof (PROC_TRACE_DATA));
ConfigData->NumberOfProcessors = (UINT32)NumberOfProcessors;
ConfigData->ProcTraceMemSize = PcdGet32 (PcdCpuProcTraceMemSize);
ConfigData->ProcTraceOutputScheme = PcdGet8 (PcdCpuProcTraceOutputScheme);
ConfigData->EnableOnBspOnly = PcdGetBool (PcdCpuProcTraceBspOnly);
ConfigData->EnablePerformanceCollecting = PcdGetBool (PcdCpuProcTracePerformanceCollecting);
return ConfigData;
}
/**
Detects if Intel Processor Trace feature supported on current
processor.
@param[in] ProcessorNumber The index of the CPU executing this function.
@param[in] CpuInfo A pointer to the REGISTER_CPU_FEATURE_INFORMATION
structure for the CPU executing this function.
@param[in] ConfigData A pointer to the configuration buffer returned
by CPU_FEATURE_GET_CONFIG_DATA. NULL if
CPU_FEATURE_GET_CONFIG_DATA was not provided in
RegisterCpuFeature().
@retval TRUE Processor Trace feature is supported.
@retval FALSE Processor Trace feature is not supported.
@note This service could be called by BSP/APs.
**/
BOOLEAN
EFIAPI
ProcTraceSupport (
IN UINTN ProcessorNumber,
IN REGISTER_CPU_FEATURE_INFORMATION *CpuInfo,
IN VOID *ConfigData OPTIONAL
)
{
PROC_TRACE_DATA *ProcTraceData;
CPUID_STRUCTURED_EXTENDED_FEATURE_FLAGS_EBX Ebx;
CPUID_INTEL_PROCESSOR_TRACE_MAIN_LEAF_ECX ProcTraceEcx;
CPUID_INTEL_PROCESSOR_TRACE_MAIN_LEAF_EBX ProcTraceEbx;
//
// Check if ProcTraceMemorySize option is enabled (0xFF means disable by user)
//
ProcTraceData = (PROC_TRACE_DATA *)ConfigData;
ASSERT (ProcTraceData != NULL);
if ((ProcTraceData->ProcTraceMemSize > RtitTopaMemorySize128M) ||
(ProcTraceData->ProcTraceOutputScheme > RtitOutputSchemeToPA))
{
return FALSE;
}
//
// Check if Processor Trace is supported
//
AsmCpuidEx (CPUID_STRUCTURED_EXTENDED_FEATURE_FLAGS, 0, NULL, &Ebx.Uint32, NULL, NULL);
if (Ebx.Bits.IntelProcessorTrace == 0) {
return FALSE;
}
AsmCpuidEx (CPUID_INTEL_PROCESSOR_TRACE, CPUID_INTEL_PROCESSOR_TRACE_MAIN_LEAF, NULL, &ProcTraceEbx.Uint32, &ProcTraceEcx.Uint32, NULL);
ProcTraceData->ProcessorData[ProcessorNumber].TopaSupported = (BOOLEAN)(ProcTraceEcx.Bits.RTIT == 1);
ProcTraceData->ProcessorData[ProcessorNumber].SingleRangeSupported = (BOOLEAN)(ProcTraceEcx.Bits.SingleRangeOutput == 1);
if ((ProcTraceData->ProcessorData[ProcessorNumber].TopaSupported && (ProcTraceData->ProcTraceOutputScheme == RtitOutputSchemeToPA)) ||
(ProcTraceData->ProcessorData[ProcessorNumber].SingleRangeSupported && (ProcTraceData->ProcTraceOutputScheme == RtitOutputSchemeSingleRange)))
{
ProcTraceData->ProcessorData[ProcessorNumber].RtitCtrl.Uint64 = AsmReadMsr64 (MSR_IA32_RTIT_CTL);
ProcTraceData->ProcessorData[ProcessorNumber].RtitOutputBase.Uint64 = AsmReadMsr64 (MSR_IA32_RTIT_OUTPUT_BASE);
ProcTraceData->ProcessorData[ProcessorNumber].RtitOutputMaskPtrs.Uint64 = AsmReadMsr64 (MSR_IA32_RTIT_OUTPUT_MASK_PTRS);
ProcTraceData->ProcessorData[ProcessorNumber].CycPacketSupported = (BOOLEAN)(ProcTraceEbx.Bits.ConfigurablePsb == 1);
return TRUE;
}
return FALSE;
}
/**
Initializes Intel Processor Trace feature to specific state.
@param[in] ProcessorNumber The index of the CPU executing this function.
@param[in] CpuInfo A pointer to the REGISTER_CPU_FEATURE_INFORMATION
structure for the CPU executing this function.
@param[in] ConfigData A pointer to the configuration buffer returned
by CPU_FEATURE_GET_CONFIG_DATA. NULL if
CPU_FEATURE_GET_CONFIG_DATA was not provided in
RegisterCpuFeature().
@param[in] State If TRUE, then the Processor Trace feature must be
enabled.
If FALSE, then the Processor Trace feature must be
disabled.
@retval RETURN_SUCCESS Intel Processor Trace feature is initialized.
**/
RETURN_STATUS
EFIAPI
ProcTraceInitialize (
IN UINTN ProcessorNumber,
IN REGISTER_CPU_FEATURE_INFORMATION *CpuInfo,
IN VOID *ConfigData OPTIONAL,
IN BOOLEAN State
)
{
UINT32 MemRegionSize;
UINTN Pages;
UINTN Alignment;
UINTN MemRegionBaseAddr;
UINTN *ThreadMemRegionTable;
UINTN Index;
UINTN TopaTableBaseAddr;
UINTN AlignedAddress;
UINTN *TopaMemArray;
PROC_TRACE_TOPA_TABLE *TopaTable;
PROC_TRACE_DATA *ProcTraceData;
BOOLEAN FirstIn;
MSR_IA32_RTIT_CTL_REGISTER CtrlReg;
MSR_IA32_RTIT_STATUS_REGISTER StatusReg;
MSR_IA32_RTIT_OUTPUT_BASE_REGISTER OutputBaseReg;
MSR_IA32_RTIT_OUTPUT_MASK_PTRS_REGISTER OutputMaskPtrsReg;
RTIT_TOPA_TABLE_ENTRY *TopaEntryPtr;
BOOLEAN IsBsp;
//
// The scope of the MSR_IA32_RTIT_* is core for below processor type, only program
// MSR_IA32_RTIT_* for thread 0 in each core.
//
if (IS_GOLDMONT_PROCESSOR (CpuInfo->DisplayFamily, CpuInfo->DisplayModel) ||
IS_GOLDMONT_PLUS_PROCESSOR (CpuInfo->DisplayFamily, CpuInfo->DisplayModel))
{
if (CpuInfo->ProcessorInfo.Location.Thread != 0) {
return RETURN_SUCCESS;
}
}
ProcTraceData = (PROC_TRACE_DATA *)ConfigData;
ASSERT (ProcTraceData != NULL);
//
// Clear MSR_IA32_RTIT_CTL[0] and IA32_RTIT_STS only if MSR_IA32_RTIT_CTL[0]==1b
//
CtrlReg.Uint64 = ProcTraceData->ProcessorData[ProcessorNumber].RtitCtrl.Uint64;
if (CtrlReg.Bits.TraceEn != 0) {
///
/// Clear bit 0 in MSR IA32_RTIT_CTL (570)
///
CtrlReg.Bits.TraceEn = 0;
CPU_REGISTER_TABLE_WRITE64 (
ProcessorNumber,
Msr,
MSR_IA32_RTIT_CTL,
CtrlReg.Uint64
);
///
/// Clear MSR IA32_RTIT_STS (571h) to all zeros
///
StatusReg.Uint64 = 0x0;
CPU_REGISTER_TABLE_WRITE64 (
ProcessorNumber,
Msr,
MSR_IA32_RTIT_STATUS,
StatusReg.Uint64
);
}
if (!State) {
return RETURN_SUCCESS;
}
IsBsp = (CpuInfo->ProcessorInfo.StatusFlag & PROCESSOR_AS_BSP_BIT) ? TRUE : FALSE;
if (ProcTraceData->EnableOnBspOnly && !IsBsp) {
return RETURN_SUCCESS;
}
MemRegionBaseAddr = 0;
FirstIn = FALSE;
if (ProcTraceData->ThreadMemRegionTable == NULL) {
FirstIn = TRUE;
DEBUG ((DEBUG_INFO, "Initialize Processor Trace\n"));
}
///
/// Refer to PROC_TRACE_MEM_SIZE Table for Size Encoding
///
MemRegionSize = (UINT32)(1 << (ProcTraceData->ProcTraceMemSize + 12));
if (FirstIn) {
DEBUG ((DEBUG_INFO, "ProcTrace: MemSize requested: 0x%X \n", MemRegionSize));
}
if (FirstIn) {
//
// Let BSP allocate and create the necessary memory region (Aligned to the size of
// the memory region from setup option(ProcTraceMemSize) which is an integral multiple of 4kB)
// for all the enabled threads to store Processor Trace debug data. Then Configure the trace
// address base in MSR, IA32_RTIT_OUTPUT_BASE (560h) bits 47:12. Note that all regions must be
// aligned based on their size, not just 4K. Thus a 2M region must have bits 20:12 cleared.
//
Pages = EFI_SIZE_TO_PAGES (MemRegionSize);
Alignment = MemRegionSize;
if (ProcTraceData->EnableOnBspOnly) {
//
// When only enable ProcTrace on BSP, this is the first and only time ProcTraceInitialize() runs.
//
MemRegionBaseAddr = (UINTN)AllocateAlignedReservedPages (Pages, Alignment);
if (MemRegionBaseAddr == 0) {
//
// Could not allocate for BSP even
//
DEBUG ((DEBUG_ERROR, "ProcTrace: Out of mem, failed to allocate buffer for BSP\n"));
return RETURN_OUT_OF_RESOURCES;
}
DEBUG ((DEBUG_INFO, "ProcTrace: Allocated PT MemRegionBaseAddr(aligned) for BSP only: 0x%llX.\n", (UINT64)MemRegionBaseAddr));
} else {
ThreadMemRegionTable = (UINTN *)AllocatePool (ProcTraceData->NumberOfProcessors * sizeof (UINTN *));
if (ThreadMemRegionTable == NULL) {
DEBUG ((DEBUG_ERROR, "Allocate ProcTrace ThreadMemRegionTable Failed\n"));
return RETURN_OUT_OF_RESOURCES;
}
ProcTraceData->ThreadMemRegionTable = ThreadMemRegionTable;
for (Index = 0; Index < ProcTraceData->NumberOfProcessors; Index++, ProcTraceData->AllocatedThreads++) {
AlignedAddress = (UINTN)AllocateAlignedReservedPages (Pages, Alignment);
if (AlignedAddress == 0) {
DEBUG ((DEBUG_ERROR, "ProcTrace: Out of mem, allocated only for %d threads\n", ProcTraceData->AllocatedThreads));
if (Index == 0) {
//
// Could not allocate for BSP even
//
FreePool ((VOID *)ThreadMemRegionTable);
ThreadMemRegionTable = NULL;
return RETURN_OUT_OF_RESOURCES;
}
break;
}
ThreadMemRegionTable[Index] = AlignedAddress;
DEBUG ((DEBUG_INFO, "ProcTrace: PT MemRegionBaseAddr(aligned) for thread %d: 0x%llX \n", Index, (UINT64)ThreadMemRegionTable[Index]));
}
DEBUG ((DEBUG_INFO, "ProcTrace: Allocated PT mem for %d thread \n", ProcTraceData->AllocatedThreads));
}
}
if (!ProcTraceData->EnableOnBspOnly) {
if (ProcessorNumber < ProcTraceData->AllocatedThreads) {
MemRegionBaseAddr = ProcTraceData->ThreadMemRegionTable[ProcessorNumber];
} else {
return RETURN_SUCCESS;
}
}
///
/// Check Processor Trace output scheme: Single Range output or ToPA table
///
//
// Single Range output scheme
//
if (ProcTraceData->ProcessorData[ProcessorNumber].SingleRangeSupported &&
(ProcTraceData->ProcTraceOutputScheme == RtitOutputSchemeSingleRange))
{
if (FirstIn) {
DEBUG ((DEBUG_INFO, "ProcTrace: Enabling Single Range Output scheme \n"));
}
//
// Clear MSR IA32_RTIT_CTL (0x570) ToPA (Bit 8)
//
CtrlReg.Bits.ToPA = 0;
CPU_REGISTER_TABLE_WRITE64 (
ProcessorNumber,
Msr,
MSR_IA32_RTIT_CTL,
CtrlReg.Uint64
);
//
// Program MSR IA32_RTIT_OUTPUT_BASE (0x560) bits[63:7] with the allocated Memory Region
//
OutputBaseReg.Uint64 = ProcTraceData->ProcessorData[ProcessorNumber].RtitOutputBase.Uint64;
OutputBaseReg.Bits.Base = (MemRegionBaseAddr >> 7) & 0x01FFFFFF;
OutputBaseReg.Bits.BaseHi = RShiftU64 ((UINT64)MemRegionBaseAddr, 32) & 0xFFFFFFFF;
CPU_REGISTER_TABLE_WRITE64 (
ProcessorNumber,
Msr,
MSR_IA32_RTIT_OUTPUT_BASE,
OutputBaseReg.Uint64
);
//
// Program the Mask bits for the Memory Region to MSR IA32_RTIT_OUTPUT_MASK_PTRS (561h)
//
OutputMaskPtrsReg.Uint64 = ProcTraceData->ProcessorData[ProcessorNumber].RtitOutputMaskPtrs.Uint64;
OutputMaskPtrsReg.Bits.MaskOrTableOffset = ((MemRegionSize - 1) >> 7) & 0x01FFFFFF;
OutputMaskPtrsReg.Bits.OutputOffset = RShiftU64 (MemRegionSize - 1, 32) & 0xFFFFFFFF;
CPU_REGISTER_TABLE_WRITE64 (
ProcessorNumber,
Msr,
MSR_IA32_RTIT_OUTPUT_MASK_PTRS,
OutputMaskPtrsReg.Uint64
);
}
//
// ToPA(Table of physical address) scheme
//
if (ProcTraceData->ProcessorData[ProcessorNumber].TopaSupported &&
(ProcTraceData->ProcTraceOutputScheme == RtitOutputSchemeToPA))
{
//
// Create ToPA structure aligned at 4KB for each logical thread
// with at least 2 entries by 8 bytes size each. The first entry
// should have the trace output base address in bits 47:12, 6:9
// for Size, bits 4,2 and 0 must be cleared. The second entry
// should have the base address of the table location in bits
// 47:12, bits 4 and 2 must be cleared and bit 0 must be set.
//
if (FirstIn) {
DEBUG ((DEBUG_INFO, "ProcTrace: Enabling ToPA scheme \n"));
Pages = EFI_SIZE_TO_PAGES (sizeof (PROC_TRACE_TOPA_TABLE));
Alignment = 0x1000;
if (ProcTraceData->EnableOnBspOnly) {
//
// When only enable ProcTrace on BSP, this is the first and only time ProcTraceInitialize() runs.
//
TopaTableBaseAddr = (UINTN)AllocateAlignedReservedPages (Pages, Alignment);
if (TopaTableBaseAddr == 0) {
DEBUG ((DEBUG_ERROR, "ProcTrace: Out of mem, failed to allocate ToPA mem for BSP"));
return RETURN_OUT_OF_RESOURCES;
}
DEBUG ((DEBUG_INFO, "ProcTrace: Topa table address(aligned) for BSP only: 0x%llX \n", (UINT64)TopaTableBaseAddr));
} else {
//
// Let BSP allocate ToPA table mem for all threads
//
TopaMemArray = (UINTN *)AllocatePool (ProcTraceData->AllocatedThreads * sizeof (UINTN *));
if (TopaMemArray == NULL) {
DEBUG ((DEBUG_ERROR, "ProcTrace: Allocate mem for ToPA Failed\n"));
return RETURN_OUT_OF_RESOURCES;
}
ProcTraceData->TopaMemArray = TopaMemArray;
for (Index = 0; Index < ProcTraceData->AllocatedThreads; Index++) {
AlignedAddress = (UINTN)AllocateAlignedReservedPages (Pages, Alignment);
if (AlignedAddress == 0) {
if (Index < ProcTraceData->AllocatedThreads) {
ProcTraceData->AllocatedThreads = Index;
}
DEBUG ((DEBUG_ERROR, "ProcTrace: Out of mem, allocated ToPA mem only for %d threads\n", ProcTraceData->AllocatedThreads));
if (Index == 0) {
//
// Could not allocate for BSP even
//
FreePool ((VOID *)TopaMemArray);
TopaMemArray = NULL;
return RETURN_OUT_OF_RESOURCES;
}
break;
}
TopaMemArray[Index] = AlignedAddress;
DEBUG ((DEBUG_INFO, "ProcTrace: Topa table address(aligned) for thread %d is 0x%llX \n", Index, (UINT64)TopaMemArray[Index]));
}
DEBUG ((DEBUG_INFO, "ProcTrace: Allocated ToPA mem for %d thread \n", ProcTraceData->AllocatedThreads));
}
}
if (!ProcTraceData->EnableOnBspOnly) {
if (ProcessorNumber < ProcTraceData->AllocatedThreads) {
TopaTableBaseAddr = ProcTraceData->TopaMemArray[ProcessorNumber];
} else {
return RETURN_SUCCESS;
}
}
TopaTable = (PROC_TRACE_TOPA_TABLE *)TopaTableBaseAddr;
TopaEntryPtr = &TopaTable->TopaEntry[0];
TopaEntryPtr->Uint64 = 0;
TopaEntryPtr->Bits.Base = (MemRegionBaseAddr >> 12) & 0x000FFFFF;
TopaEntryPtr->Bits.BaseHi = RShiftU64 ((UINT64)MemRegionBaseAddr, 32) & 0xFFFFFFFF;
TopaEntryPtr->Bits.Size = ProcTraceData->ProcTraceMemSize;
TopaEntryPtr->Bits.END = 0;
TopaEntryPtr = &TopaTable->TopaEntry[1];
TopaEntryPtr->Uint64 = 0;
TopaEntryPtr->Bits.Base = (TopaTableBaseAddr >> 12) & 0x000FFFFF;
TopaEntryPtr->Bits.BaseHi = RShiftU64 ((UINT64)TopaTableBaseAddr, 32) & 0xFFFFFFFF;
TopaEntryPtr->Bits.END = 1;
//
// Program the MSR IA32_RTIT_OUTPUT_BASE (0x560) bits[63:7] with ToPA base
//
OutputBaseReg.Uint64 = ProcTraceData->ProcessorData[ProcessorNumber].RtitOutputBase.Uint64;
OutputBaseReg.Bits.Base = (TopaTableBaseAddr >> 7) & 0x01FFFFFF;
OutputBaseReg.Bits.BaseHi = RShiftU64 ((UINT64)TopaTableBaseAddr, 32) & 0xFFFFFFFF;
CPU_REGISTER_TABLE_WRITE64 (
ProcessorNumber,
Msr,
MSR_IA32_RTIT_OUTPUT_BASE,
OutputBaseReg.Uint64
);
//
// Set the MSR IA32_RTIT_OUTPUT_MASK (0x561) bits[63:7] to 0
//
OutputMaskPtrsReg.Uint64 = ProcTraceData->ProcessorData[ProcessorNumber].RtitOutputMaskPtrs.Uint64;
OutputMaskPtrsReg.Bits.MaskOrTableOffset = 0;
OutputMaskPtrsReg.Bits.OutputOffset = 0;
CPU_REGISTER_TABLE_WRITE64 (
ProcessorNumber,
Msr,
MSR_IA32_RTIT_OUTPUT_MASK_PTRS,
OutputMaskPtrsReg.Uint64
);
//
// Enable ToPA output scheme by enabling MSR IA32_RTIT_CTL (0x570) ToPA (Bit 8)
//
CtrlReg.Bits.ToPA = 1;
CPU_REGISTER_TABLE_WRITE64 (
ProcessorNumber,
Msr,
MSR_IA32_RTIT_CTL,
CtrlReg.Uint64
);
}
///
/// Enable the Processor Trace feature from MSR IA32_RTIT_CTL (570h)
///
CtrlReg.Bits.OS = 1;
CtrlReg.Bits.User = 1;
CtrlReg.Bits.BranchEn = 1;
CtrlReg.Bits.TraceEn = 1;
//
// Generate CYC/TSC timing packets to collect performance data.
//
if (ProcTraceData->EnablePerformanceCollecting) {
if (ProcTraceData->ProcessorData[ProcessorNumber].CycPacketSupported) {
CtrlReg.Bits.CYCEn = 1;
CtrlReg.Bits.CYCThresh = 5;
}
//
// Write to TSCEn is always supported
//
CtrlReg.Bits.TSCEn = 1;
}
CPU_REGISTER_TABLE_WRITE64 (
ProcessorNumber,
Msr,
MSR_IA32_RTIT_CTL,
CtrlReg.Uint64
);
return RETURN_SUCCESS;
}