cpu/amd/fam10h-fam15h: Update Fam15h APIC config and startup sequence

This fixes Family 15h multiple package support; the previous code
hung in CAR setup and romstage when more than one CPU package was
installed for a variety of loosely related reasons.

TEST: Booted ASUS KGPE-D16 with two Opteron 6328 processors
and several different RDIMM configurations.

Change-Id: I171197c90f72d3496a385465937b7666cbf7e308
Signed-off-by: Timothy Pearson <tpearson@raptorengineeringinc.com>
Reviewed-on: http://review.coreboot.org/12020
Tested-by: build bot (Jenkins)
Reviewed-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
This commit is contained in:
Timothy Pearson
2015-07-30 14:07:15 -05:00
committed by Martin Roth
parent 631c8a2690
commit 0122afb609
32 changed files with 452 additions and 118 deletions

View File

@@ -548,8 +548,23 @@ CAR_FAM10_ap:
/* Fam10h NB config bit 54 was not set */
rolb %cl, %bl
roll_cfg:
jmp_if_not_fam15h(ap_apicid_ready)
cmp $0x5, %ecx
jne ap_apicid_ready
/* Calculate stack pointer. */
/* This is a multi-node CPU
* Adjust the maximum APIC ID to a more reasonable value
* given that no 32-core Family 15h processors exist
*/
movl %ebx, %ecx
and $0x0f, %ecx /* Get lower 4 bits of CPU number */
and $0x60, %ebx /* Get node ID */
shrl $0x1, %ebx /* Shift node ID part of APIC ID down by 1 */
or %ecx, %ebx /* Recombine node ID and CPU number */
ap_apicid_ready:
/* Calculate stack pointer using adjusted APIC ID stored in ebx */
movl $CacheSizeAPStack, %eax
mull %ebx
movl $(CacheBase + (CacheSize - (CacheSizeBSPStack + CacheSizeBSPSlush))), %esp

View File

@@ -240,18 +240,37 @@ static const struct {
{ 0, 0x68, (AMD_DR_B0 | AMD_DR_B1),
AMD_PTYPE_SVR, 0x00200000, 0x00600000 }, /* [22:21] DsNpReqLmt0 = 01b */
{ 0, 0x84, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
{ 0, 0x84, AMD_FAM10_ALL, AMD_PTYPE_ALL,
0x00002000, 0x00002000 }, /* [13] LdtStopTriEn = 1 */
{ 0, 0xA4, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
{ 0, 0xA4, AMD_FAM10_ALL, AMD_PTYPE_ALL,
0x00002000, 0x00002000 }, /* [13] LdtStopTriEn = 1 */
{ 0, 0xC4, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
{ 0, 0xC4, AMD_FAM10_ALL, AMD_PTYPE_ALL,
0x00002000, 0x00002000 }, /* [13] LdtStopTriEn = 1 */
{ 0, 0xE4, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
{ 0, 0xE4, AMD_FAM10_ALL, AMD_PTYPE_ALL,
0x00002000, 0x00002000 }, /* [13] LdtStopTriEn = 1 */
/* FIXME
* Non-C32 packages only
*/
{ 0, 0x84, AMD_FAM15_ALL, AMD_PTYPE_ALL,
0x00000000, 0x00002000 }, /* [13] LdtStopTriEn = 1 */
{ 0, 0xA4, AMD_FAM15_ALL, AMD_PTYPE_ALL,
0x00000000, 0x00002000 }, /* [13] LdtStopTriEn = 1 */
{ 0, 0xC4, AMD_FAM15_ALL, AMD_PTYPE_ALL,
0x00000000, 0x00002000 }, /* [13] LdtStopTriEn = 1 */
{ 0, 0xE4, AMD_FAM15_ALL, AMD_PTYPE_ALL,
0x00000000, 0x00002000 }, /* [13] LdtStopTriEn = 1 */
/* FIXME
* C32 package is not supported at this time
*/
/* Link Global Retry Control Register */
{ 0, 0x150, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL,
0x00073900, 0x00073F00 },
@@ -610,38 +629,79 @@ static const struct {
{ 0x530A, AMD_DR_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_ALL,
0x00004400, 0x00006400 }, /* HT_PHY_DLL_REG */
{ 0xCF, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3,
{ 0xCF, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3,
0x00000000, 0x000000FF }, /* Provide clear setting for logical
completeness */
{ 0xDF, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3,
{ 0xDF, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3,
0x00000000, 0x000000FF }, /* Provide clear setting for logical
completeness */
{ 0xCF, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1,
{ 0xCF, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1,
0x0000006D, 0x000000FF }, /* HT_PHY_HT1_FIFO_PTR_OPT_VALUE */
{ 0xDF, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1,
{ 0xDF, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1,
0x0000006D, 0x000000FF }, /* HT_PHY_HT1_FIFO_PTR_OPT_VALUE */
/* Link Phy Receiver Loop Filter Registers */
{ 0xD1, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3,
{ 0xD1, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3,
0x08040000, 0x3FFFC000 }, /* [29:22] LfcMax = 20h,
[21:14] LfcMin = 10h */
{ 0xC1, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3,
{ 0xC1, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3,
0x08040000, 0x3FFFC000 }, /* [29:22] LfcMax = 20h,
[21:14] LfcMin = 10h */
{ 0xD1, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1,
{ 0xD1, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1,
0x04020000, 0x3FFFC000 }, /* [29:22] LfcMax = 10h,
[21:14] LfcMin = 08h */
{ 0xC1, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1,
{ 0xC1, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1,
0x04020000, 0x3FFFC000 }, /* [29:22] LfcMax = 10h,
[21:14] LfcMin = 08h */
{ 0xC0, (AMD_FAM10_ALL | AMD_FAM15_ALL), AMD_PTYPE_ALL, HTPHY_LINKTYPE_ALL,
{ 0xC0, AMD_FAM10_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_ALL,
0x40040000, 0xe01F0000 }, /* [31:29] RttCtl = 02h,
[20:16] RttIndex = 04h */
[20:16] RttIndex = 04h */
/* FIXME
* Causes lockups for some reason when more than one package is installed
* Debug and reactivate!
*/
// #if 0
{ 0xCF, AMD_FAM15_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3,
0x00000000, 0x000000FF }, /* Provide clear setting for logical
completeness */
{ 0xDF, AMD_FAM15_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3,
0x00000000, 0x000000FF }, /* Provide clear setting for logical
completeness */
{ 0xCF, AMD_FAM15_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1,
0x0000006D, 0x000000FF }, /* HT_PHY_HT1_FIFO_PTR_OPT_VALUE */
{ 0xDF, AMD_FAM15_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1,
0x0000006D, 0x000000FF }, /* HT_PHY_HT1_FIFO_PTR_OPT_VALUE */
/* Link Phy Receiver Loop Filter Registers */
{ 0xD1, AMD_FAM15_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3,
0x08040000, 0x3FFFC000 }, /* [29:22] LfcMax = 20h,
[21:14] LfcMin = 10h */
{ 0xC1, AMD_FAM15_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT3,
0x08040000, 0x3FFFC000 }, /* [29:22] LfcMax = 20h,
[21:14] LfcMin = 10h */
{ 0xD1, AMD_FAM15_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1,
0x04020000, 0x3FFFC000 }, /* [29:22] LfcMax = 10h,
[21:14] LfcMin = 08h */
{ 0xC1, AMD_FAM15_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_HT1,
0x04020000, 0x3FFFC000 }, /* [29:22] LfcMax = 10h,
[21:14] LfcMin = 08h */
{ 0xC0, AMD_FAM15_ALL, AMD_PTYPE_ALL, HTPHY_LINKTYPE_ALL,
0x40040000, 0xe01F0000 }, /* [31:29] RttCtl = 02h,
[20:16] RttIndex = 04h */
// #endif
};

View File

@@ -629,44 +629,45 @@ static void prep_fid_change(void)
}
static void waitCurrentPstate(u32 target_pstate) {
msr_t initial_msr = rdmsr(TSC_MSR);
msr_t pstate_msr = rdmsr(CUR_PSTATE_MSR);
msr_t tsc_msr;
u8 timedout ;
msr_t initial_msr = rdmsr(TSC_MSR);
msr_t pstate_msr = rdmsr(CUR_PSTATE_MSR);
msr_t tsc_msr;
u8 timedout ;
/* paranoia ? I fear when we run fixPsNbVidBeforeWR we can enter a
* P1 that is a copy of P0, therefore has the same NB DID but the
* TSC will count twice per tick, so we have to wait for twice the
* count to achieve the desired timeout. But I'm likely to
* misunderstand this...
*/
u32 corrected_timeout = ( (pstate_msr.lo==1)
&& (!(rdmsr(0xC0010065).lo & NB_DID_M_ON)) ) ?
WAIT_PSTATE_TIMEOUT*2 : WAIT_PSTATE_TIMEOUT ;
msr_t timeout;
/* paranoia ? I fear when we run fixPsNbVidBeforeWR we can enter a
* P1 that is a copy of P0, therefore has the same NB DID but the
* TSC will count twice per tick, so we have to wait for twice the
* count to achieve the desired timeout. But I'm likely to
* misunderstand this...
*/
u32 corrected_timeout = ((pstate_msr.lo==1)
&& (!(rdmsr(0xC0010065).lo & NB_DID_M_ON)) ) ?
WAIT_PSTATE_TIMEOUT*2 : WAIT_PSTATE_TIMEOUT;
msr_t timeout;
timeout.lo = initial_msr.lo + corrected_timeout ;
timeout.hi = initial_msr.hi;
if ( (((u32)0xffffffff) - initial_msr.lo) < corrected_timeout ) {
timeout.hi++;
}
timeout.lo = initial_msr.lo + corrected_timeout ;
timeout.hi = initial_msr.hi;
if ( (((u32)0xffffffff) - initial_msr.lo) < corrected_timeout ) {
timeout.hi++;
}
// assuming TSC ticks at 1.25 ns per tick (800 MHz)
do {
pstate_msr = rdmsr(CUR_PSTATE_MSR);
tsc_msr = rdmsr(TSC_MSR);
timedout = (tsc_msr.hi > timeout.hi)
|| ((tsc_msr.hi == timeout.hi) && (tsc_msr.lo > timeout.lo ));
} while ( (pstate_msr.lo != target_pstate) && (! timedout) ) ;
// assuming TSC ticks at 1.25 ns per tick (800 MHz)
do {
pstate_msr = rdmsr(CUR_PSTATE_MSR);
tsc_msr = rdmsr(TSC_MSR);
timedout = (tsc_msr.hi > timeout.hi)
|| ((tsc_msr.hi == timeout.hi) && (tsc_msr.lo > timeout.lo ));
} while ( (pstate_msr.lo != target_pstate) && (! timedout) ) ;
if (pstate_msr.lo != target_pstate) {
msr_t limit_msr = rdmsr(0xc0010061);
printk(BIOS_ERR, "*** Time out waiting for P-state %01x. Current P-state %01x P-state current limit MSRC001_0061=%08x %08x\n", target_pstate, pstate_msr.lo, limit_msr.hi, limit_msr.lo);
if (pstate_msr.lo != target_pstate) {
msr_t limit_msr = rdmsr(0xc0010061);
printk(BIOS_ERR, "*** APIC ID %02x: timed out waiting for P-state %01x. Current P-state %01x P-state current limit MSRC001_0061=%08x %08x\n",
cpuid_ebx(0x00000001) >> 24, target_pstate, pstate_msr.lo, limit_msr.hi, limit_msr.lo);
do { // should we just go on instead ?
pstate_msr = rdmsr(CUR_PSTATE_MSR);
} while ( pstate_msr.lo != target_pstate ) ;
}
do { // should we just go on instead ?
pstate_msr = rdmsr(CUR_PSTATE_MSR);
} while ( pstate_msr.lo != target_pstate ) ;
}
}
static void set_pstate(u32 nonBoostedPState) {
@@ -1060,13 +1061,13 @@ static int init_fidvid_bsp(u32 bsp_apicid, u32 nodes)
APs and BSP */
ap_apicidx.num = 0;
for_each_ap(bsp_apicid, CONFIG_SET_FIDVID_CORE_RANGE, store_ap_apicid, &ap_apicidx);
for_each_ap(bsp_apicid, CONFIG_SET_FIDVID_CORE_RANGE, -1, store_ap_apicid, &ap_apicidx);
for (i = 0; i < ap_apicidx.num; i++) {
init_fidvid_bsp_stage1(ap_apicidx.apicid[i], &fv);
}
#else
for_each_ap(bsp_apicid, CONFIG_SET_FIDVID_CORE0_ONLY, init_fidvid_bsp_stage1, &fv);
for_each_ap(bsp_apicid, CONFIG_SET_FIDVID_CORE0_ONLY, -1, init_fidvid_bsp_stage1, &fv);
#endif
print_debug_fv("common_fid = ", fv.common_fid);

View File

@@ -55,6 +55,8 @@ static void set_EnableCf8ExtCfg(void)
static void set_EnableCf8ExtCfg(void) { }
#endif
// #define DEBUG_HT_SETUP 1
// #define FAM10_AP_NODE_SEQUENTIAL_START 1
typedef void (*process_ap_t) (u32 apicid, void *gp);
@@ -139,8 +141,8 @@ uint32_t get_boot_apic_id(uint8_t node, uint32_t core) {
//core range = 1 : core 0 only
//core range = 2 : cores other than core0
static void for_each_ap(u32 bsp_apicid, u32 core_range, process_ap_t process_ap,
void *gp)
static void for_each_ap(uint32_t bsp_apicid, uint32_t core_range, int8_t node,
process_ap_t process_ap, void *gp)
{
// here assume the OS don't change our apicid
u32 ap_apicid;
@@ -161,6 +163,9 @@ static void for_each_ap(u32 bsp_apicid, u32 core_range, process_ap_t process_ap,
}
for (i = 0; i < nodes; i++) {
if ((node >= 0) && (i != node))
continue;
cores_found = get_core_num_in_bsp(i);
u32 jstart, jend;
@@ -276,7 +281,7 @@ void wait_all_other_cores_started(u32 bsp_apicid)
{
// all aps other than core0
printk(BIOS_DEBUG, "started ap apicid: ");
for_each_ap(bsp_apicid, 2, wait_ap_started, (void *)0);
for_each_ap(bsp_apicid, 2, -1, wait_ap_started, (void *)0);
printk(BIOS_DEBUG, "\n");
}
@@ -369,8 +374,10 @@ static u32 init_cpus(u32 cpu_init_detectedx, struct sys_info *sysinfo)
/* NB_CFG MSR is shared between cores, so we need make sure
core0 is done at first --- use wait_all_core0_started */
if (id.coreid == 0) {
set_apicid_cpuid_lo(); /* only set it on core0 */
set_EnableCf8ExtCfg(); /* only set it on core0 */
/* Set InitApicIdCpuIdLo / EnableCf8ExtCfg on core0 only */
if (!is_fam15h())
set_apicid_cpuid_lo();
set_EnableCf8ExtCfg();
#if CONFIG_ENABLE_APIC_EXT_ID
enable_apic_ext_id(id.nodeid);
#endif
@@ -423,6 +430,7 @@ static u32 init_cpus(u32 cpu_init_detectedx, struct sys_info *sysinfo)
}
// Mark the core as started.
lapic_write(LAPIC_MSG_REG, (apicid << 24) | F10_APSTATE_STARTED);
printk(BIOS_DEBUG, "CPU APICID %02x start flag set\n", apicid);
if (apicid != bsp_apicid) {
/* Setup each AP's cores MSRs.
@@ -584,6 +592,34 @@ static void setup_remote_node(u8 node)
}
#endif /* CONFIG_MAX_PHYSICAL_CPUS > 1 */
//it is running on core0 of node0
static void start_other_cores(uint32_t bsp_apicid)
{
u32 nodes;
u32 nodeid;
// disable multi_core
if (read_option(multi_core, 0) != 0) {
printk(BIOS_DEBUG, "Skip additional core init\n");
return;
}
nodes = get_nodes();
for (nodeid = 0; nodeid < nodes; nodeid++) {
u32 cores = get_core_num_in_bsp(nodeid);
printk(BIOS_DEBUG, "init node: %02x cores: %02x pass 1\n", nodeid, cores);
if (cores > 0) {
real_start_other_core(nodeid, cores);
#ifdef FAM10_AP_NODE_SEQUENTIAL_START
printk(BIOS_DEBUG, "waiting for core start on node %d...\n", nodeid);
for_each_ap(bsp_apicid, 2, nodeid, wait_ap_started, (void *)0);
printk(BIOS_DEBUG, "...started\n");
#endif
}
}
}
static void AMD_Errata281(u8 node, uint64_t revision, u32 platform)
{
/* Workaround for Transaction Scheduling Conflict in
@@ -843,6 +879,10 @@ static void AMD_SetHtPhyRegister(u8 node, u8 link, u8 entry)
phyBase = ((u32) link << 3) | 0x180;
/* Determine if link is connected and abort if not */
if (!(pci_read_config32(NODE_PCI(node, 0), 0x98 + (link * 0x20)) & 0x1))
return;
/* Get the portal control register's initial value
* and update it to access the desired phy register
*/
@@ -1005,10 +1045,11 @@ static void cpuSetAMDPCI(u8 node)
* Hypertransport initialization has taken place. Also note
* that it is run for the first core on each node
*/
u8 i, j;
uint8_t i;
uint8_t j;
u32 platform;
u32 val;
u8 offset;
uint8_t offset;
uint32_t dword;
uint64_t revision;
@@ -1035,6 +1076,17 @@ static void cpuSetAMDPCI(u8 node)
}
}
#ifdef DEBUG_HT_SETUP
/* Dump link settings */
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
printk(BIOS_DEBUG, "Node %d link %d: type register: %08x control register: %08x extended control sublink 0: %08x 1: %08x\n", i, j,
pci_read_config32(NODE_PCI(i, 0), 0x98 + (j * 0x20)), pci_read_config32(NODE_PCI(i, 0), 0x84 + (j * 0x20)),
pci_read_config32(NODE_PCI(i, 0), 0x170 + (j * 0x4)), pci_read_config32(NODE_PCI(i, 0), 0x180 + (j * 0x4)));
}
}
#endif
for (i = 0; i < ARRAY_SIZE(fam10_htphy_default); i++) {
if ((fam10_htphy_default[i].revision & revision) &&
(fam10_htphy_default[i].platform & platform)) {

View File

@@ -27,21 +27,6 @@
uint32_t get_boot_apic_id(uint8_t node, uint32_t core);
uint32_t wait_cpu_state(uint32_t apicid, uint32_t state, uint32_t state2);
static inline uint8_t is_fam15h(void)
{
uint8_t fam15h = 0;
uint32_t family;
family = cpuid_eax(0x80000001);
family = ((family & 0xf00000) >> 16) | ((family & 0xf00) >> 8);
if (family >= 0x6f)
/* Family 15h or later */
fam15h = 1;
return fam15h;
}
static u32 get_core_num_in_bsp(u32 nodeid)
{
u32 dword;
@@ -137,6 +122,7 @@ static void real_start_other_core(uint32_t nodeid, uint32_t cores)
}
}
#if (!IS_ENABLED(CONFIG_CPU_AMD_MODEL_10XXX))
//it is running on core0 of node0
static void start_other_cores(void)
{
@@ -153,9 +139,10 @@ static void start_other_cores(void)
for (nodeid = 0; nodeid < nodes; nodeid++) {
u32 cores = get_core_num_in_bsp(nodeid);
printk(BIOS_DEBUG, "init node: %02x cores: %02x pass 1 \n", nodeid, cores);
printk(BIOS_DEBUG, "init node: %02x cores: %02x pass 1\n", nodeid, cores);
if (cores > 0) {
real_start_other_core(nodeid, cores);
}
}
}
#endif

View File

@@ -104,7 +104,6 @@ struct node_core_id get_node_core_id(u32 nb_cfg_54)
id.nodeid = apicid & 0x7;
}
}
if (fam15h && dual_node) {
/* Coreboot expects each separate processor die to be on a different nodeid.
* Since the code above returns nodeid 0 even on internal node 1 some fixup is needed...