northbridge/amd/pi/00730F01: enable PARALLEL_MP
Disable LEGACY_SMP_INIT to enable PARALLEL_MP. Also remove a large amount of APIC code that is now unnecessary. TEST=Boot on PC Engines apu3 Boot time reduced from 1.707 seconds to 1.620 seconds average across 5 coldboots. Inspired by CB:59693 Change-Id: Ib49e7d3f5956ac7831664d50db5f233b70aa54db Signed-off-by: Michał Kopeć <michal.kopec@3mdeb.com> Reviewed-on: https://review.coreboot.org/c/coreboot/+/59808 Tested-by: build bot (Jenkins) <no-reply@coreboot.org> Reviewed-by: Angel Pons <th3fanbus@gmail.com> Reviewed-by: Arthur Heymans <arthur@aheymans.xyz>
This commit is contained in:
		
				
					committed by
					
						
						Patrick Georgi
					
				
			
			
				
	
			
			
			
						parent
						
							5b334b88a6
						
					
				
				
					commit
					dc35d2a693
				
			@@ -23,21 +23,6 @@ static void model_16_init(struct device *dev)
 | 
			
		||||
	msr_t msr;
 | 
			
		||||
	u32 siblings;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * All cores are initialized sequentially, so the solution for APs will be created
 | 
			
		||||
	 * before they start.
 | 
			
		||||
	 */
 | 
			
		||||
	x86_setup_mtrrs_with_detect();
 | 
			
		||||
	/*
 | 
			
		||||
	 * Enable ROM caching on BSP we just lost when creating MTRR solution, for faster
 | 
			
		||||
	 * execution of e.g. AmdInitLate
 | 
			
		||||
	 */
 | 
			
		||||
	if (boot_cpu()) {
 | 
			
		||||
		mtrr_use_temp_range(OPTIMAL_CACHE_ROM_BASE, OPTIMAL_CACHE_ROM_SIZE,
 | 
			
		||||
				    MTRR_TYPE_WRPROT);
 | 
			
		||||
	}
 | 
			
		||||
	x86_mtrr_check();
 | 
			
		||||
 | 
			
		||||
	/* zero the machine check error status registers */
 | 
			
		||||
	mca_clear_status();
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -2,7 +2,6 @@
 | 
			
		||||
 | 
			
		||||
config NORTHBRIDGE_AMD_PI_00730F01
 | 
			
		||||
	bool
 | 
			
		||||
	select LEGACY_SMP_INIT
 | 
			
		||||
 | 
			
		||||
if NORTHBRIDGE_AMD_PI_00730F01
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -13,7 +13,7 @@
 | 
			
		||||
#include <string.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <lib.h>
 | 
			
		||||
#include <cpu/cpu.h>
 | 
			
		||||
#include <cpu/x86/mp.h>
 | 
			
		||||
#include <Porting.h>
 | 
			
		||||
#include <AGESA.h>
 | 
			
		||||
#include <Topology.h>
 | 
			
		||||
@@ -864,22 +864,7 @@ static void sysconf_init(struct device *dev) // first node
 | 
			
		||||
 | 
			
		||||
static void cpu_bus_scan(struct device *dev)
 | 
			
		||||
{
 | 
			
		||||
	struct bus *cpu_bus;
 | 
			
		||||
	struct device *dev_mc;
 | 
			
		||||
	int i,j;
 | 
			
		||||
	int coreid_bits;
 | 
			
		||||
	int core_max = 0;
 | 
			
		||||
	unsigned int ApicIdCoreIdSize;
 | 
			
		||||
	unsigned int core_nums;
 | 
			
		||||
	int siblings = 0;
 | 
			
		||||
	unsigned int family;
 | 
			
		||||
	u32 modules = 0;
 | 
			
		||||
	int ioapic_count = 0;
 | 
			
		||||
 | 
			
		||||
	/* For binaryPI there is no multiprocessor configuration, the number of
 | 
			
		||||
	 * modules will always be 1. */
 | 
			
		||||
	modules = 1;
 | 
			
		||||
	ioapic_count = CONFIG_NUM_OF_IOAPICS;
 | 
			
		||||
 | 
			
		||||
	dev_mc = pcidev_on_root(DEV_CDB, 0);
 | 
			
		||||
	if (!dev_mc) {
 | 
			
		||||
@@ -887,109 +872,40 @@ static void cpu_bus_scan(struct device *dev)
 | 
			
		||||
		die("");
 | 
			
		||||
	}
 | 
			
		||||
	sysconf_init(dev_mc);
 | 
			
		||||
 | 
			
		||||
	/* Get Max Number of cores(MNC) */
 | 
			
		||||
	coreid_bits = (cpuid_ecx(0x80000008) & 0x0000F000) >> 12;
 | 
			
		||||
	core_max = 1 << (coreid_bits & 0x000F); //mnc
 | 
			
		||||
 | 
			
		||||
	ApicIdCoreIdSize = ((cpuid_ecx(0x80000008)>>12) & 0xF);
 | 
			
		||||
	if (ApicIdCoreIdSize) {
 | 
			
		||||
		core_nums = (1 << ApicIdCoreIdSize) - 1;
 | 
			
		||||
	} else {
 | 
			
		||||
		core_nums = 3; //quad core
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* Find which cpus are present */
 | 
			
		||||
	cpu_bus = dev->link_list;
 | 
			
		||||
	for (i = 0; i < node_nums; i++) {
 | 
			
		||||
		struct device *cdb_dev;
 | 
			
		||||
		unsigned int devn;
 | 
			
		||||
		struct bus *pbus;
 | 
			
		||||
 | 
			
		||||
		devn = DEV_CDB + i;
 | 
			
		||||
		pbus = dev_mc->bus;
 | 
			
		||||
 | 
			
		||||
		/* Find the cpu's pci device */
 | 
			
		||||
		cdb_dev = pcidev_on_root(devn, 0);
 | 
			
		||||
		if (!cdb_dev) {
 | 
			
		||||
			/* If I am probing things in a weird order
 | 
			
		||||
			 * ensure all of the cpu's pci devices are found.
 | 
			
		||||
			 */
 | 
			
		||||
			int fn;
 | 
			
		||||
			for (fn = 0; fn <= 5; fn++) { //FBDIMM?
 | 
			
		||||
				cdb_dev = pci_probe_dev(NULL, pbus,
 | 
			
		||||
							PCI_DEVFN(devn, fn));
 | 
			
		||||
			}
 | 
			
		||||
			cdb_dev = pcidev_on_root(devn, 0);
 | 
			
		||||
		} else {
 | 
			
		||||
			/* Ok, We need to set the links for that device.
 | 
			
		||||
			 * otherwise the device under it will not be scanned
 | 
			
		||||
			 */
 | 
			
		||||
 | 
			
		||||
			add_more_links(cdb_dev, 4);
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		family = cpuid_eax(1);
 | 
			
		||||
		family = (family >> 20) & 0xFF;
 | 
			
		||||
		if (family == 1) { //f10
 | 
			
		||||
			u32 dword;
 | 
			
		||||
			cdb_dev = pcidev_on_root(devn, 3);
 | 
			
		||||
			dword = pci_read_config32(cdb_dev, 0xe8);
 | 
			
		||||
			siblings = ((dword & BIT15) >> 13) | ((dword & (BIT13 | BIT12)) >> 12);
 | 
			
		||||
		} else if (family == 7) {//f16
 | 
			
		||||
			cdb_dev = pcidev_on_root(devn, 5);
 | 
			
		||||
			if (cdb_dev && cdb_dev->enabled) {
 | 
			
		||||
				siblings = pci_read_config32(cdb_dev, 0x84);
 | 
			
		||||
				siblings &= 0xFF;
 | 
			
		||||
			}
 | 
			
		||||
		} else {
 | 
			
		||||
			siblings = 0; //default one core
 | 
			
		||||
		}
 | 
			
		||||
		int enable_node = cdb_dev && cdb_dev->enabled;
 | 
			
		||||
		printk(BIOS_SPEW, "%s family%xh, core_max = 0x%x, core_nums = 0x%x, siblings = 0x%x\n",
 | 
			
		||||
				dev_path(cdb_dev), 0x0f + family, core_max, core_nums, siblings);
 | 
			
		||||
 | 
			
		||||
		for (j = 0; j <= siblings; j++) {
 | 
			
		||||
			u32 lapicid_start = 0;
 | 
			
		||||
 | 
			
		||||
			/*
 | 
			
		||||
			 * APIC ID calculation is tightly coupled with AGESA v5 code.
 | 
			
		||||
			 * This calculation MUST match the assignment calculation done
 | 
			
		||||
			 * in LocalApicInitializationAtEarly() function.
 | 
			
		||||
			 * And reference GetLocalApicIdForCore()
 | 
			
		||||
			 *
 | 
			
		||||
			 * Apply APIC enumeration rules
 | 
			
		||||
			 * For systems with >= 16 APICs, put the IO-APICs at 0..n and
 | 
			
		||||
			 * put the local-APICs at m..z
 | 
			
		||||
			 *
 | 
			
		||||
			 * This is needed because many IO-APIC devices only have 4 bits
 | 
			
		||||
			 * for their APIC id and therefore must reside at 0..15
 | 
			
		||||
			 */
 | 
			
		||||
			if ((node_nums * core_max) + ioapic_count >= 0x10) {
 | 
			
		||||
				lapicid_start = (ioapic_count - 1) / core_max;
 | 
			
		||||
				lapicid_start = (lapicid_start + 1) * core_max;
 | 
			
		||||
				printk(BIOS_SPEW, "lpaicid_start = 0x%x ", lapicid_start);
 | 
			
		||||
			}
 | 
			
		||||
			u32 apic_id = (lapicid_start * (i/modules + 1)) + ((i % modules) ? (j + (siblings + 1)) : j);
 | 
			
		||||
			printk(BIOS_SPEW, "node 0x%x core 0x%x apicid = 0x%x\n",
 | 
			
		||||
					i, j, apic_id);
 | 
			
		||||
 | 
			
		||||
			struct device *cpu = add_cpu_device(cpu_bus, apic_id, enable_node);
 | 
			
		||||
			if (cpu)
 | 
			
		||||
				amd_cpu_topology(cpu, i, j);
 | 
			
		||||
		} //j
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void cpu_bus_init(struct device *dev)
 | 
			
		||||
static void pre_mp_init(void)
 | 
			
		||||
{
 | 
			
		||||
	initialize_cpus(dev->link_list);
 | 
			
		||||
	x86_setup_mtrrs_with_detect();
 | 
			
		||||
	x86_mtrr_check();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int get_cpu_count(void)
 | 
			
		||||
{
 | 
			
		||||
	uint8_t siblings = cpuid_ecx(0x80000008) & 0xff;
 | 
			
		||||
 | 
			
		||||
	return siblings + 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static const struct mp_ops mp_ops = {
 | 
			
		||||
	.pre_mp_init = pre_mp_init,
 | 
			
		||||
	.get_cpu_count = get_cpu_count,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
void mp_init_cpus(struct bus *cpu_bus)
 | 
			
		||||
{
 | 
			
		||||
	/* TODO: Handle mp_init_with_smm failure? */
 | 
			
		||||
	mp_init_with_smm(cpu_bus, &mp_ops);
 | 
			
		||||
 | 
			
		||||
	/* The flash is now no longer cacheable. Reset to WP for performance. */
 | 
			
		||||
	mtrr_use_temp_range(OPTIMAL_CACHE_ROM_BASE, OPTIMAL_CACHE_ROM_SIZE,
 | 
			
		||||
			    MTRR_TYPE_WRPROT);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static struct device_operations cpu_bus_ops = {
 | 
			
		||||
	.read_resources	  = noop_read_resources,
 | 
			
		||||
	.set_resources	  = noop_set_resources,
 | 
			
		||||
	.init		  = cpu_bus_init,
 | 
			
		||||
	.init		  = mp_cpu_bus_init,
 | 
			
		||||
	.scan_bus	  = cpu_bus_scan,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user