- Add new cvs code to cvs

git-svn-id: svn://svn.coreboot.org/coreboot/trunk@1657 2b7e53f0-3cfb-0310-b3e9-8179ed1497e1
This commit is contained in:
Eric Biederman
2004-10-14 19:29:29 +00:00
parent 98e619b1ce
commit fcd5ace00b
79 changed files with 8384 additions and 0 deletions

View File

@@ -0,0 +1,124 @@
/*
This software and ancillary information (herein called SOFTWARE )
called LinuxBIOS is made available under the terms described
here. The SOFTWARE has been approved for release with associated
LA-CC Number 00-34 . Unless otherwise indicated, this SOFTWARE has
been authored by an employee or employees of the University of
California, operator of the Los Alamos National Laboratory under
Contract No. W-7405-ENG-36 with the U.S. Department of Energy. The
U.S. Government has rights to use, reproduce, and distribute this
SOFTWARE. The public may copy, distribute, prepare derivative works
and publicly display this SOFTWARE without charge, provided that this
Notice and any statement of authorship are reproduced on all copies.
Neither the Government nor the University makes any warranty, express
or implied, or assumes any liability or responsibility for the use of
this SOFTWARE. If SOFTWARE is modified to produce derivative works,
such modified SOFTWARE should be clearly marked, so as not to confuse
it with the version available from LANL.
*/
/* Copyright 2000, Ron Minnich, Advanced Computing Lab, LANL
* rminnich@lanl.gov
*/
/** Start code to put an i386 or later processor into 32-bit
* protected mode.
*/
/* .section ".rom.text" */
#include <arch/rom_segs.h>
.code16
.globl _start
.type _start, @function
_start:
cli
/* Save the BIST result */
movl %eax, %ebp
/* thanks to kmliu@sis.tw.com for this TBL fix ... */
/**/
/* IMMEDIATELY invalidate the translation lookaside buffer before executing*/
/* any further code. Even though paging is disabled we could still get*/
/*false address translations due to the TLB if we didn't invalidate it.*/
/**/
xorl %eax, %eax
movl %eax, %cr3 /* Invalidate TLB*/
/* Invalidating the cache here seems to be a bad idea on
* modern processors. Don't.
* If we are hyperthreaded or we have multiple cores it is bad,
* for SMP startup. On Opterons it causes a 5 second delay.
* Invalidating the cache was pure paranoia in any event.
* If you cpu needs it you can write a cpu dependent version of
* entry16.inc.
*/
/* Note: gas handles memory addresses in 16 bit code very poorly.
* In particular it doesn't appear to have a directive allowing you
* associate a section or even an absolute offset with a segment register.
*
* This means that anything except cs:ip relative offsets are
* a real pain in 16 bit mode. And explains why it is almost
* imposible to get gas to do lgdt correctly.
*
* One way to work around this is to have the linker do the
* math instead of the assembler. This solves the very
* pratical problem of being able to write code that can
* be relocated.
*
* An lgdt call before we have memory enabled cannot be
* position independent, as we cannot execute a call
* instruction to get our current instruction pointer.
* So while this code is relocateable it isn't arbitrarily
* relocatable.
*
* The criteria for relocation have been relaxed to their
* utmost, so that we can use the same code for both
* our initial entry point and startup of the second cpu.
* The code assumes when executing at _start that:
* (((cs & 0xfff) == 0) and (ip == _start & 0xffff))
* or
* ((cs == anything) and (ip == 0)).
*
* The restrictions in reset16.inc mean that _start initially
* must be loaded at or above 0xffff0000 or below 0x100000.
*
* The linker scripts computs gdtptr16_offset by simply returning
* the low 16 bits. This means that the intial segment used
* when start is called must be 64K aligned. This should not
* restrict the address as the ip address can be anything.
*/
movw %cs, %ax
shlw $4, %ax
movw $gdtptr16_offset, %bx
subw %ax, %bx
data32 lgdt %cs:(%bx)
movl %cr0, %eax
andl $0x7FFAFFD1, %eax /* PG,AM,WP,NE,TS,EM,MP = 0 */
orl $0x60000001, %eax /* CD, NW, PE = 1 */
movl %eax, %cr0
/* Restore BIST to %eax */
movl %ebp, %eax
/* Now that we are in protected mode jump to a 32 bit code segment. */
data32 ljmp $ROM_CODE_SEG, $__protected_start
/** The gdt has a 4 Gb code segment at 0x10, and a 4 GB data segment
* at 0x18; these are Linux-compatible.
*/
.align 4
.globl gdtptr16
gdtptr16:
.word gdt_end - gdt -1 /* compute the table limit */
.long gdt /* we know the offset */
.globl _estart
_estart:
.code32

View File

@@ -0,0 +1,2 @@
gdtptr16_offset = gdtptr16 & 0xffff;
_start_offset = _start & 0xffff;

View File

@@ -0,0 +1,21 @@
.section ".reset"
.code16
.globl reset_vector
reset_vector:
#if _ROMBASE >= 0xffff0000
/* jmp _start */
.byte 0xe9
.int _start - ( . + 2 )
/* Note: The above jump is hand coded to work around bugs in binutils.
* 5 byte are used for a 3 byte instruction. This works because x86
* is little endian and allows us to use supported 32bit relocations
* instead of the weird 16 bit relocations that binutils does not
* handle consistenly between versions because they are used so rarely.
*/
#else
# error _ROMBASE is an unsupported value
#endif
. = 0x8;
.code32
jmp protected_start
.previous

View File

@@ -0,0 +1,14 @@
/*
* _ROMTOP : The top of the rom used where we
* need to put the reset vector.
*/
SECTIONS {
_ROMTOP = (_ROMBASE >= 0xffff0000)? 0xfffffff0 : 0xffff0;
. = _ROMTOP;
.reset . : {
*(.reset)
. = 15 ;
BYTE(0x00);
}
}

View File

@@ -0,0 +1,61 @@
/* For starting linuxBIOS in protected mode */
#include <arch/rom_segs.h>
/* .section ".rom.text" */
.code32
.align 4
.globl gdtptr
gdt:
gdtptr:
.word gdt_end - gdt -1 /* compute the table limit */
.long gdt /* we know the offset */
.word 0
/* flat code segment */
.word 0xffff, 0x0000
.byte 0x00, 0x9b, 0xcf, 0x00
/* flat data segment */
.word 0xffff, 0x0000
.byte 0x00, 0x93, 0xcf, 0x00
gdt_end:
/*
* When we come here we are in protected mode. We expand
* the stack and copies the data segment from ROM to the
* memory.
*
* After that, we call the chipset bootstrap routine that
* does what is left of the chipset initialization.
*
* NOTE aligned to 4 so that we are sure that the prefetch
* cache will be reloaded.
*/
.align 4
.globl protected_start
protected_start:
lgdt %cs:gdtptr
ljmp $ROM_CODE_SEG, $__protected_start
__protected_start:
/* Save the BIST value */
movl %eax, %ebp
intel_chip_post_macro(0x10) /* post 10 */
movw $ROM_DATA_SEG, %ax
movw %ax, %ds
movw %ax, %es
movw %ax, %ss
movw %ax, %fs
movw %ax, %gs
/* Restore the BIST value to %eax */
movl %ebp, %eax

View File

@@ -0,0 +1,14 @@
/*
_cache_ram_seg_base = DEFINED(CACHE_RAM_BASE)? CACHE_RAM_BASE - _rodata : 0;
_cache_ram_seg_base_low = (_cache_ram_seg_base) & 0xffff;
_cache_ram_seg_base_middle = (_cache_ram_seg_base >> 16) & 0xff;
_cache_ram_seg_base_high = (_cache_ram_seg_base >> 24) & 0xff;
_rom_code_seg_base = _ltext - _text;
_rom_code_seg_base_low = (_rom_code_seg_base) & 0xffff;
_rom_code_seg_base_middle = (_rom_code_seg_base >> 16) & 0xff;
_rom_code_seg_base_high = (_rom_code_seg_base >> 24) & 0xff;
*/

View File

@@ -0,0 +1,10 @@
.section ".reset"
.code16
.globl reset_vector
reset_vector:
. = 0x8;
.code32
jmp protected_start
.previous

View File

@@ -0,0 +1,14 @@
/*
* _ROMTOP : The top of the rom used where we
* need to put the reset vector.
*/
SECTIONS {
_ROMTOP = _ROMBASE + ROM_IMAGE_SIZE - 0x10;
. = _ROMTOP;
.reset (.): {
*(.reset)
. = 15 ;
BYTE(0x00);
}
}

1
src/cpu/x86/cache/Config.lb vendored Normal file
View File

@@ -0,0 +1 @@
object cache.o

10
src/cpu/x86/cache/cache.c vendored Normal file
View File

@@ -0,0 +1,10 @@
#include <console/console.h>
#include <cpu/x86/cache.h>
void x86_enable_cache(void)
{
post_code(0x60);
printk_info("Enabling cache\n");
enable_cache();
}

View File

View File

@@ -0,0 +1,9 @@
/* preserve BIST in %eax */
movl %eax, %ebp
/* Disable floating point emulation */
movl %cr0, %eax
andl $~(1<<2), %eax
movl %eax, %cr0
movl %ebp, %eax

View File

@@ -0,0 +1,3 @@
object lapic.o
object lapic_cpu_init.o
object secondary.S

View File

@@ -0,0 +1,10 @@
#include <cpu/x86/msr.h>
int boot_cpu(void)
{
int bsp;
msr_t msr;
msr = rdmsr(0x1b);
bsp = !!(msr.lo & (1 << 8));
return bsp;
}

72
src/cpu/x86/lapic/lapic.c Normal file
View File

@@ -0,0 +1,72 @@
#include <cpu/x86/lapic.h>
#include <console/console.h>
#include <cpu/x86/msr.h>
#include <cpu/x86/mtrr.h>
void setup_lapic(void)
{
/* this is so interrupts work. This is very limited scope --
* linux will do better later, we hope ...
*/
/* this is the first way we learned to do it. It fails on real SMP
* stuff. So we have to do things differently ...
* see the Intel mp1.4 spec, page A-3
*/
#if NEED_LAPIC == 1
/* Only Pentium Pro and later have those MSR stuff */
msr_t msr;
printk_info("Setting up local apic...");
/* Enable the local apic */
msr = rdmsr(LAPIC_BASE_MSR);
msr.lo |= LAPIC_BASE_MSR_ENABLE;
msr.lo &= ~LAPIC_BASE_MSR_ADDR_MASK;
msr.lo |= LAPIC_DEFAULT_BASE;
wrmsr(LAPIC_BASE_MSR, msr);
/*
* Set Task Priority to 'accept all'.
*/
lapic_write_around(LAPIC_TASKPRI,
lapic_read_around(LAPIC_TASKPRI) & ~LAPIC_TPRI_MASK);
/* Put the local apic in virtual wire mode */
lapic_write_around(LAPIC_SPIV,
(lapic_read_around(LAPIC_SPIV) & ~(LAPIC_VECTOR_MASK))
| LAPIC_SPIV_ENABLE);
lapic_write_around(LAPIC_LVT0,
(lapic_read_around(LAPIC_LVT0) &
~(LAPIC_LVT_MASKED | LAPIC_LVT_LEVEL_TRIGGER |
LAPIC_LVT_REMOTE_IRR | LAPIC_INPUT_POLARITY |
LAPIC_SEND_PENDING |LAPIC_LVT_RESERVED_1 |
LAPIC_DELIVERY_MODE_MASK))
| (LAPIC_LVT_REMOTE_IRR |LAPIC_SEND_PENDING |
LAPIC_DELIVERY_MODE_EXTINT)
);
lapic_write_around(LAPIC_LVT1,
(lapic_read_around(LAPIC_LVT1) &
~(LAPIC_LVT_MASKED | LAPIC_LVT_LEVEL_TRIGGER |
LAPIC_LVT_REMOTE_IRR | LAPIC_INPUT_POLARITY |
LAPIC_SEND_PENDING |LAPIC_LVT_RESERVED_1 |
LAPIC_DELIVERY_MODE_MASK))
| (LAPIC_LVT_REMOTE_IRR |LAPIC_SEND_PENDING |
LAPIC_DELIVERY_MODE_NMI)
);
printk_debug(" apic_id: %d ", lapicid());
#else /* !NEED_LLAPIC */
/* Only Pentium Pro and later have those MSR stuff */
msr_t msr;
printk_info("Disabling local apic...");
msr = rdmsr(LAPIC_BASE_MSR);
msr.lo &= ~LAPIC_BASE_MSR_ENABLE;
wrmsr(LAPIC_BASE_MSR, msr);
#endif /* !NEED_LAPIC */
printk_info("done.\n");
post_code(0x9b);
}

View File

@@ -0,0 +1,316 @@
#include <cpu/x86/lapic.h>
#include <delay.h>
#include <string.h>
#include <console/console.h>
#include <arch/hlt.h>
#include <device/device.h>
#include <device/path.h>
#include <smp/atomic.h>
#include <smp/spinlock.h>
#include <cpu/cpu.h>
#if CONFIG_SMP == 1
/* This is a lot more paranoid now, since Linux can NOT handle
* being told there is a CPU when none exists. So any errors
* will return 0, meaning no CPU.
*
* We actually handling that case by noting which cpus startup
* and not telling anyone about the ones that dont.
*/
static int lapic_start_cpu(unsigned long apicid)
{
int timeout;
unsigned long send_status, accept_status, start_eip;
int j, num_starts, maxlvt;
extern char _secondary_start[];
/*
* Starting actual IPI sequence...
*/
printk_spew("Asserting INIT.\n");
/*
* Turn INIT on target chip
*/
lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(apicid));
/*
* Send IPI
*/
lapic_write_around(LAPIC_ICR, LAPIC_INT_LEVELTRIG | LAPIC_INT_ASSERT
| LAPIC_DM_INIT);
printk_spew("Waiting for send to finish...\n");
timeout = 0;
do {
printk_spew("+");
udelay(100);
send_status = lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY;
} while (send_status && (timeout++ < 1000));
if (timeout >= 1000) {
printk_err("CPU %d: First apic write timed out. Disabling\n",
apicid);
// too bad.
printk_err("ESR is 0x%x\n", lapic_read(LAPIC_ESR));
if (lapic_read(LAPIC_ESR)) {
printk_err("Try to reset ESR\n");
lapic_write_around(LAPIC_ESR, 0);
printk_err("ESR is 0x%x\n", lapic_read(LAPIC_ESR));
}
return 0;
}
mdelay(10);
printk_spew("Deasserting INIT.\n");
/* Target chip */
lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(apicid));
/* Send IPI */
lapic_write_around(LAPIC_ICR, LAPIC_INT_LEVELTRIG | LAPIC_DM_INIT);
printk_spew("Waiting for send to finish...\n");
timeout = 0;
do {
printk_spew("+");
udelay(100);
send_status = lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY;
} while (send_status && (timeout++ < 1000));
if (timeout >= 1000) {
printk_err("CPU %d: Second apic write timed out. Disabling\n",
apicid);
// too bad.
return 0;
}
start_eip = (unsigned long)_secondary_start;
printk_spew("start_eip=0x%08lx\n", start_eip);
num_starts = 2;
/*
* Run STARTUP IPI loop.
*/
printk_spew("#startup loops: %d.\n", num_starts);
maxlvt = 4;
for (j = 1; j <= num_starts; j++) {
printk_spew("Sending STARTUP #%d to %u.\n", j, apicid);
lapic_read_around(LAPIC_SPIV);
lapic_write(LAPIC_ESR, 0);
lapic_read(LAPIC_ESR);
printk_spew("After apic_write.\n");
/*
* STARTUP IPI
*/
/* Target chip */
lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(apicid));
/* Boot on the stack */
/* Kick the second */
lapic_write_around(LAPIC_ICR, LAPIC_DM_STARTUP
| (start_eip >> 12));
/*
* Give the other CPU some time to accept the IPI.
*/
udelay(300);
printk_spew("Startup point 1.\n");
printk_spew("Waiting for send to finish...\n");
timeout = 0;
do {
printk_spew("+");
udelay(100);
send_status = lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY;
} while (send_status && (timeout++ < 1000));
/*
* Give the other CPU some time to accept the IPI.
*/
udelay(200);
/*
* Due to the Pentium erratum 3AP.
*/
if (maxlvt > 3) {
lapic_read_around(LAPIC_SPIV);
lapic_write(LAPIC_ESR, 0);
}
accept_status = (lapic_read(LAPIC_ESR) & 0xEF);
if (send_status || accept_status)
break;
}
printk_spew("After Startup.\n");
if (send_status)
printk_warning("APIC never delivered???\n");
if (accept_status)
printk_warning("APIC delivery error (%lx).\n", accept_status);
if (send_status || accept_status)
return 0;
return 1;
}
/* Number of cpus that are currently running in linuxbios */
static atomic_t active_cpus = ATOMIC_INIT(1);
/* start_cpu_lock covers last_cpu_index and secondary_stack.
* Only starting one cpu at a time let's me remove the logic
* for select the stack from assembly language.
*
* In addition communicating by variables to the cpu I
* am starting allows me to veryify it has started before
* start_cpu returns.
*/
static spinlock_t start_cpu_lock = SPIN_LOCK_UNLOCKED;
static unsigned last_cpu_index = 0;
volatile unsigned long secondary_stack;
int start_cpu(device_t cpu)
{
extern unsigned char _estack[];
struct cpu_info *info;
unsigned long stack_end;
unsigned long apicid;
unsigned long index;
unsigned long count;
int result;
spin_lock(&start_cpu_lock);
/* Get the cpu's apicid */
apicid = cpu->path.u.apic.apic_id;
/* Get an index for the new processor */
index = ++last_cpu_index;
/* Find end of the new processors stack */
stack_end = ((unsigned long)_estack) - (STACK_SIZE*index) - sizeof(struct cpu_info);
/* Record the index and which cpu structure we are using */
info = (struct cpu_info *)stack_end;
info->index = index;
info->cpu = cpu;
/* Advertise the new stack to start_cpu */
secondary_stack = stack_end;
/* Until the cpu starts up report the cpu is not enabled */
cpu->enabled = 0;
cpu->initialized = 0;
/* Start the cpu */
result = lapic_start_cpu(apicid);
if (result) {
result = 0;
/* Wait 1s or until the new the new cpu calls in */
for(count = 0; count < 100000 ; count++) {
if (secondary_stack == 0) {
result = 1;
break;
}
udelay(10);
}
}
secondary_stack = 0;
spin_unlock(&start_cpu_lock);
return result;
}
/* C entry point of secondary cpus */
void secondary_cpu_init(void)
{
atomic_inc(&active_cpus);
cpu_initialize();
atomic_dec(&active_cpus);
stop_this_cpu();
}
static void initialize_other_cpus(device_t root)
{
int old_active_count, active_count;
device_t cpu;
/* Loop through the cpus once getting them started */
for(cpu = root->link[1].children; cpu ; cpu = cpu->sibling) {
if (cpu->path.type != DEVICE_PATH_APIC) {
continue;
}
if (!cpu->enabled) {
continue;
}
if (cpu->initialized) {
continue;
}
if (!start_cpu(cpu)) {
/* Record the error in cpu? */
printk_err("CPU %u would not start!\n",
cpu->path.u.apic.apic_id);
}
}
/* Now loop until the other cpus have finished initializing */
old_active_count = 1;
active_count = atomic_read(&active_cpus);
while(active_count > 1) {
if (active_count != old_active_count) {
printk_info("Waiting for %d CPUS to stop\n", active_count);
old_active_count = active_count;
}
udelay(10);
active_count = atomic_read(&active_cpus);
}
for(cpu = root->link[1].children; cpu; cpu = cpu->sibling) {
if (cpu->path.type != DEVICE_PATH_APIC) {
continue;
}
if (!cpu->initialized) {
printk_err("CPU %u did not initialize!\n",
cpu->path.u.apic.apic_id);
#warning "FIXME do I need a mainboard_cpu_fixup function?"
}
}
printk_debug("All AP CPUs stopped\n");
}
#else /* CONFIG_SMP */
#define initialize_other_cpus(root) do {} while(0)
#endif /* CONFIG_SMP */
void initialize_cpus(device_t root)
{
struct device_path cpu_path;
struct cpu_info *info;
/* Find the info struct for this cpu */
info = cpu_info();
#if NEED_LAPIC == 1
/* Ensure the local apic is enabled */
enable_lapic();
/* Get the device path of the boot cpu */
cpu_path.type = DEVICE_PATH_APIC;
cpu_path.u.apic.apic_id = lapicid();
#else
/* Get the device path of the boot cpu */
cpu_path.type = DEVICE_PATH_BOOT_CPU;
#endif
/* Find the device structure for the boot cpu */
info->cpu = alloc_find_dev(&root->link[1], &cpu_path);
/* Initialize the bootstrap processor */
cpu_initialize();
/* Now initialize the rest of the cpus */
initialize_other_cpus(root);
}

View File

@@ -0,0 +1,53 @@
#include <arch/asm.h>
#include <arch/intel.h>
#include <cpu/x86/mtrr.h>
#include <cpu/x86/lapic_def.h>
.text
.globl _secondary_start
.balign 4096
_secondary_start:
.code16
cli
xorl %eax, %eax
movl %eax, %cr3 /* Invalidate TLB*/
/* On hyper threaded cpus, invalidating the cache here is
* very very bad. Don't.
*/
/* setup the data segment */
movw %cs, %ax
movw %ax, %ds
data32 lgdt gdtaddr - _secondary_start
movl %cr0, %eax
andl $0x7FFAFFD1, %eax /* PG,AM,WP,NE,TS,EM,MP = 0 */
orl $0x60000001, %eax /* CD, NW, PE = 1 */
movl %eax, %cr0
ljmpl $0x10, $1f
1:
.code32
movw $0x18, %ax
movw %ax, %ds
movw %ax, %es
movw %ax, %ss
movw %ax, %fs
movw %ax, %gs
/* Set the stack pointer, and flag that we are done */
xorl %eax, %eax
movl secondary_stack, %esp
movl %eax, secondary_stack
call secondary_cpu_init
1: hlt
jmp 1b
gdtaddr:
.word gdt_limit /* the table limit */
.long gdt /* we know the offset */
.code32

View File

View File

@@ -0,0 +1,2 @@
/* Clear out an mmx state */
emms

View File

@@ -0,0 +1,6 @@
/* BIST in %eax */
/*
* Enabling mmx registers is a noop
*/

View File

@@ -0,0 +1 @@
object mtrr.o

View File

@@ -0,0 +1,123 @@
#ifndef EARLYMTRR_C
#define EARLYMTRR_C
#include <cpu/x86/cache.h>
#include <cpu/x86/mtrr.h>
#include <cpu/x86/msr.h>
/* Validate XIP_ROM_SIZE and XIP_ROM_BASE */
#if defined(XIP_ROM_SIZE) && !defined(XIP_ROM_BASE)
#error "XIP_ROM_SIZE without XIP_ROM_BASE"
#endif
#if defined(XIP_ROM_BASE) && !defined(XIP_ROM_SIZE)
#error "XIP_ROM_BASE without XIP_ROM_SIZE"
#endif
#if !defined(CONFIG_LB_MEM_TOPK)
#error "CONFIG_LB_MEM_TOPK not defined"
#endif
#if defined(XIP_ROM_SIZE) && ((XIP_ROM_SIZE & (XIP_ROM_SIZE -1)) != 0)
#error "XIP_ROM_SIZE is not a power of 2"
#endif
#if defined(XIP_ROM_SIZE) && ((XIP_ROM_BASE % XIP_ROM_SIZE) != 0)
#error "XIP_ROM_BASE is not a multiple of XIP_ROM_SIZE"
#endif
#if (CONFIG_LB_MEM_TOPK & (CONFIG_LB_MEM_TOPK -1)) != 0
# error "CONFIG_LB_MEM_TOPK must be a power of 2"
#endif
static void disable_var_mtrr(unsigned reg)
{
/* The invalid bit is kept in the mask so we simply
* clear the relevent mask register to disable a
* range.
*/
msr_t zero;
zero.lo = zero.hi = 0;
wrmsr(MTRRphysMask_MSR(reg), zero);
}
static void set_var_mtrr(
unsigned reg, unsigned base, unsigned size, unsigned type)
{
/* Bit Bit 32-35 of MTRRphysMask should be set to 1 */
msr_t basem, maskm;
basem.lo = base | type;
basem.hi = 0;
wrmsr(MTRRphysBase_MSR(reg), basem);
maskm.lo = ~(size - 1) | 0x800;
maskm.hi = 0x0f;
wrmsr(MTRRphysMask_MSR(reg), maskm);
}
static void cache_lbmem(int type)
{
/* Enable caching for 0 - 1MB using variable mtrr */
disable_cache();
set_var_mtrr(0, 0x00000000, CONFIG_LB_MEM_TOPK << 10, type);
enable_cache();
}
/* the fixed and variable MTTRs are power-up with random values,
* clear them to MTRR_TYPE_UNCACHEABLE for safty.
*/
static void do_early_mtrr_init(const unsigned long *mtrr_msrs)
{
/* Precondition:
* The cache is not enabled in cr0 nor in MTRRdefType_MSR
* entry32.inc ensures the cache is not enabled in cr0
*/
msr_t msr;
const unsigned long *msr_addr;
unsigned long cr0;
print_spew("Clearing mtrr\r\n");
/* Inialize all of the relevant msrs to 0 */
msr.lo = 0;
msr.hi = 0;
unsigned long msr_nr;
for(msr_addr = mtrr_msrs; (msr_nr = *msr_addr); msr_addr++) {
wrmsr(msr_nr, msr);
}
#if defined(XIP_ROM_SIZE)
/* enable write through caching so we can do execute in place
* on the flash rom.
*/
set_var_mtrr(1, XIP_ROM_BASE, XIP_ROM_SIZE, MTRR_TYPE_WRBACK);
#endif
/* Set the default memory type and enable fixed and variable MTRRs
*/
/* Enable Variable MTRRs */
msr.hi = 0x00000000;
msr.lo = 0x00000800;
wrmsr(MTRRdefType_MSR, msr);
}
static void early_mtrr_init(void)
{
static const unsigned long mtrr_msrs[] = {
/* fixed mtrr */
0x250, 0x258, 0x259,
0x268, 0x269, 0x26A,
0x26B, 0x26C, 0x26D,
0x26E, 0x26F,
/* var mtrr */
0x200, 0x201, 0x202, 0x203,
0x204, 0x205, 0x206, 0x207,
0x208, 0x209, 0x20A, 0x20B,
0x20C, 0x20D, 0x20E, 0x20F,
/* NULL end of table */
0
};
disable_cache();
do_early_mtrr_init(mtrr_msrs);
enable_cache();
}
#endif /* EARLYMTRR_C */

378
src/cpu/x86/mtrr/mtrr.c Normal file
View File

@@ -0,0 +1,378 @@
/*
* intel_mtrr.c: setting MTRR to decent values for cache initialization on P6
*
* Derived from intel_set_mtrr in intel_subr.c and mtrr.c in linux kernel
*
* Copyright 2000 Silicon Integrated System Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
*
* Reference: Intel Architecture Software Developer's Manual, Volume 3: System Programming
*/
#include <console/console.h>
#include <device/device.h>
#include <cpu/x86/msr.h>
#include <cpu/x86/mtrr.h>
#include <cpu/x86/cache.h>
#define arraysize(x) (sizeof(x)/sizeof((x)[0]))
#warning "FIXME I do not properly handle address more than 36 physical address bits"
#ifdef k8
# define ADDRESS_BITS 40
#else
# define ADDRESS_BITS 36
#endif
#define ADDRESS_BITS_HIGH (ADDRESS_BITS - 32)
#define ADDRESS_MASK_HIGH ((1u << ADDRESS_BITS_HIGH) - 1)
static unsigned int mtrr_msr[] = {
MTRRfix64K_00000_MSR, MTRRfix16K_80000_MSR, MTRRfix16K_A0000_MSR,
MTRRfix4K_C0000_MSR, MTRRfix4K_C8000_MSR, MTRRfix4K_D0000_MSR, MTRRfix4K_D8000_MSR,
MTRRfix4K_E0000_MSR, MTRRfix4K_E8000_MSR, MTRRfix4K_F0000_MSR, MTRRfix4K_F8000_MSR,
};
static void enable_fixed_mtrr(void)
{
msr_t msr;
msr = rdmsr(MTRRdefType_MSR);
msr.lo |= 0xc00;
wrmsr(MTRRdefType_MSR, msr);
}
static void enable_var_mtrr(void)
{
msr_t msr;
msr = rdmsr(MTRRdefType_MSR);
msr.lo |= 0x800;
wrmsr(MTRRdefType_MSR, msr);
}
/* setting variable mtrr, comes from linux kernel source */
static void set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, unsigned char type)
{
msr_t base, mask;
base.hi = basek >> 22;
base.lo = basek << 10;
//printk_debug("ADDRESS_MASK_HIGH=%#x\n", ADDRESS_MASK_HIGH);
if (sizek < 4*1024*1024) {
mask.hi = ADDRESS_MASK_HIGH;
mask.lo = ~((sizek << 10) -1);
}
else {
mask.hi = ADDRESS_MASK_HIGH & (~((sizek >> 22) -1));
mask.lo = 0;
}
if (reg >= 8)
return;
// it is recommended that we disable and enable cache when we
// do this.
disable_cache();
if (sizek == 0) {
msr_t zero;
zero.lo = zero.hi = 0;
/* The invalid bit is kept in the mask, so we simply clear the
relevant mask register to disable a range. */
wrmsr (MTRRphysMask_MSR(reg), zero);
} else {
/* Bit 32-35 of MTRRphysMask should be set to 1 */
base.lo |= type;
mask.lo |= 0x800;
wrmsr (MTRRphysBase_MSR(reg), base);
wrmsr (MTRRphysMask_MSR(reg), mask);
}
enable_cache();
}
/* fms: find most sigificant bit set, stolen from Linux Kernel Source. */
static inline unsigned int fms(unsigned int x)
{
int r;
__asm__("bsrl %1,%0\n\t"
"jnz 1f\n\t"
"movl $0,%0\n"
"1:" : "=r" (r) : "g" (x));
return r;
}
/* fms: find least sigificant bit set */
static inline unsigned int fls(unsigned int x)
{
int r;
__asm__("bsfl %1,%0\n\t"
"jnz 1f\n\t"
"movl $32,%0\n"
"1:" : "=r" (r) : "g" (x));
return r;
}
/* setting up variable and fixed mtrr
*
* From Intel Vol. III Section 9.12.4, the Range Size and Base Alignment has some kind of requirement:
* 1. The range size must be 2^N byte for N >= 12 (i.e 4KB minimum).
* 2. The base address must be 2^N aligned, where the N here is equal to the N in previous
* requirement. So a 8K range must be 8K aligned not 4K aligned.
*
* These requirement is meet by "decompositing" the ramsize into Sum(Cn * 2^n, n = [0..N], Cn = [0, 1]).
* For Cm = 1, there is a WB range of 2^m size at base address Sum(Cm * 2^m, m = [N..n]).
* A 124MB (128MB - 4MB SMA) example:
* ramsize = 124MB == 64MB (at 0MB) + 32MB (at 64MB) + 16MB (at 96MB ) + 8MB (at 112MB) + 4MB (120MB).
* But this wastes a lot of MTRR registers so we use another more "aggresive" way with Uncacheable Regions.
*
* In the Uncacheable Region scheme, we try to cover the whole ramsize by one WB region as possible,
* If (an only if) this can not be done we will try to decomposite the ramesize, the mathematical formula
* whould be ramsize = Sum(Cn * 2^n, n = [0..N], Cn = [-1, 0, 1]). For Cn = -1, a Uncachable Region is used.
* The same 124MB example:
* ramsize = 124MB == 128MB WB (at 0MB) + 4MB UC (at 124MB)
* or a 156MB (128MB + 32MB - 4MB SMA) example:
* ramsize = 156MB == 128MB WB (at 0MB) + 32MB WB (at 128MB) + 4MB UC (at 156MB)
*/
/* 2 MTRRS are reserved for the operating system */
#if 0
#define BIOS_MTRRS 6
#define OS_MTRRS 2
#else
#define BIOS_MTRRS 8
#define OS_MTRRS 0
#endif
#define MTRRS (BIOS_MTRRS + OS_MTRRS)
static void set_fixed_mtrrs(unsigned int first, unsigned int last, unsigned char type)
{
unsigned int i;
unsigned int fixed_msr = NUM_FIXED_RANGES >> 3;
msr_t msr;
msr.lo = msr.hi = 0; /* Shut up gcc */
for(i = first; i < last; i++) {
/* When I switch to a new msr read it in */
if (fixed_msr != i >> 3) {
/* But first write out the old msr */
if (fixed_msr < (NUM_FIXED_RANGES >> 3)) {
disable_cache();
wrmsr(mtrr_msr[fixed_msr], msr);
enable_cache();
}
fixed_msr = i>>3;
msr = rdmsr(mtrr_msr[fixed_msr]);
}
if ((i & 7) < 4) {
msr.lo &= ~(0xff << ((i&3)*8));
msr.lo |= type << ((i&3)*8);
} else {
msr.hi &= ~(0xff << ((i&3)*8));
msr.hi |= type << ((i&3)*8);
}
}
/* Write out the final msr */
if (fixed_msr < (NUM_FIXED_RANGES >> 3)) {
disable_cache();
wrmsr(mtrr_msr[fixed_msr], msr);
enable_cache();
}
}
static unsigned fixed_mtrr_index(unsigned long addrk)
{
unsigned index;
index = (addrk - 0) >> 6;
if (index >= 8) {
index = ((addrk - 8*64) >> 4) + 8;
}
if (index >= 24) {
index = ((addrk - (8*64 + 16*16)) >> 2) + 24;
}
if (index > NUM_FIXED_RANGES) {
index = NUM_FIXED_RANGES;
}
return index;
}
static unsigned int range_to_mtrr(unsigned int reg,
unsigned long range_startk, unsigned long range_sizek,
unsigned long next_range_startk)
{
if (!range_sizek || (reg >= BIOS_MTRRS)) {
return reg;
}
while(range_sizek) {
unsigned long max_align, align;
unsigned long sizek;
/* Compute the maximum size I can make a range */
max_align = fls(range_startk);
align = fms(range_sizek);
if (align > max_align) {
align = max_align;
}
sizek = 1 << align;
printk_debug("Setting variable MTRR %d, base: %4dMB, range: %4dMB, type WB\n",
reg, range_startk >>10, sizek >> 10);
set_var_mtrr(reg++, range_startk, sizek, MTRR_TYPE_WRBACK);
range_startk += sizek;
range_sizek -= sizek;
if (reg >= BIOS_MTRRS)
break;
}
return reg;
}
static unsigned long resk(uint64_t value)
{
unsigned long resultk;
if (value < (1ULL << 42)) {
resultk = value >> 10;
}
else {
resultk = 0xffffffff;
}
return resultk;
}
void x86_setup_mtrrs(void)
{
/* Try this the simple way of incrementally adding together
* mtrrs. If this doesn't work out we can get smart again
* and clear out the mtrrs.
*/
struct device *dev;
unsigned long range_startk, range_sizek;
unsigned int reg;
printk_debug("\n");
/* Initialized the fixed_mtrrs to uncached */
printk_debug("Setting fixed MTRRs(%d-%d) type: UC\n",
0, NUM_FIXED_RANGES);
set_fixed_mtrrs(0, NUM_FIXED_RANGES, MTRR_TYPE_UNCACHEABLE);
/* Now see which of the fixed mtrrs cover ram.
*/
for(dev = all_devices; dev; dev = dev->next) {
struct resource *res, *last;
last = &dev->resource[dev->resources];
for(res = &dev->resource[0]; res < last; res++) {
unsigned int start_mtrr;
unsigned int last_mtrr;
if (!(res->flags & IORESOURCE_MEM) ||
!(res->flags & IORESOURCE_CACHEABLE))
{
continue;
}
start_mtrr = fixed_mtrr_index(resk(res->base));
last_mtrr = fixed_mtrr_index(resk((res->base + res->size)));
if (start_mtrr >= NUM_FIXED_RANGES) {
break;
}
printk_debug("Setting fixed MTRRs(%d-%d) Type: WB\n",
start_mtrr, last_mtrr);
set_fixed_mtrrs(start_mtrr, last_mtrr, MTRR_TYPE_WRBACK);
}
}
printk_debug("DONE fixed MTRRs\n");
/* Cache as many memory areas as possible */
/* FIXME is there an algorithm for computing the optimal set of mtrrs?
* In some cases it is definitely possible to do better.
*/
range_startk = 0;
range_sizek = 0;
reg = 0;
for(dev = all_devices; dev; dev = dev->next) {
struct resource *res, *last;
last = &dev->resource[dev->resources];
for(res = &dev->resource[0]; res < last; res++) {
unsigned long basek, sizek;
if (!(res->flags & IORESOURCE_MEM) ||
!(res->flags & IORESOURCE_CACHEABLE)) {
continue;
}
basek = resk(res->base);
sizek = resk(res->size);
/* See if I can merge with the last range
* Either I am below 1M and the fixed mtrrs handle it, or
* the ranges touch.
*/
if ((basek <= 1024) || (range_startk + range_sizek == basek)) {
unsigned long endk = basek + sizek;
range_sizek = endk - range_startk;
continue;
}
/* Write the range mtrrs */
if (range_sizek != 0) {
reg = range_to_mtrr(reg, range_startk, range_sizek, basek);
range_startk = 0;
range_sizek = 0;
if (reg >= BIOS_MTRRS)
goto last_msr;
}
/* Allocate an msr */
range_startk = basek;
range_sizek = sizek;
}
}
last_msr:
/* Write the last range */
reg = range_to_mtrr(reg, range_startk, range_sizek, 0);
printk_debug("DONE variable MTRRs\n");
printk_debug("Clear out the extra MTRR's\n");
/* Clear out the extra MTRR's */
while(reg < MTRRS) {
set_var_mtrr(reg++, 0, 0, 0);
}
/* enable fixed MTRR */
printk_spew("call enable_fixed_mtrr()\n");
enable_fixed_mtrr();
printk_spew("call enable_var_mtrr()\n");
enable_var_mtrr();
printk_spew("Leave %s\n", __FUNCTION__);
post_code(0x6A);
}
int x86_mtrr_check(void)
{
/* Only Pentium Pro and later have MTRR */
msr_t msr;
printk_debug("\nMTRR check\n");
msr = rdmsr(0x2ff);
msr.lo >>= 10;
printk_debug("Fixed MTRRs : ");
if (msr.lo & 0x01)
printk_debug("Enabled\n");
else
printk_debug("Disabled\n");
printk_debug("Variable MTRRs: ");
if (msr.lo & 0x02)
printk_debug("Enabled\n");
else
printk_debug("Disabled\n");
printk_debug("\n");
post_code(0x93);
return ((int) msr.lo);
}

View File

@@ -0,0 +1 @@
object pgtbl.o

94
src/cpu/x86/pae/pgtbl.c Normal file
View File

@@ -0,0 +1,94 @@
#include <console/console.h>
#include <cpu/cpu.h>
#include <cpu/x86/pae.h>
#include <string.h>
static void paging_off(void)
{
__asm__ __volatile__ (
/* Disable paging */
"movl %%cr0, %%eax\n\t"
"andl $0x7FFFFFFF, %%eax\n\t"
"movl %%eax, %%cr0\n\t"
/* Disable pae */
"movl %%cr4, %%eax\n\t"
"andl $0xFFFFFFDF, %%eax\n\t"
:
:
: "eax"
);
}
static void paging_on(void *pdp)
{
__asm__ __volatile__(
/* Load the page table address */
"movl %0, %%cr3\n\t"
/* Enable pae */
"movl %%cr4, %%eax\n\t"
"orl $0x00000020, %%eax\n\t"
"movl %%eax, %%cr4\n\t"
/* Enable paging */
"movl %%cr0, %%eax\n\t"
"orl $0x80000000, %%eax\n\t"
"movl %%eax, %%cr0\n\t"
:
: "r" (pdp)
: "eax"
);
}
void *map_2M_page(unsigned long page)
{
struct pde {
uint32_t addr_lo;
uint32_t addr_hi;
} __attribute__ ((packed));
struct pg_table {
struct pde pd[2048];
struct pde pdp[512];
} __attribute__ ((packed));
static struct pg_table pgtbl[CONFIG_MAX_CPUS] __attribute__ ((aligned(4096)));
static unsigned long mapped_window[CONFIG_MAX_CPUS];
unsigned long index;
unsigned long window;
void *result;
int i;
index = cpu_index();
if ((index < 0) || (index >= CONFIG_MAX_CPUS)) {
return MAPPING_ERROR;
}
window = page >> 10;
if (window != mapped_window[index]) {
paging_off();
if (window > 1) {
struct pde *pd, *pdp;
/* Point the page directory pointers at the page directories */
memset(&pgtbl[index].pdp, 0, sizeof(pgtbl[index].pdp));
pd = pgtbl[index].pd;
pdp = pgtbl[index].pdp;
pdp[0].addr_lo = ((uint32_t)&pd[512*0])|1;
pdp[1].addr_lo = ((uint32_t)&pd[512*1])|1;
pdp[2].addr_lo = ((uint32_t)&pd[512*2])|1;
pdp[3].addr_lo = ((uint32_t)&pd[512*3])|1;
/* The first half of the page table is identity mapped */
for(i = 0; i < 1024; i++) {
pd[i].addr_lo = ((i & 0x3ff) << 21)| 0xE3;
pd[i].addr_hi = 0;
}
/* The second half of the page table holds the mapped page */
for(i = 1024; i < 2048; i++) {
pd[i].addr_lo = ((window & 1) << 31) | ((i & 0x3ff) << 21) | 0xE3;
pd[i].addr_hi = (window >> 1);
}
paging_on(pdp);
}
mapped_window[index] = window;
}
if (window == 0) {
result = (void *)(page << 21);
} else {
result = (void *)(0x80000000 | ((page & 0x3ff) << 21));
}
return result;
}

View File

View File

@@ -0,0 +1,18 @@
/*
* Put the processor back into a reset state
* with respect to the xmm registers.
*/
xorps %xmm0, %xmm0
xorps %xmm1, %xmm1
xorps %xmm2, %xmm2
xorps %xmm3, %xmm3
xorps %xmm4, %xmm4
xorps %xmm5, %xmm5
xorps %xmm6, %xmm6
xorps %xmm7, %xmm7
/* Disable sse instructions */
movl %cr4, %eax
andl $~(3<<9), %eax
movl %eax, %cr4

View File

@@ -0,0 +1,14 @@
/* preserve BIST in %eax */
movl %eax, %ebp
/*
* Enable the use of the xmm registers
*/
/* Enable sse instructions */
movl %cr4, %eax
orl $(1<<9), %eax
movl %eax, %cr4
movl %ebp, %eax

View File

@@ -0,0 +1,5 @@
uses CONFIG_UDELAY_TSC
uses CONFIG_TSC_X86RDTSC_CALIBRATE_WITH_TIMER2
default CONFIG_TSC_X86RDTSC_CALIBRATE_WITH_TIMER2=0
if CONFIG_UDELAY_TSC object delay_tsc.o end

165
src/cpu/x86/tsc/delay_tsc.c Normal file
View File

@@ -0,0 +1,165 @@
#include <console/console.h>
#include <arch/io.h>
#include <cpu/x86/msr.h>
#include <cpu/x86/tsc.h>
#include <smp/spinlock.h>
#include <delay.h>
static unsigned long clocks_per_usec;
#if (CONFIG_TSC_X86RDTSC_CALIBRATE_WITH_TIMER2 == 1)
#define CLOCK_TICK_RATE 1193180U /* Underlying HZ */
/* ------ Calibrate the TSC -------
* Too much 64-bit arithmetic here to do this cleanly in C, and for
* accuracy's sake we want to keep the overhead on the CTC speaker (channel 2)
* output busy loop as low as possible. We avoid reading the CTC registers
* directly because of the awkward 8-bit access mechanism of the 82C54
* device.
*/
#define CALIBRATE_INTERVAL ((20*CLOCK_TICK_RATE)/1000) /* 20ms */
#define CALIBRATE_DIVISOR (20*1000) /* 20ms / 20000 == 1usec */
static unsigned long long calibrate_tsc(void)
{
/* Set the Gate high, disable speaker */
outb((inb(0x61) & ~0x02) | 0x01, 0x61);
/*
* Now let's take care of CTC channel 2
*
* Set the Gate high, program CTC channel 2 for mode 0,
* (interrupt on terminal count mode), binary count,
* load 5 * LATCH count, (LSB and MSB) to begin countdown.
*/
outb(0xb0, 0x43); /* binary, mode 0, LSB/MSB, Ch 2 */
outb(CALIBRATE_INTERVAL & 0xff, 0x42); /* LSB of count */
outb(CALIBRATE_INTERVAL >> 8, 0x42); /* MSB of count */
{
tsc_t start;
tsc_t end;
unsigned long count;
start = rdtsc();
count = 0;
do {
count++;
} while ((inb(0x61) & 0x20) == 0);
end = rdtsc();
/* Error: ECTCNEVERSET */
if (count <= 1)
goto bad_ctc;
/* 64-bit subtract - gcc just messes up with long longs */
__asm__("subl %2,%0\n\t"
"sbbl %3,%1"
:"=a" (end.lo), "=d" (end.hi)
:"g" (start.lo), "g" (start.hi),
"0" (end.lo), "1" (end.hi));
/* Error: ECPUTOOFAST */
if (end.hi)
goto bad_ctc;
/* Error: ECPUTOOSLOW */
if (end.lo <= CALIBRATE_DIVISOR)
goto bad_ctc;
return (end.lo + CALIBRATE_DIVISOR -1)/CALIBRATE_DIVISOR;
}
/*
* The CTC wasn't reliable: we got a hit on the very first read,
* or the CPU was so fast/slow that the quotient wouldn't fit in
* 32 bits..
*/
bad_ctc:
printk_err("bad_ctc\n");
return 0;
}
#else /* CONFIG_TSC_X86RDTSC_CALIBRATE_WITH_TIMER2 */
/*
* this is the "no timer2" version.
* to calibrate tsc, we get a TSC reading, then do 1,000,000 outbs to port 0x80
* then we read TSC again, and divide the difference by 1,000,000
* we have found on a wide range of machines that this gives us a a
* good microsecond value
* to +- 10%. On a dual AMD 1.6 Ghz box, it gives us .97 microseconds, and on a
* 267 Mhz. p5, it gives us 1.1 microseconds.
* also, since gcc now supports long long, we use that.
* also no unsigned long long / operator, so we play games.
* about the only thing you can do with long longs, it seems,
*is return them and assign them.
* (and do asm on them, yuck)
* so avoid all ops on long longs.
*/
static unsigned long long calibrate_tsc(void)
{
unsigned long long start, end, delta;
unsigned long allones = (unsigned long) -1, result;
unsigned long count;
start = rdtscll();
// no udivdi3, dammit.
// so we count to 1<< 20 and then right shift 20
for(count = 0; count < (1<<20); count ++)
outb(0x80, 0x80);
end = rdtscll();
#if 0
// make delta be (endhigh - starthigh) + (endlow - startlow)
// but >> 20
// do it this way to avoid gcc warnings.
start = tsc_start.hi;
start <<= 32;
start |= start.lo;
end = tsc_end.hi;
end <<= 32;
end |= tsc_end.lo;
#endif
delta = end - start;
// at this point we have a delta for 1,000,000 outbs. Now rescale for one microsecond.
delta >>= 20;
// save this for microsecond timing.
result = delta;
printk_spew("end %x:%x, start %x:%x\n",
endhigh, endlow, starthigh, startlow);
printk_spew("32-bit delta %d\n", (unsigned long) delta);
printk_spew(__FUNCTION__ " 32-bit result is %d\n", result);
return delta;
}
#endif /* CONFIG_TSC_X86RDTSC_CALIBRATE_WITH_TIMER2*/
void init_timer(void)
{
if (!clocks_per_usec) {
clocks_per_usec = calibrate_tsc();
printk_info("clocks_per_usec: %u\n", clocks_per_usec);
}
}
void udelay(unsigned us)
{
unsigned long long count;
unsigned long long stop;
unsigned long long clocks;
init_timer();
clocks = us;
clocks *= clocks_per_usec;
count = rdtscll();
stop = clocks + count;
while(stop > count) {
cpu_relax();
count = rdtscll();
}
}