- Add new cvs code to cvs
git-svn-id: svn://svn.coreboot.org/coreboot/trunk@1657 2b7e53f0-3cfb-0310-b3e9-8179ed1497e1
This commit is contained in:
124
src/cpu/x86/16bit/entry16.inc
Normal file
124
src/cpu/x86/16bit/entry16.inc
Normal file
@@ -0,0 +1,124 @@
|
||||
/*
|
||||
This software and ancillary information (herein called SOFTWARE )
|
||||
called LinuxBIOS is made available under the terms described
|
||||
here. The SOFTWARE has been approved for release with associated
|
||||
LA-CC Number 00-34 . Unless otherwise indicated, this SOFTWARE has
|
||||
been authored by an employee or employees of the University of
|
||||
California, operator of the Los Alamos National Laboratory under
|
||||
Contract No. W-7405-ENG-36 with the U.S. Department of Energy. The
|
||||
U.S. Government has rights to use, reproduce, and distribute this
|
||||
SOFTWARE. The public may copy, distribute, prepare derivative works
|
||||
and publicly display this SOFTWARE without charge, provided that this
|
||||
Notice and any statement of authorship are reproduced on all copies.
|
||||
Neither the Government nor the University makes any warranty, express
|
||||
or implied, or assumes any liability or responsibility for the use of
|
||||
this SOFTWARE. If SOFTWARE is modified to produce derivative works,
|
||||
such modified SOFTWARE should be clearly marked, so as not to confuse
|
||||
it with the version available from LANL.
|
||||
*/
|
||||
/* Copyright 2000, Ron Minnich, Advanced Computing Lab, LANL
|
||||
* rminnich@lanl.gov
|
||||
*/
|
||||
|
||||
|
||||
/** Start code to put an i386 or later processor into 32-bit
|
||||
* protected mode.
|
||||
*/
|
||||
|
||||
/* .section ".rom.text" */
|
||||
#include <arch/rom_segs.h>
|
||||
.code16
|
||||
.globl _start
|
||||
.type _start, @function
|
||||
|
||||
_start:
|
||||
cli
|
||||
/* Save the BIST result */
|
||||
movl %eax, %ebp
|
||||
|
||||
/* thanks to kmliu@sis.tw.com for this TBL fix ... */
|
||||
/**/
|
||||
/* IMMEDIATELY invalidate the translation lookaside buffer before executing*/
|
||||
/* any further code. Even though paging is disabled we could still get*/
|
||||
/*false address translations due to the TLB if we didn't invalidate it.*/
|
||||
/**/
|
||||
xorl %eax, %eax
|
||||
movl %eax, %cr3 /* Invalidate TLB*/
|
||||
|
||||
|
||||
/* Invalidating the cache here seems to be a bad idea on
|
||||
* modern processors. Don't.
|
||||
* If we are hyperthreaded or we have multiple cores it is bad,
|
||||
* for SMP startup. On Opterons it causes a 5 second delay.
|
||||
* Invalidating the cache was pure paranoia in any event.
|
||||
* If you cpu needs it you can write a cpu dependent version of
|
||||
* entry16.inc.
|
||||
*/
|
||||
|
||||
/* Note: gas handles memory addresses in 16 bit code very poorly.
|
||||
* In particular it doesn't appear to have a directive allowing you
|
||||
* associate a section or even an absolute offset with a segment register.
|
||||
*
|
||||
* This means that anything except cs:ip relative offsets are
|
||||
* a real pain in 16 bit mode. And explains why it is almost
|
||||
* imposible to get gas to do lgdt correctly.
|
||||
*
|
||||
* One way to work around this is to have the linker do the
|
||||
* math instead of the assembler. This solves the very
|
||||
* pratical problem of being able to write code that can
|
||||
* be relocated.
|
||||
*
|
||||
* An lgdt call before we have memory enabled cannot be
|
||||
* position independent, as we cannot execute a call
|
||||
* instruction to get our current instruction pointer.
|
||||
* So while this code is relocateable it isn't arbitrarily
|
||||
* relocatable.
|
||||
*
|
||||
* The criteria for relocation have been relaxed to their
|
||||
* utmost, so that we can use the same code for both
|
||||
* our initial entry point and startup of the second cpu.
|
||||
* The code assumes when executing at _start that:
|
||||
* (((cs & 0xfff) == 0) and (ip == _start & 0xffff))
|
||||
* or
|
||||
* ((cs == anything) and (ip == 0)).
|
||||
*
|
||||
* The restrictions in reset16.inc mean that _start initially
|
||||
* must be loaded at or above 0xffff0000 or below 0x100000.
|
||||
*
|
||||
* The linker scripts computs gdtptr16_offset by simply returning
|
||||
* the low 16 bits. This means that the intial segment used
|
||||
* when start is called must be 64K aligned. This should not
|
||||
* restrict the address as the ip address can be anything.
|
||||
*/
|
||||
|
||||
movw %cs, %ax
|
||||
shlw $4, %ax
|
||||
movw $gdtptr16_offset, %bx
|
||||
subw %ax, %bx
|
||||
data32 lgdt %cs:(%bx)
|
||||
|
||||
movl %cr0, %eax
|
||||
andl $0x7FFAFFD1, %eax /* PG,AM,WP,NE,TS,EM,MP = 0 */
|
||||
orl $0x60000001, %eax /* CD, NW, PE = 1 */
|
||||
movl %eax, %cr0
|
||||
|
||||
/* Restore BIST to %eax */
|
||||
movl %ebp, %eax
|
||||
|
||||
/* Now that we are in protected mode jump to a 32 bit code segment. */
|
||||
data32 ljmp $ROM_CODE_SEG, $__protected_start
|
||||
|
||||
/** The gdt has a 4 Gb code segment at 0x10, and a 4 GB data segment
|
||||
* at 0x18; these are Linux-compatible.
|
||||
*/
|
||||
|
||||
.align 4
|
||||
.globl gdtptr16
|
||||
gdtptr16:
|
||||
.word gdt_end - gdt -1 /* compute the table limit */
|
||||
.long gdt /* we know the offset */
|
||||
|
||||
.globl _estart
|
||||
_estart:
|
||||
.code32
|
||||
|
2
src/cpu/x86/16bit/entry16.lds
Normal file
2
src/cpu/x86/16bit/entry16.lds
Normal file
@@ -0,0 +1,2 @@
|
||||
gdtptr16_offset = gdtptr16 & 0xffff;
|
||||
_start_offset = _start & 0xffff;
|
21
src/cpu/x86/16bit/reset16.inc
Normal file
21
src/cpu/x86/16bit/reset16.inc
Normal file
@@ -0,0 +1,21 @@
|
||||
.section ".reset"
|
||||
.code16
|
||||
.globl reset_vector
|
||||
reset_vector:
|
||||
#if _ROMBASE >= 0xffff0000
|
||||
/* jmp _start */
|
||||
.byte 0xe9
|
||||
.int _start - ( . + 2 )
|
||||
/* Note: The above jump is hand coded to work around bugs in binutils.
|
||||
* 5 byte are used for a 3 byte instruction. This works because x86
|
||||
* is little endian and allows us to use supported 32bit relocations
|
||||
* instead of the weird 16 bit relocations that binutils does not
|
||||
* handle consistenly between versions because they are used so rarely.
|
||||
*/
|
||||
#else
|
||||
# error _ROMBASE is an unsupported value
|
||||
#endif
|
||||
. = 0x8;
|
||||
.code32
|
||||
jmp protected_start
|
||||
.previous
|
14
src/cpu/x86/16bit/reset16.lds
Normal file
14
src/cpu/x86/16bit/reset16.lds
Normal file
@@ -0,0 +1,14 @@
|
||||
/*
|
||||
* _ROMTOP : The top of the rom used where we
|
||||
* need to put the reset vector.
|
||||
*/
|
||||
|
||||
SECTIONS {
|
||||
_ROMTOP = (_ROMBASE >= 0xffff0000)? 0xfffffff0 : 0xffff0;
|
||||
. = _ROMTOP;
|
||||
.reset . : {
|
||||
*(.reset)
|
||||
. = 15 ;
|
||||
BYTE(0x00);
|
||||
}
|
||||
}
|
61
src/cpu/x86/32bit/entry32.inc
Normal file
61
src/cpu/x86/32bit/entry32.inc
Normal file
@@ -0,0 +1,61 @@
|
||||
/* For starting linuxBIOS in protected mode */
|
||||
|
||||
#include <arch/rom_segs.h>
|
||||
|
||||
/* .section ".rom.text" */
|
||||
.code32
|
||||
|
||||
.align 4
|
||||
.globl gdtptr
|
||||
|
||||
gdt:
|
||||
gdtptr:
|
||||
.word gdt_end - gdt -1 /* compute the table limit */
|
||||
.long gdt /* we know the offset */
|
||||
.word 0
|
||||
|
||||
/* flat code segment */
|
||||
.word 0xffff, 0x0000
|
||||
.byte 0x00, 0x9b, 0xcf, 0x00
|
||||
|
||||
/* flat data segment */
|
||||
.word 0xffff, 0x0000
|
||||
.byte 0x00, 0x93, 0xcf, 0x00
|
||||
|
||||
gdt_end:
|
||||
|
||||
|
||||
/*
|
||||
* When we come here we are in protected mode. We expand
|
||||
* the stack and copies the data segment from ROM to the
|
||||
* memory.
|
||||
*
|
||||
* After that, we call the chipset bootstrap routine that
|
||||
* does what is left of the chipset initialization.
|
||||
*
|
||||
* NOTE aligned to 4 so that we are sure that the prefetch
|
||||
* cache will be reloaded.
|
||||
*/
|
||||
.align 4
|
||||
.globl protected_start
|
||||
protected_start:
|
||||
|
||||
lgdt %cs:gdtptr
|
||||
ljmp $ROM_CODE_SEG, $__protected_start
|
||||
|
||||
__protected_start:
|
||||
/* Save the BIST value */
|
||||
movl %eax, %ebp
|
||||
|
||||
intel_chip_post_macro(0x10) /* post 10 */
|
||||
|
||||
movw $ROM_DATA_SEG, %ax
|
||||
movw %ax, %ds
|
||||
movw %ax, %es
|
||||
movw %ax, %ss
|
||||
movw %ax, %fs
|
||||
movw %ax, %gs
|
||||
|
||||
/* Restore the BIST value to %eax */
|
||||
movl %ebp, %eax
|
||||
|
14
src/cpu/x86/32bit/entry32.lds
Normal file
14
src/cpu/x86/32bit/entry32.lds
Normal file
@@ -0,0 +1,14 @@
|
||||
/*
|
||||
_cache_ram_seg_base = DEFINED(CACHE_RAM_BASE)? CACHE_RAM_BASE - _rodata : 0;
|
||||
_cache_ram_seg_base_low = (_cache_ram_seg_base) & 0xffff;
|
||||
_cache_ram_seg_base_middle = (_cache_ram_seg_base >> 16) & 0xff;
|
||||
_cache_ram_seg_base_high = (_cache_ram_seg_base >> 24) & 0xff;
|
||||
|
||||
_rom_code_seg_base = _ltext - _text;
|
||||
_rom_code_seg_base_low = (_rom_code_seg_base) & 0xffff;
|
||||
_rom_code_seg_base_middle = (_rom_code_seg_base >> 16) & 0xff;
|
||||
_rom_code_seg_base_high = (_rom_code_seg_base >> 24) & 0xff;
|
||||
*/
|
||||
|
||||
|
||||
|
10
src/cpu/x86/32bit/reset32.inc
Normal file
10
src/cpu/x86/32bit/reset32.inc
Normal file
@@ -0,0 +1,10 @@
|
||||
.section ".reset"
|
||||
.code16
|
||||
.globl reset_vector
|
||||
reset_vector:
|
||||
|
||||
. = 0x8;
|
||||
.code32
|
||||
jmp protected_start
|
||||
|
||||
.previous
|
14
src/cpu/x86/32bit/reset32.lds
Normal file
14
src/cpu/x86/32bit/reset32.lds
Normal file
@@ -0,0 +1,14 @@
|
||||
/*
|
||||
* _ROMTOP : The top of the rom used where we
|
||||
* need to put the reset vector.
|
||||
*/
|
||||
|
||||
SECTIONS {
|
||||
_ROMTOP = _ROMBASE + ROM_IMAGE_SIZE - 0x10;
|
||||
. = _ROMTOP;
|
||||
.reset (.): {
|
||||
*(.reset)
|
||||
. = 15 ;
|
||||
BYTE(0x00);
|
||||
}
|
||||
}
|
1
src/cpu/x86/cache/Config.lb
vendored
Normal file
1
src/cpu/x86/cache/Config.lb
vendored
Normal file
@@ -0,0 +1 @@
|
||||
object cache.o
|
10
src/cpu/x86/cache/cache.c
vendored
Normal file
10
src/cpu/x86/cache/cache.c
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
#include <console/console.h>
|
||||
#include <cpu/x86/cache.h>
|
||||
|
||||
void x86_enable_cache(void)
|
||||
{
|
||||
post_code(0x60);
|
||||
printk_info("Enabling cache\n");
|
||||
enable_cache();
|
||||
}
|
||||
|
0
src/cpu/x86/fpu/Config.lb
Normal file
0
src/cpu/x86/fpu/Config.lb
Normal file
9
src/cpu/x86/fpu/enable_fpu.inc
Normal file
9
src/cpu/x86/fpu/enable_fpu.inc
Normal file
@@ -0,0 +1,9 @@
|
||||
/* preserve BIST in %eax */
|
||||
movl %eax, %ebp
|
||||
|
||||
/* Disable floating point emulation */
|
||||
movl %cr0, %eax
|
||||
andl $~(1<<2), %eax
|
||||
movl %eax, %cr0
|
||||
|
||||
movl %ebp, %eax
|
3
src/cpu/x86/lapic/Config.lb
Normal file
3
src/cpu/x86/lapic/Config.lb
Normal file
@@ -0,0 +1,3 @@
|
||||
object lapic.o
|
||||
object lapic_cpu_init.o
|
||||
object secondary.S
|
10
src/cpu/x86/lapic/boot_cpu.c
Normal file
10
src/cpu/x86/lapic/boot_cpu.c
Normal file
@@ -0,0 +1,10 @@
|
||||
#include <cpu/x86/msr.h>
|
||||
|
||||
int boot_cpu(void)
|
||||
{
|
||||
int bsp;
|
||||
msr_t msr;
|
||||
msr = rdmsr(0x1b);
|
||||
bsp = !!(msr.lo & (1 << 8));
|
||||
return bsp;
|
||||
}
|
72
src/cpu/x86/lapic/lapic.c
Normal file
72
src/cpu/x86/lapic/lapic.c
Normal file
@@ -0,0 +1,72 @@
|
||||
#include <cpu/x86/lapic.h>
|
||||
#include <console/console.h>
|
||||
#include <cpu/x86/msr.h>
|
||||
#include <cpu/x86/mtrr.h>
|
||||
|
||||
void setup_lapic(void)
|
||||
{
|
||||
/* this is so interrupts work. This is very limited scope --
|
||||
* linux will do better later, we hope ...
|
||||
*/
|
||||
/* this is the first way we learned to do it. It fails on real SMP
|
||||
* stuff. So we have to do things differently ...
|
||||
* see the Intel mp1.4 spec, page A-3
|
||||
*/
|
||||
|
||||
#if NEED_LAPIC == 1
|
||||
/* Only Pentium Pro and later have those MSR stuff */
|
||||
msr_t msr;
|
||||
|
||||
printk_info("Setting up local apic...");
|
||||
|
||||
/* Enable the local apic */
|
||||
msr = rdmsr(LAPIC_BASE_MSR);
|
||||
msr.lo |= LAPIC_BASE_MSR_ENABLE;
|
||||
msr.lo &= ~LAPIC_BASE_MSR_ADDR_MASK;
|
||||
msr.lo |= LAPIC_DEFAULT_BASE;
|
||||
wrmsr(LAPIC_BASE_MSR, msr);
|
||||
|
||||
/*
|
||||
* Set Task Priority to 'accept all'.
|
||||
*/
|
||||
lapic_write_around(LAPIC_TASKPRI,
|
||||
lapic_read_around(LAPIC_TASKPRI) & ~LAPIC_TPRI_MASK);
|
||||
|
||||
/* Put the local apic in virtual wire mode */
|
||||
lapic_write_around(LAPIC_SPIV,
|
||||
(lapic_read_around(LAPIC_SPIV) & ~(LAPIC_VECTOR_MASK))
|
||||
| LAPIC_SPIV_ENABLE);
|
||||
lapic_write_around(LAPIC_LVT0,
|
||||
(lapic_read_around(LAPIC_LVT0) &
|
||||
~(LAPIC_LVT_MASKED | LAPIC_LVT_LEVEL_TRIGGER |
|
||||
LAPIC_LVT_REMOTE_IRR | LAPIC_INPUT_POLARITY |
|
||||
LAPIC_SEND_PENDING |LAPIC_LVT_RESERVED_1 |
|
||||
LAPIC_DELIVERY_MODE_MASK))
|
||||
| (LAPIC_LVT_REMOTE_IRR |LAPIC_SEND_PENDING |
|
||||
LAPIC_DELIVERY_MODE_EXTINT)
|
||||
);
|
||||
lapic_write_around(LAPIC_LVT1,
|
||||
(lapic_read_around(LAPIC_LVT1) &
|
||||
~(LAPIC_LVT_MASKED | LAPIC_LVT_LEVEL_TRIGGER |
|
||||
LAPIC_LVT_REMOTE_IRR | LAPIC_INPUT_POLARITY |
|
||||
LAPIC_SEND_PENDING |LAPIC_LVT_RESERVED_1 |
|
||||
LAPIC_DELIVERY_MODE_MASK))
|
||||
| (LAPIC_LVT_REMOTE_IRR |LAPIC_SEND_PENDING |
|
||||
LAPIC_DELIVERY_MODE_NMI)
|
||||
);
|
||||
|
||||
printk_debug(" apic_id: %d ", lapicid());
|
||||
|
||||
#else /* !NEED_LLAPIC */
|
||||
/* Only Pentium Pro and later have those MSR stuff */
|
||||
msr_t msr;
|
||||
|
||||
printk_info("Disabling local apic...");
|
||||
|
||||
msr = rdmsr(LAPIC_BASE_MSR);
|
||||
msr.lo &= ~LAPIC_BASE_MSR_ENABLE;
|
||||
wrmsr(LAPIC_BASE_MSR, msr);
|
||||
#endif /* !NEED_LAPIC */
|
||||
printk_info("done.\n");
|
||||
post_code(0x9b);
|
||||
}
|
316
src/cpu/x86/lapic/lapic_cpu_init.c
Normal file
316
src/cpu/x86/lapic/lapic_cpu_init.c
Normal file
@@ -0,0 +1,316 @@
|
||||
#include <cpu/x86/lapic.h>
|
||||
#include <delay.h>
|
||||
#include <string.h>
|
||||
#include <console/console.h>
|
||||
#include <arch/hlt.h>
|
||||
#include <device/device.h>
|
||||
#include <device/path.h>
|
||||
#include <smp/atomic.h>
|
||||
#include <smp/spinlock.h>
|
||||
#include <cpu/cpu.h>
|
||||
|
||||
|
||||
#if CONFIG_SMP == 1
|
||||
/* This is a lot more paranoid now, since Linux can NOT handle
|
||||
* being told there is a CPU when none exists. So any errors
|
||||
* will return 0, meaning no CPU.
|
||||
*
|
||||
* We actually handling that case by noting which cpus startup
|
||||
* and not telling anyone about the ones that dont.
|
||||
*/
|
||||
static int lapic_start_cpu(unsigned long apicid)
|
||||
{
|
||||
int timeout;
|
||||
unsigned long send_status, accept_status, start_eip;
|
||||
int j, num_starts, maxlvt;
|
||||
extern char _secondary_start[];
|
||||
|
||||
/*
|
||||
* Starting actual IPI sequence...
|
||||
*/
|
||||
|
||||
printk_spew("Asserting INIT.\n");
|
||||
|
||||
/*
|
||||
* Turn INIT on target chip
|
||||
*/
|
||||
lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(apicid));
|
||||
|
||||
/*
|
||||
* Send IPI
|
||||
*/
|
||||
|
||||
lapic_write_around(LAPIC_ICR, LAPIC_INT_LEVELTRIG | LAPIC_INT_ASSERT
|
||||
| LAPIC_DM_INIT);
|
||||
|
||||
printk_spew("Waiting for send to finish...\n");
|
||||
timeout = 0;
|
||||
do {
|
||||
printk_spew("+");
|
||||
udelay(100);
|
||||
send_status = lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY;
|
||||
} while (send_status && (timeout++ < 1000));
|
||||
if (timeout >= 1000) {
|
||||
printk_err("CPU %d: First apic write timed out. Disabling\n",
|
||||
apicid);
|
||||
// too bad.
|
||||
printk_err("ESR is 0x%x\n", lapic_read(LAPIC_ESR));
|
||||
if (lapic_read(LAPIC_ESR)) {
|
||||
printk_err("Try to reset ESR\n");
|
||||
lapic_write_around(LAPIC_ESR, 0);
|
||||
printk_err("ESR is 0x%x\n", lapic_read(LAPIC_ESR));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
mdelay(10);
|
||||
|
||||
printk_spew("Deasserting INIT.\n");
|
||||
|
||||
/* Target chip */
|
||||
lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(apicid));
|
||||
|
||||
/* Send IPI */
|
||||
lapic_write_around(LAPIC_ICR, LAPIC_INT_LEVELTRIG | LAPIC_DM_INIT);
|
||||
|
||||
printk_spew("Waiting for send to finish...\n");
|
||||
timeout = 0;
|
||||
do {
|
||||
printk_spew("+");
|
||||
udelay(100);
|
||||
send_status = lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY;
|
||||
} while (send_status && (timeout++ < 1000));
|
||||
if (timeout >= 1000) {
|
||||
printk_err("CPU %d: Second apic write timed out. Disabling\n",
|
||||
apicid);
|
||||
// too bad.
|
||||
return 0;
|
||||
}
|
||||
|
||||
start_eip = (unsigned long)_secondary_start;
|
||||
printk_spew("start_eip=0x%08lx\n", start_eip);
|
||||
|
||||
num_starts = 2;
|
||||
|
||||
/*
|
||||
* Run STARTUP IPI loop.
|
||||
*/
|
||||
printk_spew("#startup loops: %d.\n", num_starts);
|
||||
|
||||
maxlvt = 4;
|
||||
|
||||
for (j = 1; j <= num_starts; j++) {
|
||||
printk_spew("Sending STARTUP #%d to %u.\n", j, apicid);
|
||||
lapic_read_around(LAPIC_SPIV);
|
||||
lapic_write(LAPIC_ESR, 0);
|
||||
lapic_read(LAPIC_ESR);
|
||||
printk_spew("After apic_write.\n");
|
||||
|
||||
/*
|
||||
* STARTUP IPI
|
||||
*/
|
||||
|
||||
/* Target chip */
|
||||
lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(apicid));
|
||||
|
||||
/* Boot on the stack */
|
||||
/* Kick the second */
|
||||
lapic_write_around(LAPIC_ICR, LAPIC_DM_STARTUP
|
||||
| (start_eip >> 12));
|
||||
|
||||
/*
|
||||
* Give the other CPU some time to accept the IPI.
|
||||
*/
|
||||
udelay(300);
|
||||
|
||||
printk_spew("Startup point 1.\n");
|
||||
|
||||
printk_spew("Waiting for send to finish...\n");
|
||||
timeout = 0;
|
||||
do {
|
||||
printk_spew("+");
|
||||
udelay(100);
|
||||
send_status = lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY;
|
||||
} while (send_status && (timeout++ < 1000));
|
||||
|
||||
/*
|
||||
* Give the other CPU some time to accept the IPI.
|
||||
*/
|
||||
udelay(200);
|
||||
/*
|
||||
* Due to the Pentium erratum 3AP.
|
||||
*/
|
||||
if (maxlvt > 3) {
|
||||
lapic_read_around(LAPIC_SPIV);
|
||||
lapic_write(LAPIC_ESR, 0);
|
||||
}
|
||||
accept_status = (lapic_read(LAPIC_ESR) & 0xEF);
|
||||
if (send_status || accept_status)
|
||||
break;
|
||||
}
|
||||
printk_spew("After Startup.\n");
|
||||
if (send_status)
|
||||
printk_warning("APIC never delivered???\n");
|
||||
if (accept_status)
|
||||
printk_warning("APIC delivery error (%lx).\n", accept_status);
|
||||
if (send_status || accept_status)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Number of cpus that are currently running in linuxbios */
|
||||
static atomic_t active_cpus = ATOMIC_INIT(1);
|
||||
|
||||
/* start_cpu_lock covers last_cpu_index and secondary_stack.
|
||||
* Only starting one cpu at a time let's me remove the logic
|
||||
* for select the stack from assembly language.
|
||||
*
|
||||
* In addition communicating by variables to the cpu I
|
||||
* am starting allows me to veryify it has started before
|
||||
* start_cpu returns.
|
||||
*/
|
||||
|
||||
static spinlock_t start_cpu_lock = SPIN_LOCK_UNLOCKED;
|
||||
static unsigned last_cpu_index = 0;
|
||||
volatile unsigned long secondary_stack;
|
||||
|
||||
int start_cpu(device_t cpu)
|
||||
{
|
||||
extern unsigned char _estack[];
|
||||
struct cpu_info *info;
|
||||
unsigned long stack_end;
|
||||
unsigned long apicid;
|
||||
unsigned long index;
|
||||
unsigned long count;
|
||||
int result;
|
||||
|
||||
spin_lock(&start_cpu_lock);
|
||||
|
||||
/* Get the cpu's apicid */
|
||||
apicid = cpu->path.u.apic.apic_id;
|
||||
|
||||
/* Get an index for the new processor */
|
||||
index = ++last_cpu_index;
|
||||
|
||||
/* Find end of the new processors stack */
|
||||
stack_end = ((unsigned long)_estack) - (STACK_SIZE*index) - sizeof(struct cpu_info);
|
||||
|
||||
/* Record the index and which cpu structure we are using */
|
||||
info = (struct cpu_info *)stack_end;
|
||||
info->index = index;
|
||||
info->cpu = cpu;
|
||||
|
||||
/* Advertise the new stack to start_cpu */
|
||||
secondary_stack = stack_end;
|
||||
|
||||
/* Until the cpu starts up report the cpu is not enabled */
|
||||
cpu->enabled = 0;
|
||||
cpu->initialized = 0;
|
||||
|
||||
/* Start the cpu */
|
||||
result = lapic_start_cpu(apicid);
|
||||
|
||||
if (result) {
|
||||
result = 0;
|
||||
/* Wait 1s or until the new the new cpu calls in */
|
||||
for(count = 0; count < 100000 ; count++) {
|
||||
if (secondary_stack == 0) {
|
||||
result = 1;
|
||||
break;
|
||||
}
|
||||
udelay(10);
|
||||
}
|
||||
}
|
||||
secondary_stack = 0;
|
||||
spin_unlock(&start_cpu_lock);
|
||||
return result;
|
||||
}
|
||||
|
||||
/* C entry point of secondary cpus */
|
||||
void secondary_cpu_init(void)
|
||||
{
|
||||
atomic_inc(&active_cpus);
|
||||
cpu_initialize();
|
||||
atomic_dec(&active_cpus);
|
||||
stop_this_cpu();
|
||||
}
|
||||
|
||||
static void initialize_other_cpus(device_t root)
|
||||
{
|
||||
int old_active_count, active_count;
|
||||
device_t cpu;
|
||||
/* Loop through the cpus once getting them started */
|
||||
for(cpu = root->link[1].children; cpu ; cpu = cpu->sibling) {
|
||||
if (cpu->path.type != DEVICE_PATH_APIC) {
|
||||
continue;
|
||||
}
|
||||
if (!cpu->enabled) {
|
||||
continue;
|
||||
}
|
||||
if (cpu->initialized) {
|
||||
continue;
|
||||
}
|
||||
if (!start_cpu(cpu)) {
|
||||
/* Record the error in cpu? */
|
||||
printk_err("CPU %u would not start!\n",
|
||||
cpu->path.u.apic.apic_id);
|
||||
}
|
||||
}
|
||||
|
||||
/* Now loop until the other cpus have finished initializing */
|
||||
old_active_count = 1;
|
||||
active_count = atomic_read(&active_cpus);
|
||||
while(active_count > 1) {
|
||||
if (active_count != old_active_count) {
|
||||
printk_info("Waiting for %d CPUS to stop\n", active_count);
|
||||
old_active_count = active_count;
|
||||
}
|
||||
udelay(10);
|
||||
active_count = atomic_read(&active_cpus);
|
||||
}
|
||||
for(cpu = root->link[1].children; cpu; cpu = cpu->sibling) {
|
||||
if (cpu->path.type != DEVICE_PATH_APIC) {
|
||||
continue;
|
||||
}
|
||||
if (!cpu->initialized) {
|
||||
printk_err("CPU %u did not initialize!\n",
|
||||
cpu->path.u.apic.apic_id);
|
||||
#warning "FIXME do I need a mainboard_cpu_fixup function?"
|
||||
}
|
||||
}
|
||||
printk_debug("All AP CPUs stopped\n");
|
||||
}
|
||||
|
||||
#else /* CONFIG_SMP */
|
||||
#define initialize_other_cpus(root) do {} while(0)
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
void initialize_cpus(device_t root)
|
||||
{
|
||||
struct device_path cpu_path;
|
||||
struct cpu_info *info;
|
||||
|
||||
/* Find the info struct for this cpu */
|
||||
info = cpu_info();
|
||||
|
||||
#if NEED_LAPIC == 1
|
||||
/* Ensure the local apic is enabled */
|
||||
enable_lapic();
|
||||
|
||||
/* Get the device path of the boot cpu */
|
||||
cpu_path.type = DEVICE_PATH_APIC;
|
||||
cpu_path.u.apic.apic_id = lapicid();
|
||||
#else
|
||||
/* Get the device path of the boot cpu */
|
||||
cpu_path.type = DEVICE_PATH_BOOT_CPU;
|
||||
#endif
|
||||
|
||||
/* Find the device structure for the boot cpu */
|
||||
info->cpu = alloc_find_dev(&root->link[1], &cpu_path);
|
||||
|
||||
/* Initialize the bootstrap processor */
|
||||
cpu_initialize();
|
||||
|
||||
/* Now initialize the rest of the cpus */
|
||||
initialize_other_cpus(root);
|
||||
}
|
||||
|
53
src/cpu/x86/lapic/secondary.S
Normal file
53
src/cpu/x86/lapic/secondary.S
Normal file
@@ -0,0 +1,53 @@
|
||||
#include <arch/asm.h>
|
||||
#include <arch/intel.h>
|
||||
#include <cpu/x86/mtrr.h>
|
||||
#include <cpu/x86/lapic_def.h>
|
||||
.text
|
||||
.globl _secondary_start
|
||||
.balign 4096
|
||||
_secondary_start:
|
||||
.code16
|
||||
cli
|
||||
xorl %eax, %eax
|
||||
movl %eax, %cr3 /* Invalidate TLB*/
|
||||
|
||||
/* On hyper threaded cpus, invalidating the cache here is
|
||||
* very very bad. Don't.
|
||||
*/
|
||||
|
||||
/* setup the data segment */
|
||||
movw %cs, %ax
|
||||
movw %ax, %ds
|
||||
|
||||
data32 lgdt gdtaddr - _secondary_start
|
||||
|
||||
movl %cr0, %eax
|
||||
andl $0x7FFAFFD1, %eax /* PG,AM,WP,NE,TS,EM,MP = 0 */
|
||||
orl $0x60000001, %eax /* CD, NW, PE = 1 */
|
||||
movl %eax, %cr0
|
||||
|
||||
ljmpl $0x10, $1f
|
||||
1:
|
||||
.code32
|
||||
movw $0x18, %ax
|
||||
movw %ax, %ds
|
||||
movw %ax, %es
|
||||
movw %ax, %ss
|
||||
movw %ax, %fs
|
||||
movw %ax, %gs
|
||||
|
||||
/* Set the stack pointer, and flag that we are done */
|
||||
xorl %eax, %eax
|
||||
movl secondary_stack, %esp
|
||||
movl %eax, secondary_stack
|
||||
|
||||
call secondary_cpu_init
|
||||
1: hlt
|
||||
jmp 1b
|
||||
|
||||
gdtaddr:
|
||||
.word gdt_limit /* the table limit */
|
||||
.long gdt /* we know the offset */
|
||||
|
||||
|
||||
.code32
|
0
src/cpu/x86/mmx/Config.lb
Normal file
0
src/cpu/x86/mmx/Config.lb
Normal file
2
src/cpu/x86/mmx/disable_mmx.inc
Normal file
2
src/cpu/x86/mmx/disable_mmx.inc
Normal file
@@ -0,0 +1,2 @@
|
||||
/* Clear out an mmx state */
|
||||
emms
|
6
src/cpu/x86/mmx/enable_mmx.inc
Normal file
6
src/cpu/x86/mmx/enable_mmx.inc
Normal file
@@ -0,0 +1,6 @@
|
||||
/* BIST in %eax */
|
||||
|
||||
/*
|
||||
* Enabling mmx registers is a noop
|
||||
*/
|
||||
|
1
src/cpu/x86/mtrr/Config.lb
Normal file
1
src/cpu/x86/mtrr/Config.lb
Normal file
@@ -0,0 +1 @@
|
||||
object mtrr.o
|
123
src/cpu/x86/mtrr/earlymtrr.c
Normal file
123
src/cpu/x86/mtrr/earlymtrr.c
Normal file
@@ -0,0 +1,123 @@
|
||||
#ifndef EARLYMTRR_C
|
||||
#define EARLYMTRR_C
|
||||
#include <cpu/x86/cache.h>
|
||||
#include <cpu/x86/mtrr.h>
|
||||
#include <cpu/x86/msr.h>
|
||||
|
||||
/* Validate XIP_ROM_SIZE and XIP_ROM_BASE */
|
||||
#if defined(XIP_ROM_SIZE) && !defined(XIP_ROM_BASE)
|
||||
#error "XIP_ROM_SIZE without XIP_ROM_BASE"
|
||||
#endif
|
||||
#if defined(XIP_ROM_BASE) && !defined(XIP_ROM_SIZE)
|
||||
#error "XIP_ROM_BASE without XIP_ROM_SIZE"
|
||||
#endif
|
||||
#if !defined(CONFIG_LB_MEM_TOPK)
|
||||
#error "CONFIG_LB_MEM_TOPK not defined"
|
||||
#endif
|
||||
|
||||
#if defined(XIP_ROM_SIZE) && ((XIP_ROM_SIZE & (XIP_ROM_SIZE -1)) != 0)
|
||||
#error "XIP_ROM_SIZE is not a power of 2"
|
||||
#endif
|
||||
#if defined(XIP_ROM_SIZE) && ((XIP_ROM_BASE % XIP_ROM_SIZE) != 0)
|
||||
#error "XIP_ROM_BASE is not a multiple of XIP_ROM_SIZE"
|
||||
#endif
|
||||
|
||||
#if (CONFIG_LB_MEM_TOPK & (CONFIG_LB_MEM_TOPK -1)) != 0
|
||||
# error "CONFIG_LB_MEM_TOPK must be a power of 2"
|
||||
#endif
|
||||
|
||||
static void disable_var_mtrr(unsigned reg)
|
||||
{
|
||||
/* The invalid bit is kept in the mask so we simply
|
||||
* clear the relevent mask register to disable a
|
||||
* range.
|
||||
*/
|
||||
msr_t zero;
|
||||
zero.lo = zero.hi = 0;
|
||||
wrmsr(MTRRphysMask_MSR(reg), zero);
|
||||
}
|
||||
|
||||
static void set_var_mtrr(
|
||||
unsigned reg, unsigned base, unsigned size, unsigned type)
|
||||
|
||||
{
|
||||
/* Bit Bit 32-35 of MTRRphysMask should be set to 1 */
|
||||
msr_t basem, maskm;
|
||||
basem.lo = base | type;
|
||||
basem.hi = 0;
|
||||
wrmsr(MTRRphysBase_MSR(reg), basem);
|
||||
maskm.lo = ~(size - 1) | 0x800;
|
||||
maskm.hi = 0x0f;
|
||||
wrmsr(MTRRphysMask_MSR(reg), maskm);
|
||||
}
|
||||
|
||||
static void cache_lbmem(int type)
|
||||
{
|
||||
/* Enable caching for 0 - 1MB using variable mtrr */
|
||||
disable_cache();
|
||||
set_var_mtrr(0, 0x00000000, CONFIG_LB_MEM_TOPK << 10, type);
|
||||
enable_cache();
|
||||
}
|
||||
|
||||
|
||||
/* the fixed and variable MTTRs are power-up with random values,
|
||||
* clear them to MTRR_TYPE_UNCACHEABLE for safty.
|
||||
*/
|
||||
static void do_early_mtrr_init(const unsigned long *mtrr_msrs)
|
||||
{
|
||||
/* Precondition:
|
||||
* The cache is not enabled in cr0 nor in MTRRdefType_MSR
|
||||
* entry32.inc ensures the cache is not enabled in cr0
|
||||
*/
|
||||
msr_t msr;
|
||||
const unsigned long *msr_addr;
|
||||
unsigned long cr0;
|
||||
|
||||
print_spew("Clearing mtrr\r\n");
|
||||
|
||||
/* Inialize all of the relevant msrs to 0 */
|
||||
msr.lo = 0;
|
||||
msr.hi = 0;
|
||||
unsigned long msr_nr;
|
||||
for(msr_addr = mtrr_msrs; (msr_nr = *msr_addr); msr_addr++) {
|
||||
wrmsr(msr_nr, msr);
|
||||
}
|
||||
|
||||
#if defined(XIP_ROM_SIZE)
|
||||
/* enable write through caching so we can do execute in place
|
||||
* on the flash rom.
|
||||
*/
|
||||
set_var_mtrr(1, XIP_ROM_BASE, XIP_ROM_SIZE, MTRR_TYPE_WRBACK);
|
||||
#endif
|
||||
|
||||
/* Set the default memory type and enable fixed and variable MTRRs
|
||||
*/
|
||||
/* Enable Variable MTRRs */
|
||||
msr.hi = 0x00000000;
|
||||
msr.lo = 0x00000800;
|
||||
wrmsr(MTRRdefType_MSR, msr);
|
||||
|
||||
}
|
||||
|
||||
static void early_mtrr_init(void)
|
||||
{
|
||||
static const unsigned long mtrr_msrs[] = {
|
||||
/* fixed mtrr */
|
||||
0x250, 0x258, 0x259,
|
||||
0x268, 0x269, 0x26A,
|
||||
0x26B, 0x26C, 0x26D,
|
||||
0x26E, 0x26F,
|
||||
/* var mtrr */
|
||||
0x200, 0x201, 0x202, 0x203,
|
||||
0x204, 0x205, 0x206, 0x207,
|
||||
0x208, 0x209, 0x20A, 0x20B,
|
||||
0x20C, 0x20D, 0x20E, 0x20F,
|
||||
/* NULL end of table */
|
||||
0
|
||||
};
|
||||
disable_cache();
|
||||
do_early_mtrr_init(mtrr_msrs);
|
||||
enable_cache();
|
||||
}
|
||||
|
||||
#endif /* EARLYMTRR_C */
|
378
src/cpu/x86/mtrr/mtrr.c
Normal file
378
src/cpu/x86/mtrr/mtrr.c
Normal file
@@ -0,0 +1,378 @@
|
||||
/*
|
||||
* intel_mtrr.c: setting MTRR to decent values for cache initialization on P6
|
||||
*
|
||||
* Derived from intel_set_mtrr in intel_subr.c and mtrr.c in linux kernel
|
||||
*
|
||||
* Copyright 2000 Silicon Integrated System Corporation
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*
|
||||
*
|
||||
* Reference: Intel Architecture Software Developer's Manual, Volume 3: System Programming
|
||||
*/
|
||||
|
||||
#include <console/console.h>
|
||||
#include <device/device.h>
|
||||
#include <cpu/x86/msr.h>
|
||||
#include <cpu/x86/mtrr.h>
|
||||
#include <cpu/x86/cache.h>
|
||||
|
||||
#define arraysize(x) (sizeof(x)/sizeof((x)[0]))
|
||||
|
||||
#warning "FIXME I do not properly handle address more than 36 physical address bits"
|
||||
#ifdef k8
|
||||
# define ADDRESS_BITS 40
|
||||
#else
|
||||
# define ADDRESS_BITS 36
|
||||
#endif
|
||||
#define ADDRESS_BITS_HIGH (ADDRESS_BITS - 32)
|
||||
#define ADDRESS_MASK_HIGH ((1u << ADDRESS_BITS_HIGH) - 1)
|
||||
|
||||
static unsigned int mtrr_msr[] = {
|
||||
MTRRfix64K_00000_MSR, MTRRfix16K_80000_MSR, MTRRfix16K_A0000_MSR,
|
||||
MTRRfix4K_C0000_MSR, MTRRfix4K_C8000_MSR, MTRRfix4K_D0000_MSR, MTRRfix4K_D8000_MSR,
|
||||
MTRRfix4K_E0000_MSR, MTRRfix4K_E8000_MSR, MTRRfix4K_F0000_MSR, MTRRfix4K_F8000_MSR,
|
||||
};
|
||||
|
||||
|
||||
static void enable_fixed_mtrr(void)
|
||||
{
|
||||
msr_t msr;
|
||||
|
||||
msr = rdmsr(MTRRdefType_MSR);
|
||||
msr.lo |= 0xc00;
|
||||
wrmsr(MTRRdefType_MSR, msr);
|
||||
}
|
||||
|
||||
static void enable_var_mtrr(void)
|
||||
{
|
||||
msr_t msr;
|
||||
|
||||
msr = rdmsr(MTRRdefType_MSR);
|
||||
msr.lo |= 0x800;
|
||||
wrmsr(MTRRdefType_MSR, msr);
|
||||
}
|
||||
|
||||
/* setting variable mtrr, comes from linux kernel source */
|
||||
static void set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, unsigned char type)
|
||||
{
|
||||
msr_t base, mask;
|
||||
|
||||
base.hi = basek >> 22;
|
||||
base.lo = basek << 10;
|
||||
|
||||
//printk_debug("ADDRESS_MASK_HIGH=%#x\n", ADDRESS_MASK_HIGH);
|
||||
|
||||
if (sizek < 4*1024*1024) {
|
||||
mask.hi = ADDRESS_MASK_HIGH;
|
||||
mask.lo = ~((sizek << 10) -1);
|
||||
}
|
||||
else {
|
||||
mask.hi = ADDRESS_MASK_HIGH & (~((sizek >> 22) -1));
|
||||
mask.lo = 0;
|
||||
}
|
||||
|
||||
if (reg >= 8)
|
||||
return;
|
||||
|
||||
// it is recommended that we disable and enable cache when we
|
||||
// do this.
|
||||
disable_cache();
|
||||
if (sizek == 0) {
|
||||
msr_t zero;
|
||||
zero.lo = zero.hi = 0;
|
||||
/* The invalid bit is kept in the mask, so we simply clear the
|
||||
relevant mask register to disable a range. */
|
||||
wrmsr (MTRRphysMask_MSR(reg), zero);
|
||||
} else {
|
||||
/* Bit 32-35 of MTRRphysMask should be set to 1 */
|
||||
base.lo |= type;
|
||||
mask.lo |= 0x800;
|
||||
wrmsr (MTRRphysBase_MSR(reg), base);
|
||||
wrmsr (MTRRphysMask_MSR(reg), mask);
|
||||
}
|
||||
enable_cache();
|
||||
}
|
||||
|
||||
/* fms: find most sigificant bit set, stolen from Linux Kernel Source. */
|
||||
static inline unsigned int fms(unsigned int x)
|
||||
{
|
||||
int r;
|
||||
|
||||
__asm__("bsrl %1,%0\n\t"
|
||||
"jnz 1f\n\t"
|
||||
"movl $0,%0\n"
|
||||
"1:" : "=r" (r) : "g" (x));
|
||||
return r;
|
||||
}
|
||||
|
||||
/* fms: find least sigificant bit set */
|
||||
static inline unsigned int fls(unsigned int x)
|
||||
{
|
||||
int r;
|
||||
|
||||
__asm__("bsfl %1,%0\n\t"
|
||||
"jnz 1f\n\t"
|
||||
"movl $32,%0\n"
|
||||
"1:" : "=r" (r) : "g" (x));
|
||||
return r;
|
||||
}
|
||||
|
||||
/* setting up variable and fixed mtrr
|
||||
*
|
||||
* From Intel Vol. III Section 9.12.4, the Range Size and Base Alignment has some kind of requirement:
|
||||
* 1. The range size must be 2^N byte for N >= 12 (i.e 4KB minimum).
|
||||
* 2. The base address must be 2^N aligned, where the N here is equal to the N in previous
|
||||
* requirement. So a 8K range must be 8K aligned not 4K aligned.
|
||||
*
|
||||
* These requirement is meet by "decompositing" the ramsize into Sum(Cn * 2^n, n = [0..N], Cn = [0, 1]).
|
||||
* For Cm = 1, there is a WB range of 2^m size at base address Sum(Cm * 2^m, m = [N..n]).
|
||||
* A 124MB (128MB - 4MB SMA) example:
|
||||
* ramsize = 124MB == 64MB (at 0MB) + 32MB (at 64MB) + 16MB (at 96MB ) + 8MB (at 112MB) + 4MB (120MB).
|
||||
* But this wastes a lot of MTRR registers so we use another more "aggresive" way with Uncacheable Regions.
|
||||
*
|
||||
* In the Uncacheable Region scheme, we try to cover the whole ramsize by one WB region as possible,
|
||||
* If (an only if) this can not be done we will try to decomposite the ramesize, the mathematical formula
|
||||
* whould be ramsize = Sum(Cn * 2^n, n = [0..N], Cn = [-1, 0, 1]). For Cn = -1, a Uncachable Region is used.
|
||||
* The same 124MB example:
|
||||
* ramsize = 124MB == 128MB WB (at 0MB) + 4MB UC (at 124MB)
|
||||
* or a 156MB (128MB + 32MB - 4MB SMA) example:
|
||||
* ramsize = 156MB == 128MB WB (at 0MB) + 32MB WB (at 128MB) + 4MB UC (at 156MB)
|
||||
*/
|
||||
/* 2 MTRRS are reserved for the operating system */
|
||||
#if 0
|
||||
#define BIOS_MTRRS 6
|
||||
#define OS_MTRRS 2
|
||||
#else
|
||||
#define BIOS_MTRRS 8
|
||||
#define OS_MTRRS 0
|
||||
#endif
|
||||
#define MTRRS (BIOS_MTRRS + OS_MTRRS)
|
||||
|
||||
|
||||
static void set_fixed_mtrrs(unsigned int first, unsigned int last, unsigned char type)
|
||||
{
|
||||
unsigned int i;
|
||||
unsigned int fixed_msr = NUM_FIXED_RANGES >> 3;
|
||||
msr_t msr;
|
||||
msr.lo = msr.hi = 0; /* Shut up gcc */
|
||||
for(i = first; i < last; i++) {
|
||||
/* When I switch to a new msr read it in */
|
||||
if (fixed_msr != i >> 3) {
|
||||
/* But first write out the old msr */
|
||||
if (fixed_msr < (NUM_FIXED_RANGES >> 3)) {
|
||||
disable_cache();
|
||||
wrmsr(mtrr_msr[fixed_msr], msr);
|
||||
enable_cache();
|
||||
}
|
||||
fixed_msr = i>>3;
|
||||
msr = rdmsr(mtrr_msr[fixed_msr]);
|
||||
}
|
||||
if ((i & 7) < 4) {
|
||||
msr.lo &= ~(0xff << ((i&3)*8));
|
||||
msr.lo |= type << ((i&3)*8);
|
||||
} else {
|
||||
msr.hi &= ~(0xff << ((i&3)*8));
|
||||
msr.hi |= type << ((i&3)*8);
|
||||
}
|
||||
}
|
||||
/* Write out the final msr */
|
||||
if (fixed_msr < (NUM_FIXED_RANGES >> 3)) {
|
||||
disable_cache();
|
||||
wrmsr(mtrr_msr[fixed_msr], msr);
|
||||
enable_cache();
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned fixed_mtrr_index(unsigned long addrk)
|
||||
{
|
||||
unsigned index;
|
||||
index = (addrk - 0) >> 6;
|
||||
if (index >= 8) {
|
||||
index = ((addrk - 8*64) >> 4) + 8;
|
||||
}
|
||||
if (index >= 24) {
|
||||
index = ((addrk - (8*64 + 16*16)) >> 2) + 24;
|
||||
}
|
||||
if (index > NUM_FIXED_RANGES) {
|
||||
index = NUM_FIXED_RANGES;
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
static unsigned int range_to_mtrr(unsigned int reg,
|
||||
unsigned long range_startk, unsigned long range_sizek,
|
||||
unsigned long next_range_startk)
|
||||
{
|
||||
if (!range_sizek || (reg >= BIOS_MTRRS)) {
|
||||
return reg;
|
||||
}
|
||||
while(range_sizek) {
|
||||
unsigned long max_align, align;
|
||||
unsigned long sizek;
|
||||
/* Compute the maximum size I can make a range */
|
||||
max_align = fls(range_startk);
|
||||
align = fms(range_sizek);
|
||||
if (align > max_align) {
|
||||
align = max_align;
|
||||
}
|
||||
sizek = 1 << align;
|
||||
printk_debug("Setting variable MTRR %d, base: %4dMB, range: %4dMB, type WB\n",
|
||||
reg, range_startk >>10, sizek >> 10);
|
||||
set_var_mtrr(reg++, range_startk, sizek, MTRR_TYPE_WRBACK);
|
||||
range_startk += sizek;
|
||||
range_sizek -= sizek;
|
||||
if (reg >= BIOS_MTRRS)
|
||||
break;
|
||||
}
|
||||
return reg;
|
||||
}
|
||||
|
||||
static unsigned long resk(uint64_t value)
|
||||
{
|
||||
unsigned long resultk;
|
||||
if (value < (1ULL << 42)) {
|
||||
resultk = value >> 10;
|
||||
}
|
||||
else {
|
||||
resultk = 0xffffffff;
|
||||
}
|
||||
return resultk;
|
||||
}
|
||||
|
||||
void x86_setup_mtrrs(void)
|
||||
{
|
||||
/* Try this the simple way of incrementally adding together
|
||||
* mtrrs. If this doesn't work out we can get smart again
|
||||
* and clear out the mtrrs.
|
||||
*/
|
||||
struct device *dev;
|
||||
unsigned long range_startk, range_sizek;
|
||||
unsigned int reg;
|
||||
|
||||
printk_debug("\n");
|
||||
/* Initialized the fixed_mtrrs to uncached */
|
||||
printk_debug("Setting fixed MTRRs(%d-%d) type: UC\n",
|
||||
0, NUM_FIXED_RANGES);
|
||||
set_fixed_mtrrs(0, NUM_FIXED_RANGES, MTRR_TYPE_UNCACHEABLE);
|
||||
|
||||
/* Now see which of the fixed mtrrs cover ram.
|
||||
*/
|
||||
for(dev = all_devices; dev; dev = dev->next) {
|
||||
struct resource *res, *last;
|
||||
last = &dev->resource[dev->resources];
|
||||
for(res = &dev->resource[0]; res < last; res++) {
|
||||
unsigned int start_mtrr;
|
||||
unsigned int last_mtrr;
|
||||
if (!(res->flags & IORESOURCE_MEM) ||
|
||||
!(res->flags & IORESOURCE_CACHEABLE))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
start_mtrr = fixed_mtrr_index(resk(res->base));
|
||||
last_mtrr = fixed_mtrr_index(resk((res->base + res->size)));
|
||||
if (start_mtrr >= NUM_FIXED_RANGES) {
|
||||
break;
|
||||
}
|
||||
printk_debug("Setting fixed MTRRs(%d-%d) Type: WB\n",
|
||||
start_mtrr, last_mtrr);
|
||||
set_fixed_mtrrs(start_mtrr, last_mtrr, MTRR_TYPE_WRBACK);
|
||||
}
|
||||
}
|
||||
printk_debug("DONE fixed MTRRs\n");
|
||||
/* Cache as many memory areas as possible */
|
||||
/* FIXME is there an algorithm for computing the optimal set of mtrrs?
|
||||
* In some cases it is definitely possible to do better.
|
||||
*/
|
||||
range_startk = 0;
|
||||
range_sizek = 0;
|
||||
reg = 0;
|
||||
for(dev = all_devices; dev; dev = dev->next) {
|
||||
struct resource *res, *last;
|
||||
last = &dev->resource[dev->resources];
|
||||
for(res = &dev->resource[0]; res < last; res++) {
|
||||
unsigned long basek, sizek;
|
||||
if (!(res->flags & IORESOURCE_MEM) ||
|
||||
!(res->flags & IORESOURCE_CACHEABLE)) {
|
||||
continue;
|
||||
}
|
||||
basek = resk(res->base);
|
||||
sizek = resk(res->size);
|
||||
/* See if I can merge with the last range
|
||||
* Either I am below 1M and the fixed mtrrs handle it, or
|
||||
* the ranges touch.
|
||||
*/
|
||||
if ((basek <= 1024) || (range_startk + range_sizek == basek)) {
|
||||
unsigned long endk = basek + sizek;
|
||||
range_sizek = endk - range_startk;
|
||||
continue;
|
||||
}
|
||||
/* Write the range mtrrs */
|
||||
if (range_sizek != 0) {
|
||||
reg = range_to_mtrr(reg, range_startk, range_sizek, basek);
|
||||
range_startk = 0;
|
||||
range_sizek = 0;
|
||||
if (reg >= BIOS_MTRRS)
|
||||
goto last_msr;
|
||||
}
|
||||
/* Allocate an msr */
|
||||
range_startk = basek;
|
||||
range_sizek = sizek;
|
||||
}
|
||||
}
|
||||
last_msr:
|
||||
/* Write the last range */
|
||||
reg = range_to_mtrr(reg, range_startk, range_sizek, 0);
|
||||
printk_debug("DONE variable MTRRs\n");
|
||||
printk_debug("Clear out the extra MTRR's\n");
|
||||
/* Clear out the extra MTRR's */
|
||||
while(reg < MTRRS) {
|
||||
set_var_mtrr(reg++, 0, 0, 0);
|
||||
}
|
||||
/* enable fixed MTRR */
|
||||
printk_spew("call enable_fixed_mtrr()\n");
|
||||
enable_fixed_mtrr();
|
||||
printk_spew("call enable_var_mtrr()\n");
|
||||
enable_var_mtrr();
|
||||
printk_spew("Leave %s\n", __FUNCTION__);
|
||||
post_code(0x6A);
|
||||
}
|
||||
|
||||
int x86_mtrr_check(void)
|
||||
{
|
||||
/* Only Pentium Pro and later have MTRR */
|
||||
msr_t msr;
|
||||
printk_debug("\nMTRR check\n");
|
||||
|
||||
msr = rdmsr(0x2ff);
|
||||
msr.lo >>= 10;
|
||||
|
||||
printk_debug("Fixed MTRRs : ");
|
||||
if (msr.lo & 0x01)
|
||||
printk_debug("Enabled\n");
|
||||
else
|
||||
printk_debug("Disabled\n");
|
||||
|
||||
printk_debug("Variable MTRRs: ");
|
||||
if (msr.lo & 0x02)
|
||||
printk_debug("Enabled\n");
|
||||
else
|
||||
printk_debug("Disabled\n");
|
||||
|
||||
printk_debug("\n");
|
||||
|
||||
post_code(0x93);
|
||||
return ((int) msr.lo);
|
||||
}
|
1
src/cpu/x86/pae/Config.lb
Normal file
1
src/cpu/x86/pae/Config.lb
Normal file
@@ -0,0 +1 @@
|
||||
object pgtbl.o
|
94
src/cpu/x86/pae/pgtbl.c
Normal file
94
src/cpu/x86/pae/pgtbl.c
Normal file
@@ -0,0 +1,94 @@
|
||||
#include <console/console.h>
|
||||
#include <cpu/cpu.h>
|
||||
#include <cpu/x86/pae.h>
|
||||
#include <string.h>
|
||||
|
||||
static void paging_off(void)
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
/* Disable paging */
|
||||
"movl %%cr0, %%eax\n\t"
|
||||
"andl $0x7FFFFFFF, %%eax\n\t"
|
||||
"movl %%eax, %%cr0\n\t"
|
||||
/* Disable pae */
|
||||
"movl %%cr4, %%eax\n\t"
|
||||
"andl $0xFFFFFFDF, %%eax\n\t"
|
||||
:
|
||||
:
|
||||
: "eax"
|
||||
);
|
||||
}
|
||||
|
||||
static void paging_on(void *pdp)
|
||||
{
|
||||
__asm__ __volatile__(
|
||||
/* Load the page table address */
|
||||
"movl %0, %%cr3\n\t"
|
||||
/* Enable pae */
|
||||
"movl %%cr4, %%eax\n\t"
|
||||
"orl $0x00000020, %%eax\n\t"
|
||||
"movl %%eax, %%cr4\n\t"
|
||||
/* Enable paging */
|
||||
"movl %%cr0, %%eax\n\t"
|
||||
"orl $0x80000000, %%eax\n\t"
|
||||
"movl %%eax, %%cr0\n\t"
|
||||
:
|
||||
: "r" (pdp)
|
||||
: "eax"
|
||||
);
|
||||
}
|
||||
|
||||
void *map_2M_page(unsigned long page)
|
||||
{
|
||||
struct pde {
|
||||
uint32_t addr_lo;
|
||||
uint32_t addr_hi;
|
||||
} __attribute__ ((packed));
|
||||
struct pg_table {
|
||||
struct pde pd[2048];
|
||||
struct pde pdp[512];
|
||||
} __attribute__ ((packed));
|
||||
static struct pg_table pgtbl[CONFIG_MAX_CPUS] __attribute__ ((aligned(4096)));
|
||||
static unsigned long mapped_window[CONFIG_MAX_CPUS];
|
||||
unsigned long index;
|
||||
unsigned long window;
|
||||
void *result;
|
||||
int i;
|
||||
index = cpu_index();
|
||||
if ((index < 0) || (index >= CONFIG_MAX_CPUS)) {
|
||||
return MAPPING_ERROR;
|
||||
}
|
||||
window = page >> 10;
|
||||
if (window != mapped_window[index]) {
|
||||
paging_off();
|
||||
if (window > 1) {
|
||||
struct pde *pd, *pdp;
|
||||
/* Point the page directory pointers at the page directories */
|
||||
memset(&pgtbl[index].pdp, 0, sizeof(pgtbl[index].pdp));
|
||||
pd = pgtbl[index].pd;
|
||||
pdp = pgtbl[index].pdp;
|
||||
pdp[0].addr_lo = ((uint32_t)&pd[512*0])|1;
|
||||
pdp[1].addr_lo = ((uint32_t)&pd[512*1])|1;
|
||||
pdp[2].addr_lo = ((uint32_t)&pd[512*2])|1;
|
||||
pdp[3].addr_lo = ((uint32_t)&pd[512*3])|1;
|
||||
/* The first half of the page table is identity mapped */
|
||||
for(i = 0; i < 1024; i++) {
|
||||
pd[i].addr_lo = ((i & 0x3ff) << 21)| 0xE3;
|
||||
pd[i].addr_hi = 0;
|
||||
}
|
||||
/* The second half of the page table holds the mapped page */
|
||||
for(i = 1024; i < 2048; i++) {
|
||||
pd[i].addr_lo = ((window & 1) << 31) | ((i & 0x3ff) << 21) | 0xE3;
|
||||
pd[i].addr_hi = (window >> 1);
|
||||
}
|
||||
paging_on(pdp);
|
||||
}
|
||||
mapped_window[index] = window;
|
||||
}
|
||||
if (window == 0) {
|
||||
result = (void *)(page << 21);
|
||||
} else {
|
||||
result = (void *)(0x80000000 | ((page & 0x3ff) << 21));
|
||||
}
|
||||
return result;
|
||||
}
|
0
src/cpu/x86/sse/Config.lb
Normal file
0
src/cpu/x86/sse/Config.lb
Normal file
18
src/cpu/x86/sse/disable_sse.inc
Normal file
18
src/cpu/x86/sse/disable_sse.inc
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
* Put the processor back into a reset state
|
||||
* with respect to the xmm registers.
|
||||
*/
|
||||
|
||||
xorps %xmm0, %xmm0
|
||||
xorps %xmm1, %xmm1
|
||||
xorps %xmm2, %xmm2
|
||||
xorps %xmm3, %xmm3
|
||||
xorps %xmm4, %xmm4
|
||||
xorps %xmm5, %xmm5
|
||||
xorps %xmm6, %xmm6
|
||||
xorps %xmm7, %xmm7
|
||||
|
||||
/* Disable sse instructions */
|
||||
movl %cr4, %eax
|
||||
andl $~(3<<9), %eax
|
||||
movl %eax, %cr4
|
14
src/cpu/x86/sse/enable_sse.inc
Normal file
14
src/cpu/x86/sse/enable_sse.inc
Normal file
@@ -0,0 +1,14 @@
|
||||
/* preserve BIST in %eax */
|
||||
movl %eax, %ebp
|
||||
|
||||
/*
|
||||
* Enable the use of the xmm registers
|
||||
*/
|
||||
|
||||
/* Enable sse instructions */
|
||||
movl %cr4, %eax
|
||||
orl $(1<<9), %eax
|
||||
movl %eax, %cr4
|
||||
|
||||
movl %ebp, %eax
|
||||
|
5
src/cpu/x86/tsc/Config.lb
Normal file
5
src/cpu/x86/tsc/Config.lb
Normal file
@@ -0,0 +1,5 @@
|
||||
uses CONFIG_UDELAY_TSC
|
||||
uses CONFIG_TSC_X86RDTSC_CALIBRATE_WITH_TIMER2
|
||||
|
||||
default CONFIG_TSC_X86RDTSC_CALIBRATE_WITH_TIMER2=0
|
||||
if CONFIG_UDELAY_TSC object delay_tsc.o end
|
165
src/cpu/x86/tsc/delay_tsc.c
Normal file
165
src/cpu/x86/tsc/delay_tsc.c
Normal file
@@ -0,0 +1,165 @@
|
||||
#include <console/console.h>
|
||||
#include <arch/io.h>
|
||||
#include <cpu/x86/msr.h>
|
||||
#include <cpu/x86/tsc.h>
|
||||
#include <smp/spinlock.h>
|
||||
#include <delay.h>
|
||||
|
||||
static unsigned long clocks_per_usec;
|
||||
|
||||
#if (CONFIG_TSC_X86RDTSC_CALIBRATE_WITH_TIMER2 == 1)
|
||||
#define CLOCK_TICK_RATE 1193180U /* Underlying HZ */
|
||||
|
||||
/* ------ Calibrate the TSC -------
|
||||
* Too much 64-bit arithmetic here to do this cleanly in C, and for
|
||||
* accuracy's sake we want to keep the overhead on the CTC speaker (channel 2)
|
||||
* output busy loop as low as possible. We avoid reading the CTC registers
|
||||
* directly because of the awkward 8-bit access mechanism of the 82C54
|
||||
* device.
|
||||
*/
|
||||
|
||||
#define CALIBRATE_INTERVAL ((20*CLOCK_TICK_RATE)/1000) /* 20ms */
|
||||
#define CALIBRATE_DIVISOR (20*1000) /* 20ms / 20000 == 1usec */
|
||||
|
||||
static unsigned long long calibrate_tsc(void)
|
||||
{
|
||||
/* Set the Gate high, disable speaker */
|
||||
outb((inb(0x61) & ~0x02) | 0x01, 0x61);
|
||||
|
||||
/*
|
||||
* Now let's take care of CTC channel 2
|
||||
*
|
||||
* Set the Gate high, program CTC channel 2 for mode 0,
|
||||
* (interrupt on terminal count mode), binary count,
|
||||
* load 5 * LATCH count, (LSB and MSB) to begin countdown.
|
||||
*/
|
||||
outb(0xb0, 0x43); /* binary, mode 0, LSB/MSB, Ch 2 */
|
||||
outb(CALIBRATE_INTERVAL & 0xff, 0x42); /* LSB of count */
|
||||
outb(CALIBRATE_INTERVAL >> 8, 0x42); /* MSB of count */
|
||||
|
||||
{
|
||||
tsc_t start;
|
||||
tsc_t end;
|
||||
unsigned long count;
|
||||
|
||||
start = rdtsc();
|
||||
count = 0;
|
||||
do {
|
||||
count++;
|
||||
} while ((inb(0x61) & 0x20) == 0);
|
||||
end = rdtsc();
|
||||
|
||||
/* Error: ECTCNEVERSET */
|
||||
if (count <= 1)
|
||||
goto bad_ctc;
|
||||
|
||||
/* 64-bit subtract - gcc just messes up with long longs */
|
||||
__asm__("subl %2,%0\n\t"
|
||||
"sbbl %3,%1"
|
||||
:"=a" (end.lo), "=d" (end.hi)
|
||||
:"g" (start.lo), "g" (start.hi),
|
||||
"0" (end.lo), "1" (end.hi));
|
||||
|
||||
/* Error: ECPUTOOFAST */
|
||||
if (end.hi)
|
||||
goto bad_ctc;
|
||||
|
||||
|
||||
/* Error: ECPUTOOSLOW */
|
||||
if (end.lo <= CALIBRATE_DIVISOR)
|
||||
goto bad_ctc;
|
||||
|
||||
return (end.lo + CALIBRATE_DIVISOR -1)/CALIBRATE_DIVISOR;
|
||||
}
|
||||
|
||||
/*
|
||||
* The CTC wasn't reliable: we got a hit on the very first read,
|
||||
* or the CPU was so fast/slow that the quotient wouldn't fit in
|
||||
* 32 bits..
|
||||
*/
|
||||
bad_ctc:
|
||||
printk_err("bad_ctc\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else /* CONFIG_TSC_X86RDTSC_CALIBRATE_WITH_TIMER2 */
|
||||
|
||||
/*
|
||||
* this is the "no timer2" version.
|
||||
* to calibrate tsc, we get a TSC reading, then do 1,000,000 outbs to port 0x80
|
||||
* then we read TSC again, and divide the difference by 1,000,000
|
||||
* we have found on a wide range of machines that this gives us a a
|
||||
* good microsecond value
|
||||
* to +- 10%. On a dual AMD 1.6 Ghz box, it gives us .97 microseconds, and on a
|
||||
* 267 Mhz. p5, it gives us 1.1 microseconds.
|
||||
* also, since gcc now supports long long, we use that.
|
||||
* also no unsigned long long / operator, so we play games.
|
||||
* about the only thing you can do with long longs, it seems,
|
||||
*is return them and assign them.
|
||||
* (and do asm on them, yuck)
|
||||
* so avoid all ops on long longs.
|
||||
*/
|
||||
static unsigned long long calibrate_tsc(void)
|
||||
{
|
||||
unsigned long long start, end, delta;
|
||||
unsigned long allones = (unsigned long) -1, result;
|
||||
unsigned long count;
|
||||
|
||||
start = rdtscll();
|
||||
// no udivdi3, dammit.
|
||||
// so we count to 1<< 20 and then right shift 20
|
||||
for(count = 0; count < (1<<20); count ++)
|
||||
outb(0x80, 0x80);
|
||||
end = rdtscll();
|
||||
|
||||
#if 0
|
||||
// make delta be (endhigh - starthigh) + (endlow - startlow)
|
||||
// but >> 20
|
||||
// do it this way to avoid gcc warnings.
|
||||
start = tsc_start.hi;
|
||||
start <<= 32;
|
||||
start |= start.lo;
|
||||
end = tsc_end.hi;
|
||||
end <<= 32;
|
||||
end |= tsc_end.lo;
|
||||
#endif
|
||||
delta = end - start;
|
||||
// at this point we have a delta for 1,000,000 outbs. Now rescale for one microsecond.
|
||||
delta >>= 20;
|
||||
// save this for microsecond timing.
|
||||
result = delta;
|
||||
printk_spew("end %x:%x, start %x:%x\n",
|
||||
endhigh, endlow, starthigh, startlow);
|
||||
printk_spew("32-bit delta %d\n", (unsigned long) delta);
|
||||
|
||||
printk_spew(__FUNCTION__ " 32-bit result is %d\n", result);
|
||||
return delta;
|
||||
}
|
||||
|
||||
|
||||
#endif /* CONFIG_TSC_X86RDTSC_CALIBRATE_WITH_TIMER2*/
|
||||
|
||||
void init_timer(void)
|
||||
{
|
||||
if (!clocks_per_usec) {
|
||||
clocks_per_usec = calibrate_tsc();
|
||||
printk_info("clocks_per_usec: %u\n", clocks_per_usec);
|
||||
}
|
||||
}
|
||||
|
||||
void udelay(unsigned us)
|
||||
{
|
||||
unsigned long long count;
|
||||
unsigned long long stop;
|
||||
unsigned long long clocks;
|
||||
|
||||
init_timer();
|
||||
clocks = us;
|
||||
clocks *= clocks_per_usec;
|
||||
count = rdtscll();
|
||||
stop = clocks + count;
|
||||
while(stop > count) {
|
||||
cpu_relax();
|
||||
count = rdtscll();
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user