diff --git a/OLVASSEL.md b/OLVASSEL.md index d7ec2dc..ac96d95 100644 --- a/OLVASSEL.md +++ b/OLVASSEL.md @@ -280,18 +280,19 @@ void _start() /*** FIGYELEM: ez a kód párhuzamosan fut minden processzormagon ***/ int x, y, s=bootboot.fb_scanline, w=bootboot.fb_width, h=bootboot.fb_height; - // célkereszt, hogy lássuk, a felbontás jó-e - for(y=0;y 0 then + -- cross-hair to see screen dimension detected correctly + for Y in Integer range 0 .. (H-1) loop + bootboot.fb (S*Y + W/2) := bootboot.UInt32(16#00FFFFFF#); end loop; - end loop; - for Y in Integer range 0 .. 20 loop - for X in Integer range 0 .. 20 loop - bootboot.fb (S*(Y+20) + (X+50)) := bootboot.UInt32(16#0000FF00#); + for X in Integer range 0 .. (W-1) loop + bootboot.fb (S*H/2 + X) := bootboot.UInt32(16#00FFFFFF#); end loop; - end loop; - for Y in Integer range 0 .. 20 loop - for X in Integer range 0 .. 20 loop - bootboot.fb (S*(Y+20) + (X+80)) := bootboot.UInt32(16#000000FF#); + + -- red, green, blue boxes in order + for Y in Integer range 0 .. 20 loop + for X in Integer range 0 .. 20 loop + bootboot.fb (S*(Y+20) + (X+20)) := bootboot.UInt32(16#00FF0000#); + end loop; + end loop; + for Y in Integer range 0 .. 20 loop + for X in Integer range 0 .. 20 loop + bootboot.fb (S*(Y+20) + (X+50)) := bootboot.UInt32(16#0000FF00#); + end loop; + end loop; + for Y in Integer range 0 .. 20 loop + for X in Integer range 0 .. 20 loop + bootboot.fb (S*(Y+20) + (X+80)) := bootboot.UInt32(16#000000FF#); + end loop; end loop; - end loop; - - -- say hello - puts("Hello from a simple BOOTBOOT kernel"); + -- say hello + puts("Hello from a simple BOOTBOOT kernel"); + end if; -- hang for now loop null; diff --git a/mykernel/c/kernel.c b/mykernel/c/kernel.c index 4f12564..9beed78 100644 --- a/mykernel/c/kernel.c +++ b/mykernel/c/kernel.c @@ -53,18 +53,19 @@ void _start() /*** NOTE: this code runs on all cores in parallel ***/ int x, y, s=bootboot.fb_scanline, w=bootboot.fb_width, h=bootboot.fb_height; - // cross-hair to see screen dimension detected correctly - for(y=0;y 0) Then Begin - P := PDword(@fb + S*Y + W*2); - P^ := $00FFFFFF; - End; - For X := 0 to (W-1) Do - Begin - P := PDword(@fb + S*(H shr 1) + X*4); - P^ := $00FFFFFF; - End; - - { red, green, blue boxes in order } - For Y := 0 to 20 Do - Begin - For X := 0 to 20 Do + { cross-hair to see screen dimension detected correctly } + For Y := 0 to (H-1) Do Begin - P := PDword(@fb + S*(Y+20) + (X+20)*4); - P^ := $00FF0000; + P := PDword(@fb + S*Y + W*2); + P^ := $00FFFFFF; End; - End; - For Y := 0 to 20 Do - Begin - For X := 0 to 20 Do + For X := 0 to (W-1) Do Begin - P := PDword(@fb + S*(Y+20) + (X+50)*4); - P^ := $0000FF00; + P := PDword(@fb + S*(H shr 1) + X*4); + P^ := $00FFFFFF; End; - End; - For Y := 0 to 20 Do - Begin - For X := 0 to 20 Do + + { red, green, blue boxes in order } + For Y := 0 to 20 Do Begin - P := PDword(@fb + S*(Y+20) + (X+80)*4); - P^ := $000000FF; + For X := 0 to 20 Do + Begin + P := PDword(@fb + S*(Y+20) + (X+20)*4); + P^ := $00FF0000; + End; End; + For Y := 0 to 20 Do + Begin + For X := 0 to 20 Do + Begin + P := PDword(@fb + S*(Y+20) + (X+50)*4); + P^ := $0000FF00; + End; + End; + For Y := 0 to 20 Do + Begin + For X := 0 to 20 Do + Begin + P := PDword(@fb + S*(Y+20) + (X+80)*4); + P^ := $000000FF; + End; + End; + + { say hello } + Puts('Hello from a simple BOOTBOOT kernel'); End; - - { say hello } - Puts('Hello from a simple BOOTBOOT kernel'); - { hang for now } While (True) Do; End; diff --git a/mykernel/rust/src/main.rs b/mykernel/rust/src/main.rs index 7ba2344..a75ddd4 100644 --- a/mykernel/rust/src/main.rs +++ b/mykernel/rust/src/main.rs @@ -49,52 +49,53 @@ extern crate rlibc; #[no_mangle] // don't mangle the name of this function fn _start() -> ! { /*** NOTE: this code runs on all cores in parallel ***/ - unsafe { - let fb = &bootboot::fb as *const u8 as u64; + if bootboot::bootboot.fb_scanline > 0 { + unsafe { + let fb = &bootboot::fb as *const u8 as u64; - // cross-hair to see screen dimension detected correctly - for y in 0..bootboot::bootboot.fb_height { - let addr = fb - + bootboot::bootboot.fb_scanline as u64 * y as u64 - + bootboot::bootboot.fb_width as u64 * 2; - *(addr as *mut u64) = 0x00FFFFFF; - } - for x in 0..bootboot::bootboot.fb_width { - let addr = fb - + bootboot::bootboot.fb_scanline as u64 * (bootboot::bootboot.fb_height / 2) as u64 + (x * 4) as u64; - *(addr as *mut u64) = 0x00FFFFFF; + // cross-hair to see screen dimension detected correctly + for y in 0..bootboot::bootboot.fb_height { + let addr = fb + + bootboot::bootboot.fb_scanline as u64 * y as u64 + + bootboot::bootboot.fb_width as u64 * 2; + *(addr as *mut u64) = 0x00FFFFFF; + } + for x in 0..bootboot::bootboot.fb_width { + let addr = fb + + bootboot::bootboot.fb_scanline as u64 * (bootboot::bootboot.fb_height / 2) as u64 + (x * 4) as u64; + *(addr as *mut u64) = 0x00FFFFFF; + } + + // red, green, blue boxes in order + for y in 0..20 { + for x in 0..20 { + let addr = fb + + bootboot::bootboot.fb_scanline as u64 * (y + 20) as u64 + + (x + 20) * 4; + *(addr as *mut u64) = 0x00FF0000; + } + } + for y in 0..20 { + for x in 0..20 { + let addr = fb + + bootboot::bootboot.fb_scanline as u64 * (y + 20) as u64 + + (x + 50) * 4; + *(addr as *mut u64) = 0x0000FF00; + } + } + for y in 0..20 { + for x in 0..20 { + let addr = fb + + bootboot::bootboot.fb_scanline as u64 * (y + 20) as u64 + + (x + 80) * 4; + *(addr as *mut u64) = 0x000000FF; + } + } } - // red, green, blue boxes in order - for y in 0..20 { - for x in 0..20 { - let addr = fb - + bootboot::bootboot.fb_scanline as u64 * (y + 20) as u64 - + (x + 20) * 4; - *(addr as *mut u64) = 0x00FF0000; - } - } - for y in 0..20 { - for x in 0..20 { - let addr = fb - + bootboot::bootboot.fb_scanline as u64 * (y + 20) as u64 - + (x + 50) * 4; - *(addr as *mut u64) = 0x0000FF00; - } - } - for y in 0..20 { - for x in 0..20 { - let addr = fb - + bootboot::bootboot.fb_scanline as u64 * (y + 20) as u64 - + (x + 80) * 4; - *(addr as *mut u64) = 0x000000FF; - } - } + // say hello + puts("Hello from a simple BOOTBOOT kernel"); } - - // say hello - puts("Hello from a simple BOOTBOOT kernel"); - // hang for now loop {} } diff --git a/x86_64-bios/bootboot.asm b/x86_64-bios/bootboot.asm index 20bc19c..b67afdf 100644 --- a/x86_64-bios/bootboot.asm +++ b/x86_64-bios/bootboot.asm @@ -1968,7 +1968,7 @@ end if cmp byte [ebx], 0 ; madt_entry.type: is it a Local APIC Processor? jne @f xor ax, ax - mov al, byte [ebx+2] ; madt_entry.lapicproc.lapicid + mov al, byte [ebx+3] ; madt_entry.lapicproc.lapicid stosw ; ACPI table holds 1 byte id, but internally we have 2 bytes inc word [bootboot.numcores] @@: xor eax, eax @@ -2012,7 +2012,52 @@ end if .dosmp: cmp word [bootboot.numcores], 2 jb .nosmp - DBG32 dbg_smp +if BBDEBUG eq 1 + xor eax, eax + mov dword [gpt_ptr], eax + mov dword [gpt_num], eax + prot_realmode + mov si, dbg_smp + mov bx, word [bootboot.numcores] + mov di, gpt_ptr + + cmp bx, 1000 + jl @f + mov al, '1' + stosb + sub bx, 1000 +@@: mov cx, 100 + cmp bx, cx + jl @f + mov ax, bx + xor dx, dx + div cx + add al, '0' + stosb + mov bx, dx +@@: mov cx, 10 + cmp bx, cx + jl @f + mov ax, bx + xor dx, dx + div cx + add al, '0' + stosb + mov bx, dx +@@: mov al, bl + add al, '0' + stosb + xor al, al + stosb + + mov si, dbg_smp + call real_printfunc + mov si, gpt_ptr + call real_printfunc + mov si, crlf + call real_printfunc + real_protmode +end if ; relocate AP trampoline mov esi, ap_trampoline @@ -2324,11 +2369,10 @@ longmode_init: mov eax, 0C0000011h ;clear EM, MP (enable SSE) and WP mov cr0, eax ;enable paging with cache disabled lgdt [GDT_value] ;read 80 bit address - jmp @f - nop -@@: jmp 8:@f + jmp 8:.bootboot_startcore USE64 -@@: xor eax, eax ;load long mode segments +.bootboot_startcore: + xor rax, rax ;load long mode segments mov ax, 10h mov ds, ax mov es, ax @@ -2336,10 +2380,13 @@ longmode_init: mov fs, ax mov gs, ax ; find out our lapic id - mov eax, 1 - cpuid - shr ebx, 24 - mov edx, ebx + mov eax, dword [lapic_ptr] + or eax, eax + jz @f + add eax, 20h + mov eax, dword [rax] + shr eax, 24 +@@: mov edx, eax ; get array index for it xor rbx, rbx mov rsi, lapic_ids @@ -2351,7 +2398,8 @@ longmode_init: dec cx jnz @b xor rbx, rbx -@@: shl rbx, 10 ; 1k stack for each core +@@: mov rdi, rbx + shl rbx, 10 ; 1k stack for each core ; set stack and call _start() in sys/core xor rsp, rsp ;sp = core_num * -1024 @@ -2732,7 +2780,7 @@ dbg_gzinitrd db " * Gzip compressed initrd",10,13,0 dbg_scan db " * Autodetecting kernel",10,13,0 dbg_elf db " * Parsing ELF64",10,13,0 dbg_pe db " * Parsing PE32+",10,13,0 -dbg_smp db " * SMP init",10,13,0 +dbg_smp db " * SMP numcores ",0 dbg_vesa db " * Screen VESA VBE",10,13,0 end if backup: db " * Backup initrd",10,13,0 diff --git a/x86_64-cb/bootboot.c b/x86_64-cb/bootboot.c index 64e3060..e8c95d8 100644 --- a/x86_64-cb/bootboot.c +++ b/x86_64-cb/bootboot.c @@ -71,7 +71,7 @@ extern void ap_trampoline(); #if __WORDSIZE == 64 -extern void bsp64_init(); +extern void bsp64_init(uint64_t apicid); #else extern void bsp_init(); #endif @@ -597,7 +597,7 @@ void GetLFB() */ void LoadCore() { - uint64_t bss = 0; + uint64_t bss = 0, ptr; uint32_t r = 0; entrypoint=0; @@ -716,8 +716,9 @@ void LoadCore() if(initstack < 1024) initstack = 1024; if(initstack > 16384) initstack = 16384; // create core segment - memcpy((void*)((uint8_t*)(uintptr_t)bootboot->initrd_ptr+bootboot->initrd_size), core.ptr, core.size); - core.ptr=(uint8_t*)(uintptr_t)bootboot->initrd_ptr+bootboot->initrd_size; + ptr = ((bootboot->initrd_ptr+bootboot->initrd_size) + PAGESIZE - 1) & ~(PAGESIZE-1); + memcpy((void*)(uintptr_t)ptr, core.ptr, core.size); + core.ptr=(uint8_t*)(uintptr_t)ptr; if(bss>0) memset(core.ptr + core.size, 0, bss); core.size += bss; @@ -1110,7 +1111,7 @@ gzerr: panic("Unable to uncompress"); LoadCore(); /* Symmetric Multi Processing support */ - memset(lapic_ids, 0, sizeof(lapic_ids)); lapic_ids[0] = bootboot->bspid; + memset(lapic_ids, 0xFF, sizeof(lapic_ids)); ptr = (uint8_t*)(uintptr_t)bootboot->arch.x86_64.acpi_ptr; if(ptr && (ptr[0]=='X' || ptr[0]=='R') && ptr[1]=='S' && ptr[2]=='D' && ptr[3]=='T') { pe = ptr; ptr += 36; @@ -1123,11 +1124,13 @@ gzerr: panic("Unable to uncompress"); for(r = *((uint32_t*)(data + 4)), ptr = data + 44, i = 0; ptr < data + r && i < (int)(sizeof(lapic_ids)/sizeof(lapic_ids[0])); ptr += ptr[1]) { switch(ptr[0]) { - case 0: lapic_ids[(int)ptr[2]] = i++; break; // found Processor Local APIC + case 0: // found Processor Local APIC + if((ptr[4] & 1) && lapic_ids[(int)ptr[3]] == 0xFFFF) { lapic_ids[(int)ptr[3]] = i++; } + break; case 5: lapic_addr = *((uint64_t*)(ptr+4)); break; // found 64 bit Local APIC Address } } - if(i) bootboot->numcores = i; + if(i && lapic_ids[bootboot->bspid] != 0xFFFF) bootboot->numcores = i; break; } } @@ -1135,20 +1138,56 @@ gzerr: panic("Unable to uncompress"); if(!nosmp && bootboot->numcores > 1 && lapic_addr) { DBG(" * SMP numcores %d\n", bootboot->numcores); memcpy((uint8_t*)0x1000, &ap_trampoline, 128); - // send Broadcast INIT IPI - *((volatile uint32_t*)((uintptr_t)lapic_addr + 0x300)) = 0x0C4500; + + // send INIT IPI (supports up to 256 cores, requires x2APIC to have more) + for(i = 0; i < 256; i++) { + if(i == bootboot->bspid || lapic_ids[i] == 0xFFFF) continue; + *((volatile uint32_t*)((uintptr_t)lapic_addr + 0x280)) = 0; // clear APIC errors + *((volatile uint32_t*)((uintptr_t)lapic_addr + 0x310)) = + (*((volatile uint32_t*)((uintptr_t)lapic_addr + 0x310)) & 0x00ffffff) | (i << 24); // select AP + *((volatile uint32_t*)((uintptr_t)lapic_addr + 0x300)) = + (*((volatile uint32_t*)((uintptr_t)lapic_addr + 0x300)) & 0xfff00000) | 0x00C500; // trigger INIT IPI + do { __asm__ __volatile__ ("pause" : : : "memory"); } + while(*((volatile uint32_t*)((uintptr_t)lapic_addr + 0x300)) & (1 << 12)); // wait for delivery + // deassert + *((volatile uint32_t*)((uintptr_t)lapic_addr + 0x310)) = + (*((volatile uint32_t*)((uintptr_t)lapic_addr + 0x310)) & 0x00ffffff) | (i << 24); + *((volatile uint32_t*)((uintptr_t)lapic_addr + 0x300)) = + (*((volatile uint32_t*)((uintptr_t)lapic_addr + 0x300)) & 0xfff00000) | 0x008500; + do { __asm__ __volatile__ ("pause" : : : "memory"); } + while(*((volatile uint32_t*)((uintptr_t)lapic_addr + 0x300)) & (1 << 12)); + } mdelay(10); - // send Broadcast STARTUP IPI - *((volatile uint32_t*)((uintptr_t)lapic_addr + 0x300)) = 0x0C4601; // start at 0100:0000h - udelay(200); - // send second SIPI - *((volatile uint32_t*)((uintptr_t)lapic_addr + 0x300)) = 0x0C4601; - } else + // send STARTUP IPI + for(i = 0; i < 256; i++) { + if(i == bootboot->bspid || lapic_ids[i] == 0xFFFF) continue; + *((volatile uint32_t*)((uintptr_t)lapic_addr + 0x280)) = 0; // clear APIC errors + *((volatile uint32_t*)((uintptr_t)lapic_addr + 0x310)) = + (*((volatile uint32_t*)((uintptr_t)lapic_addr + 0x310)) & 0x00ffffff) | (i << 24); // select AP + // trigger IPI, start at 0100:0000h + *((volatile uint32_t*)((uintptr_t)lapic_addr + 0x300)) = + (*((volatile uint32_t*)((uintptr_t)lapic_addr + 0x300)) & 0xfff0f800) | 0x000601; + udelay(200); + do { __asm__ __volatile__ ("pause" : : : "memory"); } + while(*((volatile uint32_t*)((uintptr_t)lapic_addr + 0x300)) & (1 << 12)); // wait for delivery + // send second IPI + *((volatile uint32_t*)((uintptr_t)lapic_addr + 0x280)) = 0; + *((volatile uint32_t*)((uintptr_t)lapic_addr + 0x310)) = + (*((volatile uint32_t*)((uintptr_t)lapic_addr + 0x310)) & 0x00ffffff) | (i << 24); + *((volatile uint32_t*)((uintptr_t)lapic_addr + 0x300)) = + (*((volatile uint32_t*)((uintptr_t)lapic_addr + 0x300)) & 0xfff0f800) | 0x000601; + do { __asm__ __volatile__ ("pause" : : : "memory"); } + while(*((volatile uint32_t*)((uintptr_t)lapic_addr + 0x300)) & (1 << 12)); + } + } else { + lapic_addr = 0; + lapic_ids[bootboot->bspid] = 0; bootboot->numcores = 1; + } /* Create paging tables */ DBG(" * Pagetables PML4 @%p\n",paging); - memset(paging, 0, 37*PAGESIZE+bootboot->numcores*initstack); + memset(paging, 0, (37+(bootboot->numcores*initstack+PAGESIZE-1)/PAGESIZE)*PAGESIZE); //PML4 paging[0]=(uint64_t)((uintptr_t)paging+PAGESIZE)+3; // pointer to 2M PDPE (16G RAM identity mapped) paging[511]=(uint64_t)((uintptr_t)paging+20*PAGESIZE)+3; // pointer to 4k PDPE (core mapped at -2M) @@ -1243,7 +1282,7 @@ gzerr: panic("Unable to uncompress"); /* continue in Assembly, enable long mode and jump to kernel's entry point */ #if __WORDSIZE == 64 - bsp64_init(); + bsp64_init(bootboot->bspid); #else bsp_init(); #endif diff --git a/x86_64-cb/smp.S b/x86_64-cb/smp.S index 5413960..97fd480 100644 --- a/x86_64-cb/smp.S +++ b/x86_64-cb/smp.S @@ -32,6 +32,7 @@ .globl bsp_init .globl bsp64_init .extern lapic_ids +.extern lapic_addr .extern initstack .text @@ -76,7 +77,7 @@ _L1060: cmpb $0, 0x1010 jz 1b // jump back to non-relocated code segment - jmp longmode_init + ljmp $8, $longmode_init .align 128 ap_trampoline_end: @@ -93,7 +94,7 @@ GDT_value: .word 0 .align 8 stack64: - .long bit64 + .long bootboot_startcore .long 0 .quad 8 @@ -117,6 +118,15 @@ bsp_init: * common code for all cores, enable long mode and start kernel * *****************************************************************************/ longmode_init: + // find our lapic id + movl lapic_addr, %edi + or %edi, %edi + jz 1f + addl $0x20, %edi + movl (%edi), %edi + shrl $24, %edi +1: // do not clobber di + movl $0x368, %eax // Set PAE, MCE, PGE; OSFXSR, OSXMMEXCPT (enable SSE) movl %eax, %cr4 movl $0x4000, %eax @@ -129,11 +139,12 @@ longmode_init: movl $0x0C0000011, %eax // clear EM, MP (enable SSE) and WP movl %eax, %cr0 lgdt GDT_value - ljmp $8, $bit64 + ljmp $8, $bootboot_startcore .code64 /* similar code to above, but these are 64 bit encoded, only needed on BSP if coreboot is compiled for x86_64 */ bsp64_init: + // do not clobber di cli cld movb $0xFF, %al // disable PIC @@ -143,6 +154,7 @@ bsp64_init: orb $0x80, %al outb %al, $0x70 incb 0x1010 // release AP spin lock + xorq %rax, %rax movl $0xC0000011, %eax // enable SSE movq %rax, %cr0 @@ -158,24 +170,22 @@ bsp64_init: movq %rax, %rsp lretq -bit64: + /* IN: di = apic id of current core */ +bootboot_startcore: movl $0x10, %eax // load long mode segments movw %ax, %ds movw %ax, %es movw %ax, %ss movw %ax, %fs movw %ax, %gs - xorq %rbx, %rbx - // find our lapic id - movl $1, %eax - cpuid - shrl $23, %ebx - andb $0xfe, %bl // ebx = lapic id * 2 + movzwq %di, %rbx + shll $1, %ebx // ebx = lapic id * 2 addl $lapic_ids, %ebx xorq %rax, %rax movw (%rbx), %ax // ax = word[lapic_ids + lapic id * 2] -1: movl $initstack, %ebx + movl $initstack, %ebx movl (%rbx), %ebx + movzwq %ax, %rdi mulq %rbx // 1k stack for each core // set stack and call _start() in sys/core diff --git a/x86_64-efi/bootboot.c b/x86_64-efi/bootboot.c index 4cc36d3..0403099 100644 --- a/x86_64-efi/bootboot.c +++ b/x86_64-efi/bootboot.c @@ -223,6 +223,7 @@ struct _EFI_MP_SERVICES_PROTOCOL { #else extern void ap_trampoline(); UINT16 lapic_ids[1024]; +UINT64 lapic_addr=0; #endif typedef @@ -1394,21 +1395,14 @@ VOID EFIAPI bootboot_startcore(IN VOID* buf) { #if USE_MP_SERVICES // we have a scalar number, not a pointer, so cast it - UINTN core_num = (UINTN)buf; + register UINTN core_num = (UINTN)buf; #else (void)buf; - UINT16 core_num; - __asm__ __volatile__ ( - "movl $1, %%eax;" - "cpuid;" - "shrl $24, %%ebx;" - "mov %%bx,%0" - : "=b"(core_num) : : ); - core_num = lapic_ids[core_num]; + register UINT16 core_num = lapic_addr ? lapic_ids[*((volatile uint32_t*)(lapic_addr + 0x20)) >> 24] : 0; #endif - // spinlock until BSP finishes - do { __asm__ __volatile__ ("pause"); } while(!bsp_done); + // spinlock until BSP finishes (or forever if we got an invalid lapicid, should never happen) + do { __asm__ __volatile__ ("pause" : : : "memory"); } while(!bsp_done && core_num != 0xFFFF); // enable SSE __asm__ __volatile__ ( @@ -1432,8 +1426,9 @@ VOID EFIAPI bootboot_startcore(IN VOID* buf) "subq %0, %%rsp;" // sp = core_num * -initstack // pass control over "pushq %1;" + "movq %2, %%rdi;" "retq" - : : "a"((UINTN)core_num*initstack), "b"(entrypoint) : "memory" ); + : : "a"((UINTN)core_num*initstack), "b"(entrypoint), "c"((UINTN)core_num) : "memory" ); } /** @@ -1924,8 +1919,8 @@ gzerr: return report(EFI_COMPROMISED_DATA,L"Unable to uncompress"); bootboot->numcores = 1; #else UINT8 *ptr = (UINT8*)bootboot->arch.x86_64.acpi_ptr, *pe, *data; - UINT64 r, lapic_addr=0; - ZeroMem(lapic_ids, sizeof(lapic_ids)); + UINT64 r; + for(i = 0; i < (int)(sizeof(lapic_ids)/sizeof(lapic_ids[0])); i++) lapic_ids[i] = 0xFFFF; if(!nosmp && ptr && (ptr[0]=='X' || ptr[0]=='R') && ptr[1]=='S' && ptr[2]=='D' && ptr[3]=='T') { pe = ptr; ptr += 36; // iterate on ACPI table pointers @@ -1937,15 +1932,16 @@ gzerr: return report(EFI_COMPROMISED_DATA,L"Unable to uncompress"); for(r = *((uint32_t*)(data + 4)), ptr = data + 44, i = 0; ptr < data + r && i < (int)(sizeof(lapic_ids)/sizeof(lapic_ids[0])); ptr += ptr[1]) { switch(ptr[0]) { - case 0: - DBG(L"ACPI table lapicid %02x is core %d\n",ptr[2],i); - lapic_ids[(INTN)ptr[2]] = i++; break; // found Processor Local APIC + case 0: // found Processor Local APIC + if((ptr[4] & 1) && lapic_ids[(INTN)ptr[3]] == 0xFFFF) { lapic_ids[(INTN)ptr[3]] = i++; } + break; case 5: lapic_addr = *((uint64_t*)(ptr+4)); break; // found 64 bit Local APIC Address } } if(i) { - bootboot->numcores = i; bsp_num = lapic_ids[bootboot->bspid]; + if(bsp_num == 0xFFFF) bsp_num = 0; + else bootboot->numcores = i; } break; } @@ -1953,13 +1949,6 @@ gzerr: return report(EFI_COMPROMISED_DATA,L"Unable to uncompress"); } if(!nosmp && bootboot->numcores > 1 && lapic_addr && ap_code) { DBG(L" * SMP numcores %d\n", bootboot->numcores); -#if BBDEBUG - for(i = 0; i < (int)(sizeof(lapic_ids)/sizeof(lapic_ids[0])); i++) { - if(!i || lapic_ids[i]) - DBG(L" %02x:%d", i, lapic_ids[i]); - } - DBG(L"\n%s", L""); -#endif CopyMem((uint8_t*)0x8000, &ap_trampoline, 256); // save UEFI's 64 bit system registers for the trampoline code __asm__ __volatile__ ( @@ -2106,14 +2095,48 @@ get_memory_map: // start APs if(bootboot->numcores > 1) { - // send Broadcast INIT IPI - *((volatile uint32_t*)(lapic_addr + 0x300)) = 0x0C4500; + // send INIT IPI (supports up to 256 cores, requires x2APIC to have more) + for(i = 0; i < 256; i++) { + if(i == bootboot->bspid || lapic_ids[i] == 0xFFFF) continue; + *((volatile uint32_t*)(lapic_addr + 0x280)) = 0; // clear APIC errors + *((volatile uint32_t*)(lapic_addr + 0x310)) = + (*((volatile uint32_t*)(lapic_addr + 0x310)) & 0x00ffffff) | (i << 24); // select AP + *((volatile uint32_t*)(lapic_addr + 0x300)) = + (*((volatile uint32_t*)(lapic_addr + 0x300)) & 0xfff00000) | 0x00C500; // trigger INIT IPI + do { __asm__ __volatile__ ("pause" : : : "memory"); } + while(*((volatile uint32_t*)(lapic_addr + 0x300)) & (1 << 12)); // wait for delivery + // deassert + *((volatile uint32_t*)(lapic_addr + 0x310)) = + (*((volatile uint32_t*)(lapic_addr + 0x310)) & 0x00ffffff) | (i << 24); + *((volatile uint32_t*)(lapic_addr + 0x300)) = + (*((volatile uint32_t*)(lapic_addr + 0x300)) & 0xfff00000) | 0x008500; + do { __asm__ __volatile__ ("pause" : : : "memory"); } + while(*((volatile uint32_t*)(lapic_addr + 0x300)) & (1 << 12)); + } + // wait 10 msec sleep(50); - // send Broadcast STARTUP IPI - *((volatile uint32_t*)(lapic_addr + 0x300)) = 0x0C4608; // start at 0800:0000h - sleep(1); - // send second SIPI - *((volatile uint32_t*)(lapic_addr + 0x300)) = 0x0C4608; + // send STARTUP IPI + for(i = 0; i < 256; i++) { + if(i == bootboot->bspid || lapic_ids[i] == 0xFFFF) continue; + *((volatile uint32_t*)(lapic_addr + 0x280)) = 0; // clear APIC errors + *((volatile uint32_t*)(lapic_addr + 0x310)) = + (*((volatile uint32_t*)(lapic_addr + 0x310)) & 0x00ffffff) | (i << 24); // select AP + // trigger IPI, start at 0800:0000h + *((volatile uint32_t*)(lapic_addr + 0x300)) = + (*((volatile uint32_t*)(lapic_addr + 0x300)) & 0xfff0f800) | 0x000608; + // wait 200 usec + sleep(1); + do { __asm__ __volatile__ ("pause" : : : "memory"); } + while(*((volatile uint32_t*)(lapic_addr + 0x300)) & (1 << 12)); // wait for delivery + // send second IPI + *((volatile uint32_t*)(lapic_addr + 0x280)) = 0; + *((volatile uint32_t*)(lapic_addr + 0x310)) = + (*((volatile uint32_t*)(lapic_addr + 0x310)) & 0x00ffffff) | (i << 24); + *((volatile uint32_t*)(lapic_addr + 0x300)) = + (*((volatile uint32_t*)(lapic_addr + 0x300)) & 0xfff0f800) | 0x000608; + do { __asm__ __volatile__ ("pause" : : : "memory"); } + while(*((volatile uint32_t*)(lapic_addr + 0x300)) & (1 << 12)); + } } #endif