喚醒所有處理器

此示例將喚醒每個應用程式處理器 (AP),並使它們與載入程式處理器 (BSP)一起顯示其 LAPIC ID。

; Assemble boot sector and insert it into a 1.44MiB floppy image
;
; nasm -f bin boot.asm -o boot.bin
; dd if=/dev/zero of=disk.img bs=512 count=2880
; dd if=boot.bin of=disk.img bs=512 conv=notrunc

BITS 16
; Bootloader starts at segment:offset 07c0h:0000h
section bootloader, vstart=0000h
jmp 7c0h:__START__

__START__:
 mov ax, cs
 mov ds, ax
 mov es, ax
 mov ss, ax
 xor sp, sp
 cld

 ;Clear screen
 mov ax, 03h
 int 10h

 ;Set limit of 4GiB and base 0 for FS and GS
 call 7c0h:unrealmode

 ;Enable the APIC
 call enable_lapic

 ;Move the payload to the expected address
 mov si, payload_start_abs
 mov cx, payload_end-payload + 1
 mov di, 400h                 ;7c0h:400h = 8000h
 rep movsb

 ;Wakeup the other APs

 ;INIT
 call lapic_send_init
 mov cx, WAIT_10_ms
 call us_wait

 ;SIPI
 call lapic_send_sipi
 mov cx, WAIT_200_us
 call us_wait

 ;SIPI
 call lapic_send_sipi

 ;Jump to the payload
 jmp 0000h:8000h

 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll

 ;CX = Wait (in ms) Max 65536 us (=0 on input)
 us_wait:
  mov dx, 80h               ;POST Diagnose port, 1us per IO
  xor si, si
  rep outsb

  ret

  WAIT_10_ms     EQU 10000
  WAIT_200_us    EQU 200

 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll

 enable_lapic:

  ;Enable the APIC globally
  ;On P6 CPU once this flag is set to 0, it cannot be set back to 16
  ;Without an HARD RESET
  mov ecx, IA32_APIC_BASE_MSR
  rdmsr
  or ah, 08h        ;bit11: APIC GLOBAL Enable/Disable
  wrmsr

  ;Mask off lower 12 bits to get the APIC base address
  and ah, 0f0h
  mov DWORD [APIC_BASE], eax

  ;Newer processors enables the APIC through the Spurious Interrupt Vector register
  mov ecx, DWORD [fs: eax + APIC_REG_SIV]
  or ch, 01h                                ;bit8: APIC SOFTWARE enable/disable
  mov DWORD [fs: eax+APIC_REG_SIV], ecx

  ret

 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll

 lapic_send_sipi:
  mov eax, DWORD [APIC_BASE]

  ;Destination field is set to 0 has we will use a shorthand
  xor ebx, ebx
  mov DWORD [fs: eax+APIC_REG_ICR_HIGH], ebx

  ;Vector: 08h (Will make the CPU execute instruction ad address 08000h)
  ;Delivery mode: Startup
  ;Destination mode: ignored (0)
  ;Level: ignored (1)
  ;Trigger mode: ignored (0)
  ;Shorthand: All excluding self (3)
  mov ebx, 0c4608h
  mov DWORD [fs: eax+APIC_REG_ICR_LOW], ebx  ;Writing the low DWORD sent the IPI

  ret

  ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll

 lapic_send_init:
  mov eax, DWORD [APIC_BASE]

  ;Destination field is set to 0 has we will use a shorthand
  xor ebx, ebx
  mov DWORD [fs: eax+APIC_REG_ICR_HIGH], ebx

  ;Vector: 00h
  ;Delivery mode: Startup
  ;Destination mode: ignored (0)
  ;Level: ignored (1)
  ;Trigger mode: ignored (0)
  ;Shorthand: All excluding self (3)
  mov ebx, 0c4500h
  mov DWORD [fs: eax+APIC_REG_ICR_LOW], ebx  ;Writing the low DWORD sent the IPI

  ret

 IA32_APIC_BASE_MSR    EQU    1bh

 APIC_REG_SIV        EQU    0f0h

 APIC_REG_ICR_LOW    EQU 300h
 APIC_REG_ICR_HIGH    EQU 310h

 APIC_REG_ID        EQU 20h

 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll

 APIC_BASE            dd     00h

 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll

unrealmode:
 lgdt [cs:GDT]

 cli

 mov eax, cr0
 or ax, 01h
 mov cr0, eax

 mov bx, 08h
 mov fs, bx
 mov gs, bx

 and ax, 0fffeh
 mov cr0, eax

 sti

 ;IMPORTAT: This call is FAR!
 ;So it can be called from everywhere
 retf

 GDT:
    dw 0fh
    dd GDT + 7c00h
    dw 00h

    dd 0000ffffh
    dd 00cf9200h

 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll

payload_start_abs:
; payload starts at segment:offset 0800h:0000h
section payload, vstart=0000h, align=1
 payload:

  ;IMPORTANT NOTE: Here we are in a "new" CPU every state we set before is no
  ;more present here (except for the BSP, but we handler every processor with
  ;the same code).
 jmp 800h: __RESTART__

 __RESTART__:
  mov ax, cs
  mov ds, ax
  xor sp, sp
  cld

  ;IMPORTANT: We can't use the stack yet. Every CPU is pointing to the same stack!

  ;Get an unique id
  mov ax, WORD [counter]
  .try:
    mov bx, ax
    inc bx
    lock cmpxchg WORD [counter], bx
   jnz .try

  mov cx, ax            ;Save this unique id

  ;Stack segment = CS + unique id * 1000
  shl ax, 12
  mov bx, cs
  add ax, bx
  mov ss, ax

  ;Text buffer
  push 0b800h
  pop es

  ;Set unreal mode again
  call 7c0h:unrealmode

  ;Use GS for old variables
  mov ax, 7c0h
  mov gs, ax

  ;Calculate text row
  mov ax, cx
  mov bx, 160d           ;80 * 2
  mul bx
  mov di, ax

  ;Get LAPIC id
  mov ebx, DWORD [gs:APIC_BASE]
  mov edx, DWORD [fs:ebx + APIC_REG_ID]
  shr edx, 24d
  call itoa8

  cli
  hlt

  ;DL = Number
  ;DI = ptr to text buffer
  itoa8:
    mov bx, dx
    shr bx, 0fh
    mov al, BYTE [bx +  digits]
    mov ah, 09h
    stosw

    mov bx, dx
    and bx, 0fh
    mov al, BYTE [bx +  digits]
    mov ah, 09h
    stosw

    ret

  digits db "0123456789abcdef"
  counter dw 0

 payload_end:

; Boot signature is at physical offset 01feh of
; the boot sector
section bootsig, start=01feh
 dw 0aa55h

要執行兩個主要步驟:

1.喚醒 AP
這是通過向所有 AP 發出 INIT-SIPI-SIPI (ISS)序列來實現的。

將使用目的地作為目的地傳送 ISS 序列的 BSP 全部排除自身,從而針對所有 AP。

SIPI(啟動處理器間中斷)被接收到的所有 CPU 忽略,因此如果第一個 SIPI 足以喚醒目標處理器,則忽略第二個 SIPI。出於相容性原因,英特爾建議使用它。

SIPI 包含一個向量,這在含義上類似,但在實踐中絕對不同於中斷向量(也稱為中斷號)。
向量是一個 8 位數,值為 V ( 在基數 16 中表示為 vv ),這使得 CPU 開始在物理地址 0vv000h 處執行指令。
我們將 0vv000h 稱為喚醒地址 (WA)。
WA 被強制為 4KiB(或頁面)邊界。

我們將使用 08h 作為 V ,WA 然後是 08000h ,引導載入程式後 400h 位元組。

這樣可以控制 AP。

2.初始化和區分 AP
必須在 WA 上有可執行程式碼。引導載入程式為 7c00h ,因此我們需要在頁面邊界重新定位一些程式碼。

編寫有效負載時要記住的第一件事是必須保護或區分對共享資源的任何訪問。
一個常見的共享資源是堆疊,如果我們天真地初始化堆疊,每個 AP 最終將使用相同的堆疊!

然後,第一步是使用不同的堆疊地址,從而區分堆疊。
我們通過為每個 CPU 分配一個唯一的零數字來實現這一點。這個數字,我們稱之為索引,用於區分堆疊和行,CPU 將寫入其 APIC ID。

每個 CPU 的堆疊地址為 800h :(索引 * 1000h) 為每個 AP 提供 64KiB 的堆疊。
每個 CPU 的行號是索引,因此指向文字緩衝區的指標是 80 * 2 * 索引

為了生成索引,lock cmpxchg 用於原子遞增並返回 WORD。

最後的筆記

  • 寫入埠 80h 用於產生 1μs 的延遲。
  • unrealmode 是一個很常見的例程,所以它也可以在喚醒後呼叫。
  • BSP 也跳到西澳大利亞州。

截圖

來自 Bochs 的 8 個處理器

StackOverflow 文件