x86 Linux Hello World 示例
这是 32 位 x86 Linux 的 NASM 程序集中的基本 Hello World 程序,直接使用系统调用(没有任何 libc 函数调用)。这需要很多,但随着时间的推移它会变得可以理解。以分号(;
)开头的行是注释。
如果你还不熟悉低级 Unix 系统编程,你可能只想在 asm 中编写函数并从 C 或 C++程序中调用它们。然后你可以担心学习如何处理寄存器和内存,而不用学习 POSIX 系统调用 API 和 ABI 来使用它。
这会产生两个系统调用: write(2)
和 _exit(2)
(不是刷新 stdio 缓冲区的 exit(3)
libc 包装器等等)。 (从技术上讲,_exit()
调用 sys_exit_group,而不是 sys_exit,但这只在多线程进程中很重要 。)另请参阅 syscalls(2)
以获取有关系统调用的文档,以及使用 libc 包装函数直接进行系统调用之间的区别。
总之,系统调用是通过将 args 放在适当的寄存器中,并将系统调用号放在 eax
中,然后运行 int 0x80
指令。另请参见 Assembly 中系统调用的返回值是什么? 有关如何使用 C 语法记录 asm 系统调用接口的更多说明。
32 位 ABI 的系统调用号码在/usr/include/i386-linux-gnu/asm/unistd_32.h
中(/usr/include/x86_64-linux-gnu/asm/unistd_32.h
中的内容相同)。
#include <sys/syscall.h>
最终将包含正确的文件,因此你可以运行 echo '#include <sys/syscall.h>' | gcc -E - -dM | less
来查看宏 defs( 有关在 C 头中查找 asm 的常量的更多信息, 请参阅此答案 )
section .text ; Executable code goes in the .text section
global _start ; The linker looks for this symbol to set the process entry point, so execution start here
;;;a name followed by a colon defines a symbol. The global _start directive modifies it so it's a global symbol, not just one that we can CALL or JMP to from inside the asm.
;;; note that _start isn't really a "function". You can't return from it, and the kernel passes argc, argv, and env differently than main() would expect.
_start:
;;; write(1, msg, len);
; Start by moving the arguments into registers, where the kernel will look for them
mov edx,len ; 3rd arg goes in edx: buffer length
mov ecx,msg ; 2nd arg goes in ecx: pointer to the buffer
;Set output to stdout (goes to your terminal, or wherever you redirect or pipe)
mov ebx,1 ; 1st arg goes in ebx: Unix file descriptor. 1 = stdout, which is normally connected to the terminal.
mov eax,4 ; system call number (from SYS_write / __NR_write from unistd_32.h).
int 0x80 ; generate an interrupt, activating the kernel's system-call handling code. 64-bit code uses a different instruction, different registers, and different call numbers.
;; eax = return value, all other registers unchanged.
;;;Second, exit the process. There's nothing to return to, so we can't use a ret instruction (like we could if this was main() or any function with a caller)
;;; If we don't exit, execution continues into whatever bytes are next in the memory page,
;;; typically leading to a segmentation fault because the padding 00 00 decodes to add [eax],al.
;;; _exit(0);
xor ebx,ebx ; first arg = exit status = 0. (will be truncated to 8 bits). Zeroing registers is a special case on x86, and mov ebx,0 would be less efficient.
;; leaving out the zeroing of ebx would mean we exit(1), i.e. with an error status, since ebx still holds 1 from earlier.
mov eax,1 ; put __NR_exit into eax
int 0x80 ;Execute the Linux function
section .rodata ; Section for read-only constants
;; msg is a label, and in this context doesn't need to be msg:. It could be on a separate line.
;; db = Data Bytes: assemble some literal bytes into the output file.
msg db 'Hello, world!',0xa ; ASCII string constant plus a newline (0x10)
;; No terminating zero byte is needed, because we're using write(), which takes a buffer + length instead of an implicit-length string.
;; To make this a C string that we could pass to puts or strlen, we'd need a terminating 0 byte. (e.g. "...", 0x10, 0)
len equ $ - msg ; Define an assemble-time constant (not stored by itself in the output file, but will appear as an immediate operand in insns that use it)
; Calculate len = string length. subtract the address of the start
; of the string from the current position ($)
;; equivalently, we could have put a str_end: label after the string and done len equ str_end - str
在 Linux 上,你可以将此文件保存为 Hello.asm
,并使用以下命令从中构建 32 位可执行文件:
nasm -felf32 Hello.asm # assemble as 32-bit code. Add -Worphan-labels -g -Fdwarf for debug symbols and warnings
gcc -nostdlib -m32 Hello.o -o Hello # link without CRT startup code or libc, making a static binary
有关使用 GNU as
指令构建 32 位或 64 位静态或动态链接 Linux 可执行文件,NASM / YASM 语法或 GNU AT&T 语法的更多详细信息,请参阅此答案 。 (关键点:确保在 64 位主机上构建 32 位代码时使用 -m32
或等效代码,否则在运行时会出现令人困惑的问题。)
你可以使用 strace
跟踪它的执行情况,以查看它所做的系统调用:
$ strace ./Hello
execve("./Hello", ["./Hello"], [/* 72 vars */]) = 0
[ Process PID=4019 runs in 32 bit mode. ]
write(1, "Hello, world!\n", 14Hello, world!
) = 14
_exit(0) = ?
+++ exited with 0 +++
stderr 上的跟踪和 stdout 上的常规输出都到达终端,因此它们干扰了 write
系统调用。如果你愿意,可以重定向或跟踪到文件。请注意,这样可以让我们轻松查看 syscall 返回值,而无需添加代码来打印它们,实际上比使用常规调试器(如 gdb)更容易。
该程序的 x86-64 版本非常相似,将相同的 args 传递给相同的系统调用,只是在不同的寄存器中。并使用 syscall
指令代替 int 0x80
。