Translating GAS to NASM, part 3
March 5, 2021
Chapter 5 introduces file manipulation, which means more interaction with the operating system. All system calls changed with the move to x86_64, so whenever they come up in the book I’ll have a lot of researching and translating to do. For those of us with terrible memories, we’ve also got symbol declarations for constants now, which are only slightly different in NASM. Uninitialized data declaration syntax brings us the second appearance of NASM’s bwdqtoyz
size-type suffixes, for which I made my own mnemonic: ‘Black and White Dairy Queen TOYZ’.
NASM differences to note:
- New system calls!
read
,write
,open
, andclose
are syscalls 0, 1, 2, and 3 respectively - place these numbers in%rax
.read
,write
, andclose
all place their file descriptors in%rdi
, whileopen
places the pointer to the filename there.read
andwrite
place their buffer addresses in%rsi
and their byte counts in%rdx
, whileopen
puts flags in%rsi
and permission modes in%rdx
. - On my system, the numbers for flags to the
open
syscall are not exactly the same as in the book. The3101
octal that the book uses for O_CREAT_WRONLY_TRUNC is1101
on mine. As the book’s code comments point out, Linux stores these values in usr/local/asm/fcntl.h. - From what I can gather, the number supplied to
open
for the permission mode (%rdx
) corresponds to the Unixdrwxrwxrwx
permission bit mask (which you can see when you use thels -l
command in the terminal). The value0666
used in the book corresponds to the binary value for-rw-rw-rw-
(although, at least on my system, the third write permission is kept restricted). - Octals in GAS are denoted using a leading
0
. In NASM, a number with just a leading 0 will still be interpreted as a decimal. There are four notations for octals in NASM:123o
,123q
,0o123
, and0q123
. I’ll be using the0o
prefix, in keeping with the C convention that uses0x
for hex numbers. - Defining symbols for constants is slightly different in NASM. Instead of
.equ VARIABLE_NAME, #
, usevariable_name equ #
. - NASM’s equivalent of
.lcomm
is theres
family of keywords -resb
,resw
,resd
,resq
,rest
,reso
,resy
,resz
. These follow thedb
,dw
, etc. naming scheme discussed in part 1 for declaring initialized data. The suffix determines the base size-type, and they all take an operand that determines how many of that size-type should be reserved. So,buffer: resb 4
creates a pointer to a 4-byte buffer, whilebuffer: resq 4
creates a pointer to a 32-byte buffer.
Exercise 5 - Convert a file’s contents to uppercase
section .data
;; Declare symbols
sys_read equ 0
sys_write equ 1
sys_open equ 2
sys_close equ 3
sys_exit equ 60
rdonly equ 0
creat_wronly_trunc equ 0o1101
stdin equ 0
stdout equ 1
stderr equ 2
eof equ 0
num_args equ 2
section .bss
;; Reserve read/write buffer
buffer_size equ 500
buffer_data: resb buffer_size
section .text
;; Symbols for stack locations and sizes relative to rbp
st_size_reserve equ 16
st_fd_in equ -8
st_fd_out equ -16
st_argc equ 0
st_argv_0 equ 8
st_argv_1 equ 16
st_argv_2 equ 24
global _start
_start:
mov rbp, rsp
sub rsp, st_size_reserve
open_files:
open_fd_in:
mov rax, sys_open
mov rdi, [rbp + st_argv_1]
mov rsi, rdonly
mov rdx, 0o666
syscall
store_fd_in:
mov [rbp + st_fd_in], rax
open_fd_out:
mov rax, sys_open
mov rdi, [rbp + st_argv_2]
mov rsi, creat_wronly_trunc
mov rdx, 0o666
syscall
store_fd_out:
mov [rbp + st_fd_out], rax
read_loop_begin:
mov rax, sys_read
mov rdi, [rbp + st_fd_in]
mov rsi, buffer_data
mov rdx, buffer_size
syscall
cmp rax, eof
jle end_loop
continue_read_loop:
push buffer_data
push rax
call convert_to_upper
pop rax
add rsp, 8
mov rdx, rax
mov rax, sys_write
mov rdi, [rbp + st_fd_out]
mov rsi, buffer_data
syscall
jmp read_loop_begin
end_loop:
mov rax, sys_close
mov rdi, [rbp + st_fd_out]
syscall
mov rax, sys_close
mov rdi, [rbp + st_fd_in]
syscall
mov rax, sys_exit
mov rdi, 0
syscall
;; Declare symbols for convert function
lowercase_a equ 'a'
lowercase_z equ 'z'
upper_conversion equ 'A' - 'a'
st_buffer_len equ 16
st_buffer equ 24
convert_to_upper:
push rbp
mov rbp, rsp
mov rax, [rbp + st_buffer]
mov rbx, [rbp + st_buffer_len]
mov rdi, 0
cmp rbx, 0
je end_convert_loop
convert_loop:
mov cl, [rax + rdi]
cmp cl, lowercase_a
jl next_byte
cmp cl, lowercase_z
jg next_byte
add cl, upper_conversion
mov [rax + rdi], cl
next_byte:
inc rdi
cmp rbx, rdi
jne convert_loop
end_convert_loop:
mov rsp, rbp
pop rbp
ret