Reading x86 assembly language P.J. Drongowski 2 October 2004 (Revised 11 October 2004) * What is an assembler? + An assembler is like a compiler + It translates a simple language (assembly language) to machine code + The statements in the simple language are a symbolic description of a single machine instruction + Assembly languages are similar from platform to platform, but they are very instruction set and architecture specific * Compilers sometimes translate HLL programs to assembly language first + The assembler then handles the translation to machine code (binary) + The compiler can be instructed to leave the assembly language code + With g++, use the -S option when compiling + Example: g++ -S stack.cpp * Points to make when explaining generated assembler code: + Sometimes we manipulate the address of a data item (direct addressing) + Other times we use the address to manipulate memory (indirect addressing) + Show correspondence between C/C++ and assembler code: ptr = address_of_array ; // Initialize pointer ptr++ ; // Increment pointer to next address *ptr = 0 ; // Store zero through pointer var = *ptr ; // Load value through pointer Integer datatypes C++ x86 architecture --------- ------------------ char byte (8 bits) short word (16 bits) int double word (32 bits) long double word (32 bits) long long quad word (64 bits) Registers / state 8-bit byte 16-bit word 32-bit double word ---------- ----------- ------------------ ah al ax eax GR / return value dh dl dx adx General register (GR) ch cl cx ecx GR bh dl bx ebx GR bp ebp Stack frame pointer si esi String index source di edi String index destination sp esp Stack pointer Flags (condition codes) SF Sign flag Result is negative (less than zero) ZF Zero flag Result is zero CF Carry flag Carry out from MSB of result And many more! Addressing modes Mode Symbolic Purpose ---------------- -------- -------------------------------- Immediate $0 Constant Register %eax Refers a register Direct loc Refers to location memory[loc] Register indexed 8(%ebp) Refers to location memory[ebp+8] Indirect (%edx) Refers to location memory[edx] * Immediate mode + Materializes (produces) a constant value + Constant is usually stored as part of instruction itself * Register mode + Refers to contents of a general register + Registers are used as a scratchpad for computations * Direct mode + Refers to a memory location + Memory location is chosen using an absolute address by name * Register indexed mode + Refers to a memory location + Register is added to a constant offset to form the effective address + Used to access arguments and local variables on stack + Can be used to assess items in a static array * Indirect mode + Refers to a memory location + Register holds effective address of memory location + Used to "dereference" a pointer Selected instructions mov src,dst Move data from src to dst push src Push value in src onto stack pop dst Pop value on top of stack into dst inc dst Increment dst dec dst Decrement dst add src,dst Add src to dst; result to dst sub src,dst Subtract dst from src; result to dst test left,right Compute bit-wise AND and set flags cmp left,right Subtract right from left and set flags jmp target Unconditional jump to target jne target Jump if not equal (ZF=0) je target Jump is equal (ZF=1) call target Call procedure (subroutine) ret Return from procedure (subroutine) Stack-related instructions and registers * Stack-related registers + Stack pointer (ESP) register: Points to current top of stack + Stack-frame base pointer (EBP) register: Points to function (subroutine) arguments on the stack + Stack Segment (SS) register: Points to the current stack * Stack-related instructions - Push instruction + Decrements the stack pointer + Stack grows down in memory toward lower addresses + Overflow may cause a protection machine exception + Can push either 16-bit or 32-bit values - Pop instruction + Increments the stack pointer + Underflow may cause a protection machine exception + Can pop either 16-bit or 32-bit values - Call instruction + Pushes return address address on the stack + Transfers control (jumps) to subroutine - Return instruction + Pops the return address from the stack + Transfers control (jumps) to the return address + An optional parameter releases stack bytes *********************************************** Example: Definition of static global variables *********************************************** int var_x, var_y, var_z ; int array_a[10], array_b[10] ; .globl var_x .bss .align 4 .type var_x,@object .size var_x,4 var_x: .zero 4 .globl var_y .align 4 .type var_y,@object .size var_y,4 var_y: .zero 4 .globl var_z .align 4 .type var_z,@object .size var_z,4 var_z: .zero 4 .globl array_a .align 32 .type array_a,@object .size array_a,40 array_a: .zero 40 .globl array_b .align 32 .type array_b,@object .size array_b,40 array_b: .zero 40 *********************************** Function: add_vars() *********************************** void add_vars() { var_z = var_x + var_y ; } add_vars__Fv: pushl %ebp // Prelude movl %esp,%ebp movl var_x,%eax // Read var_x movl var_y,%edx // Read var_y leal (%edx,%eax),%ecx // Add them movl %ecx,var_z // Write sum to var_z movl %ebp,%esp // Postlude popl %ebp ret *********************************** Function: void call_subtract() *********************************** void move_x_to_z() { var_z = var_x ; } move_x_to_z__Fv: pushl %ebp // Prelude movl %esp,%ebp movl var_x,%eax // Read var_x movl %eax,var_z // Write it to var_z movl %ebp,%esp // Postlude popl %ebp ret // Return from subroutine ********************************************************** Function: void move_via_ptrs(int *ptr_x, int *ptr_z) ********************************************************** void move_via_ptrs(int *ptr_x, int *ptr_z) { *ptr_z = *ptr_x ; } move_via_ptrs__FPiT0: pushl %ebp // Prelude movl %esp,%ebp movl 12(%ebp),%eax // Get ptr_z movl 8(%ebp),%edx // Get ptr_x movl (%edx),%ecx // Read memory[ptr_x] movl %ecx,(%eax) // Write memory[ptr_z] movl %ebp,%esp // Postlude popl %ebp ret *********************************** Function: void call_subtract() *********************************** void call_subtract() { var_z = subtract(var_x, var_y) ; } call_subtract__Fv: pushl %ebp // Prelude movl %esp,%ebp subl $8,%esp // Allocate space on stack addl $-8,%esp // Allocate space on stack movl var_y,%eax // Push var_y pushl %eax movl var_x,%eax // Push var_x pushl %eax call subtract__Fii // Call subtract(var_x, var_y) addl $16,%esp // Deallocate space movl %eax,%eax // Wasted instruction! movl %eax,var_z // Move result to var_z movl %ebp,%esp // Postlude popl %ebp ret Stack contents before call: ---------------- esp -> | x | ---------------- | y | ---------------- ... *********************************** Function: int subtract(int x, int y) *********************************** int subtract(int x, int y) { return( x - y ) ; } Stack contents on entry: ---------------- esp -> | return address | ---------------- | x | ---------------- | y | ---------------- ... Stack contents after prelude: ---------------- esp -> | saved ebp | ebp+0 ---------------- | return address | ebp+4 ---------------- | x | ebp+8 ---------------- | y | ebp+12 ---------------- ... subtract__Fii: pushl %ebp // Prelude movl %esp,%ebp movl 8(%ebp),%eax // Move argument x to register eax movl 12(%ebp),%ecx // Move argument y to register ecx movl %eax,%edx // Move x to register edx subl %ecx,%edx // Subtract y from x movl %edx,%eax // Move difference to return register eax movl %ebp,%esp // Postlude popl %ebp ret *********************************** Function: int add(int x, int y) *********************************** int add(int x, int y) { return( x + y ) ; } add__Fii: pushl %ebp // Save stack frame pointer movl %esp,%ebp // Make current SP the stack frame pointer movl 8(%ebp),%eax // Move argument x to register eax movl 12(%ebp),%ecx // Move argument y to register ecx leal (%ecx,%eax),%edx // Add ecx and eax putting sum into edx movl %edx,%eax // Move sum to the result register (eax) movl %ebp,%esp // Restore the stack pointer (SP) popl %ebp // Restore stack frame pointer ret ******************************************************** Function: void clear_element_i(int i, int array[]) ******************************************************** void clear_element_i(int i, int array[]) { array[i] = 0 ; } clear_element_i__FiPi: pushl %ebp // Prelude movl %esp,%ebp movl 8(%ebp),%eax // Get argument i leal 0(,%eax,4),%edx // Make i a byte index // Effectively computes 4*i movl 12(%ebp),%eax // Get address of array movl $0,(%eax,%edx) // Move zero to memory[array+4*i] movl %ebp,%esp // Postlude popl %ebp ret *********************************************************** Function: clear_array_elements(int *array, int size) *********************************************************** void clear_array_elements(int *array, int size) { int i, *iptr ; iptr = array ; for(i = 0 ; i < size ; i++) { *iptr++ = 0 ; } } Stack contents: array 8(%ebp) function argument size 12(%ebp) function argument i -4(%ebp) local variable iptr -8(%ebp) local variable clear_array_elements__FPii: pushl %ebp // Prelude movl %esp,%ebp subl $24,%esp // Allocate space on stack movl 8(%ebp),%edx // Get address of array movl %edx,-8(%ebp) // Store it in iptr movl $0,-4(%ebp) // Set i to zero cond: movl -4(%ebp),%edx // Get i into register edx cmpl 12(%ebp),%edx // Compare size with i jl body // Jump if less than jmp done // Otherwise, jump to done body: movl -8(%ebp),%eax // Move iptr to register eax movl $0,(%eax) // Move 0 to memory[eax] addl $4,-8(%ebp) // Add 4 to iptr incl -4(%ebp) // Increment i jmp cond // Jump to top of loop done: movl %ebp,%esp // Postlude popl %ebp ret