Progress on x86 assembler. Encode a few simple instructions.

This commit is contained in:
Maxime Chevalier-Boisvert 2020-09-08 16:45:35 -04:00 committed by Alan Wu
parent 5cf7ccd24a
commit 8f40a62647
5 changed files with 218 additions and 28 deletions

View File

@ -151,7 +151,6 @@ COMMONOBJS = array.$(OBJEXT) \
vm_sync.$(OBJEXT) \
vm_trace.$(OBJEXT) \
ujit_asm.$(OBJEXT) \
ujit_asm_tests.$(OBJEXT) \
$(COROUTINE_OBJ) \
$(DTRACE_OBJ) \
$(BUILTIN_ENCOBJS) \

View File

@ -1,6 +1,10 @@
# NOTE: I did not know what would be the sensible way to compile
# and run these tests from the Ruby makefile
clang -std=c99 -Wall ujit_asm.c ujit_asm_tests.c -o asm_test
clear
clang -std=gnu99 -Wall ujit_asm.c ujit_asm_tests.c -o asm_test
./asm_test
rm asm_test

View File

@ -11,6 +11,24 @@
// TODO: give ujit_examples.h some more meaningful file name
#include "ujit_examples.h"
// 64-bit GP registers
const x86opnd_t RAX = { OPND_REG, 64, .reg = { REG_GP, 0 }};
const x86opnd_t RCX = { OPND_REG, 64, .reg = { REG_GP, 1 }};
const x86opnd_t RDX = { OPND_REG, 64, .reg = { REG_GP, 2 }};
const x86opnd_t RBX = { OPND_REG, 64, .reg = { REG_GP, 3 }};
const x86opnd_t RSP = { OPND_REG, 64, .reg = { REG_GP, 4 }};
const x86opnd_t RBP = { OPND_REG, 64, .reg = { REG_GP, 5 }};
const x86opnd_t RSI = { OPND_REG, 64, .reg = { REG_GP, 6 }};
const x86opnd_t RDI = { OPND_REG, 64, .reg = { REG_GP, 7 }};
const x86opnd_t R8 = { OPND_REG, 64, .reg = { REG_GP, 8 }};
const x86opnd_t R9 = { OPND_REG, 64, .reg = { REG_GP, 9 }};
const x86opnd_t R10 = { OPND_REG, 64, .reg = { REG_GP, 10 }};
const x86opnd_t R11 = { OPND_REG, 64, .reg = { REG_GP, 11 }};
const x86opnd_t R12 = { OPND_REG, 64, .reg = { REG_GP, 12 }};
const x86opnd_t R13 = { OPND_REG, 64, .reg = { REG_GP, 13 }};
const x86opnd_t R14 = { OPND_REG, 64, .reg = { REG_GP, 14 }};
const x86opnd_t R15 = { OPND_REG, 64, .reg = { REG_GP, 15 }};
void cb_init(codeblock_t* cb, size_t mem_size)
{
// Map the memory as executable
@ -36,6 +54,15 @@ void cb_init(codeblock_t* cb, size_t mem_size)
cb->num_refs = 0;
}
/**
Set the current write position
*/
void cb_set_pos(codeblock_t* cb, size_t pos)
{
assert (pos < cb->mem_size);
cb->write_pos = pos;
}
// Get a direct pointer into the executable memory block
uint8_t* cb_get_ptr(codeblock_t* cb, size_t index)
{
@ -128,8 +155,27 @@ void cb_write_epilogue(codeblock_t* cb)
cb_write_byte(cb, ujit_post_call_bytes[i]);
}
// Check if an operand needs a rex byte to be encoded
bool rex_needed(x86opnd_t opnd)
{
if (opnd.type == OPND_REG)
{
return (
opnd.reg.reg_no > 7 ||
(opnd.num_bits == 8 && opnd.reg.reg_no >= 4 && opnd.reg.reg_no <= 7)
);
}
if (opnd.type == OPND_MEM)
{
return (opnd.mem.base_reg_no > 7) || (opnd.mem.has_idx && opnd.mem.idx_reg_no > 7);
}
assert (false);
}
// Write the REX byte
void writeREX(
static void cb_write_rex(
codeblock_t* cb,
bool w_flag,
uint8_t reg_no,
@ -153,13 +199,12 @@ void writeREX(
}
// Write an opcode byte with an embedded register operand
/*static void cb_write_opcode(codeblock_t* cb, uint8_t opcode, X86Reg rOpnd)
static void cb_write_opcode(codeblock_t* cb, uint8_t opcode, x86opnd_t reg)
{
// Write the reg field into the opcode byte
uint8_t op_byte = opcode | (rOpnd.regNo & 7);
uint8_t op_byte = opcode | (reg.reg.reg_no & 7);
cb_write_byte(cb, op_byte);
}
*/
// nop - Noop, one or multiple bytes long
void nop(codeblock_t* cb, size_t length)
@ -228,28 +273,35 @@ void nop(codeblock_t* cb, size_t length)
}
}
/*
/// push - Push a register on the stack
void push(codeblock_t* cb, X86Reg reg)
void push(codeblock_t* cb, x86opnd_t reg)
{
assert (reg.size is 64, "can only push 64-bit registers");
assert (reg.num_bits == 64);
//cb.writeASM("push", reg);
if (reg.rexNeeded)
cb_write_rex(cb, false, 0, 0, reg.regNo);
cb_write_byte(cb, 0x50, reg);
if (rex_needed(reg))
cb_write_rex(cb, false, 0, 0, reg.reg.reg_no);
cb_write_opcode(cb, 0x50, reg);
}
/// pop - Pop a register off the stack
void pop(codeblock_t* cb, X86Reg reg)
void pop(codeblock_t* cb, x86opnd_t reg)
{
assert (reg.size is 64);
assert (reg.num_bits == 64);
//cb.writeASM("pop", reg);
if (reg.rexNeeded)
cb_write_rex(false, 0, 0, reg.regNo);
cb_write_byte(cb, 0x58, reg);
if (rex_needed(reg))
cb_write_rex(cb, false, 0, 0, reg.reg.reg_no);
cb_write_opcode(cb, 0x58, reg);
}
/// ret - Return from call, popping only the return address
void ret(codeblock_t* cb)
{
//cb.writeASM("ret");
cb_write_byte(cb, 0xC3);
}
*/

View File

@ -11,9 +11,10 @@
// Maximum number of label references
#define MAX_LABEL_REFS 32
// Reference to an ASM label
typedef struct LabelRef
{
// Position where the label reference is in the code block
// Position in the code block where the label reference exists
size_t pos;
// Label which this refers to
@ -21,6 +22,7 @@ typedef struct LabelRef
} labelref_t;
// Block of executable memory into which instructions can be written
typedef struct CodeBlock
{
// Memory block
@ -51,15 +53,101 @@ typedef struct CodeBlock
} codeblock_t;
enum OpndType
{
OPND_NONE,
OPND_REG,
OPND_IMM,
OPND_MEM,
OPND_IPREL
};
enum RegType
{
REG_GP,
REG_FP,
REG_XMM,
REG_IP
};
typedef struct X86Reg
{
// Register type
uint8_t reg_type;
// Register index number
uint8_t reg_no;
} x86reg_t;
typedef struct X86Mem
{
/// Base register number
uint8_t base_reg_no;
/// Index register number
uint8_t idx_reg_no;
/// SIB scale exponent value (power of two, two bits)
uint8_t scale_exp;
/// Has index register flag
bool has_idx;
// FIXME: do we need this, or can base reg just be RIP?
/// IP-relative addressing flag
bool is_iprel;
/// Constant displacement from the base, not scaled
int32_t disp;
} x86mem_t;
typedef struct X86Opnd
{
// Operand type
uint8_t type;
// Size in bits
uint16_t num_bits;
union
{
// Register operand
x86reg_t reg;
// Memory operand
x86mem_t mem;
// Signed immediate value
int64_t imm;
// Unsigned immediate value
uint64_t unsgImm;
};
} x86opnd_t;
// 64-bit GP registers
const x86opnd_t RAX;
const x86opnd_t RCX;
const x86opnd_t RDX;
const x86opnd_t RBX;
const x86opnd_t RBP;
const x86opnd_t RSP;
const x86opnd_t RSI;
const x86opnd_t RDI;
const x86opnd_t R8;
const x86opnd_t R9;
const x86opnd_t R10;
const x86opnd_t R11;
const x86opnd_t R12;
const x86opnd_t R13;
const x86opnd_t R14;
const x86opnd_t R15;
void cb_init(codeblock_t* cb, size_t mem_size);
void cb_set_pos(codeblock_t* cb, size_t pos);
uint8_t* cb_get_ptr(codeblock_t* cb, size_t index);
void cb_write_byte(codeblock_t* cb, uint8_t byte);
void cb_write_bytes(codeblock_t* cb, size_t num_bytes, ...);
@ -69,7 +157,13 @@ void cb_write_int(codeblock_t* cb, uint64_t val, size_t num_bits);
void cb_write_prologue(codeblock_t* cb);
void cb_write_epilogue(codeblock_t* cb);
// Encode individual instructions into a code block
void nop(codeblock_t* cb, size_t length);
void push(codeblock_t* cb, x86opnd_t reg);
void pop(codeblock_t* cb, x86opnd_t reg);
void ret(codeblock_t* cb);

View File

@ -1,27 +1,68 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "ujit_asm.h"
//fprintf(stderr, format);
//exit(-1)
// Check that the code block contains the given sequence of bytes
void check_bytes(codeblock_t* cb, const char* bytes)
{
printf("checking encoding: %s\n", bytes);
// TODO: make a macro to test encoding sequences
// ***You can use sizeof to know the length***
// CHECK_BYTES(cb, {})
size_t len = strlen(bytes);
assert (len % 2 == 0);
size_t num_bytes = len / 2;
if (cb->write_pos != num_bytes)
{
fprintf(stderr, "incorrect encoding length %ld\n", cb->write_pos);
exit(-1);
}
for (size_t i = 0; i < num_bytes; ++i)
{
char byte_str[] = {0, 0, 0, 0};
strncpy(byte_str, bytes + (2 * i), 2);
//printf("%ld: %s\n", i, byte_str);
char* endptr;
long int byte = strtol(byte_str, &endptr, 16);
uint8_t cb_byte = cb->mem_block[i];
if (cb_byte != byte)
{
fprintf(stderr, "incorrect encoding at position %ld\n", i);
exit(-1);
}
}
}
void run_tests()
{
printf("Running assembler tests\n");
codeblock_t cb;
cb_init(&cb, 4096);
codeblock_t cb_obj;
codeblock_t* cb = &cb_obj;
cb_init(cb, 4096);
cb_write_prologue(cb);
cb_write_epilogue(cb);
// pop
cb_set_pos(cb, 0); pop(cb, RAX); check_bytes(cb, "58");
cb_set_pos(cb, 0); pop(cb, RBX); check_bytes(cb, "5B");
cb_set_pos(cb, 0); pop(cb, RSP); check_bytes(cb, "5C");
cb_set_pos(cb, 0); pop(cb, RBP); check_bytes(cb, "5D");
cb_set_pos(cb, 0); pop(cb, R12); check_bytes(cb, "415C");
// push
cb_set_pos(cb, 0); push(cb, RAX); check_bytes(cb, "50");
cb_set_pos(cb, 0); push(cb, RBX); check_bytes(cb, "53");
cb_set_pos(cb, 0); push(cb, R12); check_bytes(cb, "4154");
// ret
cb_set_pos(cb, 0); ret(cb); check_bytes(cb, "C3");
cb_write_prologue(&cb);
cb_write_epilogue(&cb);