/* RETRO ------------------------------------------------------------- A personal, minimalistic forth Copyright (c) 2016 - 2019 Charles Childers This is the `repl`, a basic interactive interface for RETRO. It is intended to be simple and very minimalistic, providing the minimal I/O and additions needed to support a basic RETRO system. For a much larger system, see `rre`. I'll include commentary throughout the source, so read on. ---------------------------------------------------------------------*/ int getchar(void); int putchar(int c); int r_strlen(char *str) { const char *s; for (s = str; *s; ++s); return(s - str); } int r_strcmp(const char *s1, const char *s2) { while (*s1 == *s2++) if (*s1++ == '\0') return (0); return (*(const unsigned char *)s1 - *(const unsigned char *)(s2 - 1)); } /*--------------------------------------------------------------------- First, a few constants relating to the image format and memory layout. If you modify the kernel (Rx.md), these will need to be altered to match your memory layout. ---------------------------------------------------------------------*/ #define TIB 1025 #define D_OFFSET_LINK 0 #define D_OFFSET_XT 1 #define D_OFFSET_CLASS 2 #define D_OFFSET_NAME 3 /*--------------------------------------------------------------------- Next we get into some things that relate to the Nga virtual machine that RETRO runs on. ---------------------------------------------------------------------*/ #define CELL long /* Cell size (32 bit, signed integer */ #define IMAGE_SIZE 524288 * 8 /* Amount of RAM. 4MiB by default. */ #define ADDRESSES 1024 /* Depth of address stack */ #define STACK_DEPTH 128 /* Depth of data stack */ CELL sp, rp, ip; /* Data, address, instruction pointers */ CELL data[STACK_DEPTH]; /* The data stack */ CELL address[ADDRESSES]; /* The address stack */ CELL memory[IMAGE_SIZE + 1]; /* The memory for the image */ #define TOS data[sp] /* Shortcut for top item on stack */ #define NOS data[sp-1] /* Shortcut for second item on stack */ #define TORS address[rp] /* Shortcut for top item on address stack */ #define NUM_DEVICES 1 typedef void (*Handler)(void); Handler IO_deviceHandlers[NUM_DEVICES + 1]; Handler IO_queryHandlers[NUM_DEVICES + 1]; #include "image.c" /*--------------------------------------------------------------------- Moving forward, a few variables. These are updated to point to the latest values in the image. ---------------------------------------------------------------------*/ CELL Dictionary; CELL NotFound; CELL interpret; /*--------------------------------------------------------------------- Function prototypes. ---------------------------------------------------------------------*/ CELL stack_pop(); void stack_push(CELL value); int string_inject(char *str, int buffer); char *string_extract(int at); int d_link(CELL dt); int d_xt(CELL dt); int d_class(CELL dt); int d_name(CELL dt); int d_lookup(CELL Dictionary, char *name); CELL d_xt_for(char *Name, CELL Dictionary); void update_rx(); void execute(int cell); void evaluate(char *s); int not_eol(int ch); void read_token(char *token_buffer, int echo); CELL ngaLoadImage(char *imageFile); void ngaPrepare(); void ngaProcessOpcode(CELL opcode); void ngaProcessPackedOpcodes(CELL opcode); int ngaValidatePackedOpcodes(CELL opcode); /*--------------------------------------------------------------------- Here's an output helper. I define a wrapper over `write` to avoid using `printf()`. ---------------------------------------------------------------------*/ void retro_puts(char *s) { while (*s) putchar(*s++); } /*--------------------------------------------------------------------- Now to the fun stuff: interfacing with the virtual machine. There are a things I like to have here: - push a value to the stack - pop a value off the stack - extract a string from the image - inject a string into the image. - lookup dictionary headers and access dictionary fields ---------------------------------------------------------------------*/ /*--------------------------------------------------------------------- Stack push/pop is easy. I could avoid these, but it aids in keeping the code readable, so it's worth the slight overhead. ---------------------------------------------------------------------*/ CELL stack_pop() { sp--; return data[sp + 1]; } void stack_push(CELL value) { sp++; data[sp] = value; } /*--------------------------------------------------------------------- Strings are next. RETRO uses C-style NULL terminated strings. So I can easily inject or extract a string. Injection iterates over the string, copying it into the image. This also takes care to ensure that the NULL terminator is added. ---------------------------------------------------------------------*/ int string_inject(char *str, int buffer) { int i = 0; while (*str) { memory[buffer + i] = (CELL)*str++; memory[buffer + i + 1] = 0; i++; } return buffer; } /*--------------------------------------------------------------------- Extracting a string is similar, but I have to iterate over the VM memory instead of a C string and copy the charaters into a buffer. This uses a static buffer (`string_data`) as I prefer to avoid using `malloc()`. ---------------------------------------------------------------------*/ char string_data[1025]; char *string_extract(int at) { CELL starting = at; CELL i = 0; while(memory[starting] && i < 1024) string_data[i++] = (char)memory[starting++]; string_data[i] = 0; return (char *)string_data; } /*--------------------------------------------------------------------- Continuing along, I now define functions to access the dictionary. RETRO's dictionary is a linked list. Each entry is setup like: 0000 Link to previous entry (NULL if this is the root entry) 0001 Pointer to definition start 0002 Pointer to class handler 0003 Start of a NULL terminated string with the word name First, functions to access each field. The offsets were defineed at the start of the file. ---------------------------------------------------------------------*/ int d_link(CELL dt) { return dt + D_OFFSET_LINK; } int d_xt(CELL dt) { return dt + D_OFFSET_XT; } int d_class(CELL dt) { return dt + D_OFFSET_CLASS; } int d_name(CELL dt) { return dt + D_OFFSET_NAME; } /*--------------------------------------------------------------------- Next, a more complext word. This will walk through the entries to find one with a name that matches the specified name. This is *slow*, but works ok unless you have a really large dictionary. (I've not run into issues with this in practice). ---------------------------------------------------------------------*/ int d_lookup(CELL Dictionary, char *name) { CELL dt = 0; CELL i = Dictionary; char *dname; while (memory[i] != 0 && i != 0) { dname = string_extract(d_name(i)); if (r_strcmp(dname, name) == 0) { dt = i; i = 0; } else { i = memory[i]; } } return dt; } /*--------------------------------------------------------------------- My last dictionary related word returns the `xt` pointer for a word. This is used to help keep various important bits up to date. ---------------------------------------------------------------------*/ CELL d_xt_for(char *Name, CELL Dictionary) { return memory[d_xt(d_lookup(Dictionary, Name))]; } /*--------------------------------------------------------------------- This interface tracks a few words and variables in the image. These are: Dictionary - the latest dictionary header NotFound - called when a word is not found interpret - the heart of the interpreter/compiler I have to call this periodically, as the Dictionary will change as new words are defined, and the user might write a new error handler or interpreter. ---------------------------------------------------------------------*/ void update_rx() { Dictionary = memory[2]; NotFound = d_xt_for("err:notfound", Dictionary); interpret = d_xt_for("interpret", Dictionary); } /*--------------------------------------------------------------------- This is an implementation of the generic output device. It's set to write output to the standard display. ---------------------------------------------------------------------*/ void generic_output() { putchar(stack_pop()); } void generic_output_query() { stack_push(0); stack_push(0); } /*--------------------------------------------------------------------- With these out of the way, I implement `execute`, which takes an address and runs the code at it. This has a couple of interesting bits. Nga uses packed instruction bundles, with up to four instructions per bundle. Since RETRO requires an additional instruction to handle displaying a character, I define the handler for that here. This will also exit if the address stack depth is zero (meaning that the word being run, and it's dependencies) are finished. ---------------------------------------------------------------------*/ void execute(int cell) { CELL opcode; rp = 1; ip = cell; while (ip < IMAGE_SIZE) { if (ip == NotFound) { retro_puts(string_extract(TIB)); retro_puts(" ?\n"); } opcode = memory[ip]; if (ngaValidatePackedOpcodes(opcode) != 0) { ngaProcessPackedOpcodes(opcode); } else { retro_puts("Invalid instruction!\n"); retro_puts("System halted.\n"); while(1); } ip++; if (rp == 0) ip = IMAGE_SIZE; } } /*--------------------------------------------------------------------- RETRO's `interpret` word expects a token on the stack. This next function copies a token to the `TIB` (text input buffer) and then calls `interpret` to process it. ---------------------------------------------------------------------*/ void evaluate(char *s) { if (r_strlen(s) == 0) return; update_rx(); string_inject(s, TIB); stack_push(TIB); execute(interpret); } /*--------------------------------------------------------------------- `read_token` reads a token from the specified file. It will stop on a whitespace or newline. It also tries to handle backspaces, though the success of this depends on how your terminal is configured. ---------------------------------------------------------------------*/ int not_eol(int ch) { return (ch != (char)10) && (ch != (char)13) && (ch != (char)32) && (ch != 0); } void read_token(char *token_buffer, int echo) { int ch = getchar(); if (echo != 0) putchar(ch); int count = 0; while (not_eol(ch)) { if ((ch == 8 || ch == 127) && count > 0) { count--; if (echo != 0) { putchar(8); putchar(32); putchar(8); } } else { token_buffer[count++] = ch; } ch = getchar(); if (echo != 0) putchar(ch); } token_buffer[count] = '\0'; } /*--------------------------------------------------------------------- The `main()` routine. This sets up the Nga VM, loads the image, and enters a loop. The loop: - reads input - otherwise, pass to `evaluate()` to run ---------------------------------------------------------------------*/ int main(int argc, char **argv) { char input[1024]; IO_deviceHandlers[0] = generic_output; IO_queryHandlers[0] = generic_output_query; ngaPrepare(); for (CELL i = 0; i < ngaImageCells; i++) memory[i] = ngaImage[i]; update_rx(); retro_puts("RETRO Listener (c) 2016-2019, Charles Childers\n\n"); while(1) { Dictionary = memory[2]; read_token(input, 0); evaluate(input); } return 0; } /* Nga ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Copyright (c) 2008 - 2019, Charles Childers Copyright (c) 2009 - 2010, Luke Parrish Copyright (c) 2010, Marc Simpson Copyright (c) 2010, Jay Skeer Copyright (c) 2011, Kenneth Keating ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ enum vm_opcode { VM_NOP, VM_LIT, VM_DUP, VM_DROP, VM_SWAP, VM_PUSH, VM_POP, VM_JUMP, VM_CALL, VM_CCALL, VM_RETURN, VM_EQ, VM_NEQ, VM_LT, VM_GT, VM_FETCH, VM_STORE, VM_ADD, VM_SUB, VM_MUL, VM_DIVMOD, VM_AND, VM_OR, VM_XOR, VM_SHIFT, VM_ZRET, VM_HALT, VM_IE, VM_IQ, VM_II }; #define NUM_OPS VM_II + 1 #ifndef NUM_DEVICES #define NUM_DEVICES 0 #endif CELL ngaLoadImage(char *imageFile) { CELL i; for (i = 0; i < ngaImageCells; i++) memory[i] = ngaImage[i]; return i; } void ngaPrepare() { ip = sp = rp = 0; for (ip = 0; ip < IMAGE_SIZE; ip++) memory[ip] = VM_NOP; for (ip = 0; ip < STACK_DEPTH; ip++) data[ip] = 0; for (ip = 0; ip < ADDRESSES; ip++) address[ip] = 0; } void inst_nop() { } void inst_lit() { sp++; ip++; TOS = memory[ip]; } void inst_dup() { sp++; data[sp] = NOS; } void inst_drop() { data[sp] = 0; if (--sp < 0) ip = IMAGE_SIZE; } void inst_swap() { CELL a; a = TOS; TOS = NOS; NOS = a; } void inst_push() { rp++; TORS = TOS; inst_drop(); } void inst_pop() { sp++; TOS = TORS; rp--; } void inst_jump() { ip = TOS - 1; inst_drop(); } void inst_call() { rp++; TORS = ip; ip = TOS - 1; inst_drop(); } void inst_ccall() { CELL a, b; a = TOS; inst_drop(); /* False */ b = TOS; inst_drop(); /* Flag */ if (b != 0) { rp++; TORS = ip; ip = a - 1; } } void inst_return() { ip = TORS; rp--; } void inst_eq() { NOS = (NOS == TOS) ? -1 : 0; inst_drop(); } void inst_neq() { NOS = (NOS != TOS) ? -1 : 0; inst_drop(); } void inst_lt() { NOS = (NOS < TOS) ? -1 : 0; inst_drop(); } void inst_gt() { NOS = (NOS > TOS) ? -1 : 0; inst_drop(); } void inst_fetch() { switch (TOS) { case -1: TOS = sp - 1; break; case -2: TOS = rp; break; case -3: TOS = IMAGE_SIZE; break; default: TOS = memory[TOS]; break; } } void inst_store() { if (TOS <= IMAGE_SIZE && TOS >= 0) { memory[TOS] = NOS; inst_drop(); inst_drop(); } else { ip = IMAGE_SIZE; } } void inst_add() { NOS += TOS; inst_drop(); } void inst_sub() { NOS -= TOS; inst_drop(); } void inst_mul() { NOS *= TOS; inst_drop(); } void inst_divmod() { CELL a, b; a = TOS; b = NOS; TOS = b / a; NOS = b % a; } void inst_and() { NOS = TOS & NOS; inst_drop(); } void inst_or() { NOS = TOS | NOS; inst_drop(); } void inst_xor() { NOS = TOS ^ NOS; inst_drop(); } void inst_shift() { CELL y = TOS; CELL x = NOS; if (TOS < 0) NOS = NOS << (TOS * -1); else { if (x < 0 && y > 0) NOS = x >> y | ~(~0U >> y); else NOS = x >> y; } inst_drop(); } void inst_zret() { if (TOS == 0) { inst_drop(); ip = TORS; rp--; } } void inst_halt() { ip = IMAGE_SIZE; } void inst_ie() { sp++; TOS = NUM_DEVICES; } void inst_iq() { CELL Device = TOS; inst_drop(); IO_queryHandlers[Device](); } void inst_ii() { CELL Device = TOS; inst_drop(); IO_deviceHandlers[Device](); } Handler instructions[NUM_OPS] = { inst_nop, inst_lit, inst_dup, inst_drop, inst_swap, inst_push, inst_pop, inst_jump, inst_call, inst_ccall, inst_return, inst_eq, inst_neq, inst_lt, inst_gt, inst_fetch, inst_store, inst_add, inst_sub, inst_mul, inst_divmod, inst_and, inst_or, inst_xor, inst_shift, inst_zret, inst_halt, inst_ie, inst_iq, inst_ii }; void ngaProcessOpcode(CELL opcode) { if (opcode != 0) instructions[opcode](); } int ngaValidatePackedOpcodes(CELL opcode) { CELL raw = opcode; CELL current; int valid = -1; int i; for (i = 0; i < 4; i++) { current = raw & 0xFF; if (!(current >= 0 && current <= 29)) valid = 0; raw = raw >> 8; } return valid; } void ngaProcessPackedOpcodes(CELL opcode) { CELL raw = opcode; int i; for (i = 0; i < 4; i++) { ngaProcessOpcode(raw & 0xFF); raw = raw >> 8; } }