Answer To: CS/CE 4337 Project Fall 2021 Project (Part 1) This project will explore the topics learned in class,...
Kamal answered on Oct 08 2021
#include
#include
#include
#include
#define int long long // to work with 64bit address
int debug; // print the executed instructions
int assembly; // print out the assembly and source
int token; // current token
// instructions
enum { LEA ,IMM ,JMP ,CALL,JZ ,JNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PUSH,
OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,
OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT };
// tokens and classes (operators last and in precedence order)
// copied from c4
enum {
Num = 128, Fun, Sys, Glo, Loc, Id,
Char, Else, Enum, If, Int, Return, Sizeof, While,
Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak
};
// fields of identifier
enum {Token, Hash, Name, Type, Class, Value, BType, BClass, BValue, IdSize};
// types of variable/function
enum { CHAR, INT, PTR };
// type of declaration.
enum {Global, Local};
int *text, // text segment
*stack;// stack
int * old_text; // for dump text segment
char *data; // data segment
int *idmain;
char *src, *old_src; // pointer to source code string;
int poolsize; // default size of text/data/stack
int *pc, *bp, *sp, ax, cycle; // virtual machine registers
int *current_id, // current parsed ID
*symbols, // symbol table
line, // line number of source code
token_val; // value of current token (mainly for number)
int basetype; // the type of a declaration, make it global for convenience
int expr_type; // the type of an expression
// function frame
//
// 0: arg 1
// 1: arg 2
// 2: arg 3
// 3: return address
// 4: old bp pointer<- index_of_bp
// 5: local var 1
// 6: local var 2
int index_of_bp; // index of bp pointer on stack
void next() {
char *last_pos;
int hash;
while (token = *src) {
++src;
if (token == '\n') {
if (assembly) {
// print compile info
printf("%d: %.*s", line, src-old_src, old_src);
old_src = src;
while (old_text < text) {
printf("%8.4s", & "LEA ,IMM ,JMP ,CALL,JZ ,JNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PUSH,"
"OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,"
"OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT"[*++old_text * 5]);
if (*old_text <= ADJ)
printf(" %d\n", *++old_text);
else
printf("\n");
}
}
++line;
}
else if (token == '#') {
// skip macro, because we will not support it
while (*src != 0 && *src != '\n') {
src++;
}
}
else if ((token >= 'a' && token <= 'z') || (token >= 'A' && token <= 'Z') || (token == '_')) {
// parse identifier
last_pos = src - 1;
hash = token;
while ((*src >= 'a' && *src <= 'z') || (*src >= 'A' && *src <= 'Z') || (*src >= '0' && *src <= '9') || (*src == '_')) {
hash = hash * 147 + *src;
src++;
}
// look for existing identifier, linear search
current_id = symbols;
while (current_id[Token]) {
if (current_id[Hash] == hash && !memcmp((char *)current_id[Name], last_pos, src - last_pos)) {
//found one, return
token = current_id[Token];
return;
}
current_id = current_id + IdSize;
}
// store new ID
current_id[Name] = (int)last_pos;
current_id[Hash] = hash;
token = current_id[Token] = Id;
return;
}
else if (token >= '0' && token <= '9') {
// parse number, three kinds: dec(123) hex(0x123) oct(017)
token_val = token - '0';
if (token_val > 0) {
// dec, starts with [1-9]
while (*src >= '0' && *src <= '9') {
token_val = token_val*10 + *src++ - '0';
}
} else {
// starts with number 0
if (*src == 'x' || *src == 'X') {
//hex
token = *++src;
while ((token >= '0' && token <= '9') || (token >= 'a' && token <= 'f') || (token >= 'A' && token <= 'F')) {
token_val = token_val * 16 + (token & 15) + (token >= 'A' ? 9 : 0);
token = *++src;
}
} else {
// oct
while (*src >= '0' && *src <= '7') {
token_val = token_val*8 + *src++ - '0';
}
}
}
token = Num;
return;
}
else if (token == '/') {
if (*src == '/') {
// skip comments
while (*src != 0 && *src != '\n') {
++src;
}
} else {
// divide operator
token = Div;
return;
}
}
else if (token == '"' || token == '\'') {
// parse string literal, currently, the only supported escape
// character is '\n', store the string literal into data.
last_pos = data;
while (*src != 0 && *src != token) {
token_val = *src++;
if (token_val == '\\') {
// escape character
token_val = *src++;
if (token_val == 'n') {
token_val = '\n';
}
}
if (token == '"') {
*data++ = token_val;
}
}
src++;
// if it is a single character, return Num token
if (token == '"') {
token_val = (int)last_pos;
} else {
token = Num;
}
return;
}
else if (token == '=') {
// parse '==' and '='
if (*src == '=') {
src ++;
token = Eq;
} else {
token = Assign;
}
return;
}
else if (token == '+') {
// parse '+' and '++'
if (*src == '+') {
src ++;
token = Inc;
} else {
token = Add;
}
return;
}
else if (token == '-') {
// parse '-' and '--'
if (*src == '-') {
src ++;
token = Dec;
} else {
token = Sub;
}
return;
}
else if (token == '!') {
// parse '!='
if (*src == '=') {
src++;
token = Ne;
}
return;
}
else if (token == '<') {
// parse '<=', '<<' or '<'
if (*src == '=') {
src ++;
token = Le;
} else if (*src == '<') {
src ++;
token = Shl;
} else {
token = Lt;
}
return;
}
else if (token == '>') {
// parse '>=', '>>' or '>'
if (*src == '=') {
src ++;
token = Ge;
} else if (*src == '>') {
src ++;
token = Shr;
} else {
token = Gt;
}
return;
}
else if (token == '|') {
// parse '|' or '||'
if (*src == '|') {
src ++;
token = Lor;
} else {
token = Or;
}
return;
}
else if (token == '&') {
// parse '&' and '&&'
if (*src == '&') {
src ++;
token = Lan;
} else {
token = And;
}
return;
}
else if (token == '^') {
token = Xor;
return;
}
else if (token == '%') {
token = Mod;
return;
}
else if (token == '*') {
token = Mul;
return;
}
else if (token == '[') {
token = Brak;
return;
}
else if (token == '?') {
token = Cond;
return;
}
else if (token == '~' || token == ';' || token == '{' || token == '}' || token == '(' || token == ')' || token == ']' || token == ',' || token == ':') {
// directly return the character as token;
return;
}
}
}
void match(int tk) {
if (token == tk) {
next();
} else {
printf("%d: expected token: %d\n", line, tk);
exit(-1);
}
}
void expression(int level) {
// expressions have various format.
// but majorly can be divided into two parts: unit and operator
// for example `(char) *a[10] = (int *) func(b > 0 ? 10 : 20);
// `a[10]` is an unit while `*` is an operator.
// `func(...)` in total is an unit.
// so we should first parse those unit and unary operators
// and then the binary ones
//
// also the expression can be in the following types:
//
// 1. unit_unary ::= unit | unit unary_op | unary_op unit
// 2. expr ::= unit_unary (bin_op unit_unary ...)
// unit_unary()
int *id;
int tmp;
int *addr;
{
if (!token) {
printf("%d: unexpected token EOF of expression\n", line);
exit(-1);
}
if (token == Num) {
match(Num);
// emit code
*++text = IMM;
*++text = token_val;
expr_type = INT;
}
else if (token == '"') {
// continous string "abc" "abc"
// emit code
*++text = IMM;
*++text = token_val;
match('"');
// store the rest strings
while (token == '"') {
match('"');
}
// append the end of string character '\0', all the data are default
// to 0, so just move data one position forward.
data = (char *)(((int)data + sizeof(int)) & (-sizeof(int)));
expr_type = PTR;
}
else if (token == Sizeof) {
// sizeof is actually an unary operator
// now only `sizeof(int)`, `sizeof(char)` and `sizeof(*...)` are
// supported.
match(Sizeof);
match('(');
expr_type = INT;
if (token == Int) {
match(Int);
} else if (token == Char) {
match(Char);
expr_type = CHAR;
}
while (token == Mul) {
match(Mul);
expr_type = expr_type + PTR;
}
match(')');
// emit code
*++text = IMM;
*++text = (expr_type == CHAR) ? sizeof(char) : sizeof(int);
expr_type = INT;
}
else if (token == Id) {
// there are several type when occurs to Id
// but this is unit, so it can only be
// 1. function call
// 2. Enum variable
// 3. global/local variable
match(Id);
id = current_id;
if (token == '(') {
// function call
match('(');
// pass in arguments
tmp = 0; // number of arguments
while (token != ')') {
expression(Assign);
*++text = PUSH;
tmp ++;
if (token == ',') {
match(',');
}
}
match(')');
...