Skip to content

Commit

Permalink
using same label id counter in AST traverser and the parser
Browse files Browse the repository at this point in the history
labels are used in assembly code to mark specific areas of memory
  • Loading branch information
rigel-star committed Feb 25, 2024
1 parent 138bcd8 commit e13e0e4
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 60 deletions.
9 changes: 5 additions & 4 deletions src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,19 +127,20 @@ impl ASTNode {
pub struct ASTTraverser {
reg_manager: Rc<RefCell<register::RegisterManager>>,
sym_table: Rc<RefCell<symtable::Symtable>>,
label_id_count: usize,
_label_id: &'static mut usize,
}

impl ASTTraverser {
#[allow(clippy::new_without_default)]
pub fn new(
reg_manager: Rc<RefCell<RegisterManager>>,
sym_table: Rc<RefCell<symtable::Symtable>>,
label_id: &'static mut usize
) -> Self {
Self {
reg_manager,
sym_table,
label_id_count: 0,
_label_id: label_id,
}
}

Expand Down Expand Up @@ -428,8 +429,8 @@ impl ASTTraverser {
}

fn get_next_label(&mut self) -> usize {
let label: usize = self.label_id_count;
self.label_id_count += 1;
let label: usize = *self._label_id;
(*self._label_id) += 1;
label
}

Expand Down
1 change: 1 addition & 0 deletions src/enums.rs
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ pub enum ASTNodeKind {
AST_DEREF, // for dereferencing operator
AST_WIDEN, // need to widen the tree
AST_ARRAY_ACCESS, // access array element
AST_STRLIT, // string literal node
}

impl ASTNodeKind {
Expand Down
6 changes: 4 additions & 2 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def main() -> char {
}
*/

static mut LABEL_ID: usize = 0;

fn main() {
let mut tokener: tokenizer::Tokenizer = tokenizer::Tokenizer::new(
"global integer nums[5]; global integer value; value = nums[0] + 12;",
Expand All @@ -51,8 +53,8 @@ fn main() {
Rc::new(RefCell::new(register::RegisterManager::new()));
let sym_table: Rc<RefCell<symtable::Symtable>> =
Rc::new(RefCell::new(symtable::Symtable::new()));
let mut p: parser::Parser = parser::Parser::new(&tokens, Rc::clone(&sym_table));
let mut p: parser::Parser = parser::Parser::new(tokens, Rc::clone(&sym_table), unsafe { &mut LABEL_ID });
let mut traverser: ast::ASTTraverser =
ast::ASTTraverser::new(Rc::clone(&reg_manager), Rc::clone(&sym_table));
ast::ASTTraverser::new(Rc::clone(&reg_manager), Rc::clone(&sym_table), unsafe { &mut LABEL_ID });
p.start(&mut traverser);
}
96 changes: 47 additions & 49 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,25 +61,28 @@ impl std::fmt::Display for ParseError {
type ParseResult = Result<ASTNode, ParseError>;

// Actual parser
pub struct Parser<'a> {
tokens: &'a Vec<Token>,
pub struct Parser {
tokens: Vec<Token>,
current: usize,
current_token: &'a Token,
current_token: Token,
sym_table: Rc<RefCell<Symtable>>, // symbol table for global identifiers
// ID of a function that is presently being parsed. This field's value is 0xFFFFFFFF
// if the parser is not inside a function.
current_function_id: usize,
_label_id: &'static mut usize, // label generator
}

impl<'a> Parser<'a> {
impl Parser {
#[inline]
pub fn new(tokens: &'a Vec<Token>, sym_table: Rc<RefCell<Symtable>>) -> Self {
pub fn new(tokens: Vec<Token>, sym_table: Rc<RefCell<Symtable>>, label_id: &'static mut usize) -> Self {
let current_token: Token = tokens[0].clone();
Self {
tokens,
current: 0,
current_token: &tokens[0],
current_token,
sym_table,
current_function_id: 0xFFFFFFFF,
_label_id: label_id
}
}

Expand Down Expand Up @@ -205,7 +208,7 @@ impl<'a> Parser<'a> {
let func_return_type: LitTypeVariant = LitTypeVariant::from_token_kind(curr_tok_kind);
if func_return_type == LitTypeVariant::None {
error::report_unexpected_token(
self.current_token,
&self.current_token,
Some("Not a valid return type for a function."),
);
}
Expand Down Expand Up @@ -235,7 +238,7 @@ impl<'a> Parser<'a> {
// check whether parser's parsing a function or not
if self.current_function_id == 0xFFFFFFFF {
error::report_unexpected_token(
self.current_token,
&self.current_token,
Some("'return' statement outside a function is not valid."),
);
} else {
Expand All @@ -252,7 +255,7 @@ impl<'a> Parser<'a> {
// if function has void as a return type, panic if any expression follows the keyword 'return'
if self.current_token.kind != TokenKind::T_SEMICOLON {
error::report_unexpected_token(
self.current_token,
&self.current_token,
Some("Expected ';' because function has a 'void' return type."),
);
}
Expand Down Expand Up @@ -373,11 +376,11 @@ impl<'a> Parser<'a> {
fn parse_assignment_stmt(&mut self) -> ParseResult {
let id_token: Token = self.token_match(TokenKind::T_IDENTIFIER).clone();
let _id_index_symt: usize = self.sym_table.borrow().find_symbol(&id_token.lexeme);
let symbol: Symbol = self.sym_table.borrow().get_symbol(_id_index_symt).clone();
if _id_index_symt == 0xFFFFFFFF {
// if the symbol has not been defined
panic!("Assigning to an undefined symbol '{}'", id_token.lexeme);
}
let symbol: Symbol = self.sym_table.borrow().get_symbol(_id_index_symt).clone();
// Check if we are assigning to a type other than SymbolType::Variable. If yes, panic!
if symbol.sym_type != SymbolType::Variable {
panic!("Assigning to type '{:?}' is not allowed!", symbol.sym_type);
Expand Down Expand Up @@ -442,7 +445,7 @@ impl<'a> Parser<'a> {

fn parse_global_array_var_decl_stmt(&mut self, mut sym: Symbol) -> Option<ParseError> {
self.skip_to_next_token(); // skip '['
let array_size_token: &Token = self.current_token;
let array_size_token: Token = self.current_token.clone();
let mut array_size_type: TokenKind = TokenKind::T_NONE;
for t in [
TokenKind::T_INT_NUM,
Expand Down Expand Up @@ -623,6 +626,11 @@ impl<'a> Parser<'a> {
LitType::F32(current_token.lexeme.parse::<f32>().unwrap()),
LitTypeVariant::F32,
)),
TokenKind::T_STRING => {
println!("_L{}: .asciiz \"{}\"", *self._label_id, current_token.lexeme);
*self._label_id += 1;
Ok(ASTNode::make_leaf(ASTNodeKind::AST_STRLIT, LitType::I32(*self._label_id as i32), LitTypeVariant::U8Ptr))
}
TokenKind::T_IDENTIFIER => {
let id_index: usize = self.sym_table.borrow().find_symbol(&current_token.lexeme);
if id_index == 0xFFFFFFFF {
Expand Down Expand Up @@ -702,23 +710,23 @@ impl<'a> Parser<'a> {
}

fn token_match(&mut self, kind: TokenKind) -> &Token {
let current: &Token = self.current_token;
let current: Token = self.current_token.clone();
if kind != current.kind {
panic!(
"Expected the token to be '{:?}' but found '{:?}'",
kind, current.kind
);
}
self.skip_to_next_token();
current
&self.tokens[self.current - 1]
}

fn skip_to_next_token(&mut self) {
self.current += 1;
if self.current >= self.tokens.len() {
return;
}
self.current_token = &self.tokens[self.current];
self.current_token = self.tokens[self.current].clone();
}
}

Expand All @@ -728,10 +736,10 @@ mod tests {

#[test]
fn test_group_expression_tree_structure() {
static mut LABEL_ID: usize = 0;
let mut tokener: Tokenizer = Tokenizer::new("(5 + (3 * 4))");
let tokens: Vec<Token> = tokener.start_scan();
let sym_table: Rc<RefCell<Symtable>> = Rc::new(RefCell::new(Symtable::new()));
let mut p: Parser = Parser::new(&tokens, Rc::clone(&sym_table));
let mut p: Parser = Parser::new(tokener.start_scan(), Rc::clone(&sym_table), unsafe { &mut LABEL_ID });
let result: ParseResult = p.parse_equality();
assert!(result.is_ok());
let upvalue: ASTNode = result.unwrap();
Expand All @@ -745,10 +753,10 @@ mod tests {
// test addition operation
#[test]
fn test_depth_one_bin_tree() {
static mut LABEL_ID: usize = 0;
let mut tokener: Tokenizer = Tokenizer::new("5+5");
let tokens: Vec<Token> = tokener.start_scan();
let sym_table: Rc<RefCell<Symtable>> = Rc::new(RefCell::new(Symtable::new()));
let mut p: Parser = Parser::new(&tokens, Rc::clone(&sym_table));
let mut p: Parser = Parser::new(tokener.start_scan(), Rc::clone(&sym_table), unsafe { &mut LABEL_ID });
let result: ParseResult = p.parse_equality();
assert!(result.is_ok());
assert_eq!(result.unwrap().operation, ASTNodeKind::AST_ADD);
Expand All @@ -757,11 +765,10 @@ mod tests {
// test if-else block
#[test]
fn test_if_else_statement_block() {
let mut tokener: Tokenizer =
Tokenizer::new("if (4 > 5) { global integer a; } else { global integer b; }");
let tokens: Vec<Token> = tokener.start_scan();
static mut LABEL_ID: usize = 0;
let mut tokener: Tokenizer = Tokenizer::new("if (4 > 5) { global integer a; } else { global integer b; }");
let sym_table: Rc<RefCell<Symtable>> = Rc::new(RefCell::new(Symtable::new()));
let mut p: Parser = Parser::new(&tokens, Rc::clone(&sym_table));
let mut p: Parser = Parser::new(tokener.start_scan(), Rc::clone(&sym_table), unsafe { &mut LABEL_ID });
let result: ParseResult = p.parse_if_stmt();
assert!(
result.is_ok(),
Expand Down Expand Up @@ -790,10 +797,10 @@ mod tests {
// dereferencing, and addressing will work.
#[test]
fn test_integer_id_addr_load() {
static mut LABEL_ID: usize = 0;
let mut tokener: Tokenizer = Tokenizer::new("global integer *b; global integer a; b = &a;");
let tokens: Vec<Token> = tokener.start_scan();
let sym_table: Rc<RefCell<Symtable>> = Rc::new(RefCell::new(Symtable::new()));
let mut p: Parser = Parser::new(&tokens, Rc::clone(&sym_table));
let mut p: Parser = Parser::new(tokener.start_scan(), Rc::clone(&sym_table), unsafe { &mut LABEL_ID });
p.parse_global_variable_decl_stmt();
p.parse_global_variable_decl_stmt();
let result: ParseResult = p.parse_single_stmt();
Expand All @@ -812,10 +819,10 @@ mod tests {

#[test]
fn test_integer_id_deref() {
static mut LABEL_ID: usize = 0;
let mut tokener: Tokenizer = Tokenizer::new("global integer *b; global integer a; a = *b;");
let tokens: Vec<Token> = tokener.start_scan();
let sym_table: Rc<RefCell<Symtable>> = Rc::new(RefCell::new(Symtable::new()));
let mut p: Parser = Parser::new(&tokens, Rc::clone(&sym_table));
let mut p: Parser = Parser::new(tokener.start_scan(), Rc::clone(&sym_table), unsafe { &mut LABEL_ID });
// Skipping first two statements. Because, global variable declaration
// doesn't produce any AST node.
p.parse_global_variable_decl_stmt();
Expand All @@ -834,32 +841,23 @@ mod tests {
);
}

#[test]
fn test_while_statement_block() {
let mut tokener: Tokenizer = Tokenizer::new("global integer *b; global integer a; a = *b;");
let tokens: Vec<Token> = tokener.start_scan();
let sym_table: Rc<RefCell<Symtable>> = Rc::new(RefCell::new(Symtable::new()));
let mut p: Parser = Parser::new(&tokens, Rc::clone(&sym_table));
p.parse_global_variable_decl_stmt();
}

// Return statement outside a function is not valid!
#[test]
#[should_panic]
fn test_simple_return_stmt_outside_func() {
static mut LABEL_ID: usize = 0;
let mut tokener: Tokenizer = Tokenizer::new("return;");
let tokens: Vec<Token> = tokener.start_scan();
let sym_table: Rc<RefCell<Symtable>> = Rc::new(RefCell::new(Symtable::new()));
let mut p: Parser = Parser::new(&tokens, Rc::clone(&sym_table));
let mut p: Parser = Parser::new(tokener.start_scan(), Rc::clone(&sym_table), unsafe { &mut LABEL_ID });
_ = p.parse_single_stmt();
}

#[test]
fn test_func_decl_stmt() {
static mut LABEL_ID: usize = 0;
let mut tokener: Tokenizer = Tokenizer::new("def main() -> void { return; }");
let tokens: Vec<Token> = tokener.start_scan();
let sym_table: Rc<RefCell<Symtable>> = Rc::new(RefCell::new(Symtable::new()));
let mut p: Parser = Parser::new(&tokens, Rc::clone(&sym_table));
let mut p: Parser = Parser::new(tokener.start_scan(), Rc::clone(&sym_table), unsafe { &mut LABEL_ID });
let func_stmt: ParseResult = p.parse_single_stmt();
assert!(func_stmt.is_ok());
let upvalue: &ASTNode = func_stmt.as_ref().unwrap();
Expand All @@ -885,51 +883,51 @@ mod tests {

#[test]
fn test_array_decl_stmt_success() {
static mut LABEL_ID: usize = 0;
let mut tokener: Tokenizer = Tokenizer::new("global integer nums[12];");
let tokens: Vec<Token> = tokener.start_scan();
let sym_table: Rc<RefCell<Symtable>> = Rc::new(RefCell::new(Symtable::new()));
let mut p: Parser = Parser::new(&tokens, Rc::clone(&sym_table));
let mut p: Parser = Parser::new(tokener.start_scan(), Rc::clone(&sym_table), unsafe { &mut LABEL_ID });
let array_decl_stmt: Option<ParseError> = p.parse_global_variable_decl_stmt();
assert!(array_decl_stmt.is_none());
}

#[test]
#[should_panic]
fn test_array_decl_stmt_panic_array_size() {
static mut LABEL_ID: usize = 0;
let mut tokener: Tokenizer = Tokenizer::new("global integer nums[abcd];");
let tokens: Vec<Token> = tokener.start_scan();
let sym_table: Rc<RefCell<Symtable>> = Rc::new(RefCell::new(Symtable::new()));
let mut p: Parser = Parser::new(&tokens, Rc::clone(&sym_table));
let mut p: Parser = Parser::new(tokener.start_scan(), Rc::clone(&sym_table), unsafe { &mut LABEL_ID });
let array_decl_stmt: Option<ParseError> = p.parse_global_variable_decl_stmt();
assert!(array_decl_stmt.is_none());
}

#[test]
#[should_panic]
fn test_array_decl_stmt_panic_array_no_size_given() {
static mut LABEL_ID: usize = 0;
let mut tokener: Tokenizer = Tokenizer::new("global integer nums[];");
let tokens: Vec<Token> = tokener.start_scan();
let sym_table: Rc<RefCell<Symtable>> = Rc::new(RefCell::new(Symtable::new()));
let mut p: Parser = Parser::new(&tokens, Rc::clone(&sym_table));
let mut p: Parser = Parser::new(tokener.start_scan(), Rc::clone(&sym_table), unsafe { &mut LABEL_ID });
let array_decl_stmt: Option<ParseError> = p.parse_global_variable_decl_stmt();
assert!(array_decl_stmt.is_none());
}

// helper function to parse a statement from string which does not contain variable declaration
fn parse_single_statement_no_decl(input: &'static str) -> ParseResult {
static mut LABEL_ID: usize = 0;
let mut tokener: Tokenizer = Tokenizer::new(input);
let tokens: Vec<Token> = tokener.start_scan();
let sym_table: Rc<RefCell<Symtable>> = Rc::new(RefCell::new(Symtable::new()));
let mut p: Parser = Parser::new(&tokens, Rc::clone(&sym_table));
let mut p: Parser = Parser::new(tokener.start_scan(), Rc::clone(&sym_table), unsafe { &mut LABEL_ID });
p.parse_single_stmt()
}

// helper function to parse a statement from string which may contain one or more variable declarations
fn parse_single_stmt_with_decl(input: &'static str, decl_count: usize) -> ParseResult {
static mut LABEL_ID: usize = 0;
let mut tokener: Tokenizer = Tokenizer::new(input);
let tokens: Vec<Token> = tokener.start_scan();
let sym_table: Rc<RefCell<Symtable>> = Rc::new(RefCell::new(Symtable::new()));
let mut p: Parser = Parser::new(&tokens, Rc::clone(&sym_table));
let mut p: Parser = Parser::new(tokener.start_scan(), Rc::clone(&sym_table), unsafe { &mut LABEL_ID });
for _ in 0..decl_count {
p.parse_global_variable_decl_stmt();
}
Expand Down
9 changes: 4 additions & 5 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -374,12 +374,11 @@ impl Tokenizer {
if self.is_at_end() {
self.advance_to_next_char_pos();
return TokenizationResult::Error(ErrorType::UnterminatedString, token_pos);
} else {
self.advance_to_next_char_pos();
let str_val: &str = std::str::from_utf8(&self.source[__start..__end]).unwrap();
token.kind = TokenKind::T_STRING;
token.lexeme = String::from(str_val);
}
self.advance_to_next_char_pos();
let str_val: &str = std::str::from_utf8(&self.source[__start..__end]).unwrap();
token.kind = TokenKind::T_STRING;
token.lexeme = String::from(str_val);
},
'(' | ')' | '{' | '}' | '[' | ']' | '#' | '.' | '?' | ':' | ',' | ';' => {
token.kind = TokenKind::from_str(self.curr_char.to_string().as_str()).unwrap();
Expand Down

0 comments on commit e13e0e4

Please sign in to comment.