Skip to content

Commit

Permalink
feat: step 4
Browse files Browse the repository at this point in the history
  • Loading branch information
malta895 committed Sep 8, 2024
1 parent 986d06b commit b9f74cf
Show file tree
Hide file tree
Showing 6 changed files with 161 additions and 21 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
148 changes: 132 additions & 16 deletions src/parser/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,20 @@ use super::{error::JSONError, token::Token};
enum State {
Normal,

AwaitingValue,
ObjValue,
ValueNumber,
ValueTrue(char),
ValueFalse(char),
ValueNull(char),

ArrValue,

ValueStringLiteral,
Escaping,
}

pub fn lex<R: BufRead>(mut reader: R) -> Result<Vec<Token>, JSONError> {
//TODO: this is not a lexer anymore, as it was not necessary. Remove the parser and do everything here
let mut tokens = Vec::new();
let mut state = State::Normal;
loop {
Expand All @@ -31,18 +34,28 @@ pub fn lex<R: BufRead>(mut reader: R) -> Result<Vec<Token>, JSONError> {
for c in s.chars() {
let current_state = state.clone();
match (c, current_state) {
('{', State::Normal) => {
('{', State::Normal | State::ObjValue) => {
tokens.push(Token::OpenBrace);
state = State::Normal;
}

('}', State::Normal) => {
('}', State::Normal | State::ObjValue) => {
tokens.push(Token::ClosedBrace);
}
('}', State::ValueNumber) => {
tokens.push(Token::Number);
tokens.push(Token::ClosedBrace);
}

('[', State::ObjValue) => {
tokens.push(Token::OpenBracket);
state = State::ArrValue;
}
(']', State::ArrValue | State::Normal) => {
tokens.push(Token::ClosedBracket);
state = State::Normal;
}

('\n', State::Normal) => {
tokens.push(Token::NewLine);
}
Expand All @@ -54,7 +67,7 @@ pub fn lex<R: BufRead>(mut reader: R) -> Result<Vec<Token>, JSONError> {

(':', State::Normal) => {
tokens.push(Token::Column);
state = State::AwaitingValue;
state = State::ObjValue;
}

(',', State::Normal) => {
Expand All @@ -66,32 +79,32 @@ pub fn lex<R: BufRead>(mut reader: R) -> Result<Vec<Token>, JSONError> {
state = State::Normal;
}

(' ', State::Normal | State::AwaitingValue) => {
(' ', State::Normal | State::ObjValue) => {
// ignore space
}

('0'..='9', State::AwaitingValue | State::ValueNumber) => {
('0'..='9', State::ObjValue | State::ValueNumber) => {
state = State::ValueNumber
}

('t', State::AwaitingValue) => state = State::ValueTrue('t'),
('r', State::ValueTrue('t')) => state = State::ValueTrue('r'),
('u', State::ValueTrue('r')) => state = State::ValueTrue('u'),
('t', State::ObjValue) => state = State::ValueTrue('t'),
('r', State::ValueTrue('t')) => state = State::ValueTrue('r'),
('u', State::ValueTrue('r')) => state = State::ValueTrue('u'),
('e', State::ValueTrue('u')) => {
tokens.push(Token::BoolTrue);
state = State::Normal;
}

('f', State::AwaitingValue) => state = State::ValueFalse('f'),
('a', State::ValueFalse('f')) => state = State::ValueFalse('a'),
('l', State::ValueFalse('a')) => state = State::ValueFalse('l'),
('s', State::ValueFalse('l')) => state = State::ValueFalse('s'),
('f', State::ObjValue) => state = State::ValueFalse('f'),
('a', State::ValueFalse('f')) => state = State::ValueFalse('a'),
('l', State::ValueFalse('a')) => state = State::ValueFalse('l'),
('s', State::ValueFalse('l')) => state = State::ValueFalse('s'),
('e', State::ValueFalse('s')) => {
tokens.push(Token::BoolFalse);
state = State::Normal;
}

('n', State::AwaitingValue) => state = State::ValueNull('n'),
('n', State::ObjValue) => state = State::ValueNull('n'),
('u', State::ValueNull('n')) => state = State::ValueNull('u'),
('l', State::ValueNull('u')) => state = State::ValueNull('l'),
('l', State::ValueNull('l')) => {
Expand All @@ -106,7 +119,7 @@ pub fn lex<R: BufRead>(mut reader: R) -> Result<Vec<Token>, JSONError> {
('\\', State::ValueStringLiteral) => {
state = State::Escaping;
}
('"', State::Normal | State::AwaitingValue) => {
('"', State::Normal | State::ObjValue | State::ArrValue) => {
state = State::ValueStringLiteral;
tokens.push(Token::DoubleQuotes);
}
Expand Down Expand Up @@ -469,7 +482,6 @@ mod lexer_tests {
)
}


#[test]
fn should_lex_false_before_comma() {
run_test_case_with(
Expand Down Expand Up @@ -518,5 +530,109 @@ mod lexer_tests {
)
}

#[test]
fn should_lex_empty_obj_val() {
run_test_case_with(
"{ \"key\": {}}",
Vec::from([
Token::OpenBrace,
Token::DoubleQuotes,
Token::StringLiteral("key".to_string()),
Token::DoubleQuotes,
Token::Column,
Token::OpenBrace,
Token::ClosedBrace,
Token::ClosedBrace,
]),
)
}

#[test]
fn should_lex_empty_array_val() {
run_test_case_with(
"{ \"key\": []}",
Vec::from([
Token::OpenBrace,
Token::DoubleQuotes,
Token::StringLiteral("key".to_string()),
Token::DoubleQuotes,
Token::Column,
Token::OpenBracket,
Token::ClosedBracket,
Token::ClosedBrace,
]),
)
}

#[test]
fn should_lex_array_with_inner_value() {
run_test_case_with(
"{ \"key\": [\"val\"]}",
Vec::from([
Token::OpenBrace,
Token::DoubleQuotes,
Token::StringLiteral("key".to_string()),
Token::DoubleQuotes,
Token::Column,
Token::OpenBracket,
Token::DoubleQuotes,
Token::StringLiteral("val".to_string()),
Token::DoubleQuotes,
Token::ClosedBracket,
Token::ClosedBrace,
]),
)
}


#[test]
fn should_lex_obj_with_inner_value() {
run_test_case_with(
"{ \"key\": {\"inner_key\":\"inner_val\"}}",
Vec::from([
Token::OpenBrace,
Token::DoubleQuotes,
Token::StringLiteral("key".to_string()),
Token::DoubleQuotes,
Token::Column,
Token::OpenBrace,
Token::DoubleQuotes,
Token::StringLiteral("inner_key".to_string()),
Token::DoubleQuotes,
Token::Column,
Token::DoubleQuotes,
Token::StringLiteral("inner_val".to_string()),
Token::DoubleQuotes,
Token::ClosedBrace,
Token::ClosedBrace,
]),
)
}

#[test]
fn should_lex_obj_with_inner_value_new_line() {
run_test_case_with(
"{ \"key\": {\n\"inner_key\":\"inner_val\"\n}\n}",
Vec::from([
Token::OpenBrace,
Token::DoubleQuotes,
Token::StringLiteral("key".to_string()),
Token::DoubleQuotes,
Token::Column,
Token::OpenBrace,
Token::NewLine,
Token::DoubleQuotes,
Token::StringLiteral("inner_key".to_string()),
Token::DoubleQuotes,
Token::Column,
Token::DoubleQuotes,
Token::StringLiteral("inner_val".to_string()),
Token::DoubleQuotes,
Token::NewLine,
Token::ClosedBrace,
Token::NewLine,
Token::ClosedBrace,
]),
)
}
}
28 changes: 23 additions & 5 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,25 +44,28 @@ impl<R: BufRead> JSONParser<R> {
p.lex()?;

p.current_line = 1;
let mut is_inside_object = false;
let mut obj_depth = 0;
let mut is_inside_array = false;
let mut is_json_ended = false;
let mut is_inside_literal = false;
let mut is_after_comma = false;
for token in &p.tokens {
match token {
Token::OpenBrace => {
is_after_comma = false;
is_inside_object = true;
obj_depth += 1;
}
Token::ClosedBrace => {
if !is_inside_object {
if obj_depth == 0 {
return Err(p.build_json_err(format!("Unexpected {}", token)));
}
if is_after_comma {
return Err(p.build_json_err(format!("Unexpected {}", token)));
}
is_inside_object = false;
is_json_ended = true;
obj_depth -= 1;
if obj_depth == 0{
is_json_ended = true;
}
}
Token::NewLine => {
// ignore for now
Expand All @@ -81,6 +84,8 @@ impl<R: BufRead> JSONParser<R> {
// ignore for now
}
Token::Number | Token::BoolTrue | Token::BoolFalse | Token::Null => {}
Token::OpenBracket => {}
Token::ClosedBracket => {}
}
}
if !is_json_ended {
Expand Down Expand Up @@ -171,4 +176,17 @@ mod check_valid_tests {
let res = JSONParser::check_valid("{ \"key\": null}".as_bytes());
assert_eq!(Ok(()), res)
}

#[test]
fn should_recognize_empty_array() {
let res = JSONParser::check_valid("{ \"key\": []}".as_bytes());
assert_eq!(Ok(()), res)
}

#[test]
fn should_recognize_nested_objects() {
let res =
JSONParser::check_valid("{ \"key\": {\n\"inner_key\":\"inner_val\"\n}\n}".as_bytes());
assert_eq!(Ok(()), res)
}
}
6 changes: 6 additions & 0 deletions src/parser/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ use core::fmt;
pub enum Token {
OpenBrace,
ClosedBrace,
OpenBracket,
ClosedBracket,
NewLine,
DoubleQuotes,
Column,
Expand All @@ -21,6 +23,8 @@ const NEW_LINE: &str = "\n"; //TODO: make sure this works on windows too
const DOUBLE_QUOTES: &str = "\"";
const COLUMN: &str = ":";
const COMMA: &str = ",";
const OPEN_BRACKET: &str = "[";
const CLOSED_BRACKET: &str = "]";


impl fmt::Display for Token {
Expand All @@ -36,6 +40,8 @@ impl fmt::Display for Token {
Token::BoolTrue | Token::BoolFalse => String::from("<boolean>"),
Token::Null => String::from("<null>"),
Token::StringLiteral(_) => String::from("<string literal>"),
Token::OpenBracket => String::from(OPEN_BRACKET),
Token::ClosedBracket => String::from(CLOSED_BRACKET),

};
write!(f, "'{}'", token_str)
Expand Down

0 comments on commit b9f74cf

Please sign in to comment.