From 703595f8f8380acc6816f0a770afee6acba8adc4 Mon Sep 17 00:00:00 2001 From: Nat Lasseter Date: Tue, 30 Jan 2024 16:55:42 +0000 Subject: Initial commit --- lib/sexp/lex.rb | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 lib/sexp/lex.rb (limited to 'lib/sexp/lex.rb') diff --git a/lib/sexp/lex.rb b/lib/sexp/lex.rb new file mode 100644 index 0000000..d148840 --- /dev/null +++ b/lib/sexp/lex.rb @@ -0,0 +1,91 @@ +class Token + def initialize(content = nil) + @content = content + end + + attr_reader :content + + alias token class + + def to_s + case self + when T_End + "END" + when T_LParen + "LEFT_PAREN" + when T_RParen + "RIGHT_PAREN" + when T_Whitespace + "WHITESPACE" + when T_Atom + "ATOM:\"#{content}\"" + end + end + + def self.end; T_End.new; end + def self.lparen; T_LParen.new; end + def self.rparen; T_RParen.new; end + def self.whitespace; T_Whitespace.new; end + def self.atom(content); T_Atom.new(content); end +end + +class T_End < Token; end +class T_LParen < Token; end +class T_RParen < Token; end +class T_Whitespace < Token; end +class T_Atom < Token; end + +def peek_char(input) + input.first +end + +def consume_char(input) + input.shift +end + +def consume_whitespace(input) + while [?\s, ?\t, ?\r, ?\n].include?(peek_char(input)) + consume_char(input) + end +end + +def get_atom(input) + val = "" + until [?(, ?), ?\s, ?\t, ?\r, ?\n].include?(peek_char(input)) + val += consume_char(input) + end + val +end + +def lex_token(input) + case peek_char(input) + when nil + Token.end + when ?( + consume_char(input) + Token.lparen + when ?) + consume_char(input) + Token.rparen + when ?\s, ?\t, ?\r, ?\n + consume_whitespace(input) + Token.whitespace + else + val = get_atom(input) + Token.atom(val) + end +end + +def lex_tokens(input) + input = input.chars + + toks = [] + + loop do + tok = lex_token(input) + break if tok.token == T_End + toks << tok if [T_LParen, T_RParen, T_Atom].include?(tok.token) + end + + toks +end -- cgit v1.2.1