From 703595f8f8380acc6816f0a770afee6acba8adc4 Mon Sep 17 00:00:00 2001 From: Nat Lasseter Date: Tue, 30 Jan 2024 16:55:42 +0000 Subject: Initial commit --- .gitignore | 1 + lib/sexp/lex.rb | 91 +++++++++++++++++++++++++++++++++++++++++ lib/sexp/parse.rb | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ sexp.rb | 10 +++++ 4 files changed, 220 insertions(+) create mode 100644 .gitignore create mode 100644 lib/sexp/lex.rb create mode 100644 lib/sexp/parse.rb create mode 100644 sexp.rb diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1377554 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.swp diff --git a/lib/sexp/lex.rb b/lib/sexp/lex.rb new file mode 100644 index 0000000..d148840 --- /dev/null +++ b/lib/sexp/lex.rb @@ -0,0 +1,91 @@ +class Token + def initialize(content = nil) + @content = content + end + + attr_reader :content + + alias token class + + def to_s + case self + when T_End + "END" + when T_LParen + "LEFT_PAREN" + when T_RParen + "RIGHT_PAREN" + when T_Whitespace + "WHITESPACE" + when T_Atom + "ATOM:\"#{content}\"" + end + end + + def self.end; T_End.new; end + def self.lparen; T_LParen.new; end + def self.rparen; T_RParen.new; end + def self.whitespace; T_Whitespace.new; end + def self.atom(content); T_Atom.new(content); end +end + +class T_End < Token; end +class T_LParen < Token; end +class T_RParen < Token; end +class T_Whitespace < Token; end +class T_Atom < Token; end + +def peek_char(input) + input.first +end + +def consume_char(input) + input.shift +end + +def consume_whitespace(input) + while [?\s, ?\t, ?\r, ?\n].include?(peek_char(input)) + consume_char(input) + end +end + +def get_atom(input) + val = "" + until [?(, ?), ?\s, ?\t, ?\r, ?\n].include?(peek_char(input)) + val += consume_char(input) + end + val +end + +def lex_token(input) + case peek_char(input) + when nil + Token.end + when ?( + consume_char(input) + Token.lparen + when ?) + consume_char(input) + Token.rparen + when ?\s, ?\t, ?\r, ?\n + consume_whitespace(input) + Token.whitespace + else + val = get_atom(input) + Token.atom(val) + end +end + +def lex_tokens(input) + input = input.chars + + toks = [] + + loop do + tok = lex_token(input) + break if tok.token == T_End + toks << tok if [T_LParen, T_RParen, T_Atom].include?(tok.token) + end + + toks +end diff --git a/lib/sexp/parse.rb b/lib/sexp/parse.rb new file mode 100644 index 0000000..adfd513 --- /dev/null +++ b/lib/sexp/parse.rb @@ -0,0 +1,118 @@ +class Atom + def initialize(content = nil) + @content = content + end + + attr_reader :content + + def to_s + content + end + + def self.from_token(token) + new(token.content) + end +end + +class SExp + def initialize(addr = nil, data = nil) + @addr = addr + @data = data + end + + def car + @addr + end + def car=(addr) + @addr = addr + end + + def cdr + @data + end + def cdr=(data) + @data = data + end + + def to_s + if car.nil? + "()" + elsif cdr.nil? + "(#{car} . ())" + else + "(#{car} . #{cdr})" + end + end +end + +def peek_token(tokens) + tokens.first +end + +def get_token(tokens) + tokens.shift +end + +def num_members(tokens) + members = 0 + level = 1 + at = 1 + + until level == 0 + case tokens.at(at) + when T_LParen + members += 1 if level == 1 + level += 1 + when T_RParen + level -= 1 + when T_Atom + members += 1 if level == 1 + end + at += 1 + end + + members +end + +def members_to_tree(members) + case members.length + when 0 + nil + when 1 + SExp.new(members[0], SExp.new) + when 2 + SExp.new(members[0], members[1]) + else + SExp.new(members[0], members_to_tree(members[1..-1])) + end +end + +def parse_sexp(tokens) + need = num_members(tokens) + get_token(tokens) + + members = [] + + need.times do + case peek_token(tokens) + when T_Atom + members << Atom.from_token(get_token(tokens)) + when T_LParen + members << parse_sexp(tokens) + end + end + + get_token(tokens) + + members_to_tree(members) +end + +def parse_sexps(tokens) + sexps = [] + + until tokens.empty? + sexps << parse_sexp(tokens) + end + + sexps +end diff --git a/sexp.rb b/sexp.rb new file mode 100644 index 0000000..47bb674 --- /dev/null +++ b/sexp.rb @@ -0,0 +1,10 @@ +$LOAD_PATH << "./lib" + +require "sexp/lex" +require "sexp/parse" + +input = "(def foo (+ 7 4)) (display foo)" +tokens = lex_tokens(input) +sexps = parse_sexps(tokens) + +puts sexps -- cgit v1.2.1