summaryrefslogtreecommitdiff
path: root/lib/sexp/lex.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sexp/lex.rb')
-rw-r--r--lib/sexp/lex.rb91
1 files changed, 91 insertions, 0 deletions
diff --git a/lib/sexp/lex.rb b/lib/sexp/lex.rb
new file mode 100644
index 0000000..d148840
--- /dev/null
+++ b/lib/sexp/lex.rb
@@ -0,0 +1,91 @@
+class Token
+ def initialize(content = nil)
+ @content = content
+ end
+
+ attr_reader :content
+
+ alias token class
+
+ def to_s
+ case self
+ when T_End
+ "END"
+ when T_LParen
+ "LEFT_PAREN"
+ when T_RParen
+ "RIGHT_PAREN"
+ when T_Whitespace
+ "WHITESPACE"
+ when T_Atom
+ "ATOM:\"#{content}\""
+ end
+ end
+
+ def self.end; T_End.new; end
+ def self.lparen; T_LParen.new; end
+ def self.rparen; T_RParen.new; end
+ def self.whitespace; T_Whitespace.new; end
+ def self.atom(content); T_Atom.new(content); end
+end
+
+class T_End < Token; end
+class T_LParen < Token; end
+class T_RParen < Token; end
+class T_Whitespace < Token; end
+class T_Atom < Token; end
+
+def peek_char(input)
+ input.first
+end
+
+def consume_char(input)
+ input.shift
+end
+
+def consume_whitespace(input)
+ while [?\s, ?\t, ?\r, ?\n].include?(peek_char(input))
+ consume_char(input)
+ end
+end
+
+def get_atom(input)
+ val = ""
+ until [?(, ?), ?\s, ?\t, ?\r, ?\n].include?(peek_char(input))
+ val += consume_char(input)
+ end
+ val
+end
+
+def lex_token(input)
+ case peek_char(input)
+ when nil
+ Token.end
+ when ?(
+ consume_char(input)
+ Token.lparen
+ when ?)
+ consume_char(input)
+ Token.rparen
+ when ?\s, ?\t, ?\r, ?\n
+ consume_whitespace(input)
+ Token.whitespace
+ else
+ val = get_atom(input)
+ Token.atom(val)
+ end
+end
+
+def lex_tokens(input)
+ input = input.chars
+
+ toks = []
+
+ loop do
+ tok = lex_token(input)
+ break if tok.token == T_End
+ toks << tok if [T_LParen, T_RParen, T_Atom].include?(tok.token)
+ end
+
+ toks
+end