diff options
| author | Nat Lasseter <user@4574.co.uk> | 2024-01-30 16:55:42 +0000 | 
|---|---|---|
| committer | Nat Lasseter <user@4574.co.uk> | 2024-01-30 16:55:42 +0000 | 
| commit | 703595f8f8380acc6816f0a770afee6acba8adc4 (patch) | |
| tree | a82e38fb62a867964e23a7edde152a65cfcd7f29 /lib/sexp/lex.rb | |
Initial commit
Diffstat (limited to 'lib/sexp/lex.rb')
| -rw-r--r-- | lib/sexp/lex.rb | 91 | 
1 files changed, 91 insertions, 0 deletions
diff --git a/lib/sexp/lex.rb b/lib/sexp/lex.rb new file mode 100644 index 0000000..d148840 --- /dev/null +++ b/lib/sexp/lex.rb @@ -0,0 +1,91 @@ +class Token +  def initialize(content = nil) +    @content = content +  end + +  attr_reader :content + +  alias token class + +  def to_s +    case self +    when T_End +      "END" +    when T_LParen +      "LEFT_PAREN" +    when T_RParen +      "RIGHT_PAREN" +    when T_Whitespace +      "WHITESPACE" +    when T_Atom +      "ATOM:\"#{content}\"" +    end +  end + +  def self.end; T_End.new; end +  def self.lparen; T_LParen.new; end +  def self.rparen; T_RParen.new; end +  def self.whitespace; T_Whitespace.new; end +  def self.atom(content); T_Atom.new(content); end +end + +class T_End < Token; end +class T_LParen < Token; end +class T_RParen < Token; end +class T_Whitespace < Token; end +class T_Atom < Token; end + +def peek_char(input) +  input.first +end + +def consume_char(input) +  input.shift +end + +def consume_whitespace(input) +  while [?\s, ?\t, ?\r, ?\n].include?(peek_char(input)) +    consume_char(input) +  end +end + +def get_atom(input) +  val = "" +  until [?(, ?), ?\s, ?\t, ?\r, ?\n].include?(peek_char(input)) +    val += consume_char(input) +  end +  val +end + +def lex_token(input) +  case peek_char(input) +  when nil +    Token.end +  when ?( +    consume_char(input) +    Token.lparen +  when ?) +    consume_char(input) +    Token.rparen +  when ?\s, ?\t, ?\r, ?\n +    consume_whitespace(input) +    Token.whitespace +  else +    val = get_atom(input) +    Token.atom(val) +  end +end + +def lex_tokens(input) +  input = input.chars + +  toks = [] + +  loop do +    tok = lex_token(input) +    break if tok.token == T_End +    toks << tok if [T_LParen, T_RParen, T_Atom].include?(tok.token) +  end + +  toks +end  | 
