summaryrefslogtreecommitdiff
path: root/lib/sexp/lex.rb
blob: d148840a2fd4d16ba586de66131393cee9c8659e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
class Token
  def initialize(content = nil)
    @content = content
  end

  attr_reader :content

  alias token class

  def to_s
    case self
    when T_End
      "END"
    when T_LParen
      "LEFT_PAREN"
    when T_RParen
      "RIGHT_PAREN"
    when T_Whitespace
      "WHITESPACE"
    when T_Atom
      "ATOM:\"#{content}\""
    end
  end

  def self.end; T_End.new; end
  def self.lparen; T_LParen.new; end
  def self.rparen; T_RParen.new; end
  def self.whitespace; T_Whitespace.new; end
  def self.atom(content); T_Atom.new(content); end
end

class T_End < Token; end
class T_LParen < Token; end
class T_RParen < Token; end
class T_Whitespace < Token; end
class T_Atom < Token; end

def peek_char(input)
  input.first
end

def consume_char(input)
  input.shift
end

def consume_whitespace(input)
  while [?\s, ?\t, ?\r, ?\n].include?(peek_char(input))
    consume_char(input)
  end
end

def get_atom(input)
  val = ""
  until [?(, ?), ?\s, ?\t, ?\r, ?\n].include?(peek_char(input))
    val += consume_char(input)
  end
  val
end

def lex_token(input)
  case peek_char(input)
  when nil
    Token.end
  when ?(
    consume_char(input)
    Token.lparen
  when ?)
    consume_char(input)
    Token.rparen
  when ?\s, ?\t, ?\r, ?\n
    consume_whitespace(input)
    Token.whitespace
  else
    val = get_atom(input)
    Token.atom(val)
  end
end

def lex_tokens(input)
  input = input.chars

  toks = []

  loop do
    tok = lex_token(input)
    break if tok.token == T_End
    toks << tok if [T_LParen, T_RParen, T_Atom].include?(tok.token)
  end

  toks
end