diff options
-rwxr-xr-x | markov-analyse.rb | 34 | ||||
-rwxr-xr-x | markov-serialise.rb | 27 |
2 files changed, 34 insertions, 27 deletions
diff --git a/markov-analyse.rb b/markov-analyse.rb new file mode 100755 index 0000000..6c306ca --- /dev/null +++ b/markov-analyse.rb @@ -0,0 +1,34 @@ +module Markov + class Analyser + def initialize(inputfile, chunklength) + @chunklength = chunklength + @stats = {"__SETTINGS__" => {"__CHUNKLENGTH__" => @chunklength}} + + @words = File.readlines(inputfile) + .map(&:split) + .flatten + end + + def analyse + (0 .. (@words.length - @chunklength - @chunklength)).each do |i| + k = @words[i...(i+@chunklength)] + v = @words[(i+@chunklength)] + if @stats.include?(k) then + t = @stats[k] + if t.include?(v) then + t[v] += 1 + else + t[v] = 1 + end + else + @stats[k] = Hash.new + @stats[k][v] = 1 + end + end + end + + def save(outputfile) + File.new(outputfile, "w").print(Marshal::dump(@stats)) + end + end +end diff --git a/markov-serialise.rb b/markov-serialise.rb deleted file mode 100755 index 1199453..0000000 --- a/markov-serialise.rb +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env ruby - -CHUNK = ARGV[1].to_i - -stats = {"__SETTINGS__" => {"__CHUNK__" => CHUNK}} - -words = File.readlines(ARGV[0]) - .map(&:split) - .flatten - -(0 .. (words.length - CHUNK - CHUNK)).each do |i| - k = words[i...(i+CHUNK)] - v = words[(i+CHUNK)] - if stats.include?(k) then - t = stats[k] - if t.include?(v) then - t[v] += 1 - else - t[v] = 1 - end - else - stats[k] = Hash.new - stats[k][v] = 1 - end -end - -File.new(ARGV[2], "w").print(Marshal::dump(stats)) |