diff options
Diffstat (limited to 'markov-serialise.rb')
-rwxr-xr-x | markov-serialise.rb | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/markov-serialise.rb b/markov-serialise.rb new file mode 100755 index 0000000..00e459e --- /dev/null +++ b/markov-serialise.rb @@ -0,0 +1,27 @@ +#!/usr/bin/env ruby + +CHUNK = ARGV[1].to_i + +stats = Hash.new + +words = File.readlines(ARGV[0]) + .map(&:split) + .flatten + +(0 .. (words.length - CHUNK - CHUNK)).each do |i| + k = words[i...(i+CHUNK)] + v = words[(i+CHUNK)...(i+CHUNK+CHUNK)] + if stats.include?(k) then + t = stats[k] + if t.include?(v) then + t[v] += 1 + else + t[v] = 1 + end + else + stats[k] = Hash.new + stats[k][v] = 1 + end +end + +File.new(ARGV[2], "w").print(Marshal::dump(stats)) |