diff options
author | Nathan Lasseter <nathan@4574.co.uk> | 2014-01-14 17:55:47 +0000 |
---|---|---|
committer | Nathan Lasseter <nathan@4574.co.uk> | 2014-01-14 17:55:47 +0000 |
commit | 79a5cf84254c2aeed244861197fdc5fe3282a940 (patch) | |
tree | c9b3436e145dffb33904760d4a80fb723242c12c /markov-serialise.rb |
First Commit
Diffstat (limited to 'markov-serialise.rb')
-rwxr-xr-x | markov-serialise.rb | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/markov-serialise.rb b/markov-serialise.rb new file mode 100755 index 0000000..00e459e --- /dev/null +++ b/markov-serialise.rb @@ -0,0 +1,27 @@ +#!/usr/bin/env ruby + +CHUNK = ARGV[1].to_i + +stats = Hash.new + +words = File.readlines(ARGV[0]) + .map(&:split) + .flatten + +(0 .. (words.length - CHUNK - CHUNK)).each do |i| + k = words[i...(i+CHUNK)] + v = words[(i+CHUNK)...(i+CHUNK+CHUNK)] + if stats.include?(k) then + t = stats[k] + if t.include?(v) then + t[v] += 1 + else + t[v] = 1 + end + else + stats[k] = Hash.new + stats[k][v] = 1 + end +end + +File.new(ARGV[2], "w").print(Marshal::dump(stats)) |