aboutsummaryrefslogtreecommitdiff
path: root/markov-serialise.rb
diff options
context:
space:
mode:
Diffstat (limited to 'markov-serialise.rb')
-rwxr-xr-xmarkov-serialise.rb27
1 files changed, 27 insertions, 0 deletions
diff --git a/markov-serialise.rb b/markov-serialise.rb
new file mode 100755
index 0000000..00e459e
--- /dev/null
+++ b/markov-serialise.rb
@@ -0,0 +1,27 @@
+#!/usr/bin/env ruby
+
+CHUNK = ARGV[1].to_i
+
+stats = Hash.new
+
+words = File.readlines(ARGV[0])
+ .map(&:split)
+ .flatten
+
+(0 .. (words.length - CHUNK - CHUNK)).each do |i|
+ k = words[i...(i+CHUNK)]
+ v = words[(i+CHUNK)...(i+CHUNK+CHUNK)]
+ if stats.include?(k) then
+ t = stats[k]
+ if t.include?(v) then
+ t[v] += 1
+ else
+ t[v] = 1
+ end
+ else
+ stats[k] = Hash.new
+ stats[k][v] = 1
+ end
+end
+
+File.new(ARGV[2], "w").print(Marshal::dump(stats))