aboutsummaryrefslogtreecommitdiff
path: root/markov-serialise.rb
blob: 00e459e37e108ee35fac053e199baa4274dcd1c2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/usr/bin/env ruby

CHUNK = ARGV[1].to_i

stats = Hash.new

words = File.readlines(ARGV[0])
		.map(&:split)
		.flatten

(0 .. (words.length - CHUNK - CHUNK)).each do |i|
	k = words[i...(i+CHUNK)]
	v = words[(i+CHUNK)...(i+CHUNK+CHUNK)]
	if stats.include?(k) then
		t = stats[k]
		if t.include?(v) then
			t[v] += 1
		else
			t[v] = 1
		end
	else
		stats[k] = Hash.new
		stats[k][v] = 1
	end
end

File.new(ARGV[2], "w").print(Marshal::dump(stats))