aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xmarkov-analyse.rb34
-rwxr-xr-xmarkov-serialise.rb27
2 files changed, 34 insertions, 27 deletions
diff --git a/markov-analyse.rb b/markov-analyse.rb
new file mode 100755
index 0000000..6c306ca
--- /dev/null
+++ b/markov-analyse.rb
@@ -0,0 +1,34 @@
+module Markov
+ class Analyser
+ def initialize(inputfile, chunklength)
+ @chunklength = chunklength
+ @stats = {"__SETTINGS__" => {"__CHUNKLENGTH__" => @chunklength}}
+
+ @words = File.readlines(inputfile)
+ .map(&:split)
+ .flatten
+ end
+
+ def analyse
+ (0 .. (@words.length - @chunklength - @chunklength)).each do |i|
+ k = @words[i...(i+@chunklength)]
+ v = @words[(i+@chunklength)]
+ if @stats.include?(k) then
+ t = @stats[k]
+ if t.include?(v) then
+ t[v] += 1
+ else
+ t[v] = 1
+ end
+ else
+ @stats[k] = Hash.new
+ @stats[k][v] = 1
+ end
+ end
+ end
+
+ def save(outputfile)
+ File.new(outputfile, "w").print(Marshal::dump(@stats))
+ end
+ end
+end
diff --git a/markov-serialise.rb b/markov-serialise.rb
deleted file mode 100755
index 1199453..0000000
--- a/markov-serialise.rb
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env ruby
-
-CHUNK = ARGV[1].to_i
-
-stats = {"__SETTINGS__" => {"__CHUNK__" => CHUNK}}
-
-words = File.readlines(ARGV[0])
- .map(&:split)
- .flatten
-
-(0 .. (words.length - CHUNK - CHUNK)).each do |i|
- k = words[i...(i+CHUNK)]
- v = words[(i+CHUNK)]
- if stats.include?(k) then
- t = stats[k]
- if t.include?(v) then
- t[v] += 1
- else
- t[v] = 1
- end
- else
- stats[k] = Hash.new
- stats[k][v] = 1
- end
-end
-
-File.new(ARGV[2], "w").print(Marshal::dump(stats))