From 6dd179efd69946dc8c68dc1875db2eb945c5c6dd Mon Sep 17 00:00:00 2001 From: Nathan Lasseter Date: Sun, 19 Jan 2014 12:31:23 +0000 Subject: Renames and refactored serialise --- markov-analyse.rb | 34 ++++++++++++++++++++++++++++++++++ markov-serialise.rb | 27 --------------------------- 2 files changed, 34 insertions(+), 27 deletions(-) create mode 100755 markov-analyse.rb delete mode 100755 markov-serialise.rb diff --git a/markov-analyse.rb b/markov-analyse.rb new file mode 100755 index 0000000..6c306ca --- /dev/null +++ b/markov-analyse.rb @@ -0,0 +1,34 @@ +module Markov + class Analyser + def initialize(inputfile, chunklength) + @chunklength = chunklength + @stats = {"__SETTINGS__" => {"__CHUNKLENGTH__" => @chunklength}} + + @words = File.readlines(inputfile) + .map(&:split) + .flatten + end + + def analyse + (0 .. (@words.length - @chunklength - @chunklength)).each do |i| + k = @words[i...(i+@chunklength)] + v = @words[(i+@chunklength)] + if @stats.include?(k) then + t = @stats[k] + if t.include?(v) then + t[v] += 1 + else + t[v] = 1 + end + else + @stats[k] = Hash.new + @stats[k][v] = 1 + end + end + end + + def save(outputfile) + File.new(outputfile, "w").print(Marshal::dump(@stats)) + end + end +end diff --git a/markov-serialise.rb b/markov-serialise.rb deleted file mode 100755 index 1199453..0000000 --- a/markov-serialise.rb +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env ruby - -CHUNK = ARGV[1].to_i - -stats = {"__SETTINGS__" => {"__CHUNK__" => CHUNK}} - -words = File.readlines(ARGV[0]) - .map(&:split) - .flatten - -(0 .. (words.length - CHUNK - CHUNK)).each do |i| - k = words[i...(i+CHUNK)] - v = words[(i+CHUNK)] - if stats.include?(k) then - t = stats[k] - if t.include?(v) then - t[v] += 1 - else - t[v] = 1 - end - else - stats[k] = Hash.new - stats[k][v] = 1 - end -end - -File.new(ARGV[2], "w").print(Marshal::dump(stats)) -- cgit v1.2.1