From f091c51626c1ef62c9e81e0b1207b46a0666f5bf Mon Sep 17 00:00:00 2001 From: Nathan Lasseter Date: Sun, 19 Jan 2014 12:05:49 +0000 Subject: Changed the serialisation to use single word follow-ons. Added the number of generations argument to markov-run. Changed the length argument from number of achunks to the total words in the generation. Removed the "Starts with a capital" requirement. --- markov-run.rb | 26 +++++++++++++++----------- markov-serialise.rb | 4 ++-- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/markov-run.rb b/markov-run.rb index ef81315..991c5e7 100755 --- a/markov-run.rb +++ b/markov-run.rb @@ -2,7 +2,11 @@ PRNG = Random.new(Time.now.to_i) -stats = Marshal::load(File.open(ARGV[0]).read) +STATS = Marshal::load(File.open(ARGV[0]).read) + +CHUNK = STATS["__SETTINGS__"]["__CHUNK__"] + +NUM = (ARGV[2] or "1").to_i def cfd(hash) tot = 0 @@ -19,17 +23,17 @@ def sel(hash) } end -current = stats.keys.select{|k|k.first.match(/^[A-Z]/)}.sample - -print "#{current.join(" ")} " - -def achunk(chunk, hash) +def aword(chunk, hash) sel(cfd(hash[chunk])) end -(ARGV[1].to_i - 1).times do - current = achunk(current, stats) - print "#{current.join(" ")} " -end +NUM.times do + current = STATS.keys.select{|k| k != "__SETTINGS__" }.sample + + (ARGV[1].to_i - CHUNK).times do + current << aword(current[-CHUNK..-1], STATS) + end -puts + puts current.join(" ") + puts +end diff --git a/markov-serialise.rb b/markov-serialise.rb index 00e459e..1199453 100755 --- a/markov-serialise.rb +++ b/markov-serialise.rb @@ -2,7 +2,7 @@ CHUNK = ARGV[1].to_i -stats = Hash.new +stats = {"__SETTINGS__" => {"__CHUNK__" => CHUNK}} words = File.readlines(ARGV[0]) .map(&:split) @@ -10,7 +10,7 @@ words = File.readlines(ARGV[0]) (0 .. (words.length - CHUNK - CHUNK)).each do |i| k = words[i...(i+CHUNK)] - v = words[(i+CHUNK)...(i+CHUNK+CHUNK)] + v = words[(i+CHUNK)] if stats.include?(k) then t = stats[k] if t.include?(v) then -- cgit v1.2.1