diff options
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | README.TEXTILE | 6 | ||||
-rwxr-xr-x | markov-run.rb | 35 | ||||
-rwxr-xr-x | markov-serialise.rb | 27 |
4 files changed, 70 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..81f4387 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.txt +*.ser diff --git a/README.TEXTILE b/README.TEXTILE new file mode 100644 index 0000000..110494a --- /dev/null +++ b/README.TEXTILE @@ -0,0 +1,6 @@ +h1. Ruby Markov Chain Generator + +h2. Usage + +# Run markov-serialise.rb <input text file> <chunksize> <output file> +# Run markov-run.rb <input serialised file> <length> diff --git a/markov-run.rb b/markov-run.rb new file mode 100755 index 0000000..ef81315 --- /dev/null +++ b/markov-run.rb @@ -0,0 +1,35 @@ +#!/usr/bin/env ruby + +PRNG = Random.new(Time.now.to_i) + +stats = Marshal::load(File.open(ARGV[0]).read) + +def cfd(hash) + tot = 0 + hash.each_pair {|k, v| + hash[k] = tot += v + } +end + +def sel(hash) + max = hash.values.max + r = PRNG.rand(max*100000) % max + hash.each_pair {|k, v| + return k if r < v + } +end + +current = stats.keys.select{|k|k.first.match(/^[A-Z]/)}.sample + +print "#{current.join(" ")} " + +def achunk(chunk, hash) + sel(cfd(hash[chunk])) +end + +(ARGV[1].to_i - 1).times do + current = achunk(current, stats) + print "#{current.join(" ")} " +end + +puts diff --git a/markov-serialise.rb b/markov-serialise.rb new file mode 100755 index 0000000..00e459e --- /dev/null +++ b/markov-serialise.rb @@ -0,0 +1,27 @@ +#!/usr/bin/env ruby + +CHUNK = ARGV[1].to_i + +stats = Hash.new + +words = File.readlines(ARGV[0]) + .map(&:split) + .flatten + +(0 .. (words.length - CHUNK - CHUNK)).each do |i| + k = words[i...(i+CHUNK)] + v = words[(i+CHUNK)...(i+CHUNK+CHUNK)] + if stats.include?(k) then + t = stats[k] + if t.include?(v) then + t[v] += 1 + else + t[v] = 1 + end + else + stats[k] = Hash.new + stats[k][v] = 1 + end +end + +File.new(ARGV[2], "w").print(Marshal::dump(stats)) |