aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathan Lasseter <nathan@4574.co.uk>2014-01-14 17:55:47 +0000
committerNathan Lasseter <nathan@4574.co.uk>2014-01-14 17:55:47 +0000
commit79a5cf84254c2aeed244861197fdc5fe3282a940 (patch)
treec9b3436e145dffb33904760d4a80fb723242c12c
First Commit
-rw-r--r--.gitignore2
-rw-r--r--README.TEXTILE6
-rwxr-xr-xmarkov-run.rb35
-rwxr-xr-xmarkov-serialise.rb27
4 files changed, 70 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..81f4387
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+*.txt
+*.ser
diff --git a/README.TEXTILE b/README.TEXTILE
new file mode 100644
index 0000000..110494a
--- /dev/null
+++ b/README.TEXTILE
@@ -0,0 +1,6 @@
+h1. Ruby Markov Chain Generator
+
+h2. Usage
+
+# Run markov-serialise.rb <input text file> <chunksize> <output file>
+# Run markov-run.rb <input serialised file> <length>
diff --git a/markov-run.rb b/markov-run.rb
new file mode 100755
index 0000000..ef81315
--- /dev/null
+++ b/markov-run.rb
@@ -0,0 +1,35 @@
+#!/usr/bin/env ruby
+
+PRNG = Random.new(Time.now.to_i)
+
+stats = Marshal::load(File.open(ARGV[0]).read)
+
+def cfd(hash)
+ tot = 0
+ hash.each_pair {|k, v|
+ hash[k] = tot += v
+ }
+end
+
+def sel(hash)
+ max = hash.values.max
+ r = PRNG.rand(max*100000) % max
+ hash.each_pair {|k, v|
+ return k if r < v
+ }
+end
+
+current = stats.keys.select{|k|k.first.match(/^[A-Z]/)}.sample
+
+print "#{current.join(" ")} "
+
+def achunk(chunk, hash)
+ sel(cfd(hash[chunk]))
+end
+
+(ARGV[1].to_i - 1).times do
+ current = achunk(current, stats)
+ print "#{current.join(" ")} "
+end
+
+puts
diff --git a/markov-serialise.rb b/markov-serialise.rb
new file mode 100755
index 0000000..00e459e
--- /dev/null
+++ b/markov-serialise.rb
@@ -0,0 +1,27 @@
+#!/usr/bin/env ruby
+
+CHUNK = ARGV[1].to_i
+
+stats = Hash.new
+
+words = File.readlines(ARGV[0])
+ .map(&:split)
+ .flatten
+
+(0 .. (words.length - CHUNK - CHUNK)).each do |i|
+ k = words[i...(i+CHUNK)]
+ v = words[(i+CHUNK)...(i+CHUNK+CHUNK)]
+ if stats.include?(k) then
+ t = stats[k]
+ if t.include?(v) then
+ t[v] += 1
+ else
+ t[v] = 1
+ end
+ else
+ stats[k] = Hash.new
+ stats[k][v] = 1
+ end
+end
+
+File.new(ARGV[2], "w").print(Marshal::dump(stats))