summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick J Cherry <patrick@bytemark.co.uk>2014-12-09 14:08:38 +0000
committerPatrick J Cherry <patrick@bytemark.co.uk>2014-12-09 14:08:38 +0000
commitf88dd2535ca5367c89baa7131c2b94565a6a4cd7 (patch)
tree49b5efa9d05e1752dab462ddb1b9699ccbc482d6
parent5129820f2f2535086c0634cb0b6ea14f5e3d5d1c (diff)
Reworked sorting of backups by importance. Added "tests"
-rw-r--r--lib/byteback/backup_directory.rb117
-rw-r--r--test/tc_byteback_snapshot.rb163
2 files changed, 249 insertions, 31 deletions
diff --git a/lib/byteback/backup_directory.rb b/lib/byteback/backup_directory.rb
index a31c290..bdfc29c 100644
--- a/lib/byteback/backup_directory.rb
+++ b/lib/byteback/backup_directory.rb
@@ -11,35 +11,86 @@ module Byteback
BACKUP_IMPORTANCE = [1, 2, 7, 14, 21, 28, 56, 112]
def sort_by_importance(snapshots_unsorted, now=Time.now)
- snapshots_sorted = []
- scores = Array.new{|h,k| h[k] = []}
- times = snapshots_unsorted.map(&:time)
-
- BACKUP_IMPORTANCE.each_with_index do |days, backup_idx|
- target_time = now.to_i - (days*86400)
- weight = days.to_f - (backup_idx == 0 ? 0 : BACKUP_IMPORTANCE[backup_idx-1])
- scores << times.map{|t| (t.to_i - target_time).abs/weight }
- end
-
- #
- # Find the index of the lowest score from the list of BACKUP_IMPORTANCE
- #
- nearest_target = scores.transpose.map{|s| s.find_index(s.min)}
-
- BACKUP_IMPORTANCE.each_index do |backup_idx|
- #
- # Find the indicies of the snapshots that match the current BACKUP_IMPORTANCE index, and sort them according to their score.
- best_snapshot_idxs = nearest_target.each_index.
- select{|i| nearest_target[i] == backup_idx}.
- sort{|a,b| scores[backup_idx][a] <=> scores[backup_idx][b]}
-
- #
- # Append them to the array.
- #
- snapshots_sorted += snapshots_unsorted.values_at(*best_snapshot_idxs)
- end
-
- snapshots_sorted
+ #
+ # Keep the last 7 days backups
+ #
+ snapshots_sorted = []
+ snapshots_unsorted = snapshots_unsorted.sort_by(&:time).reverse
+
+ #
+ # Group snapshots by host
+ #
+ snapshots_by_host = Hash.new{|h,k| h[k] = []}
+
+ snapshots_unsorted.each do |snapshot|
+ snapshots_by_host[snapshot.host] << snapshot
+ end
+
+ #
+ # We want the snapshot nearest to the middle of the day each day.
+ #
+ today_midday = Time.mktime(*([0,0,12]+now.utc.to_a.last(7)))
+
+ #
+ # We want today, and the previous seven days
+ #
+ targets = [today_midday]
+ targets += 6.times.map{ today_midday -= 86400 }
+
+ #
+ # Now the previous four Sundays (we should bump on a week if today is a Sunday!)
+ #
+ today_midday -= (today_midday.wday == 0 ? 7 : today_midday.wday )*86400
+ targets << today_midday
+ targets += 3.times.map{ today_midday -= 7*86400 }
+
+ #
+ # Our 28 day periods are anchored on Time.at(0). However this was a
+ # Thursday, so we have to add 3 days to get it to Sunday.
+ #
+ targets << (today_midday -= ((today_midday.to_i / 86400.0).floor % 28 - 3)*86400)
+
+ #
+ # Continue removing 28 day periods until we get beyond the oldest backup time.
+ #
+ targets << (today_midday -= 28*86400) while today_midday > snapshots_unsorted.last.time
+
+ #
+ # This has records the last nearest snapshot for each host
+ #
+ last_nearest = {}
+
+ #
+ # For each target, and for each host, find the nearest snapshot
+ #
+ targets.each do |target|
+ snapshots_by_host.each do |host, snapshots|
+ next if snapshots.empty?
+
+ nearest = snapshots.sort{|a,b| (a.time - target).abs <=> (b.time - target).abs }.first
+
+ #
+ # Don't process any more if the last snapshot for this for this
+ # host was more recent, i.e. we've reached the oldest, and are
+ # bouncing back again.
+ #
+ if last_nearest[host].nil? or last_nearest[host].time > nearest.time
+ last_nearest[host] = nearest
+ snapshots_by_host[host] -= [nearest]
+ snapshots_sorted << nearest
+ end
+
+ end
+
+ end
+
+ #
+ # Remove any snapshots we've already sorted and add in the remaining snapshots
+ #
+ snapshots_unsorted -= snapshots_sorted
+ snapshots_sorted += snapshots_unsorted
+
+ snapshots_sorted
end
end
@@ -48,14 +99,18 @@ module Byteback
def initialize(backup_directory, snapshot_path)
@backup_directory = backup_directory
@path = snapshot_path
- time # throws ArgumentError if it can't parse
+ @time = Time.parse(File.basename(path)) # throws ArgumentError if it can't parse
nil
end
def time
- Time.parse(File.basename(path))
+ @time
end
+ def host
+ File.basename(File.dirname(path))
+ end
+
def <=>(b)
time <=> b.time
end
diff --git a/test/tc_byteback_snapshot.rb b/test/tc_byteback_snapshot.rb
new file mode 100644
index 0000000..0c1e181
--- /dev/null
+++ b/test/tc_byteback_snapshot.rb
@@ -0,0 +1,163 @@
+$: << File.dirname(__FILE__)+"/../lib"
+
+require 'pp'
+require 'test/unit'
+require 'byteback/backup_directory'
+require 'time'
+# require 'mocha/test_unit'
+
+class SnapshotTest < Test::Unit::TestCase
+
+ def setup
+
+ end
+
+ def teardown
+
+ end
+
+ #
+ # This class is supposed to work out which bits get pruned first
+ #
+ def test_sort_by_importance
+
+ start = Time.now
+
+ 15.times do |limit|
+ limit += 1
+ biggest_offset = nil
+ backups = []
+ offsets = []
+ now = Time.at(start.to_i) + rand(7)*86400
+ day = 0
+
+ #
+ # Do this test until we reach a maximum age
+ #
+ while true do
+
+ backups << Byteback::Snapshot.new("/tmp", File.join("/tmp/",now.iso8601))
+
+ while backups.length > limit do
+ sorted_backups = Byteback::Snapshot.sort_by_importance(backups, now)
+ backups.delete(sorted_backups.last)
+ end
+
+ offsets = backups.collect{|x| ((now - x.time)/86400.0).round }
+
+ #
+ # Each backup should have backups for the last 7 days, then the first four Sundays, and then the next mod 28 day after that
+ #
+ mod_28 = ((now.to_i / 86400.0).floor % 28 - 3)
+
+ targets = ((0..6).to_a +
+ [7,14,21,28].to_a.collect{|i| i + now.wday} +
+ [2*28 + mod_28]).select{|t| t < day}.first(limit).reverse
+
+ assert_equal(targets - offsets, [], "Failed after day #{day} (#{now.wday}) for a limit of #{limit} backups")
+
+ if biggest_offset.nil? or offsets.max > biggest_offset
+ biggest_offset = offsets.max
+ else
+ puts "Oldest backup with space for #{limit} backups is #{offsets.max} days: #{offsets.join(", ")}"
+ break
+ end
+
+ # Move on a day
+ day += 1
+ now += 86400
+ end
+ end
+ end
+
+ #
+ # This run the same test as above, execpt with 3 hosts all competeing for space
+ #
+ def test_sort_by_importance_with_multiple_hosts
+ start = Time.now
+
+ 40.times do |limit|
+ limit += 6
+ biggest_offset = nil
+ backups = []
+ offsets = []
+ now = Time.at(start.to_i) + rand(7)*86400
+ day = 0
+
+ #
+ # Do this test until we reach a maximum age
+ #
+ while true do
+
+ %w(host1 host2 host3).each do |host|
+ backups << Byteback::Snapshot.new("/tmp/#{host}", File.join("/tmp/#{host}/",(now+rand(3600)).iso8601 ))
+ end
+
+ while backups.length > limit do
+ sorted_backups = Byteback::Snapshot.sort_by_importance(backups, now)
+ backups.delete(sorted_backups.last)
+ end
+
+ offsets = backups.collect{|x| ((now - x.time)/86400.0).round }
+
+ #
+ # TODO test me!
+ #
+
+ if biggest_offset.nil? or offsets.max > biggest_offset
+ biggest_offset = offsets.max
+ else
+ puts "Oldest backup with space for #{limit} backups and 3 hosts is #{offsets.max} days: #{offsets.join(", ")}"
+ break
+ end
+
+ # Move on a day
+ day += 1
+ now += 86400
+ end
+ end
+ end
+
+ #
+ # This test is the same as the previous two, except with random failures added in.
+ #
+ def test_sort_by_importance_with_random_failures
+
+ start = Time.now
+
+ 15.times do
+ limit = 15
+ backups = []
+ offsets = []
+ now = Time.at(start.to_i) + rand(7)*86400
+ day = 0
+
+ #
+ # Run this test over 120 days
+ #
+ 120.times do
+
+ # Fail on 3 days out of four
+ if rand(7) < 3
+ backups << Byteback::Snapshot.new("/tmp", File.join("/tmp/",now.iso8601))
+ end
+
+ while backups.length > limit do
+ sorted_backups = Byteback::Snapshot.sort_by_importance(backups, now)
+ backups.delete(sorted_backups.last)
+ end
+
+ offsets = backups.collect{|x| ((now - x.time)/86400.0).round }
+
+ # TODO test!
+
+ # Move on a day
+ day += 1
+ now += 86400
+ end
+ puts "Oldest backup with space for #{limit} backups is #{offsets.max} days: #{offsets.join(", ")}"
+
+ end
+ end
+end
+