diff options
| author | Patrick J Cherry <patrick@bytemark.co.uk> | 2014-12-09 14:08:38 +0000 | 
|---|---|---|
| committer | Patrick J Cherry <patrick@bytemark.co.uk> | 2014-12-09 14:08:38 +0000 | 
| commit | f88dd2535ca5367c89baa7131c2b94565a6a4cd7 (patch) | |
| tree | 49b5efa9d05e1752dab462ddb1b9699ccbc482d6 | |
| parent | 5129820f2f2535086c0634cb0b6ea14f5e3d5d1c (diff) | |
Reworked sorting of backups by importance.  Added "tests"
| -rw-r--r-- | lib/byteback/backup_directory.rb | 117 | ||||
| -rw-r--r-- | test/tc_byteback_snapshot.rb | 163 | 
2 files changed, 249 insertions, 31 deletions
| diff --git a/lib/byteback/backup_directory.rb b/lib/byteback/backup_directory.rb index a31c290..bdfc29c 100644 --- a/lib/byteback/backup_directory.rb +++ b/lib/byteback/backup_directory.rb @@ -11,35 +11,86 @@ module Byteback  			BACKUP_IMPORTANCE = [1, 2, 7, 14, 21, 28, 56, 112]  			def sort_by_importance(snapshots_unsorted, now=Time.now) -				snapshots_sorted = [] -				scores = Array.new{|h,k| h[k] = []} -				times  = snapshots_unsorted.map(&:time) - -				BACKUP_IMPORTANCE.each_with_index do |days, backup_idx| -						target_time = now.to_i - (days*86400) -						weight = days.to_f - (backup_idx == 0 ? 0 : BACKUP_IMPORTANCE[backup_idx-1]) -						scores << times.map{|t| (t.to_i - target_time).abs/weight } -				end - -				# -				# Find the index of the lowest score from the list of BACKUP_IMPORTANCE -				# -				nearest_target = scores.transpose.map{|s| s.find_index(s.min)} - -				BACKUP_IMPORTANCE.each_index do |backup_idx| -					# -					# Find the indicies of the snapshots that match the current BACKUP_IMPORTANCE index, and sort them according to their score. -					best_snapshot_idxs = nearest_target.each_index. -						select{|i| nearest_target[i] == backup_idx}. -						sort{|a,b| scores[backup_idx][a] <=> scores[backup_idx][b]} - -					# -					# Append them to the array. -					# -					snapshots_sorted += snapshots_unsorted.values_at(*best_snapshot_idxs) -				end - -				snapshots_sorted +        #  +        # Keep the last 7 days backups +        # +        snapshots_sorted   = [] +        snapshots_unsorted = snapshots_unsorted.sort_by(&:time).reverse +         +        # +        # Group snapshots by host +        # +        snapshots_by_host = Hash.new{|h,k| h[k] = []} + +        snapshots_unsorted.each do |snapshot| +          snapshots_by_host[snapshot.host] << snapshot +        end + +        # +        # We want the snapshot nearest to the middle of the day each day. +        # +        today_midday = Time.mktime(*([0,0,12]+now.utc.to_a.last(7))) + +        # +        # We want today, and the previous seven days +        # +        targets = [today_midday] +        targets += 6.times.map{ today_midday -= 86400 } + +        # +        # Now the previous four Sundays (we should bump on a week if today is a Sunday!) +        # +        today_midday -= (today_midday.wday == 0 ? 7 : today_midday.wday )*86400 +        targets << today_midday +        targets += 3.times.map{ today_midday -= 7*86400 } + +        # +        # Our 28 day periods are anchored on Time.at(0).  However this was a +        # Thursday, so we have to add 3 days to get it to Sunday. +        # +        targets << (today_midday -= ((today_midday.to_i / 86400.0).floor % 28 - 3)*86400)   + +        # +        # Continue removing 28 day periods until we get beyond the oldest backup time. +        # +        targets << (today_midday -= 28*86400) while today_midday > snapshots_unsorted.last.time + +        # +        # This has records the last nearest snapshot for each host +        # +        last_nearest = {} + +        # +        # For each target, and for each host, find the nearest snapshot +        # +        targets.each do |target| +          snapshots_by_host.each do |host, snapshots| +            next if snapshots.empty? + +            nearest = snapshots.sort{|a,b| (a.time - target).abs <=> (b.time - target).abs }.first + +            # +            # Don't process any more if the last snapshot for this for this +            # host was more recent, i.e. we've reached the oldest, and are +            # bouncing back again. +            # +            if last_nearest[host].nil? or last_nearest[host].time > nearest.time +              last_nearest[host] = nearest +              snapshots_by_host[host]  -= [nearest] +              snapshots_sorted         << nearest +            end + +          end + +        end + +        # +        # Remove any snapshots we've already sorted and add in the remaining snapshots +        # +        snapshots_unsorted -= snapshots_sorted +        snapshots_sorted   += snapshots_unsorted + +			  snapshots_sorted   			end  		end @@ -48,14 +99,18 @@ module Byteback  		def initialize(backup_directory, snapshot_path)  			@backup_directory = backup_directory  			@path = snapshot_path -			time # throws ArgumentError if it can't parse +			@time = Time.parse(File.basename(path)) # throws ArgumentError if it can't parse  			nil  		end  		def time -			Time.parse(File.basename(path)) +      @time  		end +    def host +      File.basename(File.dirname(path)) +    end +  		def <=>(b)  			time <=> b.time  		end diff --git a/test/tc_byteback_snapshot.rb b/test/tc_byteback_snapshot.rb new file mode 100644 index 0000000..0c1e181 --- /dev/null +++ b/test/tc_byteback_snapshot.rb @@ -0,0 +1,163 @@ +$: << File.dirname(__FILE__)+"/../lib" + +require 'pp' +require 'test/unit' +require 'byteback/backup_directory' +require 'time' +# require 'mocha/test_unit' + +class SnapshotTest < Test::Unit::TestCase + +  def setup + +  end + +  def teardown + +  end + +  # +  # This class is supposed to work out which bits get pruned first +  # +  def test_sort_by_importance + +    start = Time.now +       +    15.times do |limit| +      limit += 1 +      biggest_offset = nil  +      backups = [] +      offsets = [] +      now   = Time.at(start.to_i) + rand(7)*86400 +      day   = 0 + +      # +      # Do this test until we reach a maximum age +      # +      while true do + +        backups << Byteback::Snapshot.new("/tmp", File.join("/tmp/",now.iso8601)) + +        while backups.length > limit do +          sorted_backups = Byteback::Snapshot.sort_by_importance(backups, now) +          backups.delete(sorted_backups.last) +        end + +        offsets = backups.collect{|x| ((now - x.time)/86400.0).round } +  +        # +        # Each backup should have backups for the last 7 days, then the first four Sundays, and then the next mod 28 day after that +        # +        mod_28 = ((now.to_i / 86400.0).floor % 28 - 3) + +        targets = ((0..6).to_a +  +          [7,14,21,28].to_a.collect{|i| i + now.wday} + +          [2*28 + mod_28]).select{|t| t < day}.first(limit).reverse + +        assert_equal(targets - offsets, [], "Failed after day #{day} (#{now.wday}) for a limit of #{limit} backups") + +        if biggest_offset.nil? or offsets.max > biggest_offset +          biggest_offset = offsets.max +        else +          puts "Oldest backup with space for #{limit} backups is #{offsets.max} days: #{offsets.join(", ")}" +          break +        end + +        # Move on a day +        day += 1 +        now += 86400 +      end +    end +  end + +  # +  # This run the same test as above, execpt with 3 hosts all competeing for space +  # +  def test_sort_by_importance_with_multiple_hosts +    start = Time.now +       +    40.times do |limit| +      limit += 6 +      biggest_offset = nil  +      backups = [] +      offsets = [] +      now   = Time.at(start.to_i) + rand(7)*86400 +      day   = 0 + +      # +      # Do this test until we reach a maximum age +      # +      while true do + +        %w(host1 host2 host3).each do |host| +           backups << Byteback::Snapshot.new("/tmp/#{host}", File.join("/tmp/#{host}/",(now+rand(3600)).iso8601 )) +        end + +        while backups.length > limit do +          sorted_backups = Byteback::Snapshot.sort_by_importance(backups, now) +          backups.delete(sorted_backups.last) +        end + +        offsets = backups.collect{|x| ((now - x.time)/86400.0).round } + +        # +        # TODO test me! +        # +  +        if biggest_offset.nil? or offsets.max > biggest_offset +          biggest_offset = offsets.max +        else +          puts "Oldest backup with space for #{limit} backups and 3 hosts is #{offsets.max} days: #{offsets.join(", ")}" +          break +        end + +        # Move on a day +        day += 1 +        now += 86400 +      end +    end +  end + +  # +  # This test is the same as the previous two, except with random failures added in. +  # +  def test_sort_by_importance_with_random_failures + +    start = Time.now +       +    15.times do  +      limit = 15 +      backups = [] +      offsets = [] +      now   = Time.at(start.to_i) + rand(7)*86400 +      day   = 0 + +      # +      # Run this test over 120 days +      # +      120.times do + +        # Fail on 3 days out of four +        if rand(7) < 3  +          backups << Byteback::Snapshot.new("/tmp", File.join("/tmp/",now.iso8601)) +        end + +        while backups.length > limit do +          sorted_backups = Byteback::Snapshot.sort_by_importance(backups, now) +          backups.delete(sorted_backups.last) +        end + +        offsets = backups.collect{|x| ((now - x.time)/86400.0).round } + +        # TODO test!  + +        # Move on a day +        day += 1 +        now += 86400 +      end +      puts "Oldest backup with space for #{limit} backups is #{offsets.max} days: #{offsets.join(", ")}" + +    end +  end +end + | 
