summaryrefslogtreecommitdiff
path: root/bin/byteback-backup
blob: bb9b72109228e9e2d28a38544af153cea253e753 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
#!/usr/bin/ruby
#
# Back up this system to a byteback-enabled server (just some command line
# tools and SSH setup).  We aim to make sure this backups are easy, complete
# and safe for most types of hosting customer.
#
# See 'man byteback' for more information.

require 'resolv'

$LOAD_PATH.unshift('/usr/lib/byteback')

require 'trollop'
require 'byteback/util'
require 'byteback/log'
include Byteback::Util
include Byteback::Log

ME = $PROGRAM_NAME.split('/').last

opts = Trollop.options do
  banner "#{ME}: Back up this system to a byteback-enabled server\n "

  opt :destination, 'Backup destination (i.e. user@host:/path)',
      :type => :string

  opt :source, 'Source paths',
      :type => :strings,
      :default => ['/']

  opt :exclude, 'Paths to exclude',
      :type => :strings,
      :short => 'x'

  opt :verbose, 'Show debugging messages'

  opt :retry_number, 'Number of retries on error',
      :type => :integer,
      :default => 3

  opt :io_timeout, 'Number of seconds to allow I/O timeout for',
      :type => :integer,
      :default => 10800

  opt :retry_delay, 'Number of seconds between retries after an error',
      :type => :integer,
      :default => 300

  opt :ssh_key, 'SSH key filename',
      :type => :string,
      :default => '/etc/byteback/key',
      :short => 'k'

  opt :help, 'Show this message',
      :short => 'h'

  banner "\nAdditional excludes can be specified using /etc/byteback/rsync_filter, which is an rsync filter file.  See the rsync man page for information on how this works.\n"
end

lock_out_other_processes('byteback-backup')

@ssh_key = opts[:ssh_key]
@verbose = opts[:verbose] ? '--verbose' : nil
@sources = opts[:source] if opts[:source]
@excludes = opts[:exclude] if opts[:exclude]
@destination = opts[:destination]
@retry_number = opts[:retry_number]
@retry_delay = opts[:retry_delay]
@io_timeout = opts[:io_timeout] if opts[:io_timeout]


# Read the default destination
if File.exist?('/etc/byteback/destination')
  @destination = File.read('/etc/byteback/destination').chomp
end

# Set the default SSH key
if File.exist?('/etc/byteback/key')
  @ssh_key = '/etc/byteback/key'
end

# If we have a local timeout-file then use that
if File.exist?('/etc/byteback/io_timeout')
  @io_timeout = File.foreach('/etc/byteback/io_timeout').first.to_i
end


#
# Check our destination
#
if @destination =~ /^(?:(.+)@)?([^@:]+):(.+)?$/
  @destination_user, @destination_host, @destination_path = [Regexp.last_match(1), Regexp.last_match(2), Regexp.last_match(3)]
else
  fatal('Destination must be a remote path, e.g. ssh@host.com:/store/backups')
end

#
# Validate & normalise source directories
#
@sources = ['/'] if @sources.nil?

fatal('No sources specified') if @sources.empty?

@sources = @sources.map do |s|
  s = s.gsub(/\/+/, '/')
  fatal("Can't read directory #{s}") unless File.readable?(s)
  s
end

# Automatically exclude anything mounted on a non-local filesystem, plus
# various cache and temporary directories common on Bytemark & Debian
# systems
#
if @excludes.nil?

  PROBABLY_LOCAL = %w(
    btrfs
    ext2
    ext3
    ext4
    reiserfs
    xfs
    nilfs
    jfs
    reiser4
    zfs
    rootfs
  )

  COMMON_JUNK = %w(
    /swap.file
    /tmp
    /var/backups/localhost
    /var/cache/apt/archives
    /var/lib/php5
    /var/tmp
  )

  MOUNT_HEADINGS = %w( spec file vfstype mntops freq passno ).map(&:to_sym)

  mounts = File.read('/proc/mounts').split("\n").map do |line|
    Hash[MOUNT_HEADINGS.zip(line.split(' '))]
  end

  @excludes =

    mounts.select { |m| !PROBABLY_LOCAL.include?(m[:vfstype]) }.map { |m| m[:file] } + COMMON_JUNK.select { |f| File.exist?(f) }

end

@excludes = @excludes.map do |e|
  e.gsub(/\/+/, '/')
end

fatal('Must suply --destination or put it into /etc/bytebackup/destination') unless @destination

#
# Test ssh connection is good before we start
#
fatal("Could not read ssh key #{@ssh_key}") unless File.readable?(@ssh_key)

def ssh(*ssh_args)
  args = ['ssh',
          '-o', 'BatchMode=yes',
          '-o', 'ConnectionAttempts=5',
          '-o', 'ConnectTimeout=30',
          '-o', 'Compression=yes',
          '-o', 'CompressionLevel=3',
          '-o', 'ServerAliveInterval=60',
          '-o', 'TCPKeepAlive=yes',
          '-x', '-a',
          '-i', @ssh_key,
          '-l', @destination_user,
          @destination_host
         ] +
         ssh_args.map { |a| a ? a : '' }

  log_system(*args)
end

fatal("Could not connect to #{@destination}") unless
  ssh('byteback-receive', '--ping', @verbose) == 0

#
# Call rsync to copy certain sources, returns exit status (see man rsync)
#
def rsync(*sources)
  # Default options include --inplace because we only care about consistency
  # at the end of the job, and rsync will do more work for big files without
  # it.
  #
  # rsync can spend a long time at the far end checking over its files at
  # the far end without transfer, so we want to wait as long as possible
  # without jeopardising the timing of the next backup run.
  #
  #
  args = %w( rsync --archive --numeric-ids --delete-delay --inplace --relative )

  #
  #  Add on the I/O-timeout
  #
  args += ['--timeout', @io_timeout ] unless ( @io_timeout.nil? )


  args += ['--rsh', "ssh -o BatchMode=yes -x -a -i #{@ssh_key} -l #{@destination_user}"]
  args << '--verbose' if @verbose
  args += @excludes.map { |x| ['--exclude', x] }.flatten

  #
  # Add in the rsync excludes and sources files, if present.
  #
  if File.exist?('/etc/byteback/excludes')
    args += ['--exclude-from', '/etc/byteback/excludes']
  end

  #
  # Add in an rsync_filter if required.
  #
  if File.exist?('/etc/byteback/rsync_filter')
    args += ['--filter', 'merge /etc/byteback/rsync_filter']
  end

  #
  # To add extra rsync flags, a file can be used.  This can have flags all on one line, or one per line.
  #
  if File.exist?('/etc/byteback/rsync_flags')
    args += File.readlines('/etc/byteback/rsync_flags').map(&:chomp)
  end

  args += ['--rsync-path', 'rsync --fake-super']

  args += sources
  args << @destination

  log_system(*args)
end

#
# We treat exit statuses 0 and 24 as success; 0 is "Success"; 24 is "Partial
# transfer due to vanished source files", which we treat as success otherwise
# on some hosts the backup process never finishes.
#
RSYNC_EXIT_STATUSES_TO_ACCEPT   = [0, 24]
RSYNC_EXIT_STATUSES_TO_RETRY_ON = [10, 11, 20, 21, 22, 23, 30]

# Run the file copy, retrying if necessary
#
loop do
  status = rsync(*@sources)

  if RSYNC_EXIT_STATUSES_TO_ACCEPT.any? { |s| s === status }
    break
  elsif RSYNC_EXIT_STATUSES_TO_RETRY_ON.any? { |s| s === status }

    warn "rsync exited with status #{status}"

    if @retry_number > 0
      warn "rsync will retry #{@retry_number} more times, sleeping #{@retry_delay}s"
      @retry_number -= 1
      sleep @retry_delay
      redo
    else
      fatal('Maximum number of rsync retries reached')
    end
  else
    fatal("Fatal rsync error occurred (#{status})")
  end
end

info('Backup completed, requesting snapshot')

# Mark the backup as done on the other end
#
fatal('Backup could not be marked complete') unless
  ssh('byteback-receive', '--complete', @verbose) == 0

info('Finished')