GUI CSS: Removed snapin styles from py modules and added a _snapins.scss for the...
[check_mk.git] / bin / mkbackup
blob96e2399582b9ef173fc1dfad8c65ef77777db80f
1 #!/usr/bin/env python
2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
9 # | |
10 # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
27 import ast
28 import errno
29 import fcntl
30 import fnmatch
31 import getopt
32 import glob
33 import grp
34 import os
35 import pwd
36 import re
37 import shutil
38 import socket
39 import signal
40 import subprocess
41 import sys
42 import syslog
43 import tempfile
44 import textwrap
45 import time
46 import threading
47 import traceback
48 import json
49 from tarfile import TarFile, ReadError
50 from hashlib import md5
52 from OpenSSL import crypto
53 from Cryptodome.Cipher import AES, PKCS1_OAEP
54 from Cryptodome.PublicKey import RSA
56 import cmk.utils.daemon as daemon
57 import cmk.utils.render as render
58 import cmk.utils.schedule as schedule
59 import cmk.utils.store as store
60 from cmk.utils.exceptions import MKTerminate, MKGeneralException
62 VERSION = "1.4.0i1"
65 # Is used to duplicate output from stdout/stderr to a the job log. This
66 # is e.g. used during "mkbackup backup" to store the output.
67 class Log(object):
68 def __init__(self, fd):
69 self.fd = fd
71 if self.fd == 1:
72 self.orig = sys.stdout
73 sys.stdout = self
74 else:
75 self.orig = sys.stderr
76 sys.stderr = self
78 self.color_replace = re.compile("\033\\[\\d{1,2}m", re.UNICODE)
80 def __del__(self):
81 if self.fd == 1:
82 sys.stdout = self.orig
83 else:
84 sys.stderr = self.orig
86 def write(self, data):
87 self.orig.write(data)
88 try:
89 add_output(self.color_replace.sub('', data))
90 except Exception, e:
91 self.orig.write("Failed to add output: %s\n" % e)
93 def flush(self):
94 self.orig.flush()
97 g_stdout_log = None
98 g_stderr_log = None
101 def start_logging():
102 global g_stdout_log, g_stderr_log
103 g_stdout_log = Log(1)
104 g_stderr_log = Log(2)
107 def stop_logging():
108 global g_stdout_log, g_stderr_log
109 g_stderr_log = None
110 g_stdout_log = None
113 def log(s):
114 sys.stdout.write("%s %s\n" % (time.strftime("%Y-%m-%d %H:%M:%S"), s))
115 if is_cma():
116 syslog.syslog(s)
119 def verbose(s):
120 if opt_verbose > 0:
121 log(s)
124 def hostname():
125 return socket.gethostname()
128 def is_root():
129 return os.getuid() == 0
132 def is_cma():
133 return os.path.exists("/etc/cma/cma.conf")
136 def current_site_id():
137 return os.environ.get("OMD_SITE")
140 def site_version(site_id):
141 linkpath = os.readlink("/omd/sites/%s/version" % site_id)
142 return linkpath.split("/")[-1]
145 def system_config_path():
146 return "/etc/cma/backup.conf"
149 def site_config_path():
150 if not current_site_id():
151 raise Exception("Not executed in OMD environment!")
152 return "%s/etc/check_mk/backup.mk" % os.environ["OMD_ROOT"]
155 g_backup_lock_f = None
158 # Es gibt ein globales Backup-Lock, das bei modifizierenden Aktionen
159 # geholt wird. D.h. es kann Systemweit immer nur ein Backup oder Restore
160 # zur Zeit ausgeführt werden.
161 def acquire_backup_lock():
162 global g_backup_lock_f
163 lock_file_path = "/tmp/mkbackup.lock"
164 if not os.path.exists(lock_file_path):
165 try:
166 g_backup_lock_f = tempfile.NamedTemporaryFile(mode="a+", dir="/tmp", delete=False)
167 set_permissions(g_backup_lock_f.name, -1, grp.getgrnam("omd").gr_gid, 0660)
168 os.rename(g_backup_lock_f.name, lock_file_path)
169 except (IOError, OSError), e:
170 raise MKGeneralException("Failed to open lock file \"%s\": %s" % (lock_file_path, e))
171 else:
172 g_backup_lock_f = file(lock_file_path, "a")
174 try:
175 fcntl.flock(g_backup_lock_f, fcntl.LOCK_EX | fcntl.LOCK_NB)
176 except IOError, e:
177 raise MKGeneralException("Failed to get the exclusive backup lock. "
178 "Another backup/restore seems to be running (%s)." % e)
180 # Ensure that the lock is not inherited to subprocessess
181 try:
182 cloexec_flag = fcntl.FD_CLOEXEC
183 except AttributeError:
184 cloexec_flag = 1
186 fd = g_backup_lock_f.fileno()
187 fcntl.fcntl(fd, fcntl.F_SETFD, fcntl.fcntl(fd, fcntl.F_GETFD) | cloexec_flag)
190 def set_permissions(path, uid, gid, mode):
191 try:
192 os.chown(path, uid, gid)
193 except OSError, e:
194 if e.errno == 13:
195 pass # On CIFS mounts where "uid=0,forceuid,gid=1000,forcegid" mount options
196 # are set, this is not possible. So skip over.
197 elif e.errno == 1:
198 pass # On NFS mounts where "" mount options are set, we get an
199 # "Operation not permitted" error when trying to change e.g.
200 # the group permission.
201 else:
202 raise
204 try:
205 os.chmod(path, mode)
206 except OSError, e:
207 if e.errno == 13:
208 pass # On CIFS mounts where "uid=0,forceuid,gid=1000,forcegid" mount options
209 # are set, this is not possible. So skip over.
210 elif e.errno == 1:
211 pass # On NFS mounts where "" mount options are set, we get an
212 # "Operation not permitted" error when trying to change e.g.
213 # the group permission.
214 else:
215 raise
218 # TODO: Move to cmklib?
219 def makedirs(path, user=None, group=None, mode=None):
220 head, tail = os.path.split(path)
221 if not tail:
222 head, tail = os.path.split(head)
224 if head and tail and not os.path.exists(head):
225 try:
226 makedirs(head, user, group, mode)
227 except OSError, e:
228 # be happy if someone already created the path
229 if e.errno != errno.EEXIST:
230 raise
231 if tail == ".": # xxx/newdir/. exists if xxx/newdir exists
232 return
233 makedir(path, user, group, mode)
236 # TODO: Move to cmklib?
237 def makedir(path, user=None, group=None, mode=None):
238 if os.path.exists(path):
239 return
241 os.mkdir(path)
243 if user is not None:
244 uid = pwd.getpwnam(user).pw_uid
245 else:
246 uid = -1
248 if group is not None:
249 gid = grp.getgrnam(group).gr_gid
250 else:
251 gid = -1
253 set_permissions(path, uid, gid, mode)
256 # Wenn als root ausgeführt:
257 # - System-Konfiguration laden
258 # Wenn als Site-User ausgeführt:
259 # - Targets aus System-Konfiguration laden
260 # - Site-Konfiguration laden
261 def load_config():
262 def load_file(path):
263 return ast.literal_eval(file(path).read())
265 if is_root():
266 config = load_file(system_config_path())
267 else:
268 config = load_file(site_config_path())
270 try:
271 system_targets = load_file(system_config_path())["targets"]
273 # only load non conflicting targets
274 for target_ident, target_config in system_targets.items():
275 if target_ident not in config["targets"]:
276 config["targets"][target_ident] = target_config
278 except IOError:
279 # Not existing system wide config is OK. In this case there
280 # are only backup targets from site config available.
281 pass
283 return config
286 # TODO: Duplicate code with htdocs/backup.py
287 def load_backup_info(path):
288 info = json.load(file(path))
290 # Load the backup_id from the second right path component. This is the
291 # base directory of the mkbackup.info file. The user might have moved
292 # the directory, e.g. for having multiple backups. Allow that.
293 # Maybe we need to changed this later when we allow multiple generations
294 # of backups.
295 info["backup_id"] = os.path.basename(os.path.dirname(path))
297 return info
300 def get_site_ids_of_backup(info):
301 return [f[0].split(".", 1)[0][5:] for f in info["files"] if f[0].startswith("site-")]
304 def save_backup_info(info):
305 with open(backup_info_path(), "w") as f:
306 json.dump(info, f, sort_keys=True, indent=4, separators=(',', ': '))
309 def create_backup_info():
310 files = get_files_for_backup_info()
312 info = {
313 "type" : "Check_MK" if not is_root() \
314 else "Appliance",
315 "job_id" : g_local_job_id,
316 "config" : g_job_config,
317 "hostname" : hostname(),
318 "files" : get_files_for_backup_info(),
319 "finished" : time.time(),
320 "size" : sum([ f[1] for f in files ]),
323 if not is_root():
324 add_site_info_to_backup_info(info)
325 else:
326 add_system_info_to_backup_info(info)
328 return info
331 def add_site_info_to_backup_info(info):
332 info["site_id"] = current_site_id()
333 info["site_version"] = site_version(info["site_id"])
336 def add_system_info_to_backup_info(info):
337 import cma # pylint: disable=import-error
338 cma.load_config()
339 info["cma_version"] = cma.version()
341 if not cma.is_clustered():
342 return
344 cluster_cfg = cma.cfg("cluster")
346 if cluster_cfg:
347 partner_name = cma.other_node_name(cluster_cfg)
348 else:
349 partner_name = None
351 info["cma_cluster"] = {
352 "clustered": True,
353 "partner_name": partner_name,
354 "is_inactive": is_inactive_cluster_node(),
358 def get_files_for_backup_info():
359 files = []
360 backup_path = job_backup_path_during_backup()
361 for f in sorted(os.listdir(backup_path)):
362 files.append((f, os.path.getsize(backup_path + "/" + f),
363 file_checksum(backup_path + "/" + f)))
365 return files
368 def file_checksum(path):
369 hash_md5 = md5()
370 with open(path, "rb") as f:
371 for chunk in iter(lambda: f.read(4096), b""):
372 hash_md5.update(chunk)
373 return hash_md5.hexdigest()
376 # Wrapper to workaround different issues during system restore and improved logging
377 class MKTarFile(TarFile):
378 def _extract_member(self, tarinfo, targetpath):
379 verbose("Extracting %s" % targetpath)
380 super(MKTarFile, self)._extract_member(tarinfo, targetpath)
382 def makedir(self, tarinfo, targetpath):
383 if os.path.lexists(targetpath):
384 if os.path.islink(targetpath) != (tarinfo.islnk() or tarinfo.issym()):
385 os.remove(targetpath)
387 elif not os.path.isdir(targetpath):
388 os.remove(targetpath)
390 super(MKTarFile, self).makedir(tarinfo, targetpath)
392 def makelink(self, tarinfo, targetpath):
393 if os.path.lexists(targetpath) and not os.path.islink(targetpath):
394 if os.path.isdir(targetpath):
395 shutil.rmtree(targetpath)
396 else:
397 os.remove(targetpath)
399 super(MKTarFile, self).makelink(tarinfo, targetpath)
401 def makefile(self, tarinfo, targetpath):
402 if os.path.lexists(targetpath):
403 was_link = tarinfo.islnk() or tarinfo.issym()
404 if os.path.islink(targetpath) and not was_link:
405 os.remove(targetpath)
407 elif os.path.isdir(targetpath):
408 shutil.rmtree(targetpath)
410 try:
411 super(MKTarFile, self).makefile(tarinfo, targetpath)
412 except IOError, e:
413 if e.errno == errno.EISDIR:
414 # Handle "IOError: [Errno 21] Is a directory"
415 # Happens e.g. when a dir is being replaced by a file during restore
416 if os.path.islink(targetpath):
417 os.remove(targetpath)
418 else:
419 shutil.rmtree(targetpath)
420 super(MKTarFile, self).makefile(tarinfo, targetpath)
422 elif e.errno == errno.ETXTBSY:
423 # Fix "IOError: [Errno 26] Text file busy" when replacing a file
424 os.remove(targetpath)
425 super(MKTarFile, self).makefile(tarinfo, targetpath)
426 else:
427 raise
429 def makefifo(self, tarinfo, targetpath):
430 if os.path.exists(targetpath) and targetpath in ["/rw/var/spool/nullmailer/trigger"]:
431 verbose("Cleaning up %s" % targetpath)
432 os.remove(targetpath)
434 super(MKTarFile, self).makefifo(tarinfo, targetpath)
437 # List: Alle Backups auflisten
438 # Als Site-Nutzer sieht man nur die Site-Backups (auch die, die
439 # durch die Systembackups erstellt wurden)
440 # - Job-ID
442 # Beispielbefehle:
443 # # listet alle Backups auf die man sehen darf
444 # mkbackup list nfs
446 # # listet alle Backups auf die man sehen darf die zu diesem Job gehören
447 # mkbackup list nfs --job=xxx
449 # Restore:
450 # - Job-ID
451 # - Backup-ID
452 # - Als Site-Nutzer muss man die Backup-ID eines Site-Backups angeben
454 # Beispielbefehle:
455 # # listet alle Backups auf die man sehen darf
456 # mkbackup restore nfs backup-id-20
458 # Show: Zeigt Metainfos zu einem Backup an
459 # - Job-ID
460 # - Backup-ID
462 # Beispielbefehle:
463 # mkbackup show nfs backup-id-20
465 modes = {
466 "backup": {
467 "description":
468 "Starts creating a new backup. When executed as Check_MK site user, a backup of the "
469 "current site is executed to the target of the given backup job. When executed as "
470 "root user on the Check_MK Appliance, a backup of the whole system is created.",
471 "args": [{
472 "id": "Job-ID",
473 "description": "The ID of the backup job to work with",
474 },],
475 "opts": {
476 "background": {
477 "description": "Fork and execute the program in the background.",
480 "root_opts": {
481 "without-sites": {
482 "description": "Exclude the Check_MK site files during backup.",
486 "restore": {
487 "description":
488 "Starts the restore of a backup. In case you want to restore an encrypted backup, "
489 "you have to provide the passphrase of the used backup key via the environment "
490 "variable 'MKBACKUP_PASSPHRASE'. For example: MKBACKUP_PASSPHRASE='secret' mkbackup "
491 "restore ARGS.",
492 "args": [
494 "id": "Target-ID",
495 "description": "The ID of the backup target to work with",
498 "id": "Backup-ID",
499 "description": "The ID of the backup to restore",
502 "opts": {
503 "background": {
504 "description": "Fork and execute the program in the background.",
506 "no-verify": {
507 "description": "Disable verification of the backup files to restore from.",
509 "no-reboot": {
510 "description": "Don't trigger a system reboot after succeeded restore.",
514 "jobs": {
515 "description": "Lists all configured backup jobs of the current user context.",
517 "targets": {
518 "description": "Lists all configured backup targets of the current user context.",
520 "list": {
521 "description": "Output the list of all backups found on the given backup target",
522 "args": [{
523 "id": "Target-ID",
524 "description": "The ID of the backup target to work with",
525 },],
529 g_job_id = None
530 g_local_job_id = None
531 g_job_config = None
532 g_target_id = None
533 g_backup_id = None
536 def mode_backup(local_job_id, opts):
537 acquire_backup_lock()
538 load_job(local_job_id)
539 target_ident = g_job_config["target"]
540 verify_target_is_ready(target_ident)
542 init_new_run()
543 save_next_run()
545 if "background" in opts:
546 daemon.daemonize()
547 save_state({"pid": os.getpid()})
549 start_logging()
550 log("--- Starting backup (%s to %s) ---" % (g_job_id, target_ident))
552 success = False
553 try:
554 cleanup_previous_incomplete_backup()
556 save_state({
557 "state": "running",
560 do_backup(opts)
561 success = True
563 except KeyboardInterrupt:
564 raise
566 except MKGeneralException, e:
567 sys.stderr.write("%s\n" % e)
568 if opt_debug:
569 raise
571 except:
572 if not opt_debug:
573 sys.stderr.write("An exception occured:\n")
574 sys.stderr.write(traceback.format_exc())
575 else:
576 raise
578 finally:
579 stop_logging()
580 save_state({
581 "state": "finished",
582 "finished": time.time(),
583 "success": success,
587 def do_backup(opts):
588 if not is_root():
589 do_site_backup(opts)
590 elif is_cma():
591 do_system_backup(opts)
592 else:
593 raise MKGeneralException("System backup not supported.")
594 complete_backup()
597 def do_site_backup(opts, site=None, try_stop=True):
598 cmd = ["omd", "backup"]
600 if not compress_archives():
601 cmd.append("--no-compression")
603 if backup_without_history():
604 cmd.append("--no-past")
606 # When executed as site user, "omd backup" is executed without the site
607 # name and always performing backup for the current site. When executed
608 # as root, the site argument has to be given and must be handed over to
609 # "omd backup".
610 if site is None:
611 site = current_site_id()
612 else:
613 if not is_root():
614 raise MKGeneralException("Requested backup of site %s, "
615 "but not running as root." % site)
616 cmd.append(site)
618 cmd.append("-")
620 backup_path = site_backup_archive_path(site)
622 # Create missing directories. Ensure group permissions and mode.
623 makedirs(os.path.dirname(backup_path), group="omd", mode=0775)
625 verbose("Command: %s" % " ".join(cmd))
626 p = subprocess.Popen(
627 cmd, close_fds=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=open(os.devnull))
629 with open(backup_path, "w") as backup_file:
630 s = BackupStream(
631 stream=p.stdout, is_alive=lambda: p.poll() is None, key_ident=g_job_config["encrypt"])
632 for chunk in s.process():
633 backup_file.write(chunk)
635 if p.returncode != 0:
636 err = p.stderr.read()
637 if not compress_archives() and "Invalid option '--no-compression'" in err:
638 err = "The Check_MK version of this site does not support uncompressed backups. " \
639 "you can either re-enable the compression or update your site to version " \
640 "1.2.8p5 or later."
642 elif "The site needs to be stopped" in err:
643 if try_stop:
644 log("The Check_MK version of this site does not support online backups. The site "
645 "seems to be at least partially running. Stopping the site during backup and "
646 "starting it again after completion.")
648 log("Stopping site")
649 stop_site(site)
650 try:
651 log("Start offline site backup")
652 return do_site_backup(opts, site, try_stop=False)
653 finally:
654 log("Starting site again")
655 start_site(site)
657 else:
658 raise MKGeneralException("Failed to backup site that only supports "
659 "offline backup.")
661 raise MKGeneralException("Site backup failed: %s" % err)
664 def stop_site(site):
665 site_arg = [site] if is_root() else []
667 omd_command("stop", *site_arg)
669 for c in range(5):
670 if subprocess.call(
671 ["omd", "status", "--bare"] + site_arg, stdout=open(os.devnull, "w")) == 1:
672 break
673 elif c == 4:
674 raise MKGeneralException("Failed to stop site")
677 def start_site(site):
678 site_arg = [site] if is_root() else []
680 omd_command("start", *site_arg)
682 for c in range(5):
683 if subprocess.call(
685 "omd",
686 "status",
687 "--bare",
688 ] + site_arg, stdout=open(os.devnull, "w")) == 0:
689 break
690 elif c == 4:
691 raise MKGeneralException("Failed to start site")
694 def omd_command(*args):
695 cmd = ["omd"] + list(args)
696 verbose("Command: %s" % " ".join(cmd))
697 p = subprocess.Popen(
698 cmd,
699 stdout=subprocess.PIPE,
700 stderr=subprocess.STDOUT,
701 stdin=open(os.devnull),
702 close_fds=True)
703 stdout = p.communicate()[0]
704 verbose(stdout)
705 if p.returncode != 0:
706 raise MKGeneralException("Failed to run <tt>%s</tt>: %s" % (" ".join(cmd), stdout))
709 # Using RSA directly to encrypt the whole backup is a bad idea. So we use the RSA
710 # public key to generate and encrypt a shared secret which is then used to encrypt
711 # the backup with AES.
713 # When encryption is active, this function uses the configured RSA public key to
714 # a) create a random secret key which is encrypted with the RSA public key
715 # b) the encrypted key is used written to the backup file
716 # c) the unencrypted random key is used as AES key for encrypting the backup stream
717 class MKBackupStream(object):
718 def __init__(self, stream, is_alive, key_ident):
719 self._stream = stream
720 self._is_alive = is_alive
721 self._cipher = None
722 self._key_ident = key_ident
724 self._last_state_update = time.time()
725 self._last_bps = None
726 self._bytes_copied = 0
728 # The iv is an initialization vector for the CBC mode of operation. It
729 # needs to be unique per key per message. Normally, it's sent alongside
730 # the data in cleartext. Here, since the key is only ever used once,
731 # you can use a known IV.
732 self._iv = '\x00' * AES.block_size
734 def process(self):
735 head = self._init_processing()
736 if head is not None:
737 yield head
739 self._next_chunk = None
741 while True:
742 chunk, finished = self._read_chunk()
743 self._bytes_copied += len(chunk)
744 yield self._process_chunk(chunk)
746 if finished and not self._is_alive():
747 break # end of stream reached
749 self._update_state()
751 def _encrypt(self):
752 return self._key_ident is not None
754 def _init_processing(self):
755 raise NotImplementedError()
757 def _read_from_stream(self, size):
758 try:
759 return self._stream.read(size)
760 except ValueError:
761 if self._stream.closed:
762 return "" # handle EOF transparently
763 else:
764 raise
766 def _read_chunk(self):
767 raise NotImplementedError()
769 def _process_chunk(self, chunk):
770 raise NotImplementedError()
772 def _update_state(self):
773 timedif = time.time() - self._last_state_update
774 if timedif >= 1:
775 this_bps = float(self._bytes_copied) / timedif
777 if self._last_bps is None:
778 bps = this_bps # initialize the value
779 else:
780 percentile, backlog_sec = 0.50, 10
781 weight_per_sec = (1 - percentile)**(1.0 / backlog_sec)
782 weight = weight_per_sec**timedif
783 bps = self._last_bps * weight + this_bps * (1 - weight)
785 save_state({"bytes_per_second": bps})
786 self._last_state_update, self._last_bps, self._bytes_copied = time.time(), bps, 0
788 def _get_key_spec(self, key_id):
789 keys = self._load_backup_keys()
791 for key in keys.values():
792 cert = crypto.load_certificate(crypto.FILETYPE_PEM, key["certificate"])
793 if key_id == cert.digest("md5"):
794 return key
796 raise MKGeneralException("Failed to load the configured backup key: %s" % key_id)
798 def _load_backup_keys(self):
799 if is_root():
800 path = "/etc/cma/backup_keys.conf"
801 else:
802 path = "%s/etc/check_mk/backup_keys.mk" % os.environ["OMD_ROOT"]
804 variables = {"keys": {}}
805 if os.path.exists(path):
806 execfile(path, variables, variables)
807 return variables["keys"]
810 class BackupStream(MKBackupStream):
811 def _init_processing(self):
812 if self._encrypt():
813 secret_key, encrypted_secret_key = self._derive_key(self._get_encryption_public_key(),
815 self._cipher = AES.new(secret_key, AES.MODE_CBC, self._iv)
817 # Write out a file version marker and the encrypted secret key, preceded by
818 # a length indication. All separated by \0.
819 # Version 1: Encrypted secret key written with pubkey.encrypt(). Worked with
820 # early versions of 1.4 until moving from PyCryto to PyCryptodome
821 # Version 2: Use PKCS1_OAEP for encrypting the encrypted_secret_key.
822 return "%d\0%d\0%s\0" % (2, len(encrypted_secret_key), encrypted_secret_key)
824 def _read_chunk(self):
825 finished = False
826 if self._encrypt():
827 chunk = self._read_from_stream(1024 * AES.block_size)
829 # Detect end of file and add padding to fill up to block size
830 if chunk == "" or len(chunk) % AES.block_size != 0:
831 padding_length = (AES.block_size - len(chunk) % AES.block_size) or AES.block_size
832 chunk += padding_length * chr(padding_length)
833 finished = True
834 else:
835 chunk = self._read_from_stream(1024 * 1024)
837 if chunk == "":
838 finished = True
840 return chunk, finished
842 def _process_chunk(self, chunk):
843 if self._encrypt():
844 return self._cipher.encrypt(chunk)
845 return chunk
847 def _get_encryption_public_key(self):
848 key = self._get_key_spec(self._key_ident)
850 # First extract the public key part from the certificate
851 cert = crypto.load_certificate(crypto.FILETYPE_PEM, key["certificate"])
852 pub = cert.get_pubkey()
853 pub_pem = crypto.dump_publickey(crypto.FILETYPE_PEM, pub)
855 # Now constuct the public key object
856 return RSA.importKey(pub_pem)
858 # logic from http://stackoverflow.com/questions/6309958/encrypting-a-file-with-rsa-in-python
859 # Since our packages moved from PyCrypto to PyCryptodome we need to change this to use PKCS1_OAEP.
860 def _derive_key(self, pubkey, key_length):
861 secret_key = os.urandom(key_length)
863 # Encrypt the secret key with the RSA public key
864 cipher_rsa = PKCS1_OAEP.new(pubkey)
865 encrypted_secret_key = cipher_rsa.encrypt(secret_key)
867 return secret_key, encrypted_secret_key
870 class RestoreStream(MKBackupStream):
871 def _init_processing(self):
872 if not self._encrypt():
873 return
875 file_version, encrypted_secret_key = self._read_encrypted_secret_key()
876 secret_key = self._decrypt_secret_key(file_version, encrypted_secret_key)
877 self._cipher = AES.new(secret_key, AES.MODE_CBC, self._iv)
879 def _read_chunk(self):
880 if not self._encrypt():
881 # process unencrypted backup
882 chunk = self._read_from_stream(1024 * 1024)
883 return chunk, chunk == ""
885 this_chunk = self._cipher.decrypt(self._read_from_stream(1024 * AES.block_size))
887 if self._next_chunk is None:
888 # First chunk. Only store for next loop
889 self._next_chunk = this_chunk
890 return "", False
892 elif len(this_chunk) == 0:
893 # Processing last chunk. Stip off padding.
894 padding_length = ord(self._next_chunk[-1])
895 chunk = self._next_chunk[:-padding_length]
896 return chunk, True
898 # Processing regular chunk
899 chunk = self._next_chunk
900 self._next_chunk = this_chunk
901 return chunk, False
903 def _process_chunk(self, chunk):
904 return chunk
906 def _read_encrypted_secret_key(self):
907 def read_field():
908 buf = ""
909 while True:
910 c = self._stream.read(1)
911 if c == "\0":
912 break
913 else:
914 buf += c
915 return buf
917 file_version = read_field()
918 if file_version not in ["1", "2"]:
919 raise MKGeneralException(
920 "Failed to process backup file (invalid version %r)" % file_version)
922 try:
923 key_len = int(read_field())
924 except ValueError:
925 raise MKGeneralException("Failed to parse the encrypted backup file (key length)")
927 if int(key_len) > 256:
928 raise MKGeneralException("Failed to process backup file (invalid key length)")
930 encrypted_secret_key = self._stream.read(int(key_len))
932 if self._stream.read(1) != "\0":
933 raise MKGeneralException("Failed to parse the encrypted backup file (header broken)")
935 return file_version, encrypted_secret_key
937 def _get_encryption_private_key(self):
938 key = self._get_key_spec(self._key_ident)
940 try:
941 passphrase = os.environ["MKBACKUP_PASSPHRASE"]
942 except KeyError:
943 raise MKGeneralException("Failed to get passphrase for decryption the backup. "
944 "It needs to be given as environment variable "
945 "\"MKBACKUP_PASSPHRASE\".")
947 # First decrypt the private key using PyOpenSSL (was unable to archieve
948 # this with RSA.importKey(). :-(
949 key = crypto.load_privatekey(crypto.FILETYPE_PEM, key["private_key"], passphrase)
950 priv_pem = crypto.dump_privatekey(crypto.FILETYPE_PEM, key)
952 try:
953 return RSA.importKey(priv_pem)
954 except (ValueError, IndexError, TypeError):
955 if opt_debug:
956 raise
957 raise MKGeneralException("Failed to load private key (wrong passphrase?)")
959 def _decrypt_secret_key(self, file_version, encrypted_secret_key):
960 private_key = self._get_encryption_private_key()
962 if file_version == "1":
963 raise MKGeneralException("You can not restore this backup using your current Check_MK "
964 "version. You need to use a Check_MK 1.4 version that has "
965 "been released before 2017-03-24. The last compatible "
966 "release is 1.4.0b4.")
967 else:
968 cipher_rsa = PKCS1_OAEP.new(private_key)
969 return cipher_rsa.decrypt(encrypted_secret_key)
972 # Returns the base path for the backup to work with. In backup mode, this is
973 # the directory of the target+job. In restore mode it is the target+backup path.
974 def backup_base_path():
975 if g_job_id is not None:
976 return job_backup_path_during_backup()
977 return existing_backup_path()
980 def existing_backup_path():
981 return "%s/%s" % (target_path(g_target_id), g_backup_id)
984 def backup_info_path():
985 return "%s/mkbackup.info" % (backup_base_path())
988 def site_backup_archive_path(site_id):
989 return "%s/site-%s%s" % (backup_base_path(), site_id, archive_suffix())
992 def system_backup_archive_path():
993 return "%s/system%s" % (backup_base_path(), archive_suffix())
996 def system_data_backup_archive_path():
997 return "%s/system-data%s" % (backup_base_path(), archive_suffix())
1000 def job_backup_path_during_backup():
1001 return "%s-incomplete" % job_backup_path()
1004 def job_backup_path_complete():
1005 return "%s-complete" % job_backup_path()
1008 def job_backup_path():
1009 return "%s/%s" % (target_path(g_job_config["target"]), g_job_id)
1012 def archive_suffix():
1013 suffix = ".tar"
1014 if compress_archives():
1015 suffix += ".gz"
1016 if encrypt_archives():
1017 suffix += ".enc"
1018 return suffix
1021 def needed_backup_archive_files(info):
1022 if is_root():
1023 needed_files = ["system"]
1025 if not is_inactive_cluster_backup(info):
1026 needed_files.append("system-data")
1028 # Sites may have been deleted or new sites added. Site files archives are optional.
1029 #needed_files += [ "site-%s" % s for s in existing_sites() ]
1030 else:
1031 # Care about restore from a backup made in a site with another site_id
1032 site = info.get("site_id", current_site_id())
1033 needed_files = ["site-%s" % site]
1035 return [f + archive_suffix() for f in needed_files]
1038 def compress_archives():
1039 return g_job_config["compress"]
1042 def backup_without_history():
1043 return g_job_config.get("no_history", False)
1046 def encrypt_archives():
1047 return g_job_config["encrypt"] is not None
1050 def target_cfg(target_ident):
1051 return g_config["targets"][target_ident]
1054 def target_path(target_ident):
1055 cfg = target_cfg(target_ident)
1056 if cfg["remote"][0] != "local":
1057 raise NotImplementedError()
1059 return cfg["remote"][1]["path"]
1062 # TODO: Duplicate code with htdocs/backup.py
1063 def verify_target_is_ready(target_ident):
1064 try:
1065 cfg = target_cfg(target_ident)
1066 except KeyError:
1067 raise MKGeneralException("The backup target \"%s\" does not exist." % target_ident)
1069 if cfg["remote"][0] != "local":
1070 raise NotImplementedError()
1072 if cfg["remote"][1]["is_mountpoint"] and not os.path.ismount(cfg["remote"][1]["path"]):
1073 raise MKGeneralException("The backup target path is configured to be a mountpoint, "
1074 "but nothing is mounted.")
1077 def verify_backup_exists():
1078 if not os.path.exists(backup_base_path()) or not os.path.exists(backup_info_path()):
1079 raise MKGeneralException("This backup does not exist (Use \"mkbackup list %s\" to "
1080 "show a list of available backups)." % g_target_id)
1083 def verify_backup_consistency(info):
1084 log("Verifying backup consistency")
1085 needed_files = needed_backup_archive_files(info)
1086 optional_files = [entry[0] for entry in info["files"] if entry[0] not in needed_files]
1088 verify_backup_files(info, needed_files, needed=True)
1089 verify_backup_files(info, optional_files, needed=False)
1092 def verify_backup_files(info, files, needed):
1093 for archive_file in files:
1094 size, checksum = None, None
1095 for entry in info["files"]:
1096 if entry[0] == archive_file:
1097 size, checksum = entry[1:]
1098 break
1100 if size is None:
1101 if needed:
1102 raise MKGeneralException(
1103 "The backup is missing the needed archive %s." % archive_file)
1104 else:
1105 continue # missing optional files are OK
1107 archive_path = "%s/%s" % (backup_base_path(), archive_file)
1108 this_checksum = file_checksum(archive_path)
1109 if this_checksum != checksum:
1110 raise MKGeneralException("The backup seems to be damaged and can not be restored. "
1111 "The checksum of the archive %s is wrong (got %s but "
1112 "expected %s)." % (archive_path, this_checksum, checksum))
1115 def do_system_backup(opts):
1116 # Create missing directories. Ensure group permissions and mode.
1117 try:
1118 makedirs(os.path.dirname(system_backup_archive_path()), group="omd", mode=0775)
1119 except OSError, e:
1120 if e.errno == errno.EACCES:
1121 raise MKGeneralException("Failed to create the backup directory: %s" % e)
1122 else:
1123 raise
1125 # Perform backup of the /rw volume on all devices
1126 log("Performing system backup (system%s)" % archive_suffix())
1127 do_system_rw_backup(opts)
1129 # The data volume (/omd) is not backed up on slave cluster nodes
1130 if is_inactive_cluster_node():
1131 log("Skipping system data backup (inactive cluster node)")
1132 log("Skipping site backup (inactive cluster node)")
1133 return
1135 log("Performing system data backup (system-data%s)" % archive_suffix())
1136 do_system_data_backup(opts)
1138 def exclude_sites(opts):
1139 return "without-sites" in opts or g_job_config.get("without_sites", False)
1141 # Now run the site backup for all sites
1142 if not exclude_sites(opts):
1143 for site_id in existing_sites():
1144 log("Performing site backup: %s" % site_id)
1145 do_site_backup(opts, site=site_id)
1146 else:
1147 log("Skipping site backup (disabled)")
1150 def do_system_rw_backup(opts):
1151 with open(system_backup_archive_path(), "w") as backup_file:
1152 pipein_fd, pipeout_fd = os.pipe()
1153 pipein = os.fdopen(pipein_fd)
1155 # Write to buffer in dedicated thread
1156 t = threading.Thread(
1157 target=lambda: write_to_tarfile_threaded(pipeout_fd, "/rw", ["mnt/*/*"]))
1158 t.daemon = True
1159 t.start()
1161 # Process backup stream and write to destination file
1162 s = BackupStream(stream=pipein, is_alive=t.is_alive, key_ident=g_job_config["encrypt"])
1163 for chunk in s.process():
1164 backup_file.write(chunk)
1167 def do_system_data_backup(opts):
1168 with file(system_data_backup_archive_path(), "w") as backup_file:
1169 pipein_fd, pipeout_fd = os.pipe()
1170 pipein = os.fdopen(pipein_fd)
1172 # Write to buffer in dedicated thread
1173 t = threading.Thread(
1174 target=lambda: write_to_tarfile_threaded(pipeout_fd, "/omd", ["sites/*"]))
1175 t.daemon = True
1176 t.start()
1178 # Process backup stream and write to destination file
1179 s = BackupStream(stream=pipein, is_alive=t.is_alive, key_ident=g_job_config["encrypt"])
1180 for chunk in s.process():
1181 backup_file.write(chunk)
1184 def write_to_tarfile_threaded(pipeout_fd, base_path, exclude_patterns):
1185 pipeout = os.fdopen(pipeout_fd, "w")
1186 backup_files_to_tarfile(pipeout, base_path, exclude_patterns)
1187 pipeout.close()
1190 # Whether or not the data filesystem is mounted (-> on active cluster nodes)
1191 def is_inactive_cluster_node():
1192 import cma # pylint: disable=import-error
1193 cma.load_config()
1194 return cma.inactive_cluster_node()
1197 def is_cluster_backup(info):
1198 return info.get("cma_cluster", {}).get("clustered", False)
1201 def is_inactive_cluster_backup(info):
1202 return "cma_cluster" in info and info["cma_cluster"]["is_inactive"]
1205 def existing_sites():
1206 return sorted(
1207 [s for s in os.listdir("/omd/sites") if os.path.isdir(os.path.join("/omd/sites/", s))])
1210 def backup_files_to_tarfile(fobj, base_path, exclude_patterns=None):
1211 if exclude_patterns:
1213 def filter_files(filename):
1214 for glob_pattern in exclude_patterns:
1215 # patterns are relative to base_path, filename is full path.
1216 # strip of the base_path prefix from full path
1217 if fnmatch.fnmatch(filename[len(base_path.rstrip("/")) + 1:], glob_pattern):
1218 return True # exclude this file
1219 return False
1221 else:
1222 filter_files = lambda x: False
1224 tar_mode = "w|gz" if compress_archives() else "w|"
1225 try:
1226 tar = TarFile.open(fileobj=fobj, mode=tar_mode)
1227 except IOError, e:
1228 if not opt_debug and e.errno == errno.ESPIPE:
1229 log("Failed to init backup to tarfile: %s" % e)
1230 return
1231 else:
1232 raise
1234 # Don't add base path itself
1235 for f in os.listdir(base_path):
1236 tar.add(base_path + "/" + f, exclude=filter_files)
1237 tar.close()
1240 def complete_backup():
1241 info = create_backup_info()
1242 save_backup_info(info)
1244 save_state({
1245 "size": info["size"],
1248 verify_backup_consistency(info)
1250 # Now we can be sure this new backup is a good one. Remove eventual old
1251 # backup and move from "incomplete" to "complete".
1253 if os.path.exists(job_backup_path_complete()):
1254 log("Cleaning up previously completed backup")
1255 shutil.rmtree(job_backup_path_complete())
1257 os.rename(job_backup_path_during_backup(), job_backup_path_complete())
1259 state = load_state()
1260 duration = time.time() - state["started"]
1262 log("--- Backup completed (Duration: %s, Size: %s, IO: %s/s) ---" %
1263 (render.timespan(duration), render.fmt_bytes(info["size"]),
1264 render.fmt_bytes(state["bytes_per_second"])))
1267 def cleanup_previous_incomplete_backup():
1268 if os.path.exists(job_backup_path_during_backup()):
1269 log("Found previous incomplete backup. Cleaning up those files.")
1270 try:
1271 shutil.rmtree(job_backup_path_during_backup())
1272 except OSError, e:
1273 if e.errno == errno.EACCES:
1274 raise MKGeneralException("Failed to write the backup directory: %s" % e)
1275 else:
1276 raise
1279 def load_job(local_job_id):
1280 global g_job_id, g_local_job_id, g_job_config
1281 g_job_id = globalize_job_id(local_job_id)
1282 g_local_job_id = local_job_id
1284 if local_job_id not in g_config["jobs"]:
1285 raise MKGeneralException("This backup job does not exist.")
1287 g_job_config = g_config["jobs"][local_job_id]
1290 def globalize_job_id(local_job_id):
1291 parts = []
1292 site = current_site_id()
1294 if site:
1295 parts.append("Check_MK")
1296 else:
1297 parts.append("Check_MK_Appliance")
1299 parts.append(hostname())
1301 if site:
1302 parts.append(site)
1304 parts.append(local_job_id)
1306 return "-".join([p.replace("-", "+") for p in parts])
1309 def init_new_run():
1310 save_state({
1311 "state": "started",
1312 "pid": os.getpid(),
1313 "started": time.time(),
1314 "output": "",
1315 "bytes_per_second": 0,
1317 update=False)
1320 def save_next_run():
1321 schedule_cfg = g_job_config["schedule"]
1322 if not schedule_cfg:
1323 next_schedule = None
1325 elif schedule_cfg["disabled"]:
1326 next_schedule = "disabled"
1328 else:
1329 # find the next time of all configured times
1330 times = []
1331 for timespec in schedule_cfg["timeofday"]:
1332 times.append(schedule.next_scheduled_time(schedule_cfg["period"], timespec))
1333 next_schedule = min(times)
1335 save_state({"next_schedule": next_schedule})
1338 g_state = None
1341 # The state file is in JSON format because it is 1:1 transfered
1342 # to the Check_MK server through the Check_MK agent.
1343 def load_state():
1344 global g_state
1345 if g_state is None:
1346 g_state = json.load(file(state_path()))
1348 return g_state
1351 def save_state(new_attrs, update=True):
1352 if update:
1353 state = load_state()
1354 else:
1355 state = {}
1356 state.update(new_attrs)
1358 store.save_file(state_path(), json.dumps(
1359 state, sort_keys=True, indent=4, separators=(',', ': ')))
1362 def state_path():
1363 if is_root():
1364 path = "/var/lib/mkbackup"
1365 else:
1366 if g_job_id:
1367 # backup as site user
1368 path = "%s/var/check_mk/backup" % os.environ["OMD_ROOT"]
1369 else:
1370 # restore as site user
1371 path = "/tmp"
1373 if g_job_id:
1374 # backup
1375 name = g_local_job_id
1376 else:
1377 # restore
1378 if is_root():
1379 name = "restore"
1380 else:
1381 name = "restore-%s" % current_site_id()
1383 return "%s/%s.state" % (path, name)
1386 def cleanup_backup_job_states():
1387 if is_root():
1388 path = "/var/lib/mkbackup"
1389 else:
1390 path = "%s/var/check_mk/backup" % os.environ["OMD_ROOT"]
1392 for f in glob.glob("%s/*.state" % path):
1393 if os.path.basename(f) != "restore.state" \
1394 and not os.path.basename(f).startswith("restore-"):
1395 os.unlink(f)
1398 def add_output(s):
1399 state = load_state()
1400 state["output"] += s
1401 save_state(state, update=False)
1404 def mode_restore(target_id, backup_id, opts):
1405 acquire_backup_lock()
1407 global g_target_id, g_backup_id
1408 g_target_id, g_backup_id = target_id, backup_id
1410 verify_target_is_ready(target_id)
1411 verify_backup_exists()
1413 info = load_backup_info(backup_info_path())
1414 global g_job_config
1415 g_job_config = info["config"]
1417 if "no-verify" not in opts:
1418 verify_backup_consistency(info)
1420 init_new_run()
1422 if "background" in opts:
1423 daemon.daemonize()
1424 save_state({"pid": os.getpid()})
1426 start_logging()
1427 log("--- Starting restore (%s) ---" % g_backup_id)
1429 success = False
1430 try:
1431 save_state({
1432 "state": "running",
1435 do_restore(opts, info)
1436 success = True
1438 except KeyboardInterrupt:
1439 raise
1441 except MKGeneralException, e:
1442 sys.stderr.write("%s\n" % e)
1443 if opt_debug:
1444 raise
1446 except:
1447 if not opt_debug:
1448 sys.stderr.write("An exception occured:\n")
1449 sys.stderr.write(traceback.format_exc())
1450 else:
1451 raise
1453 finally:
1454 stop_logging()
1455 save_state({
1456 "state": "finished",
1457 "finished": time.time(),
1458 "success": success,
1462 def do_restore(opts, info):
1463 if not is_root():
1464 do_site_restore(opts, info)
1465 elif is_cma():
1466 do_system_restore(opts, info)
1467 else:
1468 raise MKGeneralException("System backup not supported.")
1469 complete_restore()
1471 if "no-reboot" not in opts and is_root():
1472 log("--- Rebooting device now ---")
1473 do_system_restart()
1476 def do_system_restart():
1477 os.system("reboot")
1480 def do_site_restore(opts, info, site=None):
1481 cmd = ["omd", "restore", "--kill"]
1483 # When executed as site user, "omd restore" is executed without the site
1484 # name and always performing restore for the current site. When executed
1485 # as root, the site argument has to be given and must be handed over to
1486 # "omd restore".
1487 if site is None:
1488 # Care about restore from a backup made in a site with another site_id
1489 site = info.get("site_id", current_site_id())
1490 else:
1491 if not is_root():
1492 raise MKGeneralException("Requested restore of site %s, "
1493 "but not running as root." % site)
1494 cmd.append("--reuse")
1495 cmd.append(site)
1497 omd_root = "/omd/sites/%s" % site
1498 if not os.path.exists(omd_root):
1499 os.mkdir(omd_root)
1500 set_permissions(omd_root, pwd.getpwnam(site).pw_uid, grp.getgrnam(site).gr_gid, 0775)
1502 cmd.append("-")
1504 backup_path = site_backup_archive_path(site)
1506 p = subprocess.Popen(cmd, close_fds=True, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
1508 with open(backup_path, "r") as backup_file:
1509 s = RestoreStream(
1510 stream=backup_file, is_alive=lambda: False, key_ident=g_job_config["encrypt"])
1511 try:
1512 archive_started = False
1513 for chunk in s.process():
1514 # Hack for all handling of site backups created with Check_MK
1515 # versions < 2016-03-16 and till 1.2.8p6:
1516 # When a site was just stopped, the next "omd backup" unmounts the sites
1517 # tmpfs. In this case the output "Unmounting temporary filesystem...OK" was
1518 # produced which broke the restore of the site on the local system. Skip to
1519 # the beginning of the archive
1520 if not archive_started:
1521 if chunk.startswith("Unmounting temporary filesystem"):
1522 chunk = chunk[37:]
1523 archive_started = True
1525 p.stdin.write(chunk)
1526 except IOError, e:
1527 log("Error while sending data to restore process: %s" % e)
1529 # s.process() ends when backup_file is processed. Then end the restore process.
1530 p.stdin.close()
1532 if p.wait() != 0:
1533 log(p.stderr.read())
1534 raise MKGeneralException("Site restore failed")
1536 if not is_root():
1537 site_arg = [site] if is_root() else []
1538 if subprocess.call(["omd", "start"] + site_arg) != 0:
1539 raise MKGeneralException("Failed to start the site after restore")
1542 def do_system_restore(opts, info):
1543 verify_cma_version_compatible(info)
1544 prepare_system_restore()
1545 prepare_cluster_environment(info)
1547 # Perform restore of the /rw volume
1548 log("Performing system restore (system%s)" % archive_suffix())
1549 system_rw_files_before = get_system_rw_files()
1550 system_rw_files_restored = do_system_rw_restore(opts, info)
1551 cleanup_system_rw_files(system_rw_files_before, system_rw_files_restored)
1552 log("Finished system restore")
1554 if is_cluster_backup(info):
1555 setup_cluster_environment(info)
1556 else:
1557 setup_standalone_environment(info)
1559 # In case this backup was taken from an inactive cluster node, the restore is complete
1560 if is_inactive_cluster_backup(info):
1561 log("Skipping system data restore (inactive cluster node)")
1562 log("Skipping site restore (inactive cluster node)")
1563 return
1565 verify_data_volume_is_mounted()
1567 log("Performing system data restore (system-data%s)" % archive_suffix())
1568 cleanup_directory_contents("/omd", excludes=["/omd/lost+found"])
1569 do_system_data_restore(opts, info)
1570 log("Finished system data restore")
1572 def exclude_sites(opts):
1573 return "without-sites" in opts or g_job_config.get("without_sites", False)
1575 if exclude_sites(opts):
1576 log("Skipping site restore (disabled)")
1577 return
1579 # Now run the site restore for all sites found in the backup
1580 for site_id in get_site_ids_of_backup(info):
1581 log("Performing site restore: %s" % site_id)
1582 do_site_restore(opts, info, site=site_id)
1583 log("Finished site restore")
1586 def verify_data_volume_is_mounted():
1587 import cma # pylint: disable=import-error
1589 count = 0
1590 is_mounted = cma.is_mounted("/omd")
1591 while count < 10 and not is_mounted:
1592 time.sleep(1)
1593 count += 1
1594 is_mounted = cma.is_mounted("/omd")
1596 if not is_mounted:
1597 raise MKGeneralException("The data volume is not mounted")
1600 def prepare_cluster_environment(info):
1601 import cma # pylint: disable=import-error
1602 cma.load_config()
1604 if cma.is_cluster_configured() and not is_cluster_backup(info):
1605 # Is it currently set-up as cluster node and backup is not clustered: Erase drbd metadata
1606 log("Erasing DRBD metadata (will restore non-cluster backup)")
1607 if os.system("yes yes | drbdadm wipe-md omd >/dev/null") >> 8 != 0:
1608 raise MKGeneralException("Failed to erase DRBD metadata")
1610 if is_cluster_backup(info) and cma.is_mounted("/omd"):
1611 log("Unmounting the data volume")
1612 if not cma.execute("umount -f /omd"):
1613 raise MKGeneralException("Failed to free the data volume")
1616 def setup_cluster_environment(info):
1617 log("Setting up cluster environment")
1619 import cma # pylint: disable=import-error
1620 cma.load_config()
1622 # To be able to start the DRBD volume we need to have the IP
1623 # addresses configured in the DRBD config active. Simply activate
1624 # all IP addresses of the host on the primary network interface.
1625 # This will be cleaned up by reboot.
1626 enable_drbd_ip_addresses()
1628 cma.initialize_drbd()
1630 if not is_inactive_cluster_backup(info):
1631 cma.drbd_make_primary()
1632 cma.execute('mount /dev/drbd/by-res/omd /omd')
1635 def setup_standalone_environment(info):
1636 log("Setting up standalone device environment")
1637 os.system('mount /omd')
1640 def enable_drbd_ip_addresses():
1641 # These modules are only available when mkbackup is executed on OS level on the appliance
1642 import cma # pylint: disable=import-error
1643 import cma_net # pylint: disable=import-error
1644 cma.load_config()
1645 cma_net.load_config()
1647 drbd_if = cma.cfg("cluster")['drbd_if']
1648 config = cma_net.current_interface_config(drbd_if)
1649 if cma_net.is_vlan_config(config):
1650 return None
1651 elif cma_net.is_vlan_interface(drbd_if):
1652 config = config["ip"]
1654 address, netmask = config["ipaddress"], config["netmask"]
1656 log("Enabling DRBD network address (%s: %s/%s)" % (drbd_if, address, netmask))
1658 subprocess.call(
1659 ["ip", "a", "a",
1660 "%s/%s" % (address, netmask), "dev",
1661 cma_net.get_simple_interface_name()])
1664 def verify_cma_version_compatible(info):
1665 # These modules are only available when mkbackup is executed on OS level on the appliance
1666 import cma # pylint: disable=import-error
1667 if info["cma_version"] != cma.version():
1668 raise MKGeneralException(
1669 "The backup can not be restored because the version of the "
1670 "backup (%s) and the currently installed firmware (%s) are not the same. You "
1671 "have to install the exact same version to be able to restore the backup." %
1672 (info["cma_version"], cma.version()))
1675 def get_system_rw_files():
1676 files = []
1677 for base_dir, _unused_dir_names, file_names in os.walk("/rw"):
1678 for name in file_names:
1679 files.append("%s/%s" % (base_dir, name))
1680 return files
1683 def cleanup_system_rw_files(files_before, files_restored):
1684 for path in files_before:
1685 if path not in files_restored:
1686 if path.startswith("/rw/var/lib/mkbackup/restore.state_tmp") \
1687 or path == "/rw/var/lib/mkbackup/restore.state":
1688 continue
1690 if os.path.lexists(path):
1691 log("Cleaning up %s" % path)
1692 if not os.path.islink(path) and os.path.isdir(path):
1693 shutil.rmtree(path)
1694 else:
1695 os.remove(path)
1698 def prepare_system_restore():
1699 import cma # pylint: disable=import-error
1700 log("Cleaning up Check_MK processess and temporary filesystems")
1701 cma.free_omd_ressources(graceful=False)
1703 log("Cleaning up (eventual running) cluster processess")
1704 cma.cleanup_cluster_processes(graceful=False)
1706 log("Cleaning up system processess")
1707 cleanup_system_processes()
1710 def cleanup_directory_contents(base_path, excludes=None):
1711 for name in os.listdir(base_path):
1712 path = base_path + "/" + name
1713 if not excludes or path not in excludes:
1714 log("Cleaning up %s" % path)
1715 if not os.path.islink(path) and os.path.isdir(path):
1716 shutil.rmtree(path)
1717 else:
1718 os.remove(path)
1721 def cleanup_system_processes():
1722 os.system("/etc/init.d/nullmailer stop")
1725 def do_system_rw_restore(opts, info):
1726 return restore_system_backup(system_backup_archive_path())
1729 def do_system_data_restore(opts, info):
1730 return restore_system_backup(system_data_backup_archive_path())
1733 def restore_system_backup(backup_path):
1734 with open(backup_path, "r") as backup_file:
1735 s = RestoreStream(
1736 stream=backup_file, is_alive=lambda: False, key_ident=g_job_config["encrypt"])
1738 pipein_fd, pipeout_fd = os.pipe()
1739 pipein = os.fdopen(pipein_fd)
1741 # Write to buffer in dedicated thread
1742 t = threading.Thread(target=lambda: read_from_tarfile_threaded(s, pipeout_fd))
1743 t.daemon = True
1744 t.start()
1746 try:
1747 with MKTarFile.open(fileobj=pipein, mode="r|*") as tar:
1748 tar.extractall("/")
1749 return ["/%s" % name for name in tar.getnames()]
1750 except ReadError:
1751 if opt_debug:
1752 raise
1753 raise MKGeneralException("Failed to read data from backup")
1756 def read_from_tarfile_threaded(s, pipeout_fd):
1757 pipeout = os.fdopen(pipeout_fd, "w")
1759 try:
1760 for chunk in s.process():
1761 pipeout.write(chunk)
1762 except IOError, e:
1763 log("Error while sending data to restore process: %s" % e)
1765 # s.process() ends when backup_file is processed. Then end the restore process.
1766 pipeout.close()
1769 def complete_restore():
1770 cleanup_backup_job_states()
1771 state = load_state()
1772 duration = time.time() - state["started"]
1773 log("--- Restore completed (Duration: %s, IO: %s/s) ---" %
1774 (render.timespan(duration), render.fmt_bytes(state["bytes_per_second"])))
1777 def mode_list(target_id, opts):
1778 if target_id not in g_config["targets"]:
1779 raise MKGeneralException("This backup target does not exist. Choose one of: %s" % ", ".join(
1780 g_config["targets"].keys()))
1782 verify_target_is_ready(target_id)
1784 fmt = "%-9s %-20s %-16s %52s\n"
1785 fmt_detail = (" " * 30) + " %-20s %48s\n"
1786 sys.stdout.write(fmt % ("Type", "Job", "Details", ""))
1787 sys.stdout.write("%s\n" % ("-" * 100))
1788 for path in sorted(glob.glob("%s/*/mkbackup.info" % target_path(target_id))):
1789 info = load_backup_info(path)
1790 from_info = info["hostname"]
1791 if "site_id" in info:
1792 from_info += " (Site: %s)" % info["site_id"]
1793 sys.stdout.write(fmt % (info["type"], info["job_id"], "Backup-ID:", info["backup_id"]))
1795 sys.stdout.write(fmt_detail % ("From:", from_info))
1796 sys.stdout.write(fmt_detail % ("Finished:", render.date_and_time(info["finished"])))
1797 sys.stdout.write(fmt_detail % ("Size:", render.fmt_bytes(info["size"])))
1798 if info["config"]["encrypt"] is not None:
1799 sys.stdout.write(fmt_detail % ("Encrypted:", info["config"]["encrypt"]))
1800 else:
1801 sys.stdout.write(fmt_detail % ("Encrypted:", "No"))
1802 sys.stdout.write("\n")
1803 sys.stdout.write("\n")
1806 def mode_jobs(opts):
1807 fmt = "%-29s %-30s\n"
1808 sys.stdout.write(fmt % ("Job-ID", "Title"))
1809 sys.stdout.write("%s\n" % ("-" * 60))
1810 for job_id, job_cfg in sorted(g_config["jobs"].items(), key=lambda x_y: x_y[0]):
1811 sys.stdout.write(fmt % (job_id, job_cfg["title"].encode("utf-8")))
1814 def mode_targets(opts):
1815 fmt = "%-29s %-30s\n"
1816 sys.stdout.write(fmt % ("Target-ID", "Title"))
1817 sys.stdout.write("%s\n" % ("-" * 60))
1818 for job_id, job_cfg in sorted(g_config["targets"].items(), key=lambda x_y1: x_y1[0]):
1819 sys.stdout.write(fmt % (job_id, job_cfg["title"].encode("utf-8")))
1822 def usage(error=None):
1823 if error:
1824 sys.stderr.write("ERROR: %s\n" % error)
1825 sys.stdout.write("Usage: mkbackup [OPTIONS] MODE [MODE_ARGUMENTS...] [MODE_OPTIONS...]\n")
1826 sys.stdout.write("\n")
1827 sys.stdout.write("OPTIONS:\n")
1828 sys.stdout.write("\n")
1829 sys.stdout.write(" --verbose Enable verbose output, twice for more details\n")
1830 sys.stdout.write(" --debug Let Python exceptions come through\n")
1831 sys.stdout.write(" --version Print the version of the program\n")
1832 sys.stdout.write("\n")
1833 sys.stdout.write("MODES:\n")
1834 sys.stdout.write("\n")
1836 for mode_name, mode in sorted(modes.items()):
1837 mode_indent = " " * 18
1838 wrapped_descr = textwrap.fill(
1839 mode["description"],
1840 width=82,
1841 initial_indent=" %-13s " % mode_name,
1842 subsequent_indent=mode_indent)
1843 sys.stdout.write(wrapped_descr + "\n")
1844 sys.stdout.write("\n")
1845 if "args" in mode:
1846 sys.stdout.write("%sMODE ARGUMENTS:\n" % mode_indent)
1847 sys.stdout.write("\n")
1848 for arg in mode["args"]:
1849 sys.stdout.write("%s %-10s %s\n" % (mode_indent, arg["id"], arg["description"]))
1850 sys.stdout.write("\n")
1852 opts = mode_options(mode)
1853 if opts:
1854 sys.stdout.write("%sMODE OPTIONS:\n" % mode_indent)
1855 sys.stdout.write("\n")
1857 for opt_id, opt in sorted(opts.items(), key=lambda k_v: k_v[0]):
1858 sys.stdout.write("%s --%-13s %s\n" % (mode_indent, opt_id, opt["description"]))
1859 sys.stdout.write("\n")
1861 sys.stdout.write("\n")
1862 sys.exit(3)
1865 def mode_options(mode):
1866 opts = {}
1867 opts.update(mode.get("opts", {}))
1868 if is_root():
1869 opts.update(mode.get("root_opts", {}))
1870 return opts
1873 def interrupt_handler(signum, frame):
1874 raise MKTerminate("Caught signal: %d" % signum)
1877 def register_signal_handlers():
1878 signal.signal(signal.SIGTERM, interrupt_handler)
1881 def init_logging():
1882 if is_cma():
1883 syslog.openlog("mkbackup")
1886 g_config = {}
1887 opt_verbose = 0
1888 opt_debug = False
1891 def main():
1892 global opt_debug, opt_verbose, g_config
1894 register_signal_handlers()
1895 init_logging()
1897 short_options = "h"
1898 long_options = ["help", "version", "verbose", "debug"]
1900 try:
1901 opts, args = getopt.getopt(sys.argv[1:], short_options, long_options)
1902 except getopt.GetoptError, e:
1903 usage("%s" % e)
1905 for o, _unused_a in opts:
1906 if o in ["-h", "--help"]:
1907 usage()
1908 elif o == "--version":
1909 sys.stdout.write("mkbackup %s\n" % VERSION)
1910 sys.exit(0)
1911 elif o == "--verbose":
1912 opt_verbose += 1
1913 elif o == "--debug":
1914 opt_debug = True
1916 try:
1917 mode_name = args.pop(0)
1918 except IndexError:
1919 usage("Missing operation mode")
1921 try:
1922 mode = modes[mode_name]
1923 except KeyError:
1924 usage("Invalid operation mode")
1926 try:
1927 g_config = load_config()
1928 except IOError:
1929 if opt_debug:
1930 raise
1931 raise MKGeneralException("mkbackup is not configured yet.")
1933 # Load the mode specific options
1934 try:
1935 mode_opts, mode_args = getopt.getopt(args, [], mode_options(mode).keys())
1936 except getopt.GetoptError, e:
1937 usage("%s" % e)
1939 # Validate arguments
1940 if len(mode_args) != len(mode.get("args", [])):
1941 usage("Invalid number of arguments for this mode")
1943 opt_dict = dict([(k.lstrip("-"), v) for k, v in opts + mode_opts])
1945 globals()["mode_%s" % mode_name](*mode_args, opts=opt_dict)
1948 if __name__ == "__main__":
1949 try:
1950 main()
1951 except MKTerminate, e:
1952 sys.stderr.write("%s\n" % e)
1953 sys.exit(1)
1955 except KeyboardInterrupt:
1956 sys.stderr.write("Terminated.\n")
1957 sys.exit(0)
1959 except MKGeneralException, e:
1960 sys.stderr.write("%s\n" % e)
1961 if opt_debug:
1962 raise
1963 sys.exit(3)