KVM test: introduce VM exceptions
[autotest-zwu.git] / server / crashcollect.py
blob1f28861dcdf60f57fd181453cb4125790eb1a758
1 import os, time, pickle, logging, shutil
3 from autotest_lib.server import utils
6 # import any site hooks for the crashdump and crashinfo collection
7 get_site_crashdumps = utils.import_site_function(
8 __file__, "autotest_lib.server.site_crashcollect", "get_site_crashdumps",
9 lambda host, test_start_time: None)
10 get_site_crashinfo = utils.import_site_function(
11 __file__, "autotest_lib.server.site_crashcollect", "get_site_crashinfo",
12 lambda host, test_start_time: None)
15 def get_crashdumps(host, test_start_time):
16 get_site_crashdumps(host, test_start_time)
19 def get_crashinfo(host, test_start_time):
20 logging.info("Collecting crash information...")
22 # include crashdumps as part of the general crashinfo
23 get_crashdumps(host, test_start_time)
25 if wait_for_machine_to_recover(host):
26 # run any site-specific collection
27 get_site_crashinfo(host, test_start_time)
29 crashinfo_dir = get_crashinfo_dir(host)
30 collect_messages(host)
31 collect_log_file(host, "/var/log/monitor-ssh-reboots", crashinfo_dir)
32 collect_command(host, "dmesg", os.path.join(crashinfo_dir, "dmesg"))
33 collect_uncollected_logs(host)
36 def wait_for_machine_to_recover(host, hours_to_wait=4.0):
37 """Wait for a machine (possibly down) to become accessible again.
39 @param host: A RemoteHost instance to wait on
40 @param hours_to_wait: Number of hours to wait before giving up
42 @returns: True if the machine comes back up, False otherwise
43 """
44 current_time = time.strftime("%b %d %H:%M:%S", time.localtime())
45 if host.is_up():
46 logging.info("%s already up, collecting crash info", host.hostname)
47 return True
49 logging.info("Waiting four hours for %s to come up (%s)",
50 host.hostname, current_time)
51 if not host.wait_up(timeout=hours_to_wait * 3600):
52 logging.warning("%s down, unable to collect crash info",
53 host.hostname)
54 return False
55 else:
56 logging.info("%s is back up, collecting crash info", host.hostname)
57 return True
60 def get_crashinfo_dir(host):
61 """Find and if necessary create a directory to store crashinfo in.
63 @param host: The RemoteHost object that crashinfo will be collected from
65 @returns: The path to an existing directory for writing crashinfo into
66 """
67 host_resultdir = getattr(getattr(host, "job", None), "resultdir", None)
68 if host_resultdir:
69 infodir = host_resultdir
70 else:
71 infodir = os.path.abspath(os.getcwd())
72 infodir = os.path.join(infodir, "crashinfo.%s" % host.hostname)
73 if not os.path.exists(infodir):
74 os.mkdir(infodir)
75 return infodir
78 def collect_log_file(host, log_path, dest_path):
79 """Collects a log file from the remote machine.
81 Log files are collected from the remote machine and written into the
82 destination path. If dest_path is a directory, the log file will be named
83 using the basename of the remote log path.
85 @param host: The RemoteHost to collect logs from
86 @param log_path: The remote path to collect the log file from
87 @param dest_path: A path (file or directory) to write the copies logs into
88 """
89 logging.info("Collecting %s...", log_path)
90 try:
91 host.get_file(log_path, dest_path, preserve_perm=False)
92 except Exception:
93 logging.warning("Collection of %s failed", log_path)
97 def collect_command(host, command, dest_path):
98 """Collects the result of a command on the remote machine.
100 The standard output of the command will be collected and written into the
101 desitionation path. The destination path is assumed to be filename and
102 not a directory.
104 @param host: The RemoteHost to collect from
105 @param command: A shell command to run on the remote machine and capture
106 the output from.
107 @param dest_path: A file path to write the results of the log into
109 logging.info("Collecting '%s' ...", command)
110 devnull = open("/dev/null", "w")
111 try:
112 try:
113 result = host.run(command, stdout_tee=devnull).stdout
114 utils.open_write_close(dest_path, result)
115 except Exception, e:
116 logging.warning("Collection of '%s' failed:\n%s", command, e)
117 finally:
118 devnull.close()
121 def collect_uncollected_logs(host):
122 """Collects any leftover uncollected logs from the client.
124 @param host: The RemoteHost to collect from
126 if host.job:
127 try:
128 logs = host.job.get_client_logs()
129 for hostname, remote_path, local_path in logs:
130 if hostname == host.hostname:
131 logging.info("Retrieving logs from %s:%s into %s",
132 hostname, remote_path, local_path)
133 host.get_file(remote_path + "/", local_path + "/")
134 except Exception, e:
135 logging.warning("Error while trying to collect stranded "
136 "Autotest client logs: %s", e)
139 def collect_messages(host):
140 """Collects the 'new' contents of /var/log/messages.
142 If host.VAR_LOG_MESSAGE_COPY_PATH is on the remote machine, collects
143 the contents of /var/log/messages excluding whatever initial contents
144 are already present in host.VAR_LOG_MESSAGE_COPY_PATH. If it is not
145 present, simply collects the entire contents of /var/log/messages.
147 @param host: The RemoteHost to collect from
149 crashinfo_dir = get_crashinfo_dir(host)
151 try:
152 # paths to the messages files
153 messages = os.path.join(crashinfo_dir, "messages")
154 messages_raw = os.path.join(crashinfo_dir, "messages.raw")
155 messages_at_start = os.path.join(crashinfo_dir, "messages.at_start")
157 # grab the files from the remote host
158 collect_log_file(host, host.VAR_LOG_MESSAGES_COPY_PATH,
159 messages_at_start)
160 collect_log_file(host, "/var/log/messages", messages_raw)
162 # figure out how much of messages.raw to skip
163 if os.path.exists(messages_at_start):
164 # if the first lines of the messages at start should match the
165 # first lines of the current messages; if they don't then messages
166 # has been erase or rotated and we just grab all of it
167 first_line_at_start = utils.read_one_line(messages_at_start)
168 first_line_now = utils.read_one_line(messages_raw)
169 if first_line_at_start != first_line_now:
170 size_at_start = 0
171 else:
172 size_at_start = os.path.getsize(messages_at_start)
173 else:
174 size_at_start = 0
175 raw_messages_file = open(messages_raw)
176 messages_file = open(messages, "w")
177 raw_messages_file.seek(size_at_start)
178 shutil.copyfileobj(raw_messages_file, messages_file)
179 raw_messages_file.close()
180 messages_file.close()
182 # get rid of the "raw" versions of messages
183 os.remove(messages_raw)
184 if os.path.exists(messages_at_start):
185 os.remove(messages_at_start)
186 except Exception, e:
187 logging.warning("Error while collecting /var/log/messages: %s", e)