1 import os
, time
, pickle
, logging
, shutil
3 from autotest_lib
.client
.common_lib
import global_config
4 from autotest_lib
.server
import utils
7 # import any site hooks for the crashdump and crashinfo collection
8 get_site_crashdumps
= utils
.import_site_function(
9 __file__
, "autotest_lib.server.site_crashcollect", "get_site_crashdumps",
10 lambda host
, test_start_time
: None)
11 get_site_crashinfo
= utils
.import_site_function(
12 __file__
, "autotest_lib.server.site_crashcollect", "get_site_crashinfo",
13 lambda host
, test_start_time
: None)
16 def get_crashdumps(host
, test_start_time
):
17 get_site_crashdumps(host
, test_start_time
)
20 def get_crashinfo(host
, test_start_time
):
21 logging
.info("Collecting crash information...")
23 # include crashdumps as part of the general crashinfo
24 get_crashdumps(host
, test_start_time
)
26 if wait_for_machine_to_recover(host
):
27 # run any site-specific collection
28 get_site_crashinfo(host
, test_start_time
)
30 crashinfo_dir
= get_crashinfo_dir(host
)
31 collect_messages(host
)
32 collect_log_file(host
, "/var/log/monitor-ssh-reboots", crashinfo_dir
)
33 collect_command(host
, "dmesg", os
.path
.join(crashinfo_dir
, "dmesg"))
34 collect_uncollected_logs(host
)
37 # Load default for number of hours to wait before giving up on crash collection.
38 HOURS_TO_WAIT
= global_config
.global_config
.get_config_value(
39 'SERVER', 'crash_collection_hours_to_wait', type=float, default
=4.0)
42 def wait_for_machine_to_recover(host
, hours_to_wait
=HOURS_TO_WAIT
):
43 """Wait for a machine (possibly down) to become accessible again.
45 @param host: A RemoteHost instance to wait on
46 @param hours_to_wait: Number of hours to wait before giving up
48 @returns: True if the machine comes back up, False otherwise
50 current_time
= time
.strftime("%b %d %H:%M:%S", time
.localtime())
52 logging
.info("%s already up, collecting crash info", host
.hostname
)
55 logging
.info("Waiting %s hours for %s to come up (%s)",
56 hours_to_wait
, host
.hostname
, current_time
)
57 if not host
.wait_up(timeout
=hours_to_wait
* 3600):
58 logging
.warning("%s down, unable to collect crash info",
62 logging
.info("%s is back up, collecting crash info", host
.hostname
)
66 def get_crashinfo_dir(host
):
67 """Find and if necessary create a directory to store crashinfo in.
69 @param host: The RemoteHost object that crashinfo will be collected from
71 @returns: The path to an existing directory for writing crashinfo into
73 host_resultdir
= getattr(getattr(host
, "job", None), "resultdir", None)
75 infodir
= host_resultdir
77 infodir
= os
.path
.abspath(os
.getcwd())
78 infodir
= os
.path
.join(infodir
, "crashinfo.%s" % host
.hostname
)
79 if not os
.path
.exists(infodir
):
84 def collect_log_file(host
, log_path
, dest_path
):
85 """Collects a log file from the remote machine.
87 Log files are collected from the remote machine and written into the
88 destination path. If dest_path is a directory, the log file will be named
89 using the basename of the remote log path.
91 @param host: The RemoteHost to collect logs from
92 @param log_path: The remote path to collect the log file from
93 @param dest_path: A path (file or directory) to write the copies logs into
95 logging
.info("Collecting %s...", log_path
)
97 host
.get_file(log_path
, dest_path
, preserve_perm
=False)
99 logging
.warning("Collection of %s failed", log_path
)
103 def collect_command(host
, command
, dest_path
):
104 """Collects the result of a command on the remote machine.
106 The standard output of the command will be collected and written into the
107 desitionation path. The destination path is assumed to be filename and
110 @param host: The RemoteHost to collect from
111 @param command: A shell command to run on the remote machine and capture
113 @param dest_path: A file path to write the results of the log into
115 logging
.info("Collecting '%s' ...", command
)
116 devnull
= open("/dev/null", "w")
119 result
= host
.run(command
, stdout_tee
=devnull
).stdout
120 utils
.open_write_close(dest_path
, result
)
122 logging
.warning("Collection of '%s' failed:\n%s", command
, e
)
127 def collect_uncollected_logs(host
):
128 """Collects any leftover uncollected logs from the client.
130 @param host: The RemoteHost to collect from
134 logs
= host
.job
.get_client_logs()
135 for hostname
, remote_path
, local_path
in logs
:
136 if hostname
== host
.hostname
:
137 logging
.info("Retrieving logs from %s:%s into %s",
138 hostname
, remote_path
, local_path
)
139 host
.get_file(remote_path
+ "/", local_path
+ "/")
141 logging
.warning("Error while trying to collect stranded "
142 "Autotest client logs: %s", e
)
145 def collect_messages(host
):
146 """Collects the 'new' contents of /var/log/messages.
148 If host.VAR_LOG_MESSAGE_COPY_PATH is on the remote machine, collects
149 the contents of /var/log/messages excluding whatever initial contents
150 are already present in host.VAR_LOG_MESSAGE_COPY_PATH. If it is not
151 present, simply collects the entire contents of /var/log/messages.
153 @param host: The RemoteHost to collect from
155 crashinfo_dir
= get_crashinfo_dir(host
)
158 # paths to the messages files
159 messages
= os
.path
.join(crashinfo_dir
, "messages")
160 messages_raw
= os
.path
.join(crashinfo_dir
, "messages.raw")
161 messages_at_start
= os
.path
.join(crashinfo_dir
, "messages.at_start")
163 # grab the files from the remote host
164 collect_log_file(host
, host
.VAR_LOG_MESSAGES_COPY_PATH
,
166 collect_log_file(host
, "/var/log/messages", messages_raw
)
168 # figure out how much of messages.raw to skip
169 if os
.path
.exists(messages_at_start
):
170 # if the first lines of the messages at start should match the
171 # first lines of the current messages; if they don't then messages
172 # has been erase or rotated and we just grab all of it
173 first_line_at_start
= utils
.read_one_line(messages_at_start
)
174 first_line_now
= utils
.read_one_line(messages_raw
)
175 if first_line_at_start
!= first_line_now
:
178 size_at_start
= os
.path
.getsize(messages_at_start
)
181 raw_messages_file
= open(messages_raw
)
182 messages_file
= open(messages
, "w")
183 raw_messages_file
.seek(size_at_start
)
184 shutil
.copyfileobj(raw_messages_file
, messages_file
)
185 raw_messages_file
.close()
186 messages_file
.close()
188 # get rid of the "raw" versions of messages
189 os
.remove(messages_raw
)
190 if os
.path
.exists(messages_at_start
):
191 os
.remove(messages_at_start
)
193 logging
.warning("Error while collecting /var/log/messages: %s", e
)