1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/process_util.h"
7 #import <Cocoa/Cocoa.h>
8 #include <crt_externs.h>
11 #include <mach/mach.h>
12 #include <mach/mach_init.h>
13 #include <mach/mach_vm.h>
14 #include <mach/shared_region.h>
15 #include <mach/task.h>
16 #include <mach-o/nlist.h>
17 #include <malloc/malloc.h>
18 #import <objc/runtime.h>
21 #include <sys/event.h>
22 #include <sys/sysctl.h>
23 #include <sys/types.h>
29 #include "base/debug/debugger.h"
30 #include "base/file_util.h"
31 #include "base/hash_tables.h"
32 #include "base/lazy_instance.h"
33 #include "base/logging.h"
34 #include "base/mac/mac_util.h"
35 #include "base/mac/scoped_mach_port.h"
36 #include "base/posix/eintr_wrapper.h"
37 #include "base/string_util.h"
38 #include "base/sys_info.h"
39 #include "base/threading/thread_local.h"
40 #include "third_party/apple_apsl/CFBase.h"
41 #include "third_party/apple_apsl/malloc.h"
42 #include "third_party/mach_override/mach_override.h"
46 void RestoreDefaultExceptionHandler() {
47 // This function is tailored to remove the Breakpad exception handler.
48 // exception_mask matches s_exception_mask in
49 // breakpad/src/client/mac/handler/exception_handler.cc
50 const exception_mask_t exception_mask = EXC_MASK_BAD_ACCESS |
51 EXC_MASK_BAD_INSTRUCTION |
55 // Setting the exception port to MACH_PORT_NULL may not be entirely
56 // kosher to restore the default exception handler, but in practice,
57 // it results in the exception port being set to Apple Crash Reporter,
58 // the desired behavior.
59 task_set_exception_ports(mach_task_self(), exception_mask, MACH_PORT_NULL,
60 EXCEPTION_DEFAULT, THREAD_STATE_NONE);
63 ProcessIterator::ProcessIterator(const ProcessFilter* filter)
64 : index_of_kinfo_proc_(0),
66 // Get a snapshot of all of my processes (yes, as we loop it can go stale, but
67 // but trying to find where we were in a constantly changing list is basically
70 int mib[] = { CTL_KERN, KERN_PROC, KERN_PROC_UID, geteuid() };
72 // Since more processes could start between when we get the size and when
73 // we get the list, we do a loop to keep trying until we get it.
76 const int max_tries = 10;
78 // Get the size of the buffer
80 if (sysctl(mib, arraysize(mib), NULL, &len, NULL, 0) < 0) {
81 DLOG(ERROR) << "failed to get the size needed for the process list";
82 kinfo_procs_.resize(0);
85 size_t num_of_kinfo_proc = len / sizeof(struct kinfo_proc);
86 // Leave some spare room for process table growth (more could show up
87 // between when we check and now)
88 num_of_kinfo_proc += 16;
89 kinfo_procs_.resize(num_of_kinfo_proc);
90 len = num_of_kinfo_proc * sizeof(struct kinfo_proc);
91 // Load the list of processes
92 if (sysctl(mib, arraysize(mib), &kinfo_procs_[0], &len, NULL, 0) < 0) {
93 // If we get a mem error, it just means we need a bigger buffer, so
94 // loop around again. Anything else is a real error and give up.
95 if (errno != ENOMEM) {
96 DLOG(ERROR) << "failed to get the process list";
97 kinfo_procs_.resize(0);
101 // Got the list, just make sure we're sized exactly right
102 size_t num_of_kinfo_proc = len / sizeof(struct kinfo_proc);
103 kinfo_procs_.resize(num_of_kinfo_proc);
107 } while (!done && (try_num++ < max_tries));
110 DLOG(ERROR) << "failed to collect the process list in a few tries";
111 kinfo_procs_.resize(0);
115 ProcessIterator::~ProcessIterator() {
118 bool ProcessIterator::CheckForNextProcess() {
120 for (; index_of_kinfo_proc_ < kinfo_procs_.size(); ++index_of_kinfo_proc_) {
121 kinfo_proc& kinfo = kinfo_procs_[index_of_kinfo_proc_];
123 // Skip processes just awaiting collection
124 if ((kinfo.kp_proc.p_pid > 0) && (kinfo.kp_proc.p_stat == SZOMB))
127 int mib[] = { CTL_KERN, KERN_PROCARGS, kinfo.kp_proc.p_pid };
129 // Find out what size buffer we need.
131 if (sysctl(mib, arraysize(mib), NULL, &data_len, NULL, 0) < 0) {
132 DVPLOG(1) << "failed to figure out the buffer size for a commandline";
136 data.resize(data_len);
137 if (sysctl(mib, arraysize(mib), &data[0], &data_len, NULL, 0) < 0) {
138 DVPLOG(1) << "failed to fetch a commandline";
142 // |data| contains all the command line parameters of the process, separated
143 // by blocks of one or more null characters. We tokenize |data| into a
144 // vector of strings using '\0' as a delimiter and populate
145 // |entry_.cmd_line_args_|.
146 std::string delimiters;
147 delimiters.push_back('\0');
148 Tokenize(data, delimiters, &entry_.cmd_line_args_);
150 // |data| starts with the full executable path followed by a null character.
151 // We search for the first instance of '\0' and extract everything before it
152 // to populate |entry_.exe_file_|.
153 size_t exec_name_end = data.find('\0');
154 if (exec_name_end == std::string::npos) {
155 DLOG(ERROR) << "command line data didn't match expected format";
159 entry_.pid_ = kinfo.kp_proc.p_pid;
160 entry_.ppid_ = kinfo.kp_eproc.e_ppid;
161 entry_.gid_ = kinfo.kp_eproc.e_pgid;
162 size_t last_slash = data.rfind('/', exec_name_end);
163 if (last_slash == std::string::npos)
164 entry_.exe_file_.assign(data, 0, exec_name_end);
166 entry_.exe_file_.assign(data, last_slash + 1,
167 exec_name_end - last_slash - 1);
168 // Start w/ the next entry next time through
169 ++index_of_kinfo_proc_;
176 bool NamedProcessIterator::IncludeEntry() {
177 return (executable_name_ == entry().exe_file() &&
178 ProcessIterator::IncludeEntry());
182 // ------------------------------------------------------------------------
183 // NOTE: about ProcessMetrics
185 // Getting a mach task from a pid for another process requires permissions in
186 // general, so there doesn't really seem to be a way to do these (and spinning
187 // up ps to fetch each stats seems dangerous to put in a base api for anyone to
188 // call). Child processes ipc their port, so return something if available,
189 // otherwise return 0.
192 ProcessMetrics::ProcessMetrics(ProcessHandle process,
193 ProcessMetrics::PortProvider* port_provider)
196 last_system_time_(0),
197 port_provider_(port_provider) {
198 processor_count_ = SysInfo::NumberOfProcessors();
202 ProcessMetrics* ProcessMetrics::CreateProcessMetrics(
203 ProcessHandle process,
204 ProcessMetrics::PortProvider* port_provider) {
205 return new ProcessMetrics(process, port_provider);
208 bool ProcessMetrics::GetIOCounters(IoCounters* io_counters) const {
212 static bool GetTaskInfo(mach_port_t task, task_basic_info_64* task_info_data) {
213 if (task == MACH_PORT_NULL)
215 mach_msg_type_number_t count = TASK_BASIC_INFO_64_COUNT;
216 kern_return_t kr = task_info(task,
218 reinterpret_cast<task_info_t>(task_info_data),
220 // Most likely cause for failure: |task| is a zombie.
221 return kr == KERN_SUCCESS;
224 size_t ProcessMetrics::GetPagefileUsage() const {
225 task_basic_info_64 task_info_data;
226 if (!GetTaskInfo(TaskForPid(process_), &task_info_data))
228 return task_info_data.virtual_size;
231 size_t ProcessMetrics::GetPeakPagefileUsage() const {
235 size_t ProcessMetrics::GetWorkingSetSize() const {
236 task_basic_info_64 task_info_data;
237 if (!GetTaskInfo(TaskForPid(process_), &task_info_data))
239 return task_info_data.resident_size;
242 size_t ProcessMetrics::GetPeakWorkingSetSize() const {
246 static bool GetCPUTypeForProcess(pid_t pid, cpu_type_t* cpu_type) {
247 size_t len = sizeof(*cpu_type);
248 int result = sysctlbyname("sysctl.proc_cputype",
254 DPLOG(ERROR) << "sysctlbyname(""sysctl.proc_cputype"")";
261 static bool IsAddressInSharedRegion(mach_vm_address_t addr, cpu_type_t type) {
262 if (type == CPU_TYPE_I386)
263 return addr >= SHARED_REGION_BASE_I386 &&
264 addr < (SHARED_REGION_BASE_I386 + SHARED_REGION_SIZE_I386);
265 else if (type == CPU_TYPE_X86_64)
266 return addr >= SHARED_REGION_BASE_X86_64 &&
267 addr < (SHARED_REGION_BASE_X86_64 + SHARED_REGION_SIZE_X86_64);
272 // This is a rough approximation of the algorithm that libtop uses.
273 // private_bytes is the size of private resident memory.
274 // shared_bytes is the size of shared resident memory.
275 bool ProcessMetrics::GetMemoryBytes(size_t* private_bytes,
276 size_t* shared_bytes) {
278 size_t private_pages_count = 0;
279 size_t shared_pages_count = 0;
281 if (!private_bytes && !shared_bytes)
284 mach_port_t task = TaskForPid(process_);
285 if (task == MACH_PORT_NULL) {
286 DLOG(ERROR) << "Invalid process";
291 if (!GetCPUTypeForProcess(process_, &cpu_type))
294 // The same region can be referenced multiple times. To avoid double counting
295 // we need to keep track of which regions we've already counted.
296 base::hash_set<int> seen_objects;
298 // We iterate through each VM region in the task's address map. For shared
299 // memory we add up all the pages that are marked as shared. Like libtop we
300 // try to avoid counting pages that are also referenced by other tasks. Since
301 // we don't have access to the VM regions of other tasks the only hint we have
302 // is if the address is in the shared region area.
304 // Private memory is much simpler. We simply count the pages that are marked
305 // as private or copy on write (COW).
307 // See libtop_update_vm_regions in
308 // http://www.opensource.apple.com/source/top/top-67/libtop.c
309 mach_vm_size_t size = 0;
310 for (mach_vm_address_t address = MACH_VM_MIN_ADDRESS;; address += size) {
311 vm_region_top_info_data_t info;
312 mach_msg_type_number_t info_count = VM_REGION_TOP_INFO_COUNT;
313 mach_port_t object_name;
314 kr = mach_vm_region(task,
318 (vm_region_info_t)&info,
321 if (kr == KERN_INVALID_ADDRESS) {
322 // We're at the end of the address space.
324 } else if (kr != KERN_SUCCESS) {
325 DLOG(ERROR) << "Calling mach_vm_region failed with error: "
326 << mach_error_string(kr);
330 if (IsAddressInSharedRegion(address, cpu_type) &&
331 info.share_mode != SM_PRIVATE)
334 if (info.share_mode == SM_COW && info.ref_count == 1)
335 info.share_mode = SM_PRIVATE;
337 switch (info.share_mode) {
339 private_pages_count += info.private_pages_resident;
340 private_pages_count += info.shared_pages_resident;
343 private_pages_count += info.private_pages_resident;
346 if (seen_objects.count(info.obj_id) == 0) {
347 // Only count the first reference to this region.
348 seen_objects.insert(info.obj_id);
349 shared_pages_count += info.shared_pages_resident;
358 kr = host_page_size(task, &page_size);
359 if (kr != KERN_SUCCESS) {
360 DLOG(ERROR) << "Failed to fetch host page size, error: "
361 << mach_error_string(kr);
366 *private_bytes = private_pages_count * page_size;
368 *shared_bytes = shared_pages_count * page_size;
373 void ProcessMetrics::GetCommittedKBytes(CommittedKBytes* usage) const {
376 bool ProcessMetrics::GetWorkingSetKBytes(WorkingSetKBytes* ws_usage) const {
377 size_t priv = GetWorkingSetSize();
380 ws_usage->priv = priv / 1024;
381 ws_usage->shareable = 0;
382 ws_usage->shared = 0;
386 #define TIME_VALUE_TO_TIMEVAL(a, r) do { \
387 (r)->tv_sec = (a)->seconds; \
388 (r)->tv_usec = (a)->microseconds; \
391 double ProcessMetrics::GetCPUUsage() {
392 mach_port_t task = TaskForPid(process_);
393 if (task == MACH_PORT_NULL)
398 // Libtop explicitly loops over the threads (libtop_pinfo_update_cpu_usage()
399 // in libtop.c), but this is more concise and gives the same results:
400 task_thread_times_info thread_info_data;
401 mach_msg_type_number_t thread_info_count = TASK_THREAD_TIMES_INFO_COUNT;
403 TASK_THREAD_TIMES_INFO,
404 reinterpret_cast<task_info_t>(&thread_info_data),
406 if (kr != KERN_SUCCESS) {
407 // Most likely cause: |task| is a zombie.
411 task_basic_info_64 task_info_data;
412 if (!GetTaskInfo(task, &task_info_data))
415 /* Set total_time. */
416 // thread info contains live time...
417 struct timeval user_timeval, system_timeval, task_timeval;
418 TIME_VALUE_TO_TIMEVAL(&thread_info_data.user_time, &user_timeval);
419 TIME_VALUE_TO_TIMEVAL(&thread_info_data.system_time, &system_timeval);
420 timeradd(&user_timeval, &system_timeval, &task_timeval);
422 // ... task info contains terminated time.
423 TIME_VALUE_TO_TIMEVAL(&task_info_data.user_time, &user_timeval);
424 TIME_VALUE_TO_TIMEVAL(&task_info_data.system_time, &system_timeval);
425 timeradd(&user_timeval, &task_timeval, &task_timeval);
426 timeradd(&system_timeval, &task_timeval, &task_timeval);
429 int retval = gettimeofday(&now, NULL);
433 int64 time = TimeValToMicroseconds(now);
434 int64 task_time = TimeValToMicroseconds(task_timeval);
436 if ((last_system_time_ == 0) || (last_time_ == 0)) {
437 // First call, just set the last values.
438 last_system_time_ = task_time;
443 int64 system_time_delta = task_time - last_system_time_;
444 int64 time_delta = time - last_time_;
445 DCHECK_NE(0U, time_delta);
449 last_system_time_ = task_time;
452 return static_cast<double>(system_time_delta * 100.0) / time_delta;
455 mach_port_t ProcessMetrics::TaskForPid(ProcessHandle process) const {
456 mach_port_t task = MACH_PORT_NULL;
458 task = port_provider_->TaskForPid(process_);
459 if (task == MACH_PORT_NULL && process_ == getpid())
460 task = mach_task_self();
464 // ------------------------------------------------------------------------
466 // Bytes committed by the system.
467 size_t GetSystemCommitCharge() {
468 base::mac::ScopedMachPort host(mach_host_self());
469 mach_msg_type_number_t count = HOST_VM_INFO_COUNT;
470 vm_statistics_data_t data;
471 kern_return_t kr = host_statistics(host, HOST_VM_INFO,
472 reinterpret_cast<host_info_t>(&data),
475 DLOG(WARNING) << "Failed to fetch host statistics.";
480 kr = host_page_size(host, &page_size);
482 DLOG(ERROR) << "Failed to fetch host page size.";
486 return (data.active_count * page_size) / 1024;
491 // Finds the library path for malloc() and thus the libC part of libSystem,
492 // which in Lion is in a separate image.
493 const char* LookUpLibCPath() {
494 const void* addr = reinterpret_cast<void*>(&malloc);
497 if (dladdr(addr, &info))
498 return info.dli_fname;
500 DLOG(WARNING) << "Could not find image path for malloc()";
504 typedef void(*malloc_error_break_t)(void);
505 malloc_error_break_t g_original_malloc_error_break = NULL;
507 // Returns the function pointer for malloc_error_break. This symbol is declared
508 // as __private_extern__ and cannot be dlsym()ed. Instead, use nlist() to
510 malloc_error_break_t LookUpMallocErrorBreak() {
512 const char* lib_c_path = LookUpLibCPath();
516 // Only need to look up two symbols, but nlist() requires a NULL-terminated
517 // array and takes no count.
519 bzero(&nl, sizeof(nl));
521 // The symbol to find.
522 nl[0].n_un.n_name = const_cast<char*>("_malloc_error_break");
524 // A reference symbol by which the address of the desired symbol will be
526 nl[1].n_un.n_name = const_cast<char*>("_malloc");
528 int rv = nlist(lib_c_path, nl);
529 if (rv != 0 || nl[0].n_type == N_UNDF || nl[1].n_type == N_UNDF) {
533 // nlist() returns addresses as offsets in the image, not the instruction
534 // pointer in memory. Use the known in-memory address of malloc()
535 // to compute the offset for malloc_error_break().
536 uintptr_t reference_addr = reinterpret_cast<uintptr_t>(&malloc);
537 reference_addr -= nl[1].n_value;
538 reference_addr += nl[0].n_value;
540 return reinterpret_cast<malloc_error_break_t>(reference_addr);
541 #endif // ARCH_CPU_32_BITS
546 // Simple scoper that saves the current value of errno, resets it to 0, and on
547 // destruction puts the old value back. This is so that CrMallocErrorBreak can
548 // safely test errno free from the effects of other routines.
549 class ScopedClearErrno {
551 ScopedClearErrno() : old_errno_(errno) {
554 ~ScopedClearErrno() {
562 DISALLOW_COPY_AND_ASSIGN(ScopedClearErrno);
565 // Combines ThreadLocalBoolean with AutoReset. It would be convenient
566 // to compose ThreadLocalPointer<bool> with base::AutoReset<bool>, but that
567 // would require allocating some storage for the bool.
568 class ThreadLocalBooleanAutoReset {
570 ThreadLocalBooleanAutoReset(ThreadLocalBoolean* tlb, bool new_value)
572 original_value_(tlb->Get()) {
573 scoped_tlb_->Set(new_value);
575 ~ThreadLocalBooleanAutoReset() {
576 scoped_tlb_->Set(original_value_);
580 ThreadLocalBoolean* scoped_tlb_;
581 bool original_value_;
583 DISALLOW_COPY_AND_ASSIGN(ThreadLocalBooleanAutoReset);
586 base::LazyInstance<ThreadLocalBoolean>::Leaky
587 g_unchecked_malloc = LAZY_INSTANCE_INITIALIZER;
589 // NOTE(shess): This is called when the malloc library noticed that the heap
590 // is fubar. Avoid calls which will re-enter the malloc library.
591 void CrMallocErrorBreak() {
592 g_original_malloc_error_break();
594 // Out of memory is certainly not heap corruption, and not necessarily
595 // something for which the process should be terminated. Leave that decision
596 // to the OOM killer. The EBADF case comes up because the malloc library
597 // attempts to log to ASL (syslog) before calling this code, which fails
598 // accessing a Unix-domain socket because of sandboxing.
599 if (errno == ENOMEM || (errno == EBADF && g_unchecked_malloc.Get().Get()))
602 // A unit test checks this error message, so it needs to be in release builds.
604 "Terminating process due to a potential for future heap corruption: "
607 '0' + ((errno / 100) % 10),
608 '0' + ((errno / 10) % 10),
612 COMPILE_ASSERT(ELAST <= 999, errno_too_large_to_encode);
613 strlcat(buf, errnobuf, sizeof(buf));
616 // Crash by writing to NULL+errno to allow analyzing errno from
617 // crash dump info (setting a breakpad key would re-enter the malloc
618 // library). Max documented errno in intro(2) is actually 102, but
619 // it really just needs to be "small" to stay on the right vm page.
620 const int kMaxErrno = 256;
621 char* volatile death_ptr = NULL;
622 death_ptr += std::min(errno, kMaxErrno);
628 void EnableTerminationOnHeapCorruption() {
629 #ifdef ADDRESS_SANITIZER
630 // Don't do anything special on heap corruption, because it should be handled
631 // by AddressSanitizer.
635 // Only override once, otherwise CrMallocErrorBreak() will recurse
637 if (g_original_malloc_error_break)
640 malloc_error_break_t malloc_error_break = LookUpMallocErrorBreak();
641 if (!malloc_error_break) {
642 DLOG(WARNING) << "Could not find malloc_error_break";
646 mach_error_t err = mach_override_ptr(
647 (void*)malloc_error_break,
648 (void*)&CrMallocErrorBreak,
649 (void**)&g_original_malloc_error_break);
652 DLOG(WARNING) << "Could not override malloc_error_break; error = " << err;
655 // ------------------------------------------------------------------------
659 bool g_oom_killer_enabled;
661 // Starting with Mac OS X 10.7, the zone allocators set up by the system are
662 // read-only, to prevent them from being overwritten in an attack. However,
663 // blindly unprotecting and reprotecting the zone allocators fails with
664 // GuardMalloc because GuardMalloc sets up its zone allocator using a block of
665 // memory in its bss. Explicit saving/restoring of the protection is required.
667 // This function takes a pointer to a malloc zone, de-protects it if necessary,
668 // and returns (in the out parameters) a region of memory (if any) to be
669 // re-protected when modifications are complete. This approach assumes that
670 // there is no contention for the protection of this memory.
671 void DeprotectMallocZone(ChromeMallocZone* default_zone,
672 mach_vm_address_t* reprotection_start,
673 mach_vm_size_t* reprotection_length,
674 vm_prot_t* reprotection_value) {
676 *reprotection_start = reinterpret_cast<mach_vm_address_t>(default_zone);
677 struct vm_region_basic_info_64 info;
678 mach_msg_type_number_t count = VM_REGION_BASIC_INFO_COUNT_64;
679 kern_return_t result =
680 mach_vm_region(mach_task_self(),
683 VM_REGION_BASIC_INFO_64,
684 reinterpret_cast<vm_region_info_t>(&info),
687 CHECK(result == KERN_SUCCESS);
689 result = mach_port_deallocate(mach_task_self(), unused);
690 CHECK(result == KERN_SUCCESS);
692 // Does the region fully enclose the zone pointers? Possibly unwarranted
693 // simplification used: using the size of a full version 8 malloc zone rather
694 // than the actual smaller size if the passed-in zone is not version 8.
695 CHECK(*reprotection_start <=
696 reinterpret_cast<mach_vm_address_t>(default_zone));
697 mach_vm_size_t zone_offset = reinterpret_cast<mach_vm_size_t>(default_zone) -
698 reinterpret_cast<mach_vm_size_t>(*reprotection_start);
699 CHECK(zone_offset + sizeof(ChromeMallocZone) <= *reprotection_length);
701 if (info.protection & VM_PROT_WRITE) {
702 // No change needed; the zone is already writable.
703 *reprotection_start = 0;
704 *reprotection_length = 0;
705 *reprotection_value = VM_PROT_NONE;
707 *reprotection_value = info.protection;
708 result = mach_vm_protect(mach_task_self(),
710 *reprotection_length,
712 info.protection | VM_PROT_WRITE);
713 CHECK(result == KERN_SUCCESS);
717 // === C malloc/calloc/valloc/realloc/posix_memalign ===
719 typedef void* (*malloc_type)(struct _malloc_zone_t* zone,
721 typedef void* (*calloc_type)(struct _malloc_zone_t* zone,
724 typedef void* (*valloc_type)(struct _malloc_zone_t* zone,
726 typedef void (*free_type)(struct _malloc_zone_t* zone,
728 typedef void* (*realloc_type)(struct _malloc_zone_t* zone,
731 typedef void* (*memalign_type)(struct _malloc_zone_t* zone,
735 malloc_type g_old_malloc;
736 calloc_type g_old_calloc;
737 valloc_type g_old_valloc;
738 free_type g_old_free;
739 realloc_type g_old_realloc;
740 memalign_type g_old_memalign;
742 malloc_type g_old_malloc_purgeable;
743 calloc_type g_old_calloc_purgeable;
744 valloc_type g_old_valloc_purgeable;
745 free_type g_old_free_purgeable;
746 realloc_type g_old_realloc_purgeable;
747 memalign_type g_old_memalign_purgeable;
749 void* oom_killer_malloc(struct _malloc_zone_t* zone,
751 ScopedClearErrno clear_errno;
752 void* result = g_old_malloc(zone, size);
754 debug::BreakDebugger();
758 void* oom_killer_calloc(struct _malloc_zone_t* zone,
761 ScopedClearErrno clear_errno;
762 void* result = g_old_calloc(zone, num_items, size);
763 if (!result && num_items && size)
764 debug::BreakDebugger();
768 void* oom_killer_valloc(struct _malloc_zone_t* zone,
770 ScopedClearErrno clear_errno;
771 void* result = g_old_valloc(zone, size);
773 debug::BreakDebugger();
777 void oom_killer_free(struct _malloc_zone_t* zone,
779 ScopedClearErrno clear_errno;
780 g_old_free(zone, ptr);
783 void* oom_killer_realloc(struct _malloc_zone_t* zone,
786 ScopedClearErrno clear_errno;
787 void* result = g_old_realloc(zone, ptr, size);
789 debug::BreakDebugger();
793 void* oom_killer_memalign(struct _malloc_zone_t* zone,
796 ScopedClearErrno clear_errno;
797 void* result = g_old_memalign(zone, alignment, size);
798 // Only die if posix_memalign would have returned ENOMEM, since there are
799 // other reasons why NULL might be returned (see
800 // http://opensource.apple.com/source/Libc/Libc-583/gen/malloc.c ).
801 if (!result && size && alignment >= sizeof(void*)
802 && (alignment & (alignment - 1)) == 0) {
803 debug::BreakDebugger();
808 void* oom_killer_malloc_purgeable(struct _malloc_zone_t* zone,
810 ScopedClearErrno clear_errno;
811 void* result = g_old_malloc_purgeable(zone, size);
813 debug::BreakDebugger();
817 void* oom_killer_calloc_purgeable(struct _malloc_zone_t* zone,
820 ScopedClearErrno clear_errno;
821 void* result = g_old_calloc_purgeable(zone, num_items, size);
822 if (!result && num_items && size)
823 debug::BreakDebugger();
827 void* oom_killer_valloc_purgeable(struct _malloc_zone_t* zone,
829 ScopedClearErrno clear_errno;
830 void* result = g_old_valloc_purgeable(zone, size);
832 debug::BreakDebugger();
836 void oom_killer_free_purgeable(struct _malloc_zone_t* zone,
838 ScopedClearErrno clear_errno;
839 g_old_free_purgeable(zone, ptr);
842 void* oom_killer_realloc_purgeable(struct _malloc_zone_t* zone,
845 ScopedClearErrno clear_errno;
846 void* result = g_old_realloc_purgeable(zone, ptr, size);
848 debug::BreakDebugger();
852 void* oom_killer_memalign_purgeable(struct _malloc_zone_t* zone,
855 ScopedClearErrno clear_errno;
856 void* result = g_old_memalign_purgeable(zone, alignment, size);
857 // Only die if posix_memalign would have returned ENOMEM, since there are
858 // other reasons why NULL might be returned (see
859 // http://opensource.apple.com/source/Libc/Libc-583/gen/malloc.c ).
860 if (!result && size && alignment >= sizeof(void*)
861 && (alignment & (alignment - 1)) == 0) {
862 debug::BreakDebugger();
867 // === C++ operator new ===
869 void oom_killer_new() {
870 debug::BreakDebugger();
873 // === Core Foundation CFAllocators ===
875 bool CanGetContextForCFAllocator() {
876 return !base::mac::IsOSLaterThanMountainLion_DontCallThis();
879 CFAllocatorContext* ContextForCFAllocator(CFAllocatorRef allocator) {
880 if (base::mac::IsOSSnowLeopard()) {
881 ChromeCFAllocatorLeopards* our_allocator =
882 const_cast<ChromeCFAllocatorLeopards*>(
883 reinterpret_cast<const ChromeCFAllocatorLeopards*>(allocator));
884 return &our_allocator->_context;
885 } else if (base::mac::IsOSLion() || base::mac::IsOSMountainLion()) {
886 ChromeCFAllocatorLions* our_allocator =
887 const_cast<ChromeCFAllocatorLions*>(
888 reinterpret_cast<const ChromeCFAllocatorLions*>(allocator));
889 return &our_allocator->_context;
895 CFAllocatorAllocateCallBack g_old_cfallocator_system_default;
896 CFAllocatorAllocateCallBack g_old_cfallocator_malloc;
897 CFAllocatorAllocateCallBack g_old_cfallocator_malloc_zone;
899 void* oom_killer_cfallocator_system_default(CFIndex alloc_size,
902 void* result = g_old_cfallocator_system_default(alloc_size, hint, info);
904 debug::BreakDebugger();
908 void* oom_killer_cfallocator_malloc(CFIndex alloc_size,
911 void* result = g_old_cfallocator_malloc(alloc_size, hint, info);
913 debug::BreakDebugger();
917 void* oom_killer_cfallocator_malloc_zone(CFIndex alloc_size,
920 void* result = g_old_cfallocator_malloc_zone(alloc_size, hint, info);
922 debug::BreakDebugger();
926 // === Cocoa NSObject allocation ===
928 typedef id (*allocWithZone_t)(id, SEL, NSZone*);
929 allocWithZone_t g_old_allocWithZone;
931 id oom_killer_allocWithZone(id self, SEL _cmd, NSZone* zone)
933 id result = g_old_allocWithZone(self, _cmd, zone);
935 debug::BreakDebugger();
941 void* UncheckedMalloc(size_t size) {
943 ScopedClearErrno clear_errno;
944 ThreadLocalBooleanAutoReset flag(g_unchecked_malloc.Pointer(), true);
945 return g_old_malloc(malloc_default_zone(), size);
950 void EnableTerminationOnOutOfMemory() {
951 if (g_oom_killer_enabled)
954 g_oom_killer_enabled = true;
956 // === C malloc/calloc/valloc/realloc/posix_memalign ===
958 // This approach is not perfect, as requests for amounts of memory larger than
959 // MALLOC_ABSOLUTE_MAX_SIZE (currently SIZE_T_MAX - (2 * PAGE_SIZE)) will
960 // still fail with a NULL rather than dying (see
961 // http://opensource.apple.com/source/Libc/Libc-583/gen/malloc.c for details).
962 // Unfortunately, it's the best we can do. Also note that this does not affect
963 // allocations from non-default zones.
965 CHECK(!g_old_malloc && !g_old_calloc && !g_old_valloc && !g_old_realloc &&
966 !g_old_memalign) << "Old allocators unexpectedly non-null";
968 CHECK(!g_old_malloc_purgeable && !g_old_calloc_purgeable &&
969 !g_old_valloc_purgeable && !g_old_realloc_purgeable &&
970 !g_old_memalign_purgeable) << "Old allocators unexpectedly non-null";
972 #if !defined(ADDRESS_SANITIZER)
973 // Don't do anything special on OOM for the malloc zones replaced by
974 // AddressSanitizer, as modifying or protecting them may not work correctly.
976 ChromeMallocZone* default_zone =
977 reinterpret_cast<ChromeMallocZone*>(malloc_default_zone());
978 ChromeMallocZone* purgeable_zone =
979 reinterpret_cast<ChromeMallocZone*>(malloc_default_purgeable_zone());
981 mach_vm_address_t default_reprotection_start = 0;
982 mach_vm_size_t default_reprotection_length = 0;
983 vm_prot_t default_reprotection_value = VM_PROT_NONE;
984 DeprotectMallocZone(default_zone,
985 &default_reprotection_start,
986 &default_reprotection_length,
987 &default_reprotection_value);
989 mach_vm_address_t purgeable_reprotection_start = 0;
990 mach_vm_size_t purgeable_reprotection_length = 0;
991 vm_prot_t purgeable_reprotection_value = VM_PROT_NONE;
992 if (purgeable_zone) {
993 DeprotectMallocZone(purgeable_zone,
994 &purgeable_reprotection_start,
995 &purgeable_reprotection_length,
996 &purgeable_reprotection_value);
1001 g_old_malloc = default_zone->malloc;
1002 g_old_calloc = default_zone->calloc;
1003 g_old_valloc = default_zone->valloc;
1004 g_old_free = default_zone->free;
1005 g_old_realloc = default_zone->realloc;
1006 CHECK(g_old_malloc && g_old_calloc && g_old_valloc && g_old_free &&
1008 << "Failed to get system allocation functions.";
1010 default_zone->malloc = oom_killer_malloc;
1011 default_zone->calloc = oom_killer_calloc;
1012 default_zone->valloc = oom_killer_valloc;
1013 default_zone->free = oom_killer_free;
1014 default_zone->realloc = oom_killer_realloc;
1016 if (default_zone->version >= 5) {
1017 g_old_memalign = default_zone->memalign;
1019 default_zone->memalign = oom_killer_memalign;
1022 // Purgeable zone (if it exists)
1024 if (purgeable_zone) {
1025 g_old_malloc_purgeable = purgeable_zone->malloc;
1026 g_old_calloc_purgeable = purgeable_zone->calloc;
1027 g_old_valloc_purgeable = purgeable_zone->valloc;
1028 g_old_free_purgeable = purgeable_zone->free;
1029 g_old_realloc_purgeable = purgeable_zone->realloc;
1030 CHECK(g_old_malloc_purgeable && g_old_calloc_purgeable &&
1031 g_old_valloc_purgeable && g_old_free_purgeable &&
1032 g_old_realloc_purgeable)
1033 << "Failed to get system allocation functions.";
1035 purgeable_zone->malloc = oom_killer_malloc_purgeable;
1036 purgeable_zone->calloc = oom_killer_calloc_purgeable;
1037 purgeable_zone->valloc = oom_killer_valloc_purgeable;
1038 purgeable_zone->free = oom_killer_free_purgeable;
1039 purgeable_zone->realloc = oom_killer_realloc_purgeable;
1041 if (purgeable_zone->version >= 5) {
1042 g_old_memalign_purgeable = purgeable_zone->memalign;
1043 if (g_old_memalign_purgeable)
1044 purgeable_zone->memalign = oom_killer_memalign_purgeable;
1048 // Restore protection if it was active.
1050 if (default_reprotection_start) {
1051 kern_return_t result = mach_vm_protect(mach_task_self(),
1052 default_reprotection_start,
1053 default_reprotection_length,
1055 default_reprotection_value);
1056 CHECK(result == KERN_SUCCESS);
1059 if (purgeable_reprotection_start) {
1060 kern_return_t result = mach_vm_protect(mach_task_self(),
1061 purgeable_reprotection_start,
1062 purgeable_reprotection_length,
1064 purgeable_reprotection_value);
1065 CHECK(result == KERN_SUCCESS);
1069 // === C malloc_zone_batch_malloc ===
1071 // batch_malloc is omitted because the default malloc zone's implementation
1072 // only supports batch_malloc for "tiny" allocations from the free list. It
1073 // will fail for allocations larger than "tiny", and will only allocate as
1074 // many blocks as it's able to from the free list. These factors mean that it
1075 // can return less than the requested memory even in a non-out-of-memory
1076 // situation. There's no good way to detect whether a batch_malloc failure is
1077 // due to these other factors, or due to genuine memory or address space
1078 // exhaustion. The fact that it only allocates space from the "tiny" free list
1079 // means that it's likely that a failure will not be due to memory exhaustion.
1080 // Similarly, these constraints on batch_malloc mean that callers must always
1081 // be expecting to receive less memory than was requested, even in situations
1082 // where memory pressure is not a concern. Finally, the only public interface
1083 // to batch_malloc is malloc_zone_batch_malloc, which is specific to the
1084 // system's malloc implementation. It's unlikely that anyone's even heard of
1087 // === C++ operator new ===
1089 // Yes, operator new does call through to malloc, but this will catch failures
1090 // that our imperfect handling of malloc cannot.
1092 std::set_new_handler(oom_killer_new);
1094 #ifndef ADDRESS_SANITIZER
1095 // === Core Foundation CFAllocators ===
1097 // This will not catch allocation done by custom allocators, but will catch
1098 // all allocation done by system-provided ones.
1100 CHECK(!g_old_cfallocator_system_default && !g_old_cfallocator_malloc &&
1101 !g_old_cfallocator_malloc_zone)
1102 << "Old allocators unexpectedly non-null";
1104 bool cf_allocator_internals_known = CanGetContextForCFAllocator();
1106 if (cf_allocator_internals_known) {
1107 CFAllocatorContext* context =
1108 ContextForCFAllocator(kCFAllocatorSystemDefault);
1109 CHECK(context) << "Failed to get context for kCFAllocatorSystemDefault.";
1110 g_old_cfallocator_system_default = context->allocate;
1111 CHECK(g_old_cfallocator_system_default)
1112 << "Failed to get kCFAllocatorSystemDefault allocation function.";
1113 context->allocate = oom_killer_cfallocator_system_default;
1115 context = ContextForCFAllocator(kCFAllocatorMalloc);
1116 CHECK(context) << "Failed to get context for kCFAllocatorMalloc.";
1117 g_old_cfallocator_malloc = context->allocate;
1118 CHECK(g_old_cfallocator_malloc)
1119 << "Failed to get kCFAllocatorMalloc allocation function.";
1120 context->allocate = oom_killer_cfallocator_malloc;
1122 context = ContextForCFAllocator(kCFAllocatorMallocZone);
1123 CHECK(context) << "Failed to get context for kCFAllocatorMallocZone.";
1124 g_old_cfallocator_malloc_zone = context->allocate;
1125 CHECK(g_old_cfallocator_malloc_zone)
1126 << "Failed to get kCFAllocatorMallocZone allocation function.";
1127 context->allocate = oom_killer_cfallocator_malloc_zone;
1129 NSLog(@"Internals of CFAllocator not known; out-of-memory failures via "
1130 "CFAllocator will not result in termination. http://crbug.com/45650");
1134 // === Cocoa NSObject allocation ===
1136 // Note that both +[NSObject new] and +[NSObject alloc] call through to
1137 // +[NSObject allocWithZone:].
1139 CHECK(!g_old_allocWithZone)
1140 << "Old allocator unexpectedly non-null";
1142 Class nsobject_class = [NSObject class];
1143 Method orig_method = class_getClassMethod(nsobject_class,
1144 @selector(allocWithZone:));
1145 g_old_allocWithZone = reinterpret_cast<allocWithZone_t>(
1146 method_getImplementation(orig_method));
1147 CHECK(g_old_allocWithZone)
1148 << "Failed to get allocWithZone allocation function.";
1149 method_setImplementation(orig_method,
1150 reinterpret_cast<IMP>(oom_killer_allocWithZone));
1153 ProcessId GetParentProcessId(ProcessHandle process) {
1154 struct kinfo_proc info;
1155 size_t length = sizeof(struct kinfo_proc);
1156 int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PID, process };
1157 if (sysctl(mib, 4, &info, &length, NULL, 0) < 0) {
1158 DPLOG(ERROR) << "sysctl";
1163 return info.kp_eproc.e_ppid;
1168 const int kWaitBeforeKillSeconds = 2;
1170 // Reap |child| process. This call blocks until completion.
1171 void BlockingReap(pid_t child) {
1172 const pid_t result = HANDLE_EINTR(waitpid(child, NULL, 0));
1174 DPLOG(ERROR) << "waitpid(" << child << ", NULL, 0)";
1178 // Waits for |timeout| seconds for the given |child| to exit and reap it. If
1179 // the child doesn't exit within the time specified, kills it.
1181 // This function takes two approaches: first, it tries to use kqueue to
1182 // observe when the process exits. kevent can monitor a kqueue with a
1183 // timeout, so this method is preferred to wait for a specified period of
1184 // time. Once the kqueue indicates the process has exited, waitpid will reap
1185 // the exited child. If the kqueue doesn't provide an exit event notification,
1186 // before the timeout expires, or if the kqueue fails or misbehaves, the
1187 // process will be mercilessly killed and reaped.
1189 // A child process passed to this function may be in one of several states:
1190 // running, terminated and not yet reaped, and (apparently, and unfortunately)
1191 // terminated and already reaped. Normally, a process will at least have been
1192 // asked to exit before this function is called, but this is not required.
1193 // If a process is terminating and unreaped, there may be a window between the
1194 // time that kqueue will no longer recognize it and when it becomes an actual
1195 // zombie that a non-blocking (WNOHANG) waitpid can reap. This condition is
1196 // detected when kqueue indicates that the process is not running and a
1197 // non-blocking waitpid fails to reap the process but indicates that it is
1198 // still running. In this event, a blocking attempt to reap the process
1199 // collects the known-dying child, preventing zombies from congregating.
1201 // In the event that the kqueue misbehaves entirely, as it might under a
1202 // EMFILE condition ("too many open files", or out of file descriptors), this
1203 // function will forcibly kill and reap the child without delay. This
1204 // eliminates another potential zombie vector. (If you're out of file
1205 // descriptors, you're probably deep into something else, but that doesn't
1206 // mean that zombies be allowed to kick you while you're down.)
1208 // The fact that this function seemingly can be called to wait on a child
1209 // that's not only already terminated but already reaped is a bit of a
1210 // problem: a reaped child's pid can be reclaimed and may refer to a distinct
1211 // process in that case. The fact that this function can seemingly be called
1212 // to wait on a process that's not even a child is also a problem: kqueue will
1213 // work in that case, but waitpid won't, and killing a non-child might not be
1214 // the best approach.
1215 void WaitForChildToDie(pid_t child, int timeout) {
1217 DCHECK(timeout > 0);
1219 // DON'T ADD ANY EARLY RETURNS TO THIS FUNCTION without ensuring that
1220 // |child| has been reaped. Specifically, even if a kqueue, kevent, or other
1221 // call fails, this function should fall back to the last resort of trying
1222 // to kill and reap the process. Not observing this rule will resurrect
1227 int kq = HANDLE_EINTR(kqueue());
1229 DPLOG(ERROR) << "kqueue()";
1231 file_util::ScopedFD auto_close_kq(&kq);
1233 struct kevent change = {0};
1234 EV_SET(&change, child, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, NULL);
1235 result = HANDLE_EINTR(kevent(kq, &change, 1, NULL, 0, NULL));
1238 if (errno != ESRCH) {
1239 DPLOG(ERROR) << "kevent (setup " << child << ")";
1241 // At this point, one of the following has occurred:
1242 // 1. The process has died but has not yet been reaped.
1243 // 2. The process has died and has already been reaped.
1244 // 3. The process is in the process of dying. It's no longer
1245 // kqueueable, but it may not be waitable yet either. Mark calls
1246 // this case the "zombie death race".
1248 result = HANDLE_EINTR(waitpid(child, NULL, WNOHANG));
1251 // A positive result indicates case 1. waitpid succeeded and reaped
1252 // the child. A result of -1 indicates case 2. The child has already
1253 // been reaped. In both of these cases, no further action is
1258 // |result| is 0, indicating case 3. The process will be waitable in
1259 // short order. Fall back out of the kqueue code to kill it (for good
1260 // measure) and reap it.
1263 // Keep track of the elapsed time to be able to restart kevent if it's
1265 TimeDelta remaining_delta = TimeDelta::FromSeconds(timeout);
1266 TimeTicks deadline = TimeTicks::Now() + remaining_delta;
1268 struct kevent event = {0};
1269 while (remaining_delta.InMilliseconds() > 0) {
1270 const struct timespec remaining_timespec = remaining_delta.ToTimeSpec();
1271 result = kevent(kq, NULL, 0, &event, 1, &remaining_timespec);
1272 if (result == -1 && errno == EINTR) {
1273 remaining_delta = deadline - TimeTicks::Now();
1281 DPLOG(ERROR) << "kevent (wait " << child << ")";
1282 } else if (result > 1) {
1283 DLOG(ERROR) << "kevent (wait " << child << "): unexpected result "
1285 } else if (result == 1) {
1286 if ((event.fflags & NOTE_EXIT) &&
1287 (event.ident == static_cast<uintptr_t>(child))) {
1288 // The process is dead or dying. This won't block for long, if at
1290 BlockingReap(child);
1293 DLOG(ERROR) << "kevent (wait " << child
1294 << "): unexpected event: fflags=" << event.fflags
1295 << ", ident=" << event.ident;
1301 // The child is still alive, or is very freshly dead. Be sure by sending it
1302 // a signal. This is safe even if it's freshly dead, because it will be a
1303 // zombie (or on the way to zombiedom) and kill will return 0 even if the
1304 // signal is not delivered to a live process.
1305 result = kill(child, SIGKILL);
1307 DPLOG(ERROR) << "kill(" << child << ", SIGKILL)";
1309 // The child is definitely on the way out now. BlockingReap won't need to
1310 // wait for long, if at all.
1311 BlockingReap(child);
1317 void EnsureProcessTerminated(ProcessHandle process) {
1318 WaitForChildToDie(process, kWaitBeforeKillSeconds);