From fa83523f45fbb403eba4ebc5704bf98aa4da0163 Mon Sep 17 00:00:00 2001 From: John Dykstra Date: Fri, 25 May 2012 16:12:46 -0500 Subject: [PATCH] x86/mm/pat: Improve scaling of pat_pagerange_is_ram() Function pat_pagerange_is_ram() scales poorly to large address ranges, because it probes the resource tree for each page. On a 2.6 GHz Opteron, this function consumes 34 ms for a 1 GB range. It is called twice during untrack_pfn_vma(), slowing process cleanup and handicapping the OOM killer. This replacement consumes less than 1ms, under the same conditions. Signed-off-by: John Dykstra on behalf of Cray Inc. Acked-by: Suresh Siddha Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1337980366.1979.6.camel@redwood [ Small stylistic cleanups and renames ] Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 56 +++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index f6ff57b7efa..bea6e573e02 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -158,31 +158,47 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type) return req_type; } +struct pagerange_state { + unsigned long cur_pfn; + int ram; + int not_ram; +}; + +static int +pagerange_is_ram_callback(unsigned long initial_pfn, unsigned long total_nr_pages, void *arg) +{ + struct pagerange_state *state = arg; + + state->not_ram |= initial_pfn > state->cur_pfn; + state->ram |= total_nr_pages > 0; + state->cur_pfn = initial_pfn + total_nr_pages; + + return state->ram && state->not_ram; +} + static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end) { - int ram_page = 0, not_rampage = 0; - unsigned long page_nr; + int ret = 0; + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long end_pfn = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; + struct pagerange_state state = {start_pfn, 0, 0}; - for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT); - ++page_nr) { - /* - * For legacy reasons, physical address range in the legacy ISA - * region is tracked as non-RAM. This will allow users of - * /dev/mem to map portions of legacy ISA region, even when - * some of those portions are listed(or not even listed) with - * different e820 types(RAM/reserved/..) - */ - if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) && - page_is_ram(page_nr)) - ram_page = 1; - else - not_rampage = 1; - - if (ram_page == not_rampage) - return -1; + /* + * For legacy reasons, physical address range in the legacy ISA + * region is tracked as non-RAM. This will allow users of + * /dev/mem to map portions of legacy ISA region, even when + * some of those portions are listed(or not even listed) with + * different e820 types(RAM/reserved/..) + */ + if (start_pfn < ISA_END_ADDRESS >> PAGE_SHIFT) + start_pfn = ISA_END_ADDRESS >> PAGE_SHIFT; + + if (start_pfn < end_pfn) { + ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn, + &state, pagerange_is_ram_callback); } - return ram_page; + return (ret > 0) ? -1 : (state.ram ? 1 : 0); } /* -- 2.11.4.GIT