1 diff -Nur linux-4.4.46.orig/arch/arm/include/asm/switch_to.h linux-4.4.46/arch/arm/include/asm/switch_to.h
2 --- linux-4.4.46.orig/arch/arm/include/asm/switch_to.h 2017-02-01 08:31:11.000000000 +0100
3 +++ linux-4.4.46/arch/arm/include/asm/switch_to.h 2017-02-03 17:18:05.627414322 +0100
6 #include <linux/thread_info.h>
8 +#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM
9 +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p);
12 +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
16 * For v7 SMP cores running a preemptible kernel we may be pre-empted
17 * during a TLB maintenance operation, so execute an inner-shareable dsb
19 #define switch_to(prev,next,last) \
21 __complete_pending_tlbi(); \
22 + switch_kmaps(prev, next); \
23 last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \
26 diff -Nur linux-4.4.46.orig/arch/arm/include/asm/thread_info.h linux-4.4.46/arch/arm/include/asm/thread_info.h
27 --- linux-4.4.46.orig/arch/arm/include/asm/thread_info.h 2017-02-01 08:31:11.000000000 +0100
28 +++ linux-4.4.46/arch/arm/include/asm/thread_info.h 2017-02-03 17:18:05.627414322 +0100
31 unsigned long flags; /* low level flags */
32 int preempt_count; /* 0 => preemptable, <0 => bug */
33 + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
34 mm_segment_t addr_limit; /* address limit */
35 struct task_struct *task; /* main task structure */
38 #define TIF_SYSCALL_TRACE 4 /* syscall trace active */
39 #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */
40 #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
41 -#define TIF_SECCOMP 7 /* seccomp syscall filtering active */
42 +#define TIF_SECCOMP 8 /* seccomp syscall filtering active */
43 +#define TIF_NEED_RESCHED_LAZY 7
45 #define TIF_NOHZ 12 /* in adaptive nohz mode */
46 #define TIF_USING_IWMMXT 17
48 #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
49 #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
50 #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
51 +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
52 #define _TIF_UPROBE (1 << TIF_UPROBE)
53 #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
54 #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
56 * Change these and you break ASM code in entry-common.S
58 #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
59 - _TIF_NOTIFY_RESUME | _TIF_UPROBE)
60 + _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
61 + _TIF_NEED_RESCHED_LAZY)
63 #endif /* __KERNEL__ */
64 #endif /* __ASM_ARM_THREAD_INFO_H */
65 diff -Nur linux-4.4.46.orig/arch/arm/Kconfig linux-4.4.46/arch/arm/Kconfig
66 --- linux-4.4.46.orig/arch/arm/Kconfig 2017-02-01 08:31:11.000000000 +0100
67 +++ linux-4.4.46/arch/arm/Kconfig 2017-02-03 17:18:05.627414322 +0100
69 select HARDIRQS_SW_RESEND
70 select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT)
71 select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
72 - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32
73 + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !PREEMPT_RT_BASE
74 select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32
75 select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
76 select HAVE_ARCH_TRACEHOOK
78 select HAVE_PERF_EVENTS
80 select HAVE_PERF_USER_STACK_DUMP
81 + select HAVE_PREEMPT_LAZY
82 select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE)
83 select HAVE_REGS_AND_STACK_ACCESS_API
84 select HAVE_SYSCALL_TRACEPOINTS
85 diff -Nur linux-4.4.46.orig/arch/arm/kernel/asm-offsets.c linux-4.4.46/arch/arm/kernel/asm-offsets.c
86 --- linux-4.4.46.orig/arch/arm/kernel/asm-offsets.c 2017-02-01 08:31:11.000000000 +0100
87 +++ linux-4.4.46/arch/arm/kernel/asm-offsets.c 2017-02-03 17:18:05.627414322 +0100
90 DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
91 DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
92 + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
93 DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
94 DEFINE(TI_TASK, offsetof(struct thread_info, task));
95 DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
96 diff -Nur linux-4.4.46.orig/arch/arm/kernel/entry-armv.S linux-4.4.46/arch/arm/kernel/entry-armv.S
97 --- linux-4.4.46.orig/arch/arm/kernel/entry-armv.S 2017-02-01 08:31:11.000000000 +0100
98 +++ linux-4.4.46/arch/arm/kernel/entry-armv.S 2017-02-03 17:18:05.627414322 +0100
100 #ifdef CONFIG_PREEMPT
102 ldr r8, [tsk, #TI_PREEMPT] @ get preempt count
103 - ldr r0, [tsk, #TI_FLAGS] @ get flags
104 teq r8, #0 @ if preempt count != 0
105 + bne 1f @ return from exeption
106 + ldr r0, [tsk, #TI_FLAGS] @ get flags
107 + tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set
108 + blne svc_preempt @ preempt!
110 + ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
111 + teq r8, #0 @ if preempt lazy count != 0
112 movne r0, #0 @ force flags to 0
113 - tst r0, #_TIF_NEED_RESCHED
114 + tst r0, #_TIF_NEED_RESCHED_LAZY
119 svc_exit r5, irq = 1 @ return from exception
121 1: bl preempt_schedule_irq @ irq en/disable is done inside
122 ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
123 tst r0, #_TIF_NEED_RESCHED
125 + tst r0, #_TIF_NEED_RESCHED_LAZY
128 + ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
129 + teq r0, #0 @ if preempt lazy count != 0
136 diff -Nur linux-4.4.46.orig/arch/arm/kernel/entry-common.S linux-4.4.46/arch/arm/kernel/entry-common.S
137 --- linux-4.4.46.orig/arch/arm/kernel/entry-common.S 2017-02-01 08:31:11.000000000 +0100
138 +++ linux-4.4.46/arch/arm/kernel/entry-common.S 2017-02-03 17:18:05.627414322 +0100
141 disable_irq_notrace @ disable interrupts
142 ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
143 - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
144 + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP)
145 + bne fast_work_pending
146 + tst r1, #_TIF_SECCOMP
147 bne fast_work_pending
149 /* perform architecture specific actions before user return */
151 str r0, [sp, #S_R0 + S_OFF]! @ save returned r0
152 disable_irq_notrace @ disable interrupts
153 ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
154 - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
155 + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP)
157 + tst r1, #_TIF_SECCOMP
161 ENDPROC(ret_fast_syscall)
163 diff -Nur linux-4.4.46.orig/arch/arm/kernel/process.c linux-4.4.46/arch/arm/kernel/process.c
164 --- linux-4.4.46.orig/arch/arm/kernel/process.c 2017-02-01 08:31:11.000000000 +0100
165 +++ linux-4.4.46/arch/arm/kernel/process.c 2017-02-03 17:18:05.627414322 +0100
171 + * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock. If the lock is not
172 + * initialized by pgtable_page_ctor() then a coredump of the vector page will
175 +static int __init vectors_user_mapping_init_page(void)
178 + unsigned long addr = 0xffff0000;
183 + pgd = pgd_offset_k(addr);
184 + pud = pud_offset(pgd, addr);
185 + pmd = pmd_offset(pud, addr);
186 + page = pmd_page(*(pmd));
188 + pgtable_page_ctor(page);
192 +late_initcall(vectors_user_mapping_init_page);
194 #ifdef CONFIG_KUSER_HELPERS
196 * The vectors page is always readable from user space for the
197 diff -Nur linux-4.4.46.orig/arch/arm/kernel/signal.c linux-4.4.46/arch/arm/kernel/signal.c
198 --- linux-4.4.46.orig/arch/arm/kernel/signal.c 2017-02-01 08:31:11.000000000 +0100
199 +++ linux-4.4.46/arch/arm/kernel/signal.c 2017-02-03 17:18:05.627414322 +0100
202 trace_hardirqs_off();
204 - if (likely(thread_flags & _TIF_NEED_RESCHED)) {
205 + if (likely(thread_flags & (_TIF_NEED_RESCHED |
206 + _TIF_NEED_RESCHED_LAZY))) {
209 if (unlikely(!user_mode(regs)))
210 diff -Nur linux-4.4.46.orig/arch/arm/kernel/smp.c linux-4.4.46/arch/arm/kernel/smp.c
211 --- linux-4.4.46.orig/arch/arm/kernel/smp.c 2017-02-01 08:31:11.000000000 +0100
212 +++ linux-4.4.46/arch/arm/kernel/smp.c 2017-02-03 17:18:05.627414322 +0100
215 local_flush_tlb_all();
217 - clear_tasks_mm_cpumask(cpu);
223 pr_err("CPU%u: cpu didn't die\n", cpu);
227 + clear_tasks_mm_cpumask(cpu);
229 pr_notice("CPU%u: shutdown\n", cpu);
232 diff -Nur linux-4.4.46.orig/arch/arm/kernel/unwind.c linux-4.4.46/arch/arm/kernel/unwind.c
233 --- linux-4.4.46.orig/arch/arm/kernel/unwind.c 2017-02-01 08:31:11.000000000 +0100
234 +++ linux-4.4.46/arch/arm/kernel/unwind.c 2017-02-03 17:18:05.627414322 +0100
236 static const struct unwind_idx *__origin_unwind_idx;
237 extern const struct unwind_idx __stop_unwind_idx[];
239 -static DEFINE_SPINLOCK(unwind_lock);
240 +static DEFINE_RAW_SPINLOCK(unwind_lock);
241 static LIST_HEAD(unwind_tables);
243 /* Convert a prel31 symbol to an absolute address */
245 /* module unwind tables */
246 struct unwind_table *table;
248 - spin_lock_irqsave(&unwind_lock, flags);
249 + raw_spin_lock_irqsave(&unwind_lock, flags);
250 list_for_each_entry(table, &unwind_tables, list) {
251 if (addr >= table->begin_addr &&
252 addr < table->end_addr) {
257 - spin_unlock_irqrestore(&unwind_lock, flags);
258 + raw_spin_unlock_irqrestore(&unwind_lock, flags);
261 pr_debug("%s: idx = %p\n", __func__, idx);
263 tab->begin_addr = text_addr;
264 tab->end_addr = text_addr + text_size;
266 - spin_lock_irqsave(&unwind_lock, flags);
267 + raw_spin_lock_irqsave(&unwind_lock, flags);
268 list_add_tail(&tab->list, &unwind_tables);
269 - spin_unlock_irqrestore(&unwind_lock, flags);
270 + raw_spin_unlock_irqrestore(&unwind_lock, flags);
278 - spin_lock_irqsave(&unwind_lock, flags);
279 + raw_spin_lock_irqsave(&unwind_lock, flags);
280 list_del(&tab->list);
281 - spin_unlock_irqrestore(&unwind_lock, flags);
282 + raw_spin_unlock_irqrestore(&unwind_lock, flags);
286 diff -Nur linux-4.4.46.orig/arch/arm/kvm/arm.c linux-4.4.46/arch/arm/kvm/arm.c
287 --- linux-4.4.46.orig/arch/arm/kvm/arm.c 2017-02-01 08:31:11.000000000 +0100
288 +++ linux-4.4.46/arch/arm/kvm/arm.c 2017-02-03 17:18:05.627414322 +0100
289 @@ -496,18 +496,18 @@
290 struct kvm_vcpu *vcpu;
292 kvm_for_each_vcpu(i, vcpu, kvm) {
293 - wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
294 + struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
296 vcpu->arch.pause = false;
297 - wake_up_interruptible(wq);
302 static void vcpu_sleep(struct kvm_vcpu *vcpu)
304 - wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
305 + struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
307 - wait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
308 + swait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
309 (!vcpu->arch.pause)));
313 * involves poking the GIC, which must be done in a
314 * non-preemptible context.
318 kvm_timer_flush_hwstate(vcpu);
319 kvm_vgic_flush_hwstate(vcpu);
323 kvm_timer_sync_hwstate(vcpu);
324 kvm_vgic_sync_hwstate(vcpu);
332 kvm_vgic_sync_hwstate(vcpu);
337 ret = handle_exit(vcpu, run, ret);
339 diff -Nur linux-4.4.46.orig/arch/arm/kvm/psci.c linux-4.4.46/arch/arm/kvm/psci.c
340 --- linux-4.4.46.orig/arch/arm/kvm/psci.c 2017-02-01 08:31:11.000000000 +0100
341 +++ linux-4.4.46/arch/arm/kvm/psci.c 2017-02-03 17:18:05.627414322 +0100
344 struct kvm *kvm = source_vcpu->kvm;
345 struct kvm_vcpu *vcpu = NULL;
346 - wait_queue_head_t *wq;
347 + struct swait_queue_head *wq;
348 unsigned long cpu_id;
349 unsigned long context_id;
350 phys_addr_t target_pc;
352 smp_mb(); /* Make sure the above is visible */
354 wq = kvm_arch_vcpu_wq(vcpu);
355 - wake_up_interruptible(wq);
358 return PSCI_RET_SUCCESS;
360 diff -Nur linux-4.4.46.orig/arch/arm/mach-at91/at91rm9200.c linux-4.4.46/arch/arm/mach-at91/at91rm9200.c
361 --- linux-4.4.46.orig/arch/arm/mach-at91/at91rm9200.c 2017-02-01 08:31:11.000000000 +0100
362 +++ linux-4.4.46/arch/arm/mach-at91/at91rm9200.c 2017-02-03 17:18:05.627414322 +0100
364 #include <linux/of_platform.h>
366 #include <asm/mach/arch.h>
367 -#include <asm/system_misc.h>
373 of_platform_populate(NULL, of_default_bus_match_table, NULL, soc_dev);
375 - arm_pm_idle = at91rm9200_idle;
376 at91rm9200_pm_init();
379 diff -Nur linux-4.4.46.orig/arch/arm/mach-at91/at91sam9.c linux-4.4.46/arch/arm/mach-at91/at91sam9.c
380 --- linux-4.4.46.orig/arch/arm/mach-at91/at91sam9.c 2017-02-01 08:31:11.000000000 +0100
381 +++ linux-4.4.46/arch/arm/mach-at91/at91sam9.c 2017-02-03 17:18:05.627414322 +0100
383 soc_dev = soc_device_to_device(soc);
385 of_platform_populate(NULL, of_default_bus_match_table, NULL, soc_dev);
387 - arm_pm_idle = at91sam9_idle;
390 static void __init at91sam9_dt_device_init(void)
391 diff -Nur linux-4.4.46.orig/arch/arm/mach-at91/generic.h linux-4.4.46/arch/arm/mach-at91/generic.h
392 --- linux-4.4.46.orig/arch/arm/mach-at91/generic.h 2017-02-01 08:31:11.000000000 +0100
393 +++ linux-4.4.46/arch/arm/mach-at91/generic.h 2017-02-03 17:18:05.627414322 +0100
395 #ifndef _AT91_GENERIC_H
396 #define _AT91_GENERIC_H
398 -#include <linux/of.h>
399 -#include <linux/reboot.h>
402 -extern void __init at91_map_io(void);
403 -extern void __init at91_alt_map_io(void);
406 -extern void at91rm9200_idle(void);
407 -extern void at91sam9_idle(void);
410 extern void __init at91rm9200_pm_init(void);
411 extern void __init at91sam9260_pm_init(void);
412 extern void __init at91sam9g45_pm_init(void);
413 extern void __init at91sam9x5_pm_init(void);
414 +extern void __init sama5_pm_init(void);
416 static inline void __init at91rm9200_pm_init(void) { }
417 static inline void __init at91sam9260_pm_init(void) { }
418 static inline void __init at91sam9g45_pm_init(void) { }
419 static inline void __init at91sam9x5_pm_init(void) { }
420 +static inline void __init sama5_pm_init(void) { }
423 #endif /* _AT91_GENERIC_H */
424 diff -Nur linux-4.4.46.orig/arch/arm/mach-at91/Kconfig linux-4.4.46/arch/arm/mach-at91/Kconfig
425 --- linux-4.4.46.orig/arch/arm/mach-at91/Kconfig 2017-02-01 08:31:11.000000000 +0100
426 +++ linux-4.4.46/arch/arm/mach-at91/Kconfig 2017-02-03 17:18:05.627414322 +0100
428 config COMMON_CLK_AT91
435 diff -Nur linux-4.4.46.orig/arch/arm/mach-at91/pm.c linux-4.4.46/arch/arm/mach-at91/pm.c
436 --- linux-4.4.46.orig/arch/arm/mach-at91/pm.c 2017-02-01 08:31:11.000000000 +0100
437 +++ linux-4.4.46/arch/arm/mach-at91/pm.c 2017-02-03 17:18:05.627414322 +0100
439 #include <asm/mach/irq.h>
440 #include <asm/fncpy.h>
441 #include <asm/cacheflush.h>
442 +#include <asm/system_misc.h>
447 +static void __iomem *pmc;
450 * FIXME: this is needed to communicate between the pinctrl driver and
451 * the PM implementation in the machine. Possibly part of the PM
456 - scsr = at91_pmc_read(AT91_PMC_SCSR);
457 + scsr = readl(pmc + AT91_PMC_SCSR);
459 /* USB must not be using PLLB */
460 if ((scsr & at91_pm_data.uhp_udp_mask) != 0) {
463 if ((scsr & (AT91_PMC_PCK0 << i)) == 0)
466 - css = at91_pmc_read(AT91_PMC_PCKR(i)) & AT91_PMC_CSS;
467 + css = readl(pmc + AT91_PMC_PCKR(i)) & AT91_PMC_CSS;
468 if (css != AT91_PMC_CSS_SLOW) {
469 pr_err("AT91: PM - Suspend-to-RAM with PCK%d src %d\n", i, css);
475 - at91_suspend_sram_fn(at91_pmc_base, at91_ramc_base[0],
476 - at91_ramc_base[1], pm_data);
477 + at91_suspend_sram_fn(pmc, at91_ramc_base[0],
478 + at91_ramc_base[1], pm_data);
483 at91_pm_set_standby(standby);
486 +void at91rm9200_idle(void)
489 + * Disable the processor clock. The processor will be automatically
490 + * re-enabled by an interrupt or by a reset.
492 + writel(AT91_PMC_PCK, pmc + AT91_PMC_SCDR);
495 +void at91sam9_idle(void)
497 + writel(AT91_PMC_PCK, pmc + AT91_PMC_SCDR);
501 static void __init at91_pm_sram_init(void)
503 struct gen_pool *sram_pool;
504 @@ -399,13 +416,36 @@
505 &at91_pm_suspend_in_sram, at91_pm_suspend_in_sram_sz);
508 -static void __init at91_pm_init(void)
509 +static const struct of_device_id atmel_pmc_ids[] __initconst = {
510 + { .compatible = "atmel,at91rm9200-pmc" },
511 + { .compatible = "atmel,at91sam9260-pmc" },
512 + { .compatible = "atmel,at91sam9g45-pmc" },
513 + { .compatible = "atmel,at91sam9n12-pmc" },
514 + { .compatible = "atmel,at91sam9x5-pmc" },
515 + { .compatible = "atmel,sama5d3-pmc" },
516 + { .compatible = "atmel,sama5d2-pmc" },
517 + { /* sentinel */ },
520 +static void __init at91_pm_init(void (*pm_idle)(void))
522 - at91_pm_sram_init();
523 + struct device_node *pmc_np;
525 if (at91_cpuidle_device.dev.platform_data)
526 platform_device_register(&at91_cpuidle_device);
528 + pmc_np = of_find_matching_node(NULL, atmel_pmc_ids);
529 + pmc = of_iomap(pmc_np, 0);
531 + pr_err("AT91: PM not supported, PMC not found\n");
536 + arm_pm_idle = pm_idle;
538 + at91_pm_sram_init();
540 if (at91_suspend_sram_fn)
541 suspend_set_ops(&at91_pm_ops);
544 at91_pm_data.uhp_udp_mask = AT91RM9200_PMC_UHP | AT91RM9200_PMC_UDP;
545 at91_pm_data.memctrl = AT91_MEMCTRL_MC;
548 + at91_pm_init(at91rm9200_idle);
551 void __init at91sam9260_pm_init(void)
554 at91_pm_data.memctrl = AT91_MEMCTRL_SDRAMC;
555 at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP;
556 - return at91_pm_init();
557 + at91_pm_init(at91sam9_idle);
560 void __init at91sam9g45_pm_init(void)
563 at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP;
564 at91_pm_data.memctrl = AT91_MEMCTRL_DDRSDR;
565 - return at91_pm_init();
566 + at91_pm_init(at91sam9_idle);
569 void __init at91sam9x5_pm_init(void)
572 at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP;
573 at91_pm_data.memctrl = AT91_MEMCTRL_DDRSDR;
574 - return at91_pm_init();
575 + at91_pm_init(at91sam9_idle);
578 +void __init sama5_pm_init(void)
581 + at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP;
582 + at91_pm_data.memctrl = AT91_MEMCTRL_DDRSDR;
583 + at91_pm_init(NULL);
585 diff -Nur linux-4.4.46.orig/arch/arm/mach-at91/sama5.c linux-4.4.46/arch/arm/mach-at91/sama5.c
586 --- linux-4.4.46.orig/arch/arm/mach-at91/sama5.c 2017-02-01 08:31:11.000000000 +0100
587 +++ linux-4.4.46/arch/arm/mach-at91/sama5.c 2017-02-03 17:18:05.627414322 +0100
589 soc_dev = soc_device_to_device(soc);
591 of_platform_populate(NULL, of_default_bus_match_table, NULL, soc_dev);
592 - at91sam9x5_pm_init();
596 static const char *const sama5_dt_board_compat[] __initconst = {
597 diff -Nur linux-4.4.46.orig/arch/arm/mach-exynos/platsmp.c linux-4.4.46/arch/arm/mach-exynos/platsmp.c
598 --- linux-4.4.46.orig/arch/arm/mach-exynos/platsmp.c 2017-02-01 08:31:11.000000000 +0100
599 +++ linux-4.4.46/arch/arm/mach-exynos/platsmp.c 2017-02-03 17:18:05.627414322 +0100
601 return (void __iomem *)(S5P_VA_SCU);
604 -static DEFINE_SPINLOCK(boot_lock);
605 +static DEFINE_RAW_SPINLOCK(boot_lock);
607 static void exynos_secondary_init(unsigned int cpu)
611 * Synchronise with the boot thread.
613 - spin_lock(&boot_lock);
614 - spin_unlock(&boot_lock);
615 + raw_spin_lock(&boot_lock);
616 + raw_spin_unlock(&boot_lock);
619 int exynos_set_boot_addr(u32 core_id, unsigned long boot_addr)
621 * Set synchronisation state between this boot processor
622 * and the secondary one
624 - spin_lock(&boot_lock);
625 + raw_spin_lock(&boot_lock);
628 * The secondary processor is waiting to be released from
632 printk(KERN_ERR "cpu1 power enable failed");
633 - spin_unlock(&boot_lock);
634 + raw_spin_unlock(&boot_lock);
639 * calibrations, then wait for it to finish
642 - spin_unlock(&boot_lock);
643 + raw_spin_unlock(&boot_lock);
645 return pen_release != -1 ? ret : 0;
647 diff -Nur linux-4.4.46.orig/arch/arm/mach-hisi/platmcpm.c linux-4.4.46/arch/arm/mach-hisi/platmcpm.c
648 --- linux-4.4.46.orig/arch/arm/mach-hisi/platmcpm.c 2017-02-01 08:31:11.000000000 +0100
649 +++ linux-4.4.46/arch/arm/mach-hisi/platmcpm.c 2017-02-03 17:18:05.627414322 +0100
652 static void __iomem *sysctrl, *fabric;
653 static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER];
654 -static DEFINE_SPINLOCK(boot_lock);
655 +static DEFINE_RAW_SPINLOCK(boot_lock);
656 static u32 fabric_phys_addr;
658 * [0]: bootwrapper physical address
660 if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
663 - spin_lock_irq(&boot_lock);
664 + raw_spin_lock_irq(&boot_lock);
666 if (hip04_cpu_table[cluster][cpu])
671 hip04_cpu_table[cluster][cpu]++;
672 - spin_unlock_irq(&boot_lock);
673 + raw_spin_unlock_irq(&boot_lock);
677 @@ -162,11 +162,11 @@
678 cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
679 cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
681 - spin_lock(&boot_lock);
682 + raw_spin_lock(&boot_lock);
683 hip04_cpu_table[cluster][cpu]--;
684 if (hip04_cpu_table[cluster][cpu] == 1) {
685 /* A power_up request went ahead of us. */
686 - spin_unlock(&boot_lock);
687 + raw_spin_unlock(&boot_lock);
689 } else if (hip04_cpu_table[cluster][cpu] > 1) {
690 pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu);
694 last_man = hip04_cluster_is_down(cluster);
695 - spin_unlock(&boot_lock);
696 + raw_spin_unlock(&boot_lock);
698 /* Since it's Cortex A15, disable L2 prefetching. */
701 cpu >= HIP04_MAX_CPUS_PER_CLUSTER);
703 count = TIMEOUT_MSEC / POLL_MSEC;
704 - spin_lock_irq(&boot_lock);
705 + raw_spin_lock_irq(&boot_lock);
706 for (tries = 0; tries < count; tries++) {
707 if (hip04_cpu_table[cluster][cpu])
709 @@ -211,10 +211,10 @@
710 data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster));
711 if (data & CORE_WFI_STATUS(cpu))
713 - spin_unlock_irq(&boot_lock);
714 + raw_spin_unlock_irq(&boot_lock);
715 /* Wait for clean L2 when the whole cluster is down. */
717 - spin_lock_irq(&boot_lock);
718 + raw_spin_lock_irq(&boot_lock);
722 @@ -231,10 +231,10 @@
724 if (hip04_cluster_is_down(cluster))
725 hip04_set_snoop_filter(cluster, 0);
726 - spin_unlock_irq(&boot_lock);
727 + raw_spin_unlock_irq(&boot_lock);
730 - spin_unlock_irq(&boot_lock);
731 + raw_spin_unlock_irq(&boot_lock);
735 diff -Nur linux-4.4.46.orig/arch/arm/mach-imx/Kconfig linux-4.4.46/arch/arm/mach-imx/Kconfig
736 --- linux-4.4.46.orig/arch/arm/mach-imx/Kconfig 2017-02-01 08:31:11.000000000 +0100
737 +++ linux-4.4.46/arch/arm/mach-imx/Kconfig 2017-02-03 17:18:05.627414322 +0100
739 bool "i.MX6 Quad/DualLite support"
740 select ARM_ERRATA_764369 if SMP
741 select HAVE_ARM_SCU if SMP
742 - select HAVE_ARM_TWD if SMP
743 + select HAVE_ARM_TWD
744 select PCI_DOMAINS if PCI
747 diff -Nur linux-4.4.46.orig/arch/arm/mach-omap2/omap-smp.c linux-4.4.46/arch/arm/mach-omap2/omap-smp.c
748 --- linux-4.4.46.orig/arch/arm/mach-omap2/omap-smp.c 2017-02-01 08:31:11.000000000 +0100
749 +++ linux-4.4.46/arch/arm/mach-omap2/omap-smp.c 2017-02-03 17:18:05.631414475 +0100
751 /* SCU base address */
752 static void __iomem *scu_base;
754 -static DEFINE_SPINLOCK(boot_lock);
755 +static DEFINE_RAW_SPINLOCK(boot_lock);
757 void __iomem *omap4_get_scu_base(void)
761 * Synchronise with the boot thread.
763 - spin_lock(&boot_lock);
764 - spin_unlock(&boot_lock);
765 + raw_spin_lock(&boot_lock);
766 + raw_spin_unlock(&boot_lock);
769 static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
771 * Set synchronisation state between this boot processor
772 * and the secondary one
774 - spin_lock(&boot_lock);
775 + raw_spin_lock(&boot_lock);
778 * Update the AuxCoreBoot0 with boot state for secondary core.
780 * Now the secondary core is starting up let it run its
781 * calibrations, then wait for it to finish
783 - spin_unlock(&boot_lock);
784 + raw_spin_unlock(&boot_lock);
788 diff -Nur linux-4.4.46.orig/arch/arm/mach-prima2/platsmp.c linux-4.4.46/arch/arm/mach-prima2/platsmp.c
789 --- linux-4.4.46.orig/arch/arm/mach-prima2/platsmp.c 2017-02-01 08:31:11.000000000 +0100
790 +++ linux-4.4.46/arch/arm/mach-prima2/platsmp.c 2017-02-03 17:18:05.631414475 +0100
793 static void __iomem *clk_base;
795 -static DEFINE_SPINLOCK(boot_lock);
796 +static DEFINE_RAW_SPINLOCK(boot_lock);
798 static void sirfsoc_secondary_init(unsigned int cpu)
802 * Synchronise with the boot thread.
804 - spin_lock(&boot_lock);
805 - spin_unlock(&boot_lock);
806 + raw_spin_lock(&boot_lock);
807 + raw_spin_unlock(&boot_lock);
810 static const struct of_device_id clk_ids[] = {
812 /* make sure write buffer is drained */
815 - spin_lock(&boot_lock);
816 + raw_spin_lock(&boot_lock);
819 * The secondary processor is waiting to be released from
821 * now the secondary core is starting up let it run its
822 * calibrations, then wait for it to finish
824 - spin_unlock(&boot_lock);
825 + raw_spin_unlock(&boot_lock);
827 return pen_release != -1 ? -ENOSYS : 0;
829 diff -Nur linux-4.4.46.orig/arch/arm/mach-qcom/platsmp.c linux-4.4.46/arch/arm/mach-qcom/platsmp.c
830 --- linux-4.4.46.orig/arch/arm/mach-qcom/platsmp.c 2017-02-01 08:31:11.000000000 +0100
831 +++ linux-4.4.46/arch/arm/mach-qcom/platsmp.c 2017-02-03 17:18:05.631414475 +0100
834 extern void secondary_startup_arm(void);
836 -static DEFINE_SPINLOCK(boot_lock);
837 +static DEFINE_RAW_SPINLOCK(boot_lock);
839 #ifdef CONFIG_HOTPLUG_CPU
840 static void qcom_cpu_die(unsigned int cpu)
843 * Synchronise with the boot thread.
845 - spin_lock(&boot_lock);
846 - spin_unlock(&boot_lock);
847 + raw_spin_lock(&boot_lock);
848 + raw_spin_unlock(&boot_lock);
851 static int scss_release_secondary(unsigned int cpu)
853 * set synchronisation state between this boot processor
854 * and the secondary one
856 - spin_lock(&boot_lock);
857 + raw_spin_lock(&boot_lock);
860 * Send the secondary CPU a soft interrupt, thereby causing
862 * now the secondary core is starting up let it run its
863 * calibrations, then wait for it to finish
865 - spin_unlock(&boot_lock);
866 + raw_spin_unlock(&boot_lock);
870 diff -Nur linux-4.4.46.orig/arch/arm/mach-spear/platsmp.c linux-4.4.46/arch/arm/mach-spear/platsmp.c
871 --- linux-4.4.46.orig/arch/arm/mach-spear/platsmp.c 2017-02-01 08:31:11.000000000 +0100
872 +++ linux-4.4.46/arch/arm/mach-spear/platsmp.c 2017-02-03 17:18:05.631414475 +0100
874 sync_cache_w(&pen_release);
877 -static DEFINE_SPINLOCK(boot_lock);
878 +static DEFINE_RAW_SPINLOCK(boot_lock);
880 static void __iomem *scu_base = IOMEM(VA_SCU_BASE);
884 * Synchronise with the boot thread.
886 - spin_lock(&boot_lock);
887 - spin_unlock(&boot_lock);
888 + raw_spin_lock(&boot_lock);
889 + raw_spin_unlock(&boot_lock);
892 static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
894 * set synchronisation state between this boot processor
895 * and the secondary one
897 - spin_lock(&boot_lock);
898 + raw_spin_lock(&boot_lock);
901 * The secondary processor is waiting to be released from
903 * now the secondary core is starting up let it run its
904 * calibrations, then wait for it to finish
906 - spin_unlock(&boot_lock);
907 + raw_spin_unlock(&boot_lock);
909 return pen_release != -1 ? -ENOSYS : 0;
911 diff -Nur linux-4.4.46.orig/arch/arm/mach-sti/platsmp.c linux-4.4.46/arch/arm/mach-sti/platsmp.c
912 --- linux-4.4.46.orig/arch/arm/mach-sti/platsmp.c 2017-02-01 08:31:11.000000000 +0100
913 +++ linux-4.4.46/arch/arm/mach-sti/platsmp.c 2017-02-03 17:18:05.631414475 +0100
915 sync_cache_w(&pen_release);
918 -static DEFINE_SPINLOCK(boot_lock);
919 +static DEFINE_RAW_SPINLOCK(boot_lock);
921 static void sti_secondary_init(unsigned int cpu)
925 * Synchronise with the boot thread.
927 - spin_lock(&boot_lock);
928 - spin_unlock(&boot_lock);
929 + raw_spin_lock(&boot_lock);
930 + raw_spin_unlock(&boot_lock);
933 static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle)
935 * set synchronisation state between this boot processor
936 * and the secondary one
938 - spin_lock(&boot_lock);
939 + raw_spin_lock(&boot_lock);
942 * The secondary processor is waiting to be released from
944 * now the secondary core is starting up let it run its
945 * calibrations, then wait for it to finish
947 - spin_unlock(&boot_lock);
948 + raw_spin_unlock(&boot_lock);
950 return pen_release != -1 ? -ENOSYS : 0;
952 diff -Nur linux-4.4.46.orig/arch/arm/mm/fault.c linux-4.4.46/arch/arm/mm/fault.c
953 --- linux-4.4.46.orig/arch/arm/mm/fault.c 2017-02-01 08:31:11.000000000 +0100
954 +++ linux-4.4.46/arch/arm/mm/fault.c 2017-02-03 17:18:05.631414475 +0100
956 if (addr < TASK_SIZE)
957 return do_page_fault(addr, fsr, regs);
959 + if (interrupts_enabled(regs))
960 + local_irq_enable();
967 do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
969 + if (interrupts_enabled(regs))
970 + local_irq_enable();
972 do_bad_area(addr, fsr, regs);
975 diff -Nur linux-4.4.46.orig/arch/arm/mm/highmem.c linux-4.4.46/arch/arm/mm/highmem.c
976 --- linux-4.4.46.orig/arch/arm/mm/highmem.c 2017-02-01 08:31:11.000000000 +0100
977 +++ linux-4.4.46/arch/arm/mm/highmem.c 2017-02-03 17:18:05.631414475 +0100
982 +static unsigned int fixmap_idx(int type)
984 + return FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
987 void *kmap(struct page *page)
992 void *kmap_atomic(struct page *page)
994 + pte_t pte = mk_pte(page, kmap_prot);
1000 - preempt_disable();
1001 + preempt_disable_nort();
1002 pagefault_disable();
1003 if (!PageHighMem(page))
1004 return page_address(page);
1007 type = kmap_atomic_idx_push();
1009 - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
1010 + idx = fixmap_idx(type);
1011 vaddr = __fix_to_virt(idx);
1012 #ifdef CONFIG_DEBUG_HIGHMEM
1015 * in place, so the contained TLB flush ensures the TLB is updated
1016 * with the new mapping.
1018 - set_fixmap_pte(idx, mk_pte(page, kmap_prot));
1019 +#ifdef CONFIG_PREEMPT_RT_FULL
1020 + current->kmap_pte[type] = pte;
1022 + set_fixmap_pte(idx, pte);
1024 return (void *)vaddr;
1026 @@ -106,44 +115,75 @@
1028 if (kvaddr >= (void *)FIXADDR_START) {
1029 type = kmap_atomic_idx();
1030 - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
1031 + idx = fixmap_idx(type);
1033 if (cache_is_vivt())
1034 __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
1035 +#ifdef CONFIG_PREEMPT_RT_FULL
1036 + current->kmap_pte[type] = __pte(0);
1038 #ifdef CONFIG_DEBUG_HIGHMEM
1039 BUG_ON(vaddr != __fix_to_virt(idx));
1040 - set_fixmap_pte(idx, __pte(0));
1042 (void) idx; /* to kill a warning */
1044 + set_fixmap_pte(idx, __pte(0));
1045 kmap_atomic_idx_pop();
1046 } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) {
1047 /* this address was obtained through kmap_high_get() */
1048 kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)]));
1052 + preempt_enable_nort();
1054 EXPORT_SYMBOL(__kunmap_atomic);
1056 void *kmap_atomic_pfn(unsigned long pfn)
1058 + pte_t pte = pfn_pte(pfn, kmap_prot);
1059 unsigned long vaddr;
1061 struct page *page = pfn_to_page(pfn);
1063 - preempt_disable();
1064 + preempt_disable_nort();
1065 pagefault_disable();
1066 if (!PageHighMem(page))
1067 return page_address(page);
1069 type = kmap_atomic_idx_push();
1070 - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
1071 + idx = fixmap_idx(type);
1072 vaddr = __fix_to_virt(idx);
1073 #ifdef CONFIG_DEBUG_HIGHMEM
1074 BUG_ON(!pte_none(get_fixmap_pte(vaddr)));
1076 - set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot));
1077 +#ifdef CONFIG_PREEMPT_RT_FULL
1078 + current->kmap_pte[type] = pte;
1080 + set_fixmap_pte(idx, pte);
1082 return (void *)vaddr;
1084 +#if defined CONFIG_PREEMPT_RT_FULL
1085 +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
1090 + * Clear @prev's kmap_atomic mappings
1092 + for (i = 0; i < prev_p->kmap_idx; i++) {
1093 + int idx = fixmap_idx(i);
1095 + set_fixmap_pte(idx, __pte(0));
1098 + * Restore @next_p's kmap_atomic mappings
1100 + for (i = 0; i < next_p->kmap_idx; i++) {
1101 + int idx = fixmap_idx(i);
1103 + if (!pte_none(next_p->kmap_pte[i]))
1104 + set_fixmap_pte(idx, next_p->kmap_pte[i]);
1108 diff -Nur linux-4.4.46.orig/arch/arm/plat-versatile/platsmp.c linux-4.4.46/arch/arm/plat-versatile/platsmp.c
1109 --- linux-4.4.46.orig/arch/arm/plat-versatile/platsmp.c 2017-02-01 08:31:11.000000000 +0100
1110 +++ linux-4.4.46/arch/arm/plat-versatile/platsmp.c 2017-02-03 17:18:05.631414475 +0100
1112 sync_cache_w(&pen_release);
1115 -static DEFINE_SPINLOCK(boot_lock);
1116 +static DEFINE_RAW_SPINLOCK(boot_lock);
1118 void versatile_secondary_init(unsigned int cpu)
1122 * Synchronise with the boot thread.
1124 - spin_lock(&boot_lock);
1125 - spin_unlock(&boot_lock);
1126 + raw_spin_lock(&boot_lock);
1127 + raw_spin_unlock(&boot_lock);
1130 int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
1132 * Set synchronisation state between this boot processor
1133 * and the secondary one
1135 - spin_lock(&boot_lock);
1136 + raw_spin_lock(&boot_lock);
1139 * This is really belt and braces; we hold unintended secondary
1141 * now the secondary core is starting up let it run its
1142 * calibrations, then wait for it to finish
1144 - spin_unlock(&boot_lock);
1145 + raw_spin_unlock(&boot_lock);
1147 return pen_release != -1 ? -ENOSYS : 0;
1149 diff -Nur linux-4.4.46.orig/arch/arm64/include/asm/thread_info.h linux-4.4.46/arch/arm64/include/asm/thread_info.h
1150 --- linux-4.4.46.orig/arch/arm64/include/asm/thread_info.h 2017-02-01 08:31:11.000000000 +0100
1151 +++ linux-4.4.46/arch/arm64/include/asm/thread_info.h 2017-02-03 17:18:05.631414475 +0100
1153 mm_segment_t addr_limit; /* address limit */
1154 struct task_struct *task; /* main task structure */
1155 int preempt_count; /* 0 => preemptable, <0 => bug */
1156 + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
1161 #define TIF_NEED_RESCHED 1
1162 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
1163 #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
1164 +#define TIF_NEED_RESCHED_LAZY 4
1166 #define TIF_SYSCALL_TRACE 8
1167 #define TIF_SYSCALL_AUDIT 9
1169 #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
1170 #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
1171 #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE)
1172 +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
1173 #define _TIF_NOHZ (1 << TIF_NOHZ)
1174 #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
1175 #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
1177 #define _TIF_32BIT (1 << TIF_32BIT)
1179 #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
1180 - _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE)
1181 + _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
1182 + _TIF_NEED_RESCHED_LAZY)
1184 #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
1185 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
1186 diff -Nur linux-4.4.46.orig/arch/arm64/Kconfig linux-4.4.46/arch/arm64/Kconfig
1187 --- linux-4.4.46.orig/arch/arm64/Kconfig 2017-02-01 08:31:11.000000000 +0100
1188 +++ linux-4.4.46/arch/arm64/Kconfig 2017-02-03 17:18:05.631414475 +0100
1190 select HAVE_PERF_REGS
1191 select HAVE_PERF_USER_STACK_DUMP
1192 select HAVE_RCU_TABLE_FREE
1193 + select HAVE_PREEMPT_LAZY
1194 select HAVE_SYSCALL_TRACEPOINTS
1195 select IOMMU_DMA if IOMMU_SUPPORT
1200 bool "Xen guest support on ARM64"
1201 - depends on ARM64 && OF
1202 + depends on ARM64 && OF && !PREEMPT_RT_FULL
1205 Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64.
1206 diff -Nur linux-4.4.46.orig/arch/arm64/kernel/asm-offsets.c linux-4.4.46/arch/arm64/kernel/asm-offsets.c
1207 --- linux-4.4.46.orig/arch/arm64/kernel/asm-offsets.c 2017-02-01 08:31:11.000000000 +0100
1208 +++ linux-4.4.46/arch/arm64/kernel/asm-offsets.c 2017-02-03 17:18:05.639414785 +0100
1211 DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
1212 DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
1213 + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
1214 DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
1215 DEFINE(TI_TASK, offsetof(struct thread_info, task));
1216 DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
1217 diff -Nur linux-4.4.46.orig/arch/arm64/kernel/entry.S linux-4.4.46/arch/arm64/kernel/entry.S
1218 --- linux-4.4.46.orig/arch/arm64/kernel/entry.S 2017-02-01 08:31:11.000000000 +0100
1219 +++ linux-4.4.46/arch/arm64/kernel/entry.S 2017-02-03 17:18:05.639414785 +0100
1220 @@ -376,11 +376,16 @@
1221 #ifdef CONFIG_PREEMPT
1223 ldr w24, [tsk, #TI_PREEMPT] // get preempt count
1224 - cbnz w24, 1f // preempt count != 0
1225 + cbnz w24, 2f // preempt count != 0
1226 ldr x0, [tsk, #TI_FLAGS] // get flags
1227 - tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
1229 + tbnz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
1231 + ldr w24, [tsk, #TI_PREEMPT_LAZY] // get preempt lazy count
1232 + cbnz w24, 2f // preempt lazy count != 0
1233 + tbz x0, #TIF_NEED_RESCHED_LAZY, 2f // needs rescheduling?
1238 #ifdef CONFIG_TRACE_IRQFLAGS
1239 bl trace_hardirqs_on
1241 1: bl preempt_schedule_irq // irq en/disable is done inside
1242 ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS
1243 tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling?
1244 + tbnz x0, #TIF_NEED_RESCHED_LAZY, 1b // needs rescheduling?
1251 tbnz x1, #TIF_NEED_RESCHED, work_resched
1252 + tbnz x1, #TIF_NEED_RESCHED_LAZY, work_resched
1253 /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */
1254 ldr x2, [sp, #S_PSTATE]
1255 mov x0, sp // 'regs'
1256 diff -Nur linux-4.4.46.orig/arch/Kconfig linux-4.4.46/arch/Kconfig
1257 --- linux-4.4.46.orig/arch/Kconfig 2017-02-01 08:31:11.000000000 +0100
1258 +++ linux-4.4.46/arch/Kconfig 2017-02-03 17:18:05.627414322 +0100
1260 tristate "OProfile system profiling"
1261 depends on PROFILING
1262 depends on HAVE_OPROFILE
1263 + depends on !PREEMPT_RT_FULL
1265 select RING_BUFFER_ALLOW_SWAP
1269 bool "Optimize very unlikely/likely branches"
1270 depends on HAVE_ARCH_JUMP_LABEL
1271 + depends on (!INTERRUPT_OFF_HIST && !PREEMPT_OFF_HIST && !WAKEUP_LATENCY_HIST && !MISSED_TIMER_OFFSETS_HIST)
1273 This option enables a transparent branch optimization that
1274 makes certain almost-always-true or almost-always-false branch
1275 diff -Nur linux-4.4.46.orig/arch/mips/Kconfig linux-4.4.46/arch/mips/Kconfig
1276 --- linux-4.4.46.orig/arch/mips/Kconfig 2017-02-01 08:31:11.000000000 +0100
1277 +++ linux-4.4.46/arch/mips/Kconfig 2017-02-03 17:18:05.639414785 +0100
1278 @@ -2410,7 +2410,7 @@
1281 bool "High Memory Support"
1282 - depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA
1283 + depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA && !PREEMPT_RT_FULL
1285 config CPU_SUPPORTS_HIGHMEM
1287 diff -Nur linux-4.4.46.orig/arch/mips/kvm/mips.c linux-4.4.46/arch/mips/kvm/mips.c
1288 --- linux-4.4.46.orig/arch/mips/kvm/mips.c 2017-02-01 08:31:11.000000000 +0100
1289 +++ linux-4.4.46/arch/mips/kvm/mips.c 2017-02-03 17:18:05.639414785 +0100
1292 dvcpu->arch.wait = 0;
1294 - if (waitqueue_active(&dvcpu->wq))
1295 - wake_up_interruptible(&dvcpu->wq);
1296 + if (swait_active(&dvcpu->wq))
1297 + swake_up(&dvcpu->wq);
1301 @@ -1183,8 +1183,8 @@
1302 kvm_mips_callbacks->queue_timer_int(vcpu);
1304 vcpu->arch.wait = 0;
1305 - if (waitqueue_active(&vcpu->wq))
1306 - wake_up_interruptible(&vcpu->wq);
1307 + if (swait_active(&vcpu->wq))
1308 + swake_up(&vcpu->wq);
1311 /* low level hrtimer wake routine */
1312 diff -Nur linux-4.4.46.orig/arch/powerpc/include/asm/kvm_host.h linux-4.4.46/arch/powerpc/include/asm/kvm_host.h
1313 --- linux-4.4.46.orig/arch/powerpc/include/asm/kvm_host.h 2017-02-01 08:31:11.000000000 +0100
1314 +++ linux-4.4.46/arch/powerpc/include/asm/kvm_host.h 2017-02-03 17:18:05.639414785 +0100
1316 struct list_head runnable_threads;
1317 struct list_head preempt_list;
1319 - wait_queue_head_t wq;
1320 + struct swait_queue_head wq;
1321 spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
1328 - wait_queue_head_t *wqp;
1329 + struct swait_queue_head *wqp;
1330 struct kvmppc_vcore *vcore;
1333 diff -Nur linux-4.4.46.orig/arch/powerpc/include/asm/thread_info.h linux-4.4.46/arch/powerpc/include/asm/thread_info.h
1334 --- linux-4.4.46.orig/arch/powerpc/include/asm/thread_info.h 2017-02-01 08:31:11.000000000 +0100
1335 +++ linux-4.4.46/arch/powerpc/include/asm/thread_info.h 2017-02-03 17:18:05.639414785 +0100
1337 int cpu; /* cpu we're on */
1338 int preempt_count; /* 0 => preemptable,
1340 + int preempt_lazy_count; /* 0 => preemptable,
1342 unsigned long local_flags; /* private flags for thread */
1344 /* low level flags - has atomic operations done on it */
1346 #define TIF_SYSCALL_TRACE 0 /* syscall trace active */
1347 #define TIF_SIGPENDING 1 /* signal pending */
1348 #define TIF_NEED_RESCHED 2 /* rescheduling necessary */
1349 -#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling
1350 - TIF_NEED_RESCHED */
1351 +#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling necessary */
1352 #define TIF_32BIT 4 /* 32 bit binary */
1353 #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */
1354 #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
1356 #if defined(CONFIG_PPC64)
1357 #define TIF_ELF2ABI 18 /* function descriptors must die! */
1359 +#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling
1360 + TIF_NEED_RESCHED */
1362 /* as above, but as bit values */
1363 #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
1364 @@ -119,14 +122,16 @@
1365 #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
1366 #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE)
1367 #define _TIF_NOHZ (1<<TIF_NOHZ)
1368 +#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY)
1369 #define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
1370 _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
1373 #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
1374 _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
1376 + _TIF_RESTORE_TM | _TIF_NEED_RESCHED_LAZY)
1377 #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
1378 +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
1380 /* Bits in local_flags */
1381 /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
1382 diff -Nur linux-4.4.46.orig/arch/powerpc/Kconfig linux-4.4.46/arch/powerpc/Kconfig
1383 --- linux-4.4.46.orig/arch/powerpc/Kconfig 2017-02-01 08:31:11.000000000 +0100
1384 +++ linux-4.4.46/arch/powerpc/Kconfig 2017-02-03 17:18:05.639414785 +0100
1387 config RWSEM_GENERIC_SPINLOCK
1389 + default y if PREEMPT_RT_FULL
1391 config RWSEM_XCHGADD_ALGORITHM
1394 + default y if !PREEMPT_RT_FULL
1396 config GENERIC_LOCKBREAK
1399 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
1400 select GENERIC_STRNCPY_FROM_USER
1401 select GENERIC_STRNLEN_USER
1402 + select HAVE_PREEMPT_LAZY
1403 select HAVE_MOD_ARCH_SPECIFIC
1404 select MODULES_USE_ELF_RELA
1405 select CLONE_BACKWARDS
1409 bool "High memory support"
1411 + depends on PPC32 && !PREEMPT_RT_FULL
1413 source kernel/Kconfig.hz
1414 source kernel/Kconfig.preempt
1415 diff -Nur linux-4.4.46.orig/arch/powerpc/kernel/asm-offsets.c linux-4.4.46/arch/powerpc/kernel/asm-offsets.c
1416 --- linux-4.4.46.orig/arch/powerpc/kernel/asm-offsets.c 2017-02-01 08:31:11.000000000 +0100
1417 +++ linux-4.4.46/arch/powerpc/kernel/asm-offsets.c 2017-02-03 17:18:05.643414939 +0100
1419 DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
1420 DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
1421 DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
1422 + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
1423 DEFINE(TI_TASK, offsetof(struct thread_info, task));
1424 DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
1426 diff -Nur linux-4.4.46.orig/arch/powerpc/kernel/entry_32.S linux-4.4.46/arch/powerpc/kernel/entry_32.S
1427 --- linux-4.4.46.orig/arch/powerpc/kernel/entry_32.S 2017-02-01 08:31:11.000000000 +0100
1428 +++ linux-4.4.46/arch/powerpc/kernel/entry_32.S 2017-02-03 17:18:05.643414939 +0100
1429 @@ -818,7 +818,14 @@
1430 cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
1432 andi. r8,r8,_TIF_NEED_RESCHED
1434 + lwz r0,TI_PREEMPT_LAZY(r9)
1435 + cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
1437 + lwz r0,TI_FLAGS(r9)
1438 + andi. r0,r0,_TIF_NEED_RESCHED_LAZY
1442 andi. r0,r3,MSR_EE /* interrupts off? */
1443 beq restore /* don't schedule if so */
1444 @@ -829,11 +836,11 @@
1446 bl trace_hardirqs_off
1448 -1: bl preempt_schedule_irq
1449 +2: bl preempt_schedule_irq
1450 CURRENT_THREAD_INFO(r9, r1)
1452 - andi. r0,r3,_TIF_NEED_RESCHED
1454 + andi. r0,r3,_TIF_NEED_RESCHED_MASK
1456 #ifdef CONFIG_TRACE_IRQFLAGS
1457 /* And now, to properly rebalance the above, we tell lockdep they
1458 * are being turned back on, which will happen when we return
1459 @@ -1154,7 +1161,7 @@
1460 #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
1462 do_work: /* r10 contains MSR_KERNEL here */
1463 - andi. r0,r9,_TIF_NEED_RESCHED
1464 + andi. r0,r9,_TIF_NEED_RESCHED_MASK
1467 do_resched: /* r10 contains MSR_KERNEL here */
1468 @@ -1175,7 +1182,7 @@
1469 MTMSRD(r10) /* disable interrupts */
1470 CURRENT_THREAD_INFO(r9, r1)
1472 - andi. r0,r9,_TIF_NEED_RESCHED
1473 + andi. r0,r9,_TIF_NEED_RESCHED_MASK
1475 andi. r0,r9,_TIF_USER_WORK_MASK
1477 diff -Nur linux-4.4.46.orig/arch/powerpc/kernel/entry_64.S linux-4.4.46/arch/powerpc/kernel/entry_64.S
1478 --- linux-4.4.46.orig/arch/powerpc/kernel/entry_64.S 2017-02-01 08:31:11.000000000 +0100
1479 +++ linux-4.4.46/arch/powerpc/kernel/entry_64.S 2017-02-03 17:18:05.643414939 +0100
1484 -1: andi. r0,r4,_TIF_NEED_RESCHED
1485 +1: andi. r0,r4,_TIF_NEED_RESCHED_MASK
1487 bl restore_interrupts
1489 @@ -745,10 +745,18 @@
1491 #ifdef CONFIG_PREEMPT
1492 /* Check if we need to preempt */
1493 + lwz r8,TI_PREEMPT(r9)
1494 + cmpwi 0,r8,0 /* if non-zero, just restore regs and return */
1496 andi. r0,r4,_TIF_NEED_RESCHED
1499 + andi. r0,r4,_TIF_NEED_RESCHED_LAZY
1501 + lwz r8,TI_PREEMPT_LAZY(r9)
1503 /* Check that preempt_count() == 0 and interrupts are enabled */
1504 - lwz r8,TI_PREEMPT(r9)
1510 /* Re-test flags and eventually loop */
1511 CURRENT_THREAD_INFO(r9, r1)
1513 - andi. r0,r4,_TIF_NEED_RESCHED
1514 + andi. r0,r4,_TIF_NEED_RESCHED_MASK
1518 diff -Nur linux-4.4.46.orig/arch/powerpc/kernel/irq.c linux-4.4.46/arch/powerpc/kernel/irq.c
1519 --- linux-4.4.46.orig/arch/powerpc/kernel/irq.c 2017-02-01 08:31:11.000000000 +0100
1520 +++ linux-4.4.46/arch/powerpc/kernel/irq.c 2017-02-03 17:18:05.643414939 +0100
1525 +#ifndef CONFIG_PREEMPT_RT_FULL
1526 void do_softirq_own_stack(void)
1528 struct thread_info *curtp, *irqtp;
1531 set_bits(irqtp->flags, &curtp->flags);
1535 irq_hw_number_t virq_to_hw(unsigned int virq)
1537 diff -Nur linux-4.4.46.orig/arch/powerpc/kernel/misc_32.S linux-4.4.46/arch/powerpc/kernel/misc_32.S
1538 --- linux-4.4.46.orig/arch/powerpc/kernel/misc_32.S 2017-02-01 08:31:11.000000000 +0100
1539 +++ linux-4.4.46/arch/powerpc/kernel/misc_32.S 2017-02-03 17:18:05.643414939 +0100
1541 * We store the saved ksp_limit in the unused part
1542 * of the STACK_FRAME_OVERHEAD
1544 +#ifndef CONFIG_PREEMPT_RT_FULL
1545 _GLOBAL(call_do_softirq)
1549 stw r10,THREAD+KSP_LIMIT(r2)
1555 * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp);
1556 diff -Nur linux-4.4.46.orig/arch/powerpc/kernel/misc_64.S linux-4.4.46/arch/powerpc/kernel/misc_64.S
1557 --- linux-4.4.46.orig/arch/powerpc/kernel/misc_64.S 2017-02-01 08:31:11.000000000 +0100
1558 +++ linux-4.4.46/arch/powerpc/kernel/misc_64.S 2017-02-03 17:18:05.643414939 +0100
1563 +#ifndef CONFIG_PREEMPT_RT_FULL
1564 _GLOBAL(call_do_softirq)
1573 _GLOBAL(call_do_irq)
1575 diff -Nur linux-4.4.46.orig/arch/powerpc/kvm/book3s_hv.c linux-4.4.46/arch/powerpc/kvm/book3s_hv.c
1576 --- linux-4.4.46.orig/arch/powerpc/kvm/book3s_hv.c 2017-02-01 08:31:11.000000000 +0100
1577 +++ linux-4.4.46/arch/powerpc/kvm/book3s_hv.c 2017-02-03 17:18:05.643414939 +0100
1578 @@ -114,11 +114,11 @@
1579 static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
1582 - wait_queue_head_t *wqp;
1583 + struct swait_queue_head *wqp;
1585 wqp = kvm_arch_vcpu_wq(vcpu);
1586 - if (waitqueue_active(wqp)) {
1587 - wake_up_interruptible(wqp);
1588 + if (swait_active(wqp)) {
1590 ++vcpu->stat.halt_wakeup;
1594 tvcpu->arch.prodded = 1;
1596 if (vcpu->arch.ceded) {
1597 - if (waitqueue_active(&vcpu->wq)) {
1598 - wake_up_interruptible(&vcpu->wq);
1599 + if (swait_active(&vcpu->wq)) {
1600 + swake_up(&vcpu->wq);
1601 vcpu->stat.halt_wakeup++;
1604 @@ -1453,7 +1453,7 @@
1605 INIT_LIST_HEAD(&vcore->runnable_threads);
1606 spin_lock_init(&vcore->lock);
1607 spin_lock_init(&vcore->stoltb_lock);
1608 - init_waitqueue_head(&vcore->wq);
1609 + init_swait_queue_head(&vcore->wq);
1610 vcore->preempt_tb = TB_NIL;
1611 vcore->lpcr = kvm->arch.lpcr;
1612 vcore->first_vcpuid = core * threads_per_subcore;
1613 @@ -2525,10 +2525,9 @@
1615 struct kvm_vcpu *vcpu;
1617 + DECLARE_SWAITQUEUE(wait);
1619 - DEFINE_WAIT(wait);
1621 - prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
1622 + prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
1625 * Check one last time for pending exceptions and ceded state after
1626 @@ -2542,7 +2541,7 @@
1630 - finish_wait(&vc->wq, &wait);
1631 + finish_swait(&vc->wq, &wait);
1635 @@ -2550,7 +2549,7 @@
1636 trace_kvmppc_vcore_blocked(vc, 0);
1637 spin_unlock(&vc->lock);
1639 - finish_wait(&vc->wq, &wait);
1640 + finish_swait(&vc->wq, &wait);
1641 spin_lock(&vc->lock);
1642 vc->vcore_state = VCORE_INACTIVE;
1643 trace_kvmppc_vcore_blocked(vc, 1);
1644 @@ -2606,7 +2605,7 @@
1645 kvmppc_start_thread(vcpu, vc);
1646 trace_kvm_guest_enter(vcpu);
1647 } else if (vc->vcore_state == VCORE_SLEEPING) {
1649 + swake_up(&vc->wq);
1653 diff -Nur linux-4.4.46.orig/arch/powerpc/kvm/Kconfig linux-4.4.46/arch/powerpc/kvm/Kconfig
1654 --- linux-4.4.46.orig/arch/powerpc/kvm/Kconfig 2017-02-01 08:31:11.000000000 +0100
1655 +++ linux-4.4.46/arch/powerpc/kvm/Kconfig 2017-02-03 17:18:05.643414939 +0100
1658 bool "KVM in-kernel MPIC emulation"
1659 depends on KVM && E500
1660 + depends on !PREEMPT_RT_FULL
1661 select HAVE_KVM_IRQCHIP
1662 select HAVE_KVM_IRQFD
1663 select HAVE_KVM_IRQ_ROUTING
1664 diff -Nur linux-4.4.46.orig/arch/powerpc/platforms/ps3/device-init.c linux-4.4.46/arch/powerpc/platforms/ps3/device-init.c
1665 --- linux-4.4.46.orig/arch/powerpc/platforms/ps3/device-init.c 2017-02-01 08:31:11.000000000 +0100
1666 +++ linux-4.4.46/arch/powerpc/platforms/ps3/device-init.c 2017-02-03 17:18:05.643414939 +0100
1669 pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op);
1671 - res = wait_event_interruptible(dev->done.wait,
1672 + res = swait_event_interruptible(dev->done.wait,
1673 dev->done.done || kthread_should_stop());
1674 if (kthread_should_stop())
1676 diff -Nur linux-4.4.46.orig/arch/s390/include/asm/kvm_host.h linux-4.4.46/arch/s390/include/asm/kvm_host.h
1677 --- linux-4.4.46.orig/arch/s390/include/asm/kvm_host.h 2017-02-01 08:31:11.000000000 +0100
1678 +++ linux-4.4.46/arch/s390/include/asm/kvm_host.h 2017-02-03 17:18:05.643414939 +0100
1680 struct kvm_s390_local_interrupt {
1682 struct kvm_s390_float_interrupt *float_int;
1683 - wait_queue_head_t *wq;
1684 + struct swait_queue_head *wq;
1686 DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
1687 struct kvm_s390_irq_payload irq;
1688 diff -Nur linux-4.4.46.orig/arch/s390/kvm/interrupt.c linux-4.4.46/arch/s390/kvm/interrupt.c
1689 --- linux-4.4.46.orig/arch/s390/kvm/interrupt.c 2017-02-01 08:31:11.000000000 +0100
1690 +++ linux-4.4.46/arch/s390/kvm/interrupt.c 2017-02-03 17:18:05.643414939 +0100
1691 @@ -868,13 +868,13 @@
1693 void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
1695 - if (waitqueue_active(&vcpu->wq)) {
1696 + if (swait_active(&vcpu->wq)) {
1698 * The vcpu gave up the cpu voluntarily, mark it as a good
1701 vcpu->preempted = true;
1702 - wake_up_interruptible(&vcpu->wq);
1703 + swake_up(&vcpu->wq);
1704 vcpu->stat.halt_wakeup++;
1707 diff -Nur linux-4.4.46.orig/arch/sh/kernel/irq.c linux-4.4.46/arch/sh/kernel/irq.c
1708 --- linux-4.4.46.orig/arch/sh/kernel/irq.c 2017-02-01 08:31:11.000000000 +0100
1709 +++ linux-4.4.46/arch/sh/kernel/irq.c 2017-02-03 17:18:05.643414939 +0100
1711 hardirq_ctx[cpu] = NULL;
1714 +#ifndef CONFIG_PREEMPT_RT_FULL
1715 void do_softirq_own_stack(void)
1717 struct thread_info *curctx;
1719 "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr"
1724 static inline void handle_one_irq(unsigned int irq)
1726 diff -Nur linux-4.4.46.orig/arch/sparc/Kconfig linux-4.4.46/arch/sparc/Kconfig
1727 --- linux-4.4.46.orig/arch/sparc/Kconfig 2017-02-01 08:31:11.000000000 +0100
1728 +++ linux-4.4.46/arch/sparc/Kconfig 2017-02-03 17:18:05.643414939 +0100
1729 @@ -189,12 +189,10 @@
1730 source kernel/Kconfig.hz
1732 config RWSEM_GENERIC_SPINLOCK
1734 - default y if SPARC32
1735 + def_bool PREEMPT_RT_FULL
1737 config RWSEM_XCHGADD_ALGORITHM
1739 - default y if SPARC64
1740 + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
1742 config GENERIC_HWEIGHT
1744 diff -Nur linux-4.4.46.orig/arch/sparc/kernel/irq_64.c linux-4.4.46/arch/sparc/kernel/irq_64.c
1745 --- linux-4.4.46.orig/arch/sparc/kernel/irq_64.c 2017-02-01 08:31:11.000000000 +0100
1746 +++ linux-4.4.46/arch/sparc/kernel/irq_64.c 2017-02-03 17:18:05.643414939 +0100
1748 set_irq_regs(old_regs);
1751 +#ifndef CONFIG_PREEMPT_RT_FULL
1752 void do_softirq_own_stack(void)
1754 void *orig_sp, *sp = softirq_stack[smp_processor_id()];
1756 __asm__ __volatile__("mov %0, %%sp"
1761 #ifdef CONFIG_HOTPLUG_CPU
1762 void fixup_irqs(void)
1763 diff -Nur linux-4.4.46.orig/arch/x86/crypto/aesni-intel_glue.c linux-4.4.46/arch/x86/crypto/aesni-intel_glue.c
1764 --- linux-4.4.46.orig/arch/x86/crypto/aesni-intel_glue.c 2017-02-01 08:31:11.000000000 +0100
1765 +++ linux-4.4.46/arch/x86/crypto/aesni-intel_glue.c 2017-02-03 17:18:05.643414939 +0100
1766 @@ -383,14 +383,14 @@
1767 err = blkcipher_walk_virt(desc, &walk);
1768 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
1770 - kernel_fpu_begin();
1771 while ((nbytes = walk.nbytes)) {
1772 + kernel_fpu_begin();
1773 aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
1774 - nbytes & AES_BLOCK_MASK);
1775 + nbytes & AES_BLOCK_MASK);
1777 nbytes &= AES_BLOCK_SIZE - 1;
1778 err = blkcipher_walk_done(desc, &walk, nbytes);
1784 @@ -407,14 +407,14 @@
1785 err = blkcipher_walk_virt(desc, &walk);
1786 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
1788 - kernel_fpu_begin();
1789 while ((nbytes = walk.nbytes)) {
1790 + kernel_fpu_begin();
1791 aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
1792 nbytes & AES_BLOCK_MASK);
1794 nbytes &= AES_BLOCK_SIZE - 1;
1795 err = blkcipher_walk_done(desc, &walk, nbytes);
1801 @@ -431,14 +431,14 @@
1802 err = blkcipher_walk_virt(desc, &walk);
1803 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
1805 - kernel_fpu_begin();
1806 while ((nbytes = walk.nbytes)) {
1807 + kernel_fpu_begin();
1808 aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
1809 nbytes & AES_BLOCK_MASK, walk.iv);
1811 nbytes &= AES_BLOCK_SIZE - 1;
1812 err = blkcipher_walk_done(desc, &walk, nbytes);
1818 @@ -455,14 +455,14 @@
1819 err = blkcipher_walk_virt(desc, &walk);
1820 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
1822 - kernel_fpu_begin();
1823 while ((nbytes = walk.nbytes)) {
1824 + kernel_fpu_begin();
1825 aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
1826 nbytes & AES_BLOCK_MASK, walk.iv);
1828 nbytes &= AES_BLOCK_SIZE - 1;
1829 err = blkcipher_walk_done(desc, &walk, nbytes);
1835 @@ -514,18 +514,20 @@
1836 err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
1837 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
1839 - kernel_fpu_begin();
1840 while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
1841 + kernel_fpu_begin();
1842 aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr,
1843 nbytes & AES_BLOCK_MASK, walk.iv);
1845 nbytes &= AES_BLOCK_SIZE - 1;
1846 err = blkcipher_walk_done(desc, &walk, nbytes);
1849 + kernel_fpu_begin();
1850 ctr_crypt_final(ctx, &walk);
1852 err = blkcipher_walk_done(desc, &walk, 0);
1858 diff -Nur linux-4.4.46.orig/arch/x86/crypto/cast5_avx_glue.c linux-4.4.46/arch/x86/crypto/cast5_avx_glue.c
1859 --- linux-4.4.46.orig/arch/x86/crypto/cast5_avx_glue.c 2017-02-01 08:31:11.000000000 +0100
1860 +++ linux-4.4.46/arch/x86/crypto/cast5_avx_glue.c 2017-02-03 17:18:05.647415094 +0100
1862 static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
1865 - bool fpu_enabled = false;
1867 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1868 const unsigned int bsize = CAST5_BLOCK_SIZE;
1869 unsigned int nbytes;
1871 u8 *wsrc = walk->src.virt.addr;
1872 u8 *wdst = walk->dst.virt.addr;
1874 - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
1875 + fpu_enabled = cast5_fpu_begin(false, nbytes);
1877 /* Process multi-block batch */
1878 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
1879 @@ -103,10 +103,9 @@
1880 } while (nbytes >= bsize);
1883 + cast5_fpu_end(fpu_enabled);
1884 err = blkcipher_walk_done(desc, walk, nbytes);
1887 - cast5_fpu_end(fpu_enabled);
1892 static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1893 struct scatterlist *src, unsigned int nbytes)
1895 - bool fpu_enabled = false;
1897 struct blkcipher_walk walk;
1900 @@ -236,12 +235,11 @@
1901 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
1903 while ((nbytes = walk.nbytes)) {
1904 - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
1905 + fpu_enabled = cast5_fpu_begin(false, nbytes);
1906 nbytes = __cbc_decrypt(desc, &walk);
1907 + cast5_fpu_end(fpu_enabled);
1908 err = blkcipher_walk_done(desc, &walk, nbytes);
1911 - cast5_fpu_end(fpu_enabled);
1916 static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1917 struct scatterlist *src, unsigned int nbytes)
1919 - bool fpu_enabled = false;
1921 struct blkcipher_walk walk;
1924 @@ -320,13 +318,12 @@
1925 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
1927 while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
1928 - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
1929 + fpu_enabled = cast5_fpu_begin(false, nbytes);
1930 nbytes = __ctr_crypt(desc, &walk);
1931 + cast5_fpu_end(fpu_enabled);
1932 err = blkcipher_walk_done(desc, &walk, nbytes);
1935 - cast5_fpu_end(fpu_enabled);
1938 ctr_crypt_final(desc, &walk);
1939 err = blkcipher_walk_done(desc, &walk, 0);
1940 diff -Nur linux-4.4.46.orig/arch/x86/crypto/glue_helper.c linux-4.4.46/arch/x86/crypto/glue_helper.c
1941 --- linux-4.4.46.orig/arch/x86/crypto/glue_helper.c 2017-02-01 08:31:11.000000000 +0100
1942 +++ linux-4.4.46/arch/x86/crypto/glue_helper.c 2017-02-03 17:18:05.647415094 +0100
1944 void *ctx = crypto_blkcipher_ctx(desc->tfm);
1945 const unsigned int bsize = 128 / 8;
1946 unsigned int nbytes, i, func_bytes;
1947 - bool fpu_enabled = false;
1951 err = blkcipher_walk_virt(desc, walk);
1953 u8 *wdst = walk->dst.virt.addr;
1955 fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
1956 - desc, fpu_enabled, nbytes);
1957 + desc, false, nbytes);
1959 for (i = 0; i < gctx->num_funcs; i++) {
1960 func_bytes = bsize * gctx->funcs[i].num_blocks;
1965 + glue_fpu_end(fpu_enabled);
1966 err = blkcipher_walk_done(desc, walk, nbytes);
1969 - glue_fpu_end(fpu_enabled);
1974 struct scatterlist *src, unsigned int nbytes)
1976 const unsigned int bsize = 128 / 8;
1977 - bool fpu_enabled = false;
1979 struct blkcipher_walk walk;
1982 @@ -203,12 +203,12 @@
1984 while ((nbytes = walk.nbytes)) {
1985 fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
1986 - desc, fpu_enabled, nbytes);
1987 + desc, false, nbytes);
1988 nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk);
1989 + glue_fpu_end(fpu_enabled);
1990 err = blkcipher_walk_done(desc, &walk, nbytes);
1993 - glue_fpu_end(fpu_enabled);
1996 EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit);
1998 struct scatterlist *src, unsigned int nbytes)
2000 const unsigned int bsize = 128 / 8;
2001 - bool fpu_enabled = false;
2003 struct blkcipher_walk walk;
2006 @@ -286,13 +286,12 @@
2008 while ((nbytes = walk.nbytes) >= bsize) {
2009 fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
2010 - desc, fpu_enabled, nbytes);
2011 + desc, false, nbytes);
2012 nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk);
2013 + glue_fpu_end(fpu_enabled);
2014 err = blkcipher_walk_done(desc, &walk, nbytes);
2017 - glue_fpu_end(fpu_enabled);
2020 glue_ctr_crypt_final_128bit(
2021 gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk);
2023 void *tweak_ctx, void *crypt_ctx)
2025 const unsigned int bsize = 128 / 8;
2026 - bool fpu_enabled = false;
2028 struct blkcipher_walk walk;
2031 @@ -360,21 +359,21 @@
2033 /* set minimum length to bsize, for tweak_fn */
2034 fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
2035 - desc, fpu_enabled,
2037 nbytes < bsize ? bsize : nbytes);
2039 /* calculate first value of T */
2040 tweak_fn(tweak_ctx, walk.iv, walk.iv);
2041 + glue_fpu_end(fpu_enabled);
2044 + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
2045 + desc, false, nbytes);
2046 nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk);
2048 + glue_fpu_end(fpu_enabled);
2049 err = blkcipher_walk_done(desc, &walk, nbytes);
2050 nbytes = walk.nbytes;
2053 - glue_fpu_end(fpu_enabled);
2057 EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit);
2058 diff -Nur linux-4.4.46.orig/arch/x86/entry/common.c linux-4.4.46/arch/x86/entry/common.c
2059 --- linux-4.4.46.orig/arch/x86/entry/common.c 2017-02-01 08:31:11.000000000 +0100
2060 +++ linux-4.4.46/arch/x86/entry/common.c 2017-02-03 17:18:05.647415094 +0100
2063 #define EXIT_TO_USERMODE_LOOP_FLAGS \
2064 (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
2065 - _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY)
2066 + _TIF_NEED_RESCHED_MASK | _TIF_USER_RETURN_NOTIFY)
2068 static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
2070 @@ -236,9 +236,16 @@
2071 /* We have work to do. */
2074 - if (cached_flags & _TIF_NEED_RESCHED)
2075 + if (cached_flags & _TIF_NEED_RESCHED_MASK)
2078 +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
2079 + if (unlikely(current->forced_info.si_signo)) {
2080 + struct task_struct *t = current;
2081 + force_sig_info(t->forced_info.si_signo, &t->forced_info, t);
2082 + t->forced_info.si_signo = 0;
2085 if (cached_flags & _TIF_UPROBE)
2086 uprobe_notify_resume(regs);
2088 diff -Nur linux-4.4.46.orig/arch/x86/entry/entry_32.S linux-4.4.46/arch/x86/entry/entry_32.S
2089 --- linux-4.4.46.orig/arch/x86/entry/entry_32.S 2017-02-01 08:31:11.000000000 +0100
2090 +++ linux-4.4.46/arch/x86/entry/entry_32.S 2017-02-03 17:18:05.647415094 +0100
2091 @@ -278,8 +278,24 @@
2092 ENTRY(resume_kernel)
2093 DISABLE_INTERRUPTS(CLBR_ANY)
2095 + # preempt count == 0 + NEED_RS set?
2096 cmpl $0, PER_CPU_VAR(__preempt_count)
2097 +#ifndef CONFIG_PREEMPT_LAZY
2102 + # atleast preempt count == 0 ?
2103 + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
2106 + cmpl $0,TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ?
2109 + testl $_TIF_NEED_RESCHED_LAZY, TI_flags(%ebp)
2113 testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
2115 call preempt_schedule_irq
2116 diff -Nur linux-4.4.46.orig/arch/x86/entry/entry_64.S linux-4.4.46/arch/x86/entry/entry_64.S
2117 --- linux-4.4.46.orig/arch/x86/entry/entry_64.S 2017-02-01 08:31:11.000000000 +0100
2118 +++ linux-4.4.46/arch/x86/entry/entry_64.S 2017-02-03 17:18:05.647415094 +0100
2119 @@ -579,7 +579,23 @@
2120 bt $9, EFLAGS(%rsp) /* were interrupts off? */
2122 0: cmpl $0, PER_CPU_VAR(__preempt_count)
2123 +#ifndef CONFIG_PREEMPT_LAZY
2126 + jz do_preempt_schedule_irq
2128 + # atleast preempt count == 0 ?
2129 + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
2132 + GET_THREAD_INFO(%rcx)
2133 + cmpl $0, TI_preempt_lazy_count(%rcx)
2136 + bt $TIF_NEED_RESCHED_LAZY,TI_flags(%rcx)
2138 +do_preempt_schedule_irq:
2140 call preempt_schedule_irq
2147 +#ifndef CONFIG_PREEMPT_RT_FULL
2148 /* Call softirq on interrupt stack. Interrupts are off. */
2149 ENTRY(do_softirq_own_stack)
2152 decl PER_CPU_VAR(irq_count)
2154 END(do_softirq_own_stack)
2158 idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
2159 diff -Nur linux-4.4.46.orig/arch/x86/include/asm/preempt.h linux-4.4.46/arch/x86/include/asm/preempt.h
2160 --- linux-4.4.46.orig/arch/x86/include/asm/preempt.h 2017-02-01 08:31:11.000000000 +0100
2161 +++ linux-4.4.46/arch/x86/include/asm/preempt.h 2017-02-03 17:18:05.647415094 +0100
2163 * a decrement which hits zero means we have no preempt_count and should
2166 -static __always_inline bool __preempt_count_dec_and_test(void)
2167 +static __always_inline bool ____preempt_count_dec_and_test(void)
2169 GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e");
2172 +static __always_inline bool __preempt_count_dec_and_test(void)
2174 + if (____preempt_count_dec_and_test())
2176 +#ifdef CONFIG_PREEMPT_LAZY
2177 + if (current_thread_info()->preempt_lazy_count)
2179 + return test_thread_flag(TIF_NEED_RESCHED_LAZY);
2186 * Returns true when we need to resched and can (barring IRQ state).
2188 static __always_inline bool should_resched(int preempt_offset)
2190 +#ifdef CONFIG_PREEMPT_LAZY
2193 + tmp = raw_cpu_read_4(__preempt_count);
2194 + if (tmp == preempt_offset)
2197 + /* preempt count == 0 ? */
2198 + tmp &= ~PREEMPT_NEED_RESCHED;
2201 + if (current_thread_info()->preempt_lazy_count)
2203 + return test_thread_flag(TIF_NEED_RESCHED_LAZY);
2205 return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
2209 #ifdef CONFIG_PREEMPT
2210 diff -Nur linux-4.4.46.orig/arch/x86/include/asm/signal.h linux-4.4.46/arch/x86/include/asm/signal.h
2211 --- linux-4.4.46.orig/arch/x86/include/asm/signal.h 2017-02-01 08:31:11.000000000 +0100
2212 +++ linux-4.4.46/arch/x86/include/asm/signal.h 2017-02-03 17:18:05.647415094 +0100
2214 unsigned long sig[_NSIG_WORDS];
2218 + * Because some traps use the IST stack, we must keep preemption
2219 + * disabled while calling do_trap(), but do_trap() may call
2220 + * force_sig_info() which will grab the signal spin_locks for the
2221 + * task, which in PREEMPT_RT_FULL are mutexes. By defining
2222 + * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set
2223 + * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
2226 +#if defined(CONFIG_PREEMPT_RT_FULL)
2227 +#define ARCH_RT_DELAYS_SIGNAL_SEND
2230 #ifndef CONFIG_COMPAT
2231 typedef sigset_t compat_sigset_t;
2233 diff -Nur linux-4.4.46.orig/arch/x86/include/asm/stackprotector.h linux-4.4.46/arch/x86/include/asm/stackprotector.h
2234 --- linux-4.4.46.orig/arch/x86/include/asm/stackprotector.h 2017-02-01 08:31:11.000000000 +0100
2235 +++ linux-4.4.46/arch/x86/include/asm/stackprotector.h 2017-02-03 17:18:05.647415094 +0100
2238 static __always_inline void boot_init_stack_canary(void)
2241 + u64 uninitialized_var(canary);
2244 #ifdef CONFIG_X86_64
2246 * of randomness. The TSC only matters for very early init,
2247 * there it already has some randomness on most systems. Later
2248 * on during the bootup the random pool has true entropy too.
2250 + * For preempt-rt we need to weaken the randomness a bit, as
2251 + * we can't call into the random generator from atomic context
2252 + * due to locking constraints. We just leave canary
2253 + * uninitialized and use the TSC based randomness on top of it.
2255 +#ifndef CONFIG_PREEMPT_RT_FULL
2256 get_random_bytes(&canary, sizeof(canary));
2259 canary += tsc + (tsc << 32UL);
2261 diff -Nur linux-4.4.46.orig/arch/x86/include/asm/thread_info.h linux-4.4.46/arch/x86/include/asm/thread_info.h
2262 --- linux-4.4.46.orig/arch/x86/include/asm/thread_info.h 2017-02-01 08:31:11.000000000 +0100
2263 +++ linux-4.4.46/arch/x86/include/asm/thread_info.h 2017-02-03 17:18:05.647415094 +0100
2265 __u32 status; /* thread synchronous flags */
2266 __u32 cpu; /* current CPU */
2267 mm_segment_t addr_limit;
2268 + int preempt_lazy_count; /* 0 => lazy preemptable
2270 unsigned int sig_on_uaccess_error:1;
2271 unsigned int uaccess_err:1; /* uaccess failed */
2274 #define TIF_SYSCALL_EMU 6 /* syscall emulation active */
2275 #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
2276 #define TIF_SECCOMP 8 /* secure computing */
2277 +#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */
2278 #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
2279 #define TIF_UPROBE 12 /* breakpointed or singlestepping */
2280 #define TIF_NOTSC 16 /* TSC is not accessible in userland */
2282 #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
2283 #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
2284 #define _TIF_SECCOMP (1 << TIF_SECCOMP)
2285 +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
2286 #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
2287 #define _TIF_UPROBE (1 << TIF_UPROBE)
2288 #define _TIF_NOTSC (1 << TIF_NOTSC)
2290 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
2291 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
2293 +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
2295 #define STACK_WARN (THREAD_SIZE/8)
2298 diff -Nur linux-4.4.46.orig/arch/x86/include/asm/uv/uv_bau.h linux-4.4.46/arch/x86/include/asm/uv/uv_bau.h
2299 --- linux-4.4.46.orig/arch/x86/include/asm/uv/uv_bau.h 2017-02-01 08:31:11.000000000 +0100
2300 +++ linux-4.4.46/arch/x86/include/asm/uv/uv_bau.h 2017-02-03 17:18:05.647415094 +0100
2302 cycles_t send_message;
2303 cycles_t period_end;
2304 cycles_t period_time;
2305 - spinlock_t uvhub_lock;
2306 - spinlock_t queue_lock;
2307 - spinlock_t disable_lock;
2308 + raw_spinlock_t uvhub_lock;
2309 + raw_spinlock_t queue_lock;
2310 + raw_spinlock_t disable_lock;
2313 int max_concurr_const;
2314 @@ -776,15 +776,15 @@
2315 * to be lowered below the current 'v'. atomic_add_unless can only stop
2318 -static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
2319 +static inline int atomic_inc_unless_ge(raw_spinlock_t *lock, atomic_t *v, int u)
2322 + raw_spin_lock(lock);
2323 if (atomic_read(v) >= u) {
2324 - spin_unlock(lock);
2325 + raw_spin_unlock(lock);
2329 - spin_unlock(lock);
2330 + raw_spin_unlock(lock);
2334 diff -Nur linux-4.4.46.orig/arch/x86/include/asm/uv/uv_hub.h linux-4.4.46/arch/x86/include/asm/uv/uv_hub.h
2335 --- linux-4.4.46.orig/arch/x86/include/asm/uv/uv_hub.h 2017-02-01 08:31:11.000000000 +0100
2336 +++ linux-4.4.46/arch/x86/include/asm/uv/uv_hub.h 2017-02-03 17:18:05.647415094 +0100
2338 unsigned short nr_online_cpus;
2339 unsigned short pnode;
2341 - spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */
2342 + raw_spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */
2343 unsigned long nmi_count; /* obsolete, see uv_hub_nmi */
2345 extern struct uv_blade_info *uv_blade_info;
2346 diff -Nur linux-4.4.46.orig/arch/x86/Kconfig linux-4.4.46/arch/x86/Kconfig
2347 --- linux-4.4.46.orig/arch/x86/Kconfig 2017-02-01 08:31:11.000000000 +0100
2348 +++ linux-4.4.46/arch/x86/Kconfig 2017-02-03 17:18:05.643414939 +0100
2353 + select HAVE_PREEMPT_LAZY
2354 select ACPI_LEGACY_TABLES_LOOKUP if ACPI
2355 select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
2357 @@ -212,8 +213,11 @@
2359 depends on ISA_DMA_API
2361 +config RWSEM_GENERIC_SPINLOCK
2362 + def_bool PREEMPT_RT_FULL
2364 config RWSEM_XCHGADD_ALGORITHM
2366 + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
2368 config GENERIC_CALIBRATE_DELAY
2372 bool "Enable Maximum number of SMP Processors and NUMA Nodes"
2373 depends on X86_64 && SMP && DEBUG_KERNEL
2374 - select CPUMASK_OFFSTACK
2375 + select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL
2377 Enable maximum number of CPUS and NUMA Nodes for this architecture.
2379 diff -Nur linux-4.4.46.orig/arch/x86/kernel/acpi/boot.c linux-4.4.46/arch/x86/kernel/acpi/boot.c
2380 --- linux-4.4.46.orig/arch/x86/kernel/acpi/boot.c 2017-02-01 08:31:11.000000000 +0100
2381 +++ linux-4.4.46/arch/x86/kernel/acpi/boot.c 2017-02-03 17:18:05.655415404 +0100
2386 +#ifdef CONFIG_X86_IO_APIC
2387 static DEFINE_MUTEX(acpi_ioapic_lock);
2390 /* --------------------------------------------------------------------------
2391 Boot-time Configuration
2392 diff -Nur linux-4.4.46.orig/arch/x86/kernel/apic/io_apic.c linux-4.4.46/arch/x86/kernel/apic/io_apic.c
2393 --- linux-4.4.46.orig/arch/x86/kernel/apic/io_apic.c 2017-02-01 08:31:11.000000000 +0100
2394 +++ linux-4.4.46/arch/x86/kernel/apic/io_apic.c 2017-02-03 17:18:05.655415404 +0100
2395 @@ -1711,7 +1711,8 @@
2396 static inline bool ioapic_irqd_mask(struct irq_data *data)
2398 /* If we are moving the irq we need to mask it */
2399 - if (unlikely(irqd_is_setaffinity_pending(data))) {
2400 + if (unlikely(irqd_is_setaffinity_pending(data) &&
2401 + !irqd_irq_inprogress(data))) {
2402 mask_ioapic_irq(data);
2405 diff -Nur linux-4.4.46.orig/arch/x86/kernel/apic/x2apic_uv_x.c linux-4.4.46/arch/x86/kernel/apic/x2apic_uv_x.c
2406 --- linux-4.4.46.orig/arch/x86/kernel/apic/x2apic_uv_x.c 2017-02-01 08:31:11.000000000 +0100
2407 +++ linux-4.4.46/arch/x86/kernel/apic/x2apic_uv_x.c 2017-02-03 17:18:05.655415404 +0100
2409 uv_blade_info[blade].pnode = pnode;
2410 uv_blade_info[blade].nr_possible_cpus = 0;
2411 uv_blade_info[blade].nr_online_cpus = 0;
2412 - spin_lock_init(&uv_blade_info[blade].nmi_lock);
2413 + raw_spin_lock_init(&uv_blade_info[blade].nmi_lock);
2414 min_pnode = min(pnode, min_pnode);
2415 max_pnode = max(pnode, max_pnode);
2417 diff -Nur linux-4.4.46.orig/arch/x86/kernel/asm-offsets.c linux-4.4.46/arch/x86/kernel/asm-offsets.c
2418 --- linux-4.4.46.orig/arch/x86/kernel/asm-offsets.c 2017-02-01 08:31:11.000000000 +0100
2419 +++ linux-4.4.46/arch/x86/kernel/asm-offsets.c 2017-02-03 17:18:05.655415404 +0100
2421 OFFSET(TI_flags, thread_info, flags);
2422 OFFSET(TI_status, thread_info, status);
2423 OFFSET(TI_addr_limit, thread_info, addr_limit);
2424 + OFFSET(TI_preempt_lazy_count, thread_info, preempt_lazy_count);
2427 OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
2431 DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
2432 + DEFINE(_PREEMPT_ENABLED, PREEMPT_ENABLED);
2434 diff -Nur linux-4.4.46.orig/arch/x86/kernel/cpu/mcheck/mce.c linux-4.4.46/arch/x86/kernel/cpu/mcheck/mce.c
2435 --- linux-4.4.46.orig/arch/x86/kernel/cpu/mcheck/mce.c 2017-02-01 08:31:11.000000000 +0100
2436 +++ linux-4.4.46/arch/x86/kernel/cpu/mcheck/mce.c 2017-02-03 17:18:05.659415557 +0100
2438 #include <linux/debugfs.h>
2439 #include <linux/irq_work.h>
2440 #include <linux/export.h>
2441 +#include <linux/jiffies.h>
2442 +#include <linux/swork.h>
2444 #include <asm/processor.h>
2445 #include <asm/traps.h>
2446 @@ -1236,7 +1238,7 @@
2447 static unsigned long check_interval = INITIAL_CHECK_INTERVAL;
2449 static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
2450 -static DEFINE_PER_CPU(struct timer_list, mce_timer);
2451 +static DEFINE_PER_CPU(struct hrtimer, mce_timer);
2453 static unsigned long mce_adjust_timer_default(unsigned long interval)
2455 @@ -1245,32 +1247,18 @@
2457 static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
2459 -static void __restart_timer(struct timer_list *t, unsigned long interval)
2460 +static enum hrtimer_restart __restart_timer(struct hrtimer *timer, unsigned long interval)
2462 - unsigned long when = jiffies + interval;
2463 - unsigned long flags;
2465 - local_irq_save(flags);
2467 - if (timer_pending(t)) {
2468 - if (time_before(when, t->expires))
2469 - mod_timer_pinned(t, when);
2471 - t->expires = round_jiffies(when);
2472 - add_timer_on(t, smp_processor_id());
2475 - local_irq_restore(flags);
2477 + return HRTIMER_NORESTART;
2478 + hrtimer_forward_now(timer, ns_to_ktime(jiffies_to_nsecs(interval)));
2479 + return HRTIMER_RESTART;
2482 -static void mce_timer_fn(unsigned long data)
2483 +static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer)
2485 - struct timer_list *t = this_cpu_ptr(&mce_timer);
2486 - int cpu = smp_processor_id();
2489 - WARN_ON(cpu != data);
2491 iv = __this_cpu_read(mce_next_interval);
2493 if (mce_available(this_cpu_ptr(&cpu_info))) {
2494 @@ -1293,7 +1281,7 @@
2497 __this_cpu_write(mce_next_interval, iv);
2498 - __restart_timer(t, iv);
2499 + return __restart_timer(timer, iv);
2503 @@ -1301,7 +1289,7 @@
2505 void mce_timer_kick(unsigned long interval)
2507 - struct timer_list *t = this_cpu_ptr(&mce_timer);
2508 + struct hrtimer *t = this_cpu_ptr(&mce_timer);
2509 unsigned long iv = __this_cpu_read(mce_next_interval);
2511 __restart_timer(t, interval);
2512 @@ -1316,7 +1304,7 @@
2515 for_each_online_cpu(cpu)
2516 - del_timer_sync(&per_cpu(mce_timer, cpu));
2517 + hrtimer_cancel(&per_cpu(mce_timer, cpu));
2520 static void mce_do_trigger(struct work_struct *work)
2521 @@ -1326,6 +1314,56 @@
2523 static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
2525 +static void __mce_notify_work(struct swork_event *event)
2527 + /* Not more than two messages every minute */
2528 + static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
2530 + /* wake processes polling /dev/mcelog */
2531 + wake_up_interruptible(&mce_chrdev_wait);
2534 + * There is no risk of missing notifications because
2535 + * work_pending is always cleared before the function is
2538 + if (mce_helper[0] && !work_pending(&mce_trigger_work))
2539 + schedule_work(&mce_trigger_work);
2541 + if (__ratelimit(&ratelimit))
2542 + pr_info(HW_ERR "Machine check events logged\n");
2545 +#ifdef CONFIG_PREEMPT_RT_FULL
2546 +static bool notify_work_ready __read_mostly;
2547 +static struct swork_event notify_work;
2549 +static int mce_notify_work_init(void)
2553 + err = swork_get();
2557 + INIT_SWORK(¬ify_work, __mce_notify_work);
2558 + notify_work_ready = true;
2562 +static void mce_notify_work(void)
2564 + if (notify_work_ready)
2565 + swork_queue(¬ify_work);
2568 +static void mce_notify_work(void)
2570 + __mce_notify_work(NULL);
2572 +static inline int mce_notify_work_init(void) { return 0; }
2576 * Notify the user(s) about new machine check events.
2577 * Can be called from interrupt context, but not from machine check/NMI
2578 @@ -1333,19 +1371,8 @@
2580 int mce_notify_irq(void)
2582 - /* Not more than two messages every minute */
2583 - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
2585 if (test_and_clear_bit(0, &mce_need_notify)) {
2586 - /* wake processes polling /dev/mcelog */
2587 - wake_up_interruptible(&mce_chrdev_wait);
2589 - if (mce_helper[0])
2590 - schedule_work(&mce_trigger_work);
2592 - if (__ratelimit(&ratelimit))
2593 - pr_info(HW_ERR "Machine check events logged\n");
2595 + mce_notify_work();
2599 @@ -1639,7 +1666,7 @@
2603 -static void mce_start_timer(unsigned int cpu, struct timer_list *t)
2604 +static void mce_start_timer(unsigned int cpu, struct hrtimer *t)
2606 unsigned long iv = check_interval * HZ;
2608 @@ -1648,16 +1675,17 @@
2610 per_cpu(mce_next_interval, cpu) = iv;
2612 - t->expires = round_jiffies(jiffies + iv);
2613 - add_timer_on(t, cpu);
2614 + hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL),
2615 + 0, HRTIMER_MODE_REL_PINNED);
2618 static void __mcheck_cpu_init_timer(void)
2620 - struct timer_list *t = this_cpu_ptr(&mce_timer);
2621 + struct hrtimer *t = this_cpu_ptr(&mce_timer);
2622 unsigned int cpu = smp_processor_id();
2624 - setup_timer(t, mce_timer_fn, cpu);
2625 + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2626 + t->function = mce_timer_fn;
2627 mce_start_timer(cpu, t);
2630 @@ -2376,6 +2404,8 @@
2631 if (!mce_available(raw_cpu_ptr(&cpu_info)))
2634 + hrtimer_cancel(this_cpu_ptr(&mce_timer));
2636 if (!(action & CPU_TASKS_FROZEN))
2639 @@ -2398,6 +2428,7 @@
2641 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
2643 + __mcheck_cpu_init_timer();
2646 /* Get notified when a cpu comes on/off. Be hotplug friendly. */
2647 @@ -2405,7 +2436,6 @@
2648 mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
2650 unsigned int cpu = (unsigned long)hcpu;
2651 - struct timer_list *t = &per_cpu(mce_timer, cpu);
2653 switch (action & ~CPU_TASKS_FROZEN) {
2655 @@ -2425,11 +2455,9 @@
2657 case CPU_DOWN_PREPARE:
2658 smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
2659 - del_timer_sync(t);
2661 case CPU_DOWN_FAILED:
2662 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
2663 - mce_start_timer(cpu, t);
2667 @@ -2468,6 +2496,10 @@
2671 + err = mce_notify_work_init();
2675 if (!zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL)) {
2678 diff -Nur linux-4.4.46.orig/arch/x86/kernel/cpu/perf_event_intel_rapl.c linux-4.4.46/arch/x86/kernel/cpu/perf_event_intel_rapl.c
2679 --- linux-4.4.46.orig/arch/x86/kernel/cpu/perf_event_intel_rapl.c 2017-02-01 08:31:11.000000000 +0100
2680 +++ linux-4.4.46/arch/x86/kernel/cpu/perf_event_intel_rapl.c 2017-02-03 17:18:05.663415712 +0100
2686 + raw_spinlock_t lock;
2687 int n_active; /* number of active events */
2688 struct list_head active_list;
2689 struct pmu *pmu; /* pointer to rapl_pmu_class */
2690 @@ -220,13 +220,13 @@
2692 return HRTIMER_NORESTART;
2694 - spin_lock_irqsave(&pmu->lock, flags);
2695 + raw_spin_lock_irqsave(&pmu->lock, flags);
2697 list_for_each_entry(event, &pmu->active_list, active_entry) {
2698 rapl_event_update(event);
2701 - spin_unlock_irqrestore(&pmu->lock, flags);
2702 + raw_spin_unlock_irqrestore(&pmu->lock, flags);
2704 hrtimer_forward_now(hrtimer, pmu->timer_interval);
2707 struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
2708 unsigned long flags;
2710 - spin_lock_irqsave(&pmu->lock, flags);
2711 + raw_spin_lock_irqsave(&pmu->lock, flags);
2712 __rapl_pmu_event_start(pmu, event);
2713 - spin_unlock_irqrestore(&pmu->lock, flags);
2714 + raw_spin_unlock_irqrestore(&pmu->lock, flags);
2717 static void rapl_pmu_event_stop(struct perf_event *event, int mode)
2719 struct hw_perf_event *hwc = &event->hw;
2720 unsigned long flags;
2722 - spin_lock_irqsave(&pmu->lock, flags);
2723 + raw_spin_lock_irqsave(&pmu->lock, flags);
2725 /* mark event as deactivated and stopped */
2726 if (!(hwc->state & PERF_HES_STOPPED)) {
2728 hwc->state |= PERF_HES_UPTODATE;
2731 - spin_unlock_irqrestore(&pmu->lock, flags);
2732 + raw_spin_unlock_irqrestore(&pmu->lock, flags);
2735 static int rapl_pmu_event_add(struct perf_event *event, int mode)
2736 @@ -308,14 +308,14 @@
2737 struct hw_perf_event *hwc = &event->hw;
2738 unsigned long flags;
2740 - spin_lock_irqsave(&pmu->lock, flags);
2741 + raw_spin_lock_irqsave(&pmu->lock, flags);
2743 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
2745 if (mode & PERF_EF_START)
2746 __rapl_pmu_event_start(pmu, event);
2748 - spin_unlock_irqrestore(&pmu->lock, flags);
2749 + raw_spin_unlock_irqrestore(&pmu->lock, flags);
2754 pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
2757 - spin_lock_init(&pmu->lock);
2758 + raw_spin_lock_init(&pmu->lock);
2760 INIT_LIST_HEAD(&pmu->active_list);
2762 diff -Nur linux-4.4.46.orig/arch/x86/kernel/dumpstack_32.c linux-4.4.46/arch/x86/kernel/dumpstack_32.c
2763 --- linux-4.4.46.orig/arch/x86/kernel/dumpstack_32.c 2017-02-01 08:31:11.000000000 +0100
2764 +++ linux-4.4.46/arch/x86/kernel/dumpstack_32.c 2017-02-03 17:18:05.663415712 +0100
2766 unsigned long *stack, unsigned long bp,
2767 const struct stacktrace_ops *ops, void *data)
2769 - const unsigned cpu = get_cpu();
2770 + const unsigned cpu = get_cpu_light();
2776 touch_nmi_watchdog();
2781 EXPORT_SYMBOL(dump_trace);
2783 diff -Nur linux-4.4.46.orig/arch/x86/kernel/dumpstack_64.c linux-4.4.46/arch/x86/kernel/dumpstack_64.c
2784 --- linux-4.4.46.orig/arch/x86/kernel/dumpstack_64.c 2017-02-01 08:31:11.000000000 +0100
2785 +++ linux-4.4.46/arch/x86/kernel/dumpstack_64.c 2017-02-03 17:18:05.663415712 +0100
2787 unsigned long *stack, unsigned long bp,
2788 const struct stacktrace_ops *ops, void *data)
2790 - const unsigned cpu = get_cpu();
2791 + const unsigned cpu = get_cpu_light();
2792 struct thread_info *tinfo;
2793 unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu);
2794 unsigned long dummy;
2796 * This handles the process stack:
2798 bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph);
2802 EXPORT_SYMBOL(dump_trace);
2808 - preempt_disable();
2809 + migrate_disable();
2810 cpu = smp_processor_id();
2812 irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
2814 pr_cont(" %016lx", *stack++);
2815 touch_nmi_watchdog();
2821 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
2822 diff -Nur linux-4.4.46.orig/arch/x86/kernel/irq_32.c linux-4.4.46/arch/x86/kernel/irq_32.c
2823 --- linux-4.4.46.orig/arch/x86/kernel/irq_32.c 2017-02-01 08:31:11.000000000 +0100
2824 +++ linux-4.4.46/arch/x86/kernel/irq_32.c 2017-02-03 17:18:05.663415712 +0100
2826 cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu));
2829 +#ifndef CONFIG_PREEMPT_RT_FULL
2830 void do_softirq_own_stack(void)
2832 struct thread_info *curstk;
2835 call_on_stack(__do_softirq, isp);
2839 bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
2841 diff -Nur linux-4.4.46.orig/arch/x86/kernel/kvm.c linux-4.4.46/arch/x86/kernel/kvm.c
2842 --- linux-4.4.46.orig/arch/x86/kernel/kvm.c 2017-02-01 08:31:11.000000000 +0100
2843 +++ linux-4.4.46/arch/x86/kernel/kvm.c 2017-02-03 17:18:05.663415712 +0100
2845 #include <linux/kprobes.h>
2846 #include <linux/debugfs.h>
2847 #include <linux/nmi.h>
2848 +#include <linux/swait.h>
2849 #include <asm/timer.h>
2850 #include <asm/cpu.h>
2851 #include <asm/traps.h>
2854 struct kvm_task_sleep_node {
2855 struct hlist_node link;
2856 - wait_queue_head_t wq;
2857 + struct swait_queue_head wq;
2863 static struct kvm_task_sleep_head {
2865 + raw_spinlock_t lock;
2866 struct hlist_head list;
2867 } async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE];
2869 @@ -122,17 +123,17 @@
2870 u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
2871 struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
2872 struct kvm_task_sleep_node n, *e;
2873 - DEFINE_WAIT(wait);
2874 + DECLARE_SWAITQUEUE(wait);
2878 - spin_lock(&b->lock);
2879 + raw_spin_lock(&b->lock);
2880 e = _find_apf_task(b, token);
2882 /* dummy entry exist -> wake up was delivered ahead of PF */
2883 hlist_del(&e->link);
2885 - spin_unlock(&b->lock);
2886 + raw_spin_unlock(&b->lock);
2890 @@ -141,13 +142,13 @@
2892 n.cpu = smp_processor_id();
2893 n.halted = is_idle_task(current) || preempt_count() > 1;
2894 - init_waitqueue_head(&n.wq);
2895 + init_swait_queue_head(&n.wq);
2896 hlist_add_head(&n.link, &b->list);
2897 - spin_unlock(&b->lock);
2898 + raw_spin_unlock(&b->lock);
2902 - prepare_to_wait(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
2903 + prepare_to_swait(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
2904 if (hlist_unhashed(&n.link))
2911 - finish_wait(&n.wq, &wait);
2912 + finish_swait(&n.wq, &wait);
2917 hlist_del_init(&n->link);
2919 smp_send_reschedule(n->cpu);
2920 - else if (waitqueue_active(&n->wq))
2922 + else if (swait_active(&n->wq))
2926 static void apf_task_wake_all(void)
2927 @@ -189,14 +190,14 @@
2928 for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
2929 struct hlist_node *p, *next;
2930 struct kvm_task_sleep_head *b = &async_pf_sleepers[i];
2931 - spin_lock(&b->lock);
2932 + raw_spin_lock(&b->lock);
2933 hlist_for_each_safe(p, next, &b->list) {
2934 struct kvm_task_sleep_node *n =
2935 hlist_entry(p, typeof(*n), link);
2936 if (n->cpu == smp_processor_id())
2937 apf_task_wake_one(n);
2939 - spin_unlock(&b->lock);
2940 + raw_spin_unlock(&b->lock);
2948 - spin_lock(&b->lock);
2949 + raw_spin_lock(&b->lock);
2950 n = _find_apf_task(b, token);
2953 @@ -225,17 +226,17 @@
2954 * Allocation failed! Busy wait while other cpu
2957 - spin_unlock(&b->lock);
2958 + raw_spin_unlock(&b->lock);
2963 n->cpu = smp_processor_id();
2964 - init_waitqueue_head(&n->wq);
2965 + init_swait_queue_head(&n->wq);
2966 hlist_add_head(&n->link, &b->list);
2968 apf_task_wake_one(n);
2969 - spin_unlock(&b->lock);
2970 + raw_spin_unlock(&b->lock);
2973 EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
2975 paravirt_ops_setup();
2976 register_reboot_notifier(&kvm_pv_reboot_nb);
2977 for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
2978 - spin_lock_init(&async_pf_sleepers[i].lock);
2979 + raw_spin_lock_init(&async_pf_sleepers[i].lock);
2980 if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
2981 x86_init.irqs.trap_init = kvm_apf_trap_init;
2983 diff -Nur linux-4.4.46.orig/arch/x86/kernel/nmi.c linux-4.4.46/arch/x86/kernel/nmi.c
2984 --- linux-4.4.46.orig/arch/x86/kernel/nmi.c 2017-02-01 08:31:11.000000000 +0100
2985 +++ linux-4.4.46/arch/x86/kernel/nmi.c 2017-02-03 17:18:05.663415712 +0100
2989 if (panic_on_unrecovered_nmi)
2990 - panic("NMI: Not continuing");
2991 + nmi_panic(regs, "NMI: Not continuing");
2993 pr_emerg("Dazed and confused, but trying to continue\n");
2995 @@ -255,8 +255,16 @@
2996 reason, smp_processor_id());
2999 - if (panic_on_io_nmi)
3000 - panic("NMI IOCK error: Not continuing");
3001 + if (panic_on_io_nmi) {
3002 + nmi_panic(regs, "NMI IOCK error: Not continuing");
3005 + * If we end up here, it means we have received an NMI while
3006 + * processing panic(). Simply return without delaying and
3007 + * re-enabling NMIs.
3012 /* Re-enable the IOCK line, wait for a few seconds */
3013 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
3016 pr_emerg("Do you have a strange power saving mode enabled?\n");
3017 if (unknown_nmi_panic || panic_on_unrecovered_nmi)
3018 - panic("NMI: Not continuing");
3019 + nmi_panic(regs, "NMI: Not continuing");
3021 pr_emerg("Dazed and confused, but trying to continue\n");
3023 diff -Nur linux-4.4.46.orig/arch/x86/kernel/process_32.c linux-4.4.46/arch/x86/kernel/process_32.c
3024 --- linux-4.4.46.orig/arch/x86/kernel/process_32.c 2017-02-01 08:31:11.000000000 +0100
3025 +++ linux-4.4.46/arch/x86/kernel/process_32.c 2017-02-03 17:18:05.663415712 +0100
3027 #include <linux/uaccess.h>
3028 #include <linux/io.h>
3029 #include <linux/kdebug.h>
3030 +#include <linux/highmem.h>
3032 #include <asm/pgtable.h>
3033 #include <asm/ldt.h>
3034 @@ -210,6 +211,35 @@
3036 EXPORT_SYMBOL_GPL(start_thread);
3038 +#ifdef CONFIG_PREEMPT_RT_FULL
3039 +static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
3044 + * Clear @prev's kmap_atomic mappings
3046 + for (i = 0; i < prev_p->kmap_idx; i++) {
3047 + int idx = i + KM_TYPE_NR * smp_processor_id();
3048 + pte_t *ptep = kmap_pte - idx;
3050 + kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx));
3053 + * Restore @next_p's kmap_atomic mappings
3055 + for (i = 0; i < next_p->kmap_idx; i++) {
3056 + int idx = i + KM_TYPE_NR * smp_processor_id();
3058 + if (!pte_none(next_p->kmap_pte[i]))
3059 + set_pte(kmap_pte - idx, next_p->kmap_pte[i]);
3064 +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
3069 * switch_to(x,y) should switch tasks from x to y.
3071 task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
3072 __switch_to_xtra(prev_p, next_p, tss);
3074 + switch_kmaps(prev_p, next_p);
3077 * Leave lazy mode, flushing any hypercalls made here.
3078 * This must be done before restoring TLS segments so
3079 diff -Nur linux-4.4.46.orig/arch/x86/kernel/reboot.c linux-4.4.46/arch/x86/kernel/reboot.c
3080 --- linux-4.4.46.orig/arch/x86/kernel/reboot.c 2017-02-01 08:31:11.000000000 +0100
3081 +++ linux-4.4.46/arch/x86/kernel/reboot.c 2017-02-03 17:18:05.663415712 +0100
3083 static nmi_shootdown_cb shootdown_callback;
3085 static atomic_t waiting_for_crash_ipi;
3086 +static int crash_ipi_issued;
3088 static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
3092 smp_send_nmi_allbutself();
3094 + /* Kick CPUs looping in NMI context. */
3095 + WRITE_ONCE(crash_ipi_issued, 1);
3097 msecs = 1000; /* Wait at most a second for the other cpus to stop */
3098 while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
3100 @@ -796,6 +800,22 @@
3102 /* Leave the nmi callback set */
3105 +/* Override the weak function in kernel/panic.c */
3106 +void nmi_panic_self_stop(struct pt_regs *regs)
3110 + * Wait for the crash dumping IPI to be issued, and then
3111 + * call its callback directly.
3113 + if (READ_ONCE(crash_ipi_issued))
3114 + crash_nmi_callback(0, regs); /* Don't return */
3120 #else /* !CONFIG_SMP */
3121 void nmi_shootdown_cpus(nmi_shootdown_cb callback)
3123 diff -Nur linux-4.4.46.orig/arch/x86/kvm/lapic.c linux-4.4.46/arch/x86/kvm/lapic.c
3124 --- linux-4.4.46.orig/arch/x86/kvm/lapic.c 2017-02-01 08:31:11.000000000 +0100
3125 +++ linux-4.4.46/arch/x86/kvm/lapic.c 2017-02-03 17:18:05.663415712 +0100
3126 @@ -1195,7 +1195,7 @@
3127 static void apic_timer_expired(struct kvm_lapic *apic)
3129 struct kvm_vcpu *vcpu = apic->vcpu;
3130 - wait_queue_head_t *q = &vcpu->wq;
3131 + struct swait_queue_head *q = &vcpu->wq;
3132 struct kvm_timer *ktimer = &apic->lapic_timer;
3134 if (atomic_read(&apic->lapic_timer.pending))
3135 @@ -1204,8 +1204,8 @@
3136 atomic_inc(&apic->lapic_timer.pending);
3137 kvm_set_pending_timer(vcpu);
3139 - if (waitqueue_active(q))
3140 - wake_up_interruptible(q);
3141 + if (swait_active(q))
3144 if (apic_lvtt_tscdeadline(apic))
3145 ktimer->expired_tscdeadline = ktimer->tscdeadline;
3146 @@ -1801,6 +1801,7 @@
3147 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
3149 apic->lapic_timer.timer.function = apic_timer_fn;
3150 + apic->lapic_timer.timer.irqsafe = 1;
3153 * APIC is created enabled. This will prevent kvm_lapic_set_base from
3154 diff -Nur linux-4.4.46.orig/arch/x86/kvm/x86.c linux-4.4.46/arch/x86/kvm/x86.c
3155 --- linux-4.4.46.orig/arch/x86/kvm/x86.c 2017-02-01 08:31:11.000000000 +0100
3156 +++ linux-4.4.46/arch/x86/kvm/x86.c 2017-02-03 17:18:05.663415712 +0100
3157 @@ -5809,6 +5809,13 @@
3161 +#ifdef CONFIG_PREEMPT_RT_FULL
3162 + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
3163 + printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n");
3164 + return -EOPNOTSUPP;
3168 r = kvm_mmu_module_init();
3170 goto out_free_percpu;
3171 diff -Nur linux-4.4.46.orig/arch/x86/mm/highmem_32.c linux-4.4.46/arch/x86/mm/highmem_32.c
3172 --- linux-4.4.46.orig/arch/x86/mm/highmem_32.c 2017-02-01 08:31:11.000000000 +0100
3173 +++ linux-4.4.46/arch/x86/mm/highmem_32.c 2017-02-03 17:18:05.663415712 +0100
3176 void *kmap_atomic_prot(struct page *page, pgprot_t prot)
3178 + pte_t pte = mk_pte(page, prot);
3179 unsigned long vaddr;
3182 - preempt_disable();
3183 + preempt_disable_nort();
3184 pagefault_disable();
3186 if (!PageHighMem(page))
3188 idx = type + KM_TYPE_NR*smp_processor_id();
3189 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
3190 BUG_ON(!pte_none(*(kmap_pte-idx)));
3191 - set_pte(kmap_pte-idx, mk_pte(page, prot));
3192 +#ifdef CONFIG_PREEMPT_RT_FULL
3193 + current->kmap_pte[type] = pte;
3195 + set_pte(kmap_pte-idx, pte);
3196 arch_flush_lazy_mmu_mode();
3198 return (void *)vaddr;
3200 * is a bad idea also, in case the page changes cacheability
3201 * attributes or becomes a protected page in a hypervisor.
3203 +#ifdef CONFIG_PREEMPT_RT_FULL
3204 + current->kmap_pte[type] = __pte(0);
3206 kpte_clear_flush(kmap_pte-idx, vaddr);
3207 kmap_atomic_idx_pop();
3208 arch_flush_lazy_mmu_mode();
3214 + preempt_enable_nort();
3216 EXPORT_SYMBOL(__kunmap_atomic);
3218 diff -Nur linux-4.4.46.orig/arch/x86/mm/iomap_32.c linux-4.4.46/arch/x86/mm/iomap_32.c
3219 --- linux-4.4.46.orig/arch/x86/mm/iomap_32.c 2017-02-01 08:31:11.000000000 +0100
3220 +++ linux-4.4.46/arch/x86/mm/iomap_32.c 2017-02-03 17:18:05.663415712 +0100
3223 void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
3225 + pte_t pte = pfn_pte(pfn, prot);
3226 unsigned long vaddr;
3230 type = kmap_atomic_idx_push();
3231 idx = type + KM_TYPE_NR * smp_processor_id();
3232 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
3233 - set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
3234 + WARN_ON(!pte_none(*(kmap_pte - idx)));
3236 +#ifdef CONFIG_PREEMPT_RT_FULL
3237 + current->kmap_pte[type] = pte;
3239 + set_pte(kmap_pte - idx, pte);
3240 arch_flush_lazy_mmu_mode();
3242 return (void *)vaddr;
3244 * is a bad idea also, in case the page changes cacheability
3245 * attributes or becomes a protected page in a hypervisor.
3247 +#ifdef CONFIG_PREEMPT_RT_FULL
3248 + current->kmap_pte[type] = __pte(0);
3250 kpte_clear_flush(kmap_pte-idx, vaddr);
3251 kmap_atomic_idx_pop();
3253 diff -Nur linux-4.4.46.orig/arch/x86/platform/uv/tlb_uv.c linux-4.4.46/arch/x86/platform/uv/tlb_uv.c
3254 --- linux-4.4.46.orig/arch/x86/platform/uv/tlb_uv.c 2017-02-01 08:31:11.000000000 +0100
3255 +++ linux-4.4.46/arch/x86/platform/uv/tlb_uv.c 2017-02-03 17:18:05.663415712 +0100
3258 quiesce_local_uvhub(hmaster);
3260 - spin_lock(&hmaster->queue_lock);
3261 + raw_spin_lock(&hmaster->queue_lock);
3262 reset_with_ipi(&bau_desc->distribution, bcp);
3263 - spin_unlock(&hmaster->queue_lock);
3264 + raw_spin_unlock(&hmaster->queue_lock);
3266 end_uvhub_quiesce(hmaster);
3270 quiesce_local_uvhub(hmaster);
3272 - spin_lock(&hmaster->queue_lock);
3273 + raw_spin_lock(&hmaster->queue_lock);
3274 reset_with_ipi(&bau_desc->distribution, bcp);
3275 - spin_unlock(&hmaster->queue_lock);
3276 + raw_spin_unlock(&hmaster->queue_lock);
3278 end_uvhub_quiesce(hmaster);
3283 hmaster = bcp->uvhub_master;
3284 - spin_lock(&hmaster->disable_lock);
3285 + raw_spin_lock(&hmaster->disable_lock);
3286 if (!bcp->baudisabled) {
3287 stat->s_bau_disabled++;
3293 - spin_unlock(&hmaster->disable_lock);
3294 + raw_spin_unlock(&hmaster->disable_lock);
3297 static void count_max_concurr(int stat, struct bau_control *bcp,
3300 static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat)
3302 - spinlock_t *lock = &hmaster->uvhub_lock;
3303 + raw_spinlock_t *lock = &hmaster->uvhub_lock;
3306 v = &hmaster->active_descriptor_count;
3308 struct bau_control *hmaster;
3310 hmaster = bcp->uvhub_master;
3311 - spin_lock(&hmaster->disable_lock);
3312 + raw_spin_lock(&hmaster->disable_lock);
3313 if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) {
3314 stat->s_bau_reenabled++;
3315 for_each_present_cpu(tcpu) {
3316 @@ -980,10 +980,10 @@
3317 tbcp->period_giveups = 0;
3320 - spin_unlock(&hmaster->disable_lock);
3321 + raw_spin_unlock(&hmaster->disable_lock);
3324 - spin_unlock(&hmaster->disable_lock);
3325 + raw_spin_unlock(&hmaster->disable_lock);
3329 @@ -1901,9 +1901,9 @@
3330 bcp->cong_reps = congested_reps;
3331 bcp->disabled_period = sec_2_cycles(disabled_period);
3332 bcp->giveup_limit = giveup_limit;
3333 - spin_lock_init(&bcp->queue_lock);
3334 - spin_lock_init(&bcp->uvhub_lock);
3335 - spin_lock_init(&bcp->disable_lock);
3336 + raw_spin_lock_init(&bcp->queue_lock);
3337 + raw_spin_lock_init(&bcp->uvhub_lock);
3338 + raw_spin_lock_init(&bcp->disable_lock);
3342 diff -Nur linux-4.4.46.orig/arch/x86/platform/uv/uv_time.c linux-4.4.46/arch/x86/platform/uv/uv_time.c
3343 --- linux-4.4.46.orig/arch/x86/platform/uv/uv_time.c 2017-02-01 08:31:11.000000000 +0100
3344 +++ linux-4.4.46/arch/x86/platform/uv/uv_time.c 2017-02-03 17:18:05.663415712 +0100
3347 /* There is one of these allocated per node */
3348 struct uv_rtc_timer_head {
3350 + raw_spinlock_t lock;
3351 /* next cpu waiting for timer, local node relative: */
3353 /* number of cpus on this node: */
3355 uv_rtc_deallocate_timers();
3358 - spin_lock_init(&head->lock);
3359 + raw_spin_lock_init(&head->lock);
3360 head->ncpus = uv_blade_nr_possible_cpus(bid);
3361 head->next_cpu = -1;
3362 blade_info[bid] = head;
3364 unsigned long flags;
3367 - spin_lock_irqsave(&head->lock, flags);
3368 + raw_spin_lock_irqsave(&head->lock, flags);
3370 next_cpu = head->next_cpu;
3372 @@ -243,12 +243,12 @@
3373 if (uv_setup_intr(cpu, expires)) {
3375 uv_rtc_find_next_timer(head, pnode);
3376 - spin_unlock_irqrestore(&head->lock, flags);
3377 + raw_spin_unlock_irqrestore(&head->lock, flags);
3382 - spin_unlock_irqrestore(&head->lock, flags);
3383 + raw_spin_unlock_irqrestore(&head->lock, flags);
3388 unsigned long flags;
3391 - spin_lock_irqsave(&head->lock, flags);
3392 + raw_spin_lock_irqsave(&head->lock, flags);
3394 if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force)
3397 uv_rtc_find_next_timer(head, pnode);
3400 - spin_unlock_irqrestore(&head->lock, flags);
3401 + raw_spin_unlock_irqrestore(&head->lock, flags);
3405 @@ -299,13 +299,18 @@
3406 static cycle_t uv_read_rtc(struct clocksource *cs)
3408 unsigned long offset;
3411 + preempt_disable();
3412 if (uv_get_min_hub_revision_id() == 1)
3415 offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE;
3417 - return (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
3418 + cycles = (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
3425 diff -Nur linux-4.4.46.orig/block/blk-core.c linux-4.4.46/block/blk-core.c
3426 --- linux-4.4.46.orig/block/blk-core.c 2017-02-01 08:31:11.000000000 +0100
3427 +++ linux-4.4.46/block/blk-core.c 2017-02-03 17:18:05.667415866 +0100
3430 INIT_LIST_HEAD(&rq->queuelist);
3431 INIT_LIST_HEAD(&rq->timeout_list);
3432 +#ifdef CONFIG_PREEMPT_RT_FULL
3433 + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work);
3437 rq->__sector = (sector_t) -1;
3440 void blk_start_queue(struct request_queue *q)
3442 - WARN_ON(!irqs_disabled());
3443 + WARN_ON_NONRT(!irqs_disabled());
3445 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
3448 if (!gfpflags_allow_blocking(gfp))
3451 - ret = wait_event_interruptible(q->mq_freeze_wq,
3452 + ret = swait_event_interruptible(q->mq_freeze_wq,
3453 !atomic_read(&q->mq_freeze_depth) ||
3454 blk_queue_dying(q));
3455 if (blk_queue_dying(q))
3457 struct request_queue *q =
3458 container_of(ref, struct request_queue, q_usage_counter);
3460 - wake_up_all(&q->mq_freeze_wq);
3461 + swake_up_all(&q->mq_freeze_wq);
3464 struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
3466 q->bypass_depth = 1;
3467 __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
3469 - init_waitqueue_head(&q->mq_freeze_wq);
3470 + init_swait_queue_head(&q->mq_freeze_wq);
3473 * Init percpu_ref in atomic mode so that it's faster to shutdown.
3474 @@ -3200,7 +3203,7 @@
3475 blk_run_queue_async(q);
3478 - spin_unlock(q->queue_lock);
3479 + spin_unlock_irq(q->queue_lock);
3482 static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
3483 @@ -3248,7 +3251,6 @@
3484 void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
3486 struct request_queue *q;
3487 - unsigned long flags;
3491 @@ -3268,11 +3270,6 @@
3496 - * Save and disable interrupts here, to avoid doing it for every
3497 - * queue lock we have to take.
3499 - local_irq_save(flags);
3500 while (!list_empty(&list)) {
3501 rq = list_entry_rq(list.next);
3502 list_del_init(&rq->queuelist);
3503 @@ -3285,7 +3282,7 @@
3504 queue_unplugged(q, depth, from_schedule);
3507 - spin_lock(q->queue_lock);
3508 + spin_lock_irq(q->queue_lock);
3512 @@ -3312,8 +3309,6 @@
3515 queue_unplugged(q, depth, from_schedule);
3517 - local_irq_restore(flags);
3520 void blk_finish_plug(struct blk_plug *plug)
3521 diff -Nur linux-4.4.46.orig/block/blk-ioc.c linux-4.4.46/block/blk-ioc.c
3522 --- linux-4.4.46.orig/block/blk-ioc.c 2017-02-01 08:31:11.000000000 +0100
3523 +++ linux-4.4.46/block/blk-ioc.c 2017-02-03 17:18:05.667415866 +0100
3525 #include <linux/bio.h>
3526 #include <linux/blkdev.h>
3527 #include <linux/slab.h>
3528 +#include <linux/delay.h>
3533 spin_unlock(q->queue_lock);
3535 spin_unlock_irqrestore(&ioc->lock, flags);
3538 spin_lock_irqsave_nested(&ioc->lock, flags, 1);
3542 spin_unlock(icq->q->queue_lock);
3544 spin_unlock_irqrestore(&ioc->lock, flags);
3550 diff -Nur linux-4.4.46.orig/block/blk-iopoll.c linux-4.4.46/block/blk-iopoll.c
3551 --- linux-4.4.46.orig/block/blk-iopoll.c 2017-02-01 08:31:11.000000000 +0100
3552 +++ linux-4.4.46/block/blk-iopoll.c 2017-02-03 17:18:05.667415866 +0100
3554 list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll));
3555 __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
3556 local_irq_restore(flags);
3557 + preempt_check_resched_rt();
3559 EXPORT_SYMBOL(blk_iopoll_sched);
3562 __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
3565 + preempt_check_resched_rt();
3570 this_cpu_ptr(&blk_cpu_iopoll));
3571 __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
3573 + preempt_check_resched_rt();
3577 diff -Nur linux-4.4.46.orig/block/blk-mq.c linux-4.4.46/block/blk-mq.c
3578 --- linux-4.4.46.orig/block/blk-mq.c 2017-02-01 08:31:11.000000000 +0100
3579 +++ linux-4.4.46/block/blk-mq.c 2017-02-03 17:18:05.667415866 +0100
3582 static void blk_mq_freeze_queue_wait(struct request_queue *q)
3584 - wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
3585 + swait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
3590 WARN_ON_ONCE(freeze_depth < 0);
3591 if (!freeze_depth) {
3592 percpu_ref_reinit(&q->q_usage_counter);
3593 - wake_up_all(&q->mq_freeze_wq);
3594 + swake_up_all(&q->mq_freeze_wq);
3597 EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
3599 * dying, we need to ensure that processes currently waiting on
3600 * the queue are notified as well.
3602 - wake_up_all(&q->mq_freeze_wq);
3603 + swake_up_all(&q->mq_freeze_wq);
3606 bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
3611 +#ifdef CONFIG_PREEMPT_RT_FULL
3612 + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work);
3614 INIT_LIST_HEAD(&rq->timeout_list);
3617 @@ -325,6 +328,17 @@
3619 EXPORT_SYMBOL(blk_mq_end_request);
3621 +#ifdef CONFIG_PREEMPT_RT_FULL
3623 +void __blk_mq_complete_request_remote_work(struct work_struct *work)
3625 + struct request *rq = container_of(work, struct request, work);
3627 + rq->q->softirq_done_fn(rq);
3632 static void __blk_mq_complete_request_remote(void *data)
3634 struct request *rq = data;
3636 rq->q->softirq_done_fn(rq);
3641 static void blk_mq_ipi_complete_request(struct request *rq)
3643 struct blk_mq_ctx *ctx = rq->mq_ctx;
3644 @@ -343,19 +359,23 @@
3649 + cpu = get_cpu_light();
3650 if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags))
3651 shared = cpus_share_cache(cpu, ctx->cpu);
3653 if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
3654 +#ifdef CONFIG_PREEMPT_RT_FULL
3655 + schedule_work_on(ctx->cpu, &rq->work);
3657 rq->csd.func = __blk_mq_complete_request_remote;
3660 smp_call_function_single_async(ctx->cpu, &rq->csd);
3663 rq->q->softirq_done_fn(rq);
3669 static void __blk_mq_complete_request(struct request *rq)
3670 @@ -862,14 +882,14 @@
3674 - int cpu = get_cpu();
3675 + int cpu = get_cpu_light();
3676 if (cpumask_test_cpu(cpu, hctx->cpumask)) {
3677 __blk_mq_run_hw_queue(hctx);
3687 kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
3688 @@ -1617,7 +1637,7 @@
3690 struct blk_mq_hw_ctx *hctx = data;
3692 - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
3693 + if (action == CPU_POST_DEAD)
3694 return blk_mq_hctx_cpu_offline(hctx, cpu);
3697 diff -Nur linux-4.4.46.orig/block/blk-mq-cpu.c linux-4.4.46/block/blk-mq-cpu.c
3698 --- linux-4.4.46.orig/block/blk-mq-cpu.c 2017-02-01 08:31:11.000000000 +0100
3699 +++ linux-4.4.46/block/blk-mq-cpu.c 2017-02-03 17:18:05.667415866 +0100
3703 static LIST_HEAD(blk_mq_cpu_notify_list);
3704 -static DEFINE_RAW_SPINLOCK(blk_mq_cpu_notify_lock);
3705 +static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock);
3707 static int blk_mq_main_cpu_notify(struct notifier_block *self,
3708 unsigned long action, void *hcpu)
3710 struct blk_mq_cpu_notifier *notify;
3711 int ret = NOTIFY_OK;
3713 - raw_spin_lock(&blk_mq_cpu_notify_lock);
3714 + if (action != CPU_POST_DEAD)
3717 + spin_lock(&blk_mq_cpu_notify_lock);
3719 list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) {
3720 ret = notify->notify(notify->data, action, cpu);
3725 - raw_spin_unlock(&blk_mq_cpu_notify_lock);
3726 + spin_unlock(&blk_mq_cpu_notify_lock);
3732 BUG_ON(!notifier->notify);
3734 - raw_spin_lock(&blk_mq_cpu_notify_lock);
3735 + spin_lock(&blk_mq_cpu_notify_lock);
3736 list_add_tail(¬ifier->list, &blk_mq_cpu_notify_list);
3737 - raw_spin_unlock(&blk_mq_cpu_notify_lock);
3738 + spin_unlock(&blk_mq_cpu_notify_lock);
3741 void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
3743 - raw_spin_lock(&blk_mq_cpu_notify_lock);
3744 + spin_lock(&blk_mq_cpu_notify_lock);
3745 list_del(¬ifier->list);
3746 - raw_spin_unlock(&blk_mq_cpu_notify_lock);
3747 + spin_unlock(&blk_mq_cpu_notify_lock);
3750 void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
3751 diff -Nur linux-4.4.46.orig/block/blk-mq.h linux-4.4.46/block/blk-mq.h
3752 --- linux-4.4.46.orig/block/blk-mq.h 2017-02-01 08:31:11.000000000 +0100
3753 +++ linux-4.4.46/block/blk-mq.h 2017-02-03 17:18:05.667415866 +0100
3755 static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
3758 - return per_cpu_ptr(q->queue_ctx, cpu);
3759 + struct blk_mq_ctx *ctx;
3761 + ctx = per_cpu_ptr(q->queue_ctx, cpu);
3768 static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q)
3770 - return __blk_mq_get_ctx(q, get_cpu());
3771 + return __blk_mq_get_ctx(q, get_cpu_light());
3774 static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
3780 struct blk_mq_alloc_data {
3781 diff -Nur linux-4.4.46.orig/block/blk-softirq.c linux-4.4.46/block/blk-softirq.c
3782 --- linux-4.4.46.orig/block/blk-softirq.c 2017-02-01 08:31:11.000000000 +0100
3783 +++ linux-4.4.46/block/blk-softirq.c 2017-02-03 17:18:05.667415866 +0100
3785 raise_softirq_irqoff(BLOCK_SOFTIRQ);
3787 local_irq_restore(flags);
3788 + preempt_check_resched_rt();
3793 this_cpu_ptr(&blk_cpu_done));
3794 raise_softirq_irqoff(BLOCK_SOFTIRQ);
3796 + preempt_check_resched_rt();
3803 local_irq_restore(flags);
3804 + preempt_check_resched_rt();
3808 diff -Nur linux-4.4.46.orig/block/bounce.c linux-4.4.46/block/bounce.c
3809 --- linux-4.4.46.orig/block/bounce.c 2017-02-01 08:31:11.000000000 +0100
3810 +++ linux-4.4.46/block/bounce.c 2017-02-03 17:18:05.667415866 +0100
3812 unsigned long flags;
3815 - local_irq_save(flags);
3816 + local_irq_save_nort(flags);
3817 vto = kmap_atomic(to->bv_page);
3818 memcpy(vto + to->bv_offset, vfrom, to->bv_len);
3820 - local_irq_restore(flags);
3821 + local_irq_restore_nort(flags);
3824 #else /* CONFIG_HIGHMEM */
3825 diff -Nur linux-4.4.46.orig/crypto/algapi.c linux-4.4.46/crypto/algapi.c
3826 --- linux-4.4.46.orig/crypto/algapi.c 2017-02-01 08:31:11.000000000 +0100
3827 +++ linux-4.4.46/crypto/algapi.c 2017-02-03 17:18:05.667415866 +0100
3828 @@ -719,13 +719,13 @@
3830 int crypto_register_notifier(struct notifier_block *nb)
3832 - return blocking_notifier_chain_register(&crypto_chain, nb);
3833 + return srcu_notifier_chain_register(&crypto_chain, nb);
3835 EXPORT_SYMBOL_GPL(crypto_register_notifier);
3837 int crypto_unregister_notifier(struct notifier_block *nb)
3839 - return blocking_notifier_chain_unregister(&crypto_chain, nb);
3840 + return srcu_notifier_chain_unregister(&crypto_chain, nb);
3842 EXPORT_SYMBOL_GPL(crypto_unregister_notifier);
3844 diff -Nur linux-4.4.46.orig/crypto/api.c linux-4.4.46/crypto/api.c
3845 --- linux-4.4.46.orig/crypto/api.c 2017-02-01 08:31:11.000000000 +0100
3846 +++ linux-4.4.46/crypto/api.c 2017-02-03 17:18:05.671416021 +0100
3848 DECLARE_RWSEM(crypto_alg_sem);
3849 EXPORT_SYMBOL_GPL(crypto_alg_sem);
3851 -BLOCKING_NOTIFIER_HEAD(crypto_chain);
3852 +SRCU_NOTIFIER_HEAD(crypto_chain);
3853 EXPORT_SYMBOL_GPL(crypto_chain);
3855 static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg);
3856 @@ -236,10 +236,10 @@
3860 - ok = blocking_notifier_call_chain(&crypto_chain, val, v);
3861 + ok = srcu_notifier_call_chain(&crypto_chain, val, v);
3862 if (ok == NOTIFY_DONE) {
3863 request_module("cryptomgr");
3864 - ok = blocking_notifier_call_chain(&crypto_chain, val, v);
3865 + ok = srcu_notifier_call_chain(&crypto_chain, val, v);
3869 diff -Nur linux-4.4.46.orig/crypto/internal.h linux-4.4.46/crypto/internal.h
3870 --- linux-4.4.46.orig/crypto/internal.h 2017-02-01 08:31:11.000000000 +0100
3871 +++ linux-4.4.46/crypto/internal.h 2017-02-03 17:18:05.671416021 +0100
3874 extern struct list_head crypto_alg_list;
3875 extern struct rw_semaphore crypto_alg_sem;
3876 -extern struct blocking_notifier_head crypto_chain;
3877 +extern struct srcu_notifier_head crypto_chain;
3879 #ifdef CONFIG_PROC_FS
3880 void __init crypto_init_proc(void);
3883 static inline void crypto_notify(unsigned long val, void *v)
3885 - blocking_notifier_call_chain(&crypto_chain, val, v);
3886 + srcu_notifier_call_chain(&crypto_chain, val, v);
3889 #endif /* _CRYPTO_INTERNAL_H */
3890 diff -Nur linux-4.4.46.orig/Documentation/hwlat_detector.txt linux-4.4.46/Documentation/hwlat_detector.txt
3891 --- linux-4.4.46.orig/Documentation/hwlat_detector.txt 1970-01-01 01:00:00.000000000 +0100
3892 +++ linux-4.4.46/Documentation/hwlat_detector.txt 2017-02-03 17:18:05.623414168 +0100
3897 +The module hwlat_detector is a special purpose kernel module that is used to
3898 +detect large system latencies induced by the behavior of certain underlying
3899 +hardware or firmware, independent of Linux itself. The code was developed
3900 +originally to detect SMIs (System Management Interrupts) on x86 systems,
3901 +however there is nothing x86 specific about this patchset. It was
3902 +originally written for use by the "RT" patch since the Real Time
3903 +kernel is highly latency sensitive.
3905 +SMIs are usually not serviced by the Linux kernel, which typically does not
3906 +even know that they are occuring. SMIs are instead are set up by BIOS code
3907 +and are serviced by BIOS code, usually for "critical" events such as
3908 +management of thermal sensors and fans. Sometimes though, SMIs are used for
3909 +other tasks and those tasks can spend an inordinate amount of time in the
3910 +handler (sometimes measured in milliseconds). Obviously this is a problem if
3911 +you are trying to keep event service latencies down in the microsecond range.
3913 +The hardware latency detector works by hogging all of the cpus for configurable
3914 +amounts of time (by calling stop_machine()), polling the CPU Time Stamp Counter
3915 +for some period, then looking for gaps in the TSC data. Any gap indicates a
3916 +time when the polling was interrupted and since the machine is stopped and
3917 +interrupts turned off the only thing that could do that would be an SMI.
3919 +Note that the SMI detector should *NEVER* be used in a production environment.
3920 +It is intended to be run manually to determine if the hardware platform has a
3921 +problem with long system firmware service routines.
3926 +Loading the module hwlat_detector passing the parameter "enabled=1" (or by
3927 +setting the "enable" entry in "hwlat_detector" debugfs toggled on) is the only
3928 +step required to start the hwlat_detector. It is possible to redefine the
3929 +threshold in microseconds (us) above which latency spikes will be taken
3930 +into account (parameter "threshold=").
3934 + # modprobe hwlat_detector enabled=1 threshold=100
3936 +After the module is loaded, it creates a directory named "hwlat_detector" under
3937 +the debugfs mountpoint, "/debug/hwlat_detector" for this text. It is necessary
3938 +to have debugfs mounted, which might be on /sys/debug on your system.
3940 +The /debug/hwlat_detector interface contains the following files:
3942 +count - number of latency spikes observed since last reset
3943 +enable - a global enable/disable toggle (0/1), resets count
3944 +max - maximum hardware latency actually observed (usecs)
3945 +sample - a pipe from which to read current raw sample data
3946 + in the format <timestamp> <latency observed usecs>
3947 + (can be opened O_NONBLOCK for a single sample)
3948 +threshold - minimum latency value to be considered (usecs)
3949 +width - time period to sample with CPUs held (usecs)
3950 + must be less than the total window size (enforced)
3951 +window - total period of sampling, width being inside (usecs)
3953 +By default we will set width to 500,000 and window to 1,000,000, meaning that
3954 +we will sample every 1,000,000 usecs (1s) for 500,000 usecs (0.5s). If we
3955 +observe any latencies that exceed the threshold (initially 100 usecs),
3956 +then we write to a global sample ring buffer of 8K samples, which is
3957 +consumed by reading from the "sample" (pipe) debugfs file interface.
3958 diff -Nur linux-4.4.46.orig/Documentation/kernel-parameters.txt linux-4.4.46/Documentation/kernel-parameters.txt
3959 --- linux-4.4.46.orig/Documentation/kernel-parameters.txt 2017-02-01 08:31:11.000000000 +0100
3960 +++ linux-4.4.46/Documentation/kernel-parameters.txt 2017-02-03 17:18:05.623414168 +0100
3961 @@ -1636,6 +1636,15 @@
3963 See Documentation/filesystems/nfs/nfsroot.txt.
3965 + irqaffinity= [SMP] Set the default irq affinity mask
3967 + <cpu number>,...,<cpu number>
3969 + <cpu number>-<cpu number>
3970 + (must be a positive range in ascending order)
3972 + <cpu number>,...,<cpu number>-<cpu number>
3975 When an interrupt is not handled search all handlers
3976 for it. Intended to get systems with badly broken
3977 diff -Nur linux-4.4.46.orig/Documentation/sysrq.txt linux-4.4.46/Documentation/sysrq.txt
3978 --- linux-4.4.46.orig/Documentation/sysrq.txt 2017-02-01 08:31:11.000000000 +0100
3979 +++ linux-4.4.46/Documentation/sysrq.txt 2017-02-03 17:18:05.623414168 +0100
3981 On other - If you know of the key combos for other architectures, please
3982 let me know so I can add them to this section.
3984 -On all - write a character to /proc/sysrq-trigger. e.g.:
3986 +On all - write a character to /proc/sysrq-trigger, e.g.:
3987 echo t > /proc/sysrq-trigger
3989 +On all - Enable network SysRq by writing a cookie to icmp_echo_sysrq, e.g.
3990 + echo 0x01020304 >/proc/sys/net/ipv4/icmp_echo_sysrq
3991 + Send an ICMP echo request with this pattern plus the particular
3992 + SysRq command key. Example:
3993 + # ping -c1 -s57 -p0102030468
3994 + will trigger the SysRq-H (help) command.
3997 * What are the 'command' keys?
3998 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3999 'b' - Will immediately reboot the system without syncing or unmounting
4000 diff -Nur linux-4.4.46.orig/Documentation/trace/histograms.txt linux-4.4.46/Documentation/trace/histograms.txt
4001 --- linux-4.4.46.orig/Documentation/trace/histograms.txt 1970-01-01 01:00:00.000000000 +0100
4002 +++ linux-4.4.46/Documentation/trace/histograms.txt 2017-02-03 17:18:05.623414168 +0100
4004 + Using the Linux Kernel Latency Histograms
4007 +This document gives a short explanation how to enable, configure and use
4008 +latency histograms. Latency histograms are primarily relevant in the
4009 +context of real-time enabled kernels (CONFIG_PREEMPT/CONFIG_PREEMPT_RT)
4010 +and are used in the quality management of the Linux real-time
4014 +* Purpose of latency histograms
4016 +A latency histogram continuously accumulates the frequencies of latency
4017 +data. There are two types of histograms
4018 +- potential sources of latencies
4019 +- effective latencies
4022 +* Potential sources of latencies
4024 +Potential sources of latencies are code segments where interrupts,
4025 +preemption or both are disabled (aka critical sections). To create
4026 +histograms of potential sources of latency, the kernel stores the time
4027 +stamp at the start of a critical section, determines the time elapsed
4028 +when the end of the section is reached, and increments the frequency
4029 +counter of that latency value - irrespective of whether any concurrently
4030 +running process is affected by latency or not.
4031 +- Configuration items (in the Kernel hacking/Tracers submenu)
4032 + CONFIG_INTERRUPT_OFF_LATENCY
4033 + CONFIG_PREEMPT_OFF_LATENCY
4036 +* Effective latencies
4038 +Effective latencies are actually occuring during wakeup of a process. To
4039 +determine effective latencies, the kernel stores the time stamp when a
4040 +process is scheduled to be woken up, and determines the duration of the
4041 +wakeup time shortly before control is passed over to this process. Note
4042 +that the apparent latency in user space may be somewhat longer, since the
4043 +process may be interrupted after control is passed over to it but before
4044 +the execution in user space takes place. Simply measuring the interval
4045 +between enqueuing and wakeup may also not appropriate in cases when a
4046 +process is scheduled as a result of a timer expiration. The timer may have
4047 +missed its deadline, e.g. due to disabled interrupts, but this latency
4048 +would not be registered. Therefore, the offsets of missed timers are
4049 +recorded in a separate histogram. If both wakeup latency and missed timer
4050 +offsets are configured and enabled, a third histogram may be enabled that
4051 +records the overall latency as a sum of the timer latency, if any, and the
4052 +wakeup latency. This histogram is called "timerandwakeup".
4053 +- Configuration items (in the Kernel hacking/Tracers submenu)
4054 + CONFIG_WAKEUP_LATENCY
4055 + CONFIG_MISSED_TIMER_OFSETS
4060 +The interface to the administration of the latency histograms is located
4061 +in the debugfs file system. To mount it, either enter
4063 +mount -t sysfs nodev /sys
4064 +mount -t debugfs nodev /sys/kernel/debug
4066 +from shell command line level, or add
4068 +nodev /sys sysfs defaults 0 0
4069 +nodev /sys/kernel/debug debugfs defaults 0 0
4071 +to the file /etc/fstab. All latency histogram related files are then
4072 +available in the directory /sys/kernel/debug/tracing/latency_hist. A
4073 +particular histogram type is enabled by writing non-zero to the related
4074 +variable in the /sys/kernel/debug/tracing/latency_hist/enable directory.
4075 +Select "preemptirqsoff" for the histograms of potential sources of
4076 +latencies and "wakeup" for histograms of effective latencies etc. The
4077 +histogram data - one per CPU - are available in the files
4079 +/sys/kernel/debug/tracing/latency_hist/preemptoff/CPUx
4080 +/sys/kernel/debug/tracing/latency_hist/irqsoff/CPUx
4081 +/sys/kernel/debug/tracing/latency_hist/preemptirqsoff/CPUx
4082 +/sys/kernel/debug/tracing/latency_hist/wakeup/CPUx
4083 +/sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio/CPUx
4084 +/sys/kernel/debug/tracing/latency_hist/missed_timer_offsets/CPUx
4085 +/sys/kernel/debug/tracing/latency_hist/timerandwakeup/CPUx
4087 +The histograms are reset by writing non-zero to the file "reset" in a
4088 +particular latency directory. To reset all latency data, use
4092 +TRACINGDIR=/sys/kernel/debug/tracing
4093 +HISTDIR=$TRACINGDIR/latency_hist
4095 +if test -d $HISTDIR
4098 + for i in `find . | grep /reset$`
4107 +Latency data are stored with a resolution of one microsecond. The
4108 +maximum latency is 10,240 microseconds. The data are only valid, if the
4109 +overflow register is empty. Every output line contains the latency in
4110 +microseconds in the first row and the number of samples in the second
4111 +row. To display only lines with a positive latency count, use, for
4114 +grep -v " 0$" /sys/kernel/debug/tracing/latency_hist/preemptoff/CPU0
4116 +#Minimum latency: 0 microseconds.
4117 +#Average latency: 0 microseconds.
4118 +#Maximum latency: 25 microseconds.
4119 +#Total samples: 3104770694
4120 +#There are 0 samples greater or equal than 10240 microseconds
4149 +* Wakeup latency of a selected process
4151 +To only collect wakeup latency data of a particular process, write the
4152 +PID of the requested process to
4154 +/sys/kernel/debug/tracing/latency_hist/wakeup/pid
4156 +PIDs are not considered, if this variable is set to 0.
4159 +* Details of the process with the highest wakeup latency so far
4161 +Selected data of the process that suffered from the highest wakeup
4162 +latency that occurred in a particular CPU are available in the file
4164 +/sys/kernel/debug/tracing/latency_hist/wakeup/max_latency-CPUx.
4166 +In addition, other relevant system data at the time when the
4167 +latency occurred are given.
4169 +The format of the data is (all in one line):
4170 +<PID> <Priority> <Latency> (<Timeroffset>) <Command> \
4171 +<- <PID> <Priority> <Command> <Timestamp>
4173 +The value of <Timeroffset> is only relevant in the combined timer
4174 +and wakeup latency recording. In the wakeup recording, it is
4175 +always 0, in the missed_timer_offsets recording, it is the same
4178 +When retrospectively searching for the origin of a latency and
4179 +tracing was not enabled, it may be helpful to know the name and
4180 +some basic data of the task that (finally) was switching to the
4181 +late real-tlme task. In addition to the victim's data, also the
4182 +data of the possible culprit are therefore displayed after the
4185 +Finally, the timestamp of the time when the latency occurred
4186 +in <seconds>.<microseconds> after the most recent system boot
4189 +These data are also reset when the wakeup histogram is reset.
4190 diff -Nur linux-4.4.46.orig/drivers/acpi/acpica/acglobal.h linux-4.4.46/drivers/acpi/acpica/acglobal.h
4191 --- linux-4.4.46.orig/drivers/acpi/acpica/acglobal.h 2017-02-01 08:31:11.000000000 +0100
4192 +++ linux-4.4.46/drivers/acpi/acpica/acglobal.h 2017-02-03 17:18:05.671416021 +0100
4196 ACPI_GLOBAL(acpi_spinlock, acpi_gbl_gpe_lock); /* For GPE data structs and registers */
4197 -ACPI_GLOBAL(acpi_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */
4198 +ACPI_GLOBAL(acpi_raw_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */
4199 ACPI_GLOBAL(acpi_spinlock, acpi_gbl_reference_count_lock);
4201 /* Mutex for _OSI support */
4202 diff -Nur linux-4.4.46.orig/drivers/acpi/acpica/hwregs.c linux-4.4.46/drivers/acpi/acpica/hwregs.c
4203 --- linux-4.4.46.orig/drivers/acpi/acpica/hwregs.c 2017-02-01 08:31:11.000000000 +0100
4204 +++ linux-4.4.46/drivers/acpi/acpica/hwregs.c 2017-02-03 17:18:05.671416021 +0100
4205 @@ -269,14 +269,14 @@
4206 ACPI_BITMASK_ALL_FIXED_STATUS,
4207 ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address)));
4209 - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
4210 + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags);
4212 /* Clear the fixed events in PM1 A/B */
4214 status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS,
4215 ACPI_BITMASK_ALL_FIXED_STATUS);
4217 - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
4218 + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags);
4220 if (ACPI_FAILURE(status)) {
4222 diff -Nur linux-4.4.46.orig/drivers/acpi/acpica/hwxface.c linux-4.4.46/drivers/acpi/acpica/hwxface.c
4223 --- linux-4.4.46.orig/drivers/acpi/acpica/hwxface.c 2017-02-01 08:31:11.000000000 +0100
4224 +++ linux-4.4.46/drivers/acpi/acpica/hwxface.c 2017-02-03 17:18:05.671416021 +0100
4226 return_ACPI_STATUS(AE_BAD_PARAMETER);
4229 - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
4230 + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags);
4233 * At this point, we know that the parent register is one of the
4238 - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
4239 + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags);
4240 return_ACPI_STATUS(status);
4243 diff -Nur linux-4.4.46.orig/drivers/acpi/acpica/utmutex.c linux-4.4.46/drivers/acpi/acpica/utmutex.c
4244 --- linux-4.4.46.orig/drivers/acpi/acpica/utmutex.c 2017-02-01 08:31:11.000000000 +0100
4245 +++ linux-4.4.46/drivers/acpi/acpica/utmutex.c 2017-02-03 17:18:05.671416021 +0100
4247 return_ACPI_STATUS (status);
4250 - status = acpi_os_create_lock (&acpi_gbl_hardware_lock);
4251 + status = acpi_os_create_raw_lock (&acpi_gbl_hardware_lock);
4252 if (ACPI_FAILURE (status)) {
4253 return_ACPI_STATUS (status);
4256 /* Delete the spinlocks */
4258 acpi_os_delete_lock(acpi_gbl_gpe_lock);
4259 - acpi_os_delete_lock(acpi_gbl_hardware_lock);
4260 + acpi_os_delete_raw_lock(acpi_gbl_hardware_lock);
4261 acpi_os_delete_lock(acpi_gbl_reference_count_lock);
4263 /* Delete the reader/writer lock */
4264 diff -Nur linux-4.4.46.orig/drivers/ata/libata-sff.c linux-4.4.46/drivers/ata/libata-sff.c
4265 --- linux-4.4.46.orig/drivers/ata/libata-sff.c 2017-02-01 08:31:11.000000000 +0100
4266 +++ linux-4.4.46/drivers/ata/libata-sff.c 2017-02-03 17:18:05.671416021 +0100
4268 unsigned long flags;
4269 unsigned int consumed;
4271 - local_irq_save(flags);
4272 + local_irq_save_nort(flags);
4273 consumed = ata_sff_data_xfer32(dev, buf, buflen, rw);
4274 - local_irq_restore(flags);
4275 + local_irq_restore_nort(flags);
4280 unsigned long flags;
4282 /* FIXME: use a bounce buffer */
4283 - local_irq_save(flags);
4284 + local_irq_save_nort(flags);
4285 buf = kmap_atomic(page);
4287 /* do the actual data transfer */
4292 - local_irq_restore(flags);
4293 + local_irq_restore_nort(flags);
4295 buf = page_address(page);
4296 ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size,
4298 unsigned long flags;
4300 /* FIXME: use bounce buffer */
4301 - local_irq_save(flags);
4302 + local_irq_save_nort(flags);
4303 buf = kmap_atomic(page);
4305 /* do the actual data transfer */
4310 - local_irq_restore(flags);
4311 + local_irq_restore_nort(flags);
4313 buf = page_address(page);
4314 consumed = ap->ops->sff_data_xfer(dev, buf + offset,
4315 diff -Nur linux-4.4.46.orig/drivers/block/zram/zram_drv.c linux-4.4.46/drivers/block/zram/zram_drv.c
4316 --- linux-4.4.46.orig/drivers/block/zram/zram_drv.c 2017-02-01 08:31:11.000000000 +0100
4317 +++ linux-4.4.46/drivers/block/zram/zram_drv.c 2017-02-03 17:18:05.671416021 +0100
4322 + zram_meta_init_table_locks(meta, disksize);
4327 @@ -568,12 +570,12 @@
4328 unsigned long handle;
4331 - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
4332 + zram_lock_table(&meta->table[index]);
4333 handle = meta->table[index].handle;
4334 size = zram_get_obj_size(meta, index);
4336 if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
4337 - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
4338 + zram_unlock_table(&meta->table[index]);
4344 ret = zcomp_decompress(zram->comp, cmem, size, mem);
4345 zs_unmap_object(meta->mem_pool, handle);
4346 - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
4347 + zram_unlock_table(&meta->table[index]);
4349 /* Should NEVER happen. Return bio error if it does. */
4350 if (unlikely(ret)) {
4351 @@ -604,14 +606,14 @@
4352 struct zram_meta *meta = zram->meta;
4353 page = bvec->bv_page;
4355 - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
4356 + zram_lock_table(&meta->table[index]);
4357 if (unlikely(!meta->table[index].handle) ||
4358 zram_test_flag(meta, index, ZRAM_ZERO)) {
4359 - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
4360 + zram_unlock_table(&meta->table[index]);
4361 handle_zero_page(bvec);
4364 - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
4365 + zram_unlock_table(&meta->table[index]);
4367 if (is_partial_io(bvec))
4368 /* Use a temporary buffer to decompress the page */
4369 @@ -689,10 +691,10 @@
4371 kunmap_atomic(user_mem);
4372 /* Free memory associated with this sector now. */
4373 - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
4374 + zram_lock_table(&meta->table[index]);
4375 zram_free_page(zram, index);
4376 zram_set_flag(meta, index, ZRAM_ZERO);
4377 - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
4378 + zram_unlock_table(&meta->table[index]);
4380 atomic64_inc(&zram->stats.zero_pages);
4382 @@ -752,12 +754,12 @@
4383 * Free memory associated with this sector
4384 * before overwriting unused sectors.
4386 - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
4387 + zram_lock_table(&meta->table[index]);
4388 zram_free_page(zram, index);
4390 meta->table[index].handle = handle;
4391 zram_set_obj_size(meta, index, clen);
4392 - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
4393 + zram_unlock_table(&meta->table[index]);
4396 atomic64_add(clen, &zram->stats.compr_data_size);
4400 while (n >= PAGE_SIZE) {
4401 - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
4402 + zram_lock_table(&meta->table[index]);
4403 zram_free_page(zram, index);
4404 - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
4405 + zram_unlock_table(&meta->table[index]);
4406 atomic64_inc(&zram->stats.notify_free);
4410 zram = bdev->bd_disk->private_data;
4413 - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
4414 + zram_lock_table(&meta->table[index]);
4415 zram_free_page(zram, index);
4416 - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
4417 + zram_unlock_table(&meta->table[index]);
4418 atomic64_inc(&zram->stats.notify_free);
4421 diff -Nur linux-4.4.46.orig/drivers/block/zram/zram_drv.h linux-4.4.46/drivers/block/zram/zram_drv.h
4422 --- linux-4.4.46.orig/drivers/block/zram/zram_drv.h 2017-02-01 08:31:11.000000000 +0100
4423 +++ linux-4.4.46/drivers/block/zram/zram_drv.h 2017-02-03 17:18:05.671416021 +0100
4425 struct zram_table_entry {
4426 unsigned long handle;
4427 unsigned long value;
4428 +#ifdef CONFIG_PREEMPT_RT_BASE
4434 @@ -119,4 +122,42 @@
4436 bool claim; /* Protected by bdev->bd_mutex */
4439 +#ifndef CONFIG_PREEMPT_RT_BASE
4440 +static inline void zram_lock_table(struct zram_table_entry *table)
4442 + bit_spin_lock(ZRAM_ACCESS, &table->value);
4445 +static inline void zram_unlock_table(struct zram_table_entry *table)
4447 + bit_spin_unlock(ZRAM_ACCESS, &table->value);
4450 +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize) { }
4451 +#else /* CONFIG_PREEMPT_RT_BASE */
4452 +static inline void zram_lock_table(struct zram_table_entry *table)
4454 + spin_lock(&table->lock);
4455 + __set_bit(ZRAM_ACCESS, &table->value);
4458 +static inline void zram_unlock_table(struct zram_table_entry *table)
4460 + __clear_bit(ZRAM_ACCESS, &table->value);
4461 + spin_unlock(&table->lock);
4464 +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize)
4466 + size_t num_pages = disksize >> PAGE_SHIFT;
4469 + for (index = 0; index < num_pages; index++) {
4470 + spinlock_t *lock = &meta->table[index].lock;
4471 + spin_lock_init(lock);
4474 +#endif /* CONFIG_PREEMPT_RT_BASE */
4477 diff -Nur linux-4.4.46.orig/drivers/char/random.c linux-4.4.46/drivers/char/random.c
4478 --- linux-4.4.46.orig/drivers/char/random.c 2017-02-01 08:31:11.000000000 +0100
4479 +++ linux-4.4.46/drivers/char/random.c 2017-02-03 17:18:05.671416021 +0100
4482 long delta, delta2, delta3;
4484 - preempt_disable();
4486 sample.jiffies = jiffies;
4487 sample.cycles = random_get_entropy();
4491 credit_entropy_bits(r, min_t(int, fls(delta>>1), 11));
4496 void add_input_randomness(unsigned int type, unsigned int code,
4497 @@ -894,28 +891,27 @@
4498 return *(ptr + f->reg_idx++);
4501 -void add_interrupt_randomness(int irq, int irq_flags)
4502 +void add_interrupt_randomness(int irq, int irq_flags, __u64 ip)
4504 struct entropy_store *r;
4505 struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness);
4506 - struct pt_regs *regs = get_irq_regs();
4507 unsigned long now = jiffies;
4508 cycles_t cycles = random_get_entropy();
4509 __u32 c_high, j_high;
4515 - cycles = get_reg(fast_pool, regs);
4516 + cycles = get_reg(fast_pool, NULL);
4517 c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0;
4518 j_high = (sizeof(now) > 4) ? now >> 32 : 0;
4519 fast_pool->pool[0] ^= cycles ^ j_high ^ irq;
4520 fast_pool->pool[1] ^= now ^ c_high;
4521 - ip = regs ? instruction_pointer(regs) : _RET_IP_;
4524 fast_pool->pool[2] ^= ip;
4525 fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 :
4526 - get_reg(fast_pool, regs);
4527 + get_reg(fast_pool, NULL);
4529 fast_mix(fast_pool);
4530 add_interrupt_bench(cycles);
4531 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-generated.c linux-4.4.46/drivers/clk/at91/clk-generated.c
4532 --- linux-4.4.46.orig/drivers/clk/at91/clk-generated.c 2017-02-01 08:31:11.000000000 +0100
4533 +++ linux-4.4.46/drivers/clk/at91/clk-generated.c 2017-02-03 17:18:05.671416021 +0100
4535 #include <linux/clkdev.h>
4536 #include <linux/clk/at91_pmc.h>
4537 #include <linux/of.h>
4538 -#include <linux/of_address.h>
4539 -#include <linux/io.h>
4540 +#include <linux/mfd/syscon.h>
4541 +#include <linux/regmap.h>
4547 struct clk_generated {
4549 - struct at91_pmc *pmc;
4550 + struct regmap *regmap;
4551 struct clk_range range;
4557 static int clk_generated_enable(struct clk_hw *hw)
4559 struct clk_generated *gck = to_clk_generated(hw);
4560 - struct at91_pmc *pmc = gck->pmc;
4562 + unsigned long flags;
4564 pr_debug("GCLK: %s, gckdiv = %d, parent id = %d\n",
4565 __func__, gck->gckdiv, gck->parent_id);
4568 - pmc_write(pmc, AT91_PMC_PCR, (gck->id & AT91_PMC_PCR_PID_MASK));
4569 - tmp = pmc_read(pmc, AT91_PMC_PCR) &
4570 - ~(AT91_PMC_PCR_GCKDIV_MASK | AT91_PMC_PCR_GCKCSS_MASK);
4571 - pmc_write(pmc, AT91_PMC_PCR, tmp | AT91_PMC_PCR_GCKCSS(gck->parent_id)
4572 - | AT91_PMC_PCR_CMD
4573 - | AT91_PMC_PCR_GCKDIV(gck->gckdiv)
4574 - | AT91_PMC_PCR_GCKEN);
4576 + spin_lock_irqsave(gck->lock, flags);
4577 + regmap_write(gck->regmap, AT91_PMC_PCR,
4578 + (gck->id & AT91_PMC_PCR_PID_MASK));
4579 + regmap_update_bits(gck->regmap, AT91_PMC_PCR,
4580 + AT91_PMC_PCR_GCKDIV_MASK | AT91_PMC_PCR_GCKCSS_MASK |
4581 + AT91_PMC_PCR_CMD | AT91_PMC_PCR_GCKEN,
4582 + AT91_PMC_PCR_GCKCSS(gck->parent_id) |
4583 + AT91_PMC_PCR_CMD |
4584 + AT91_PMC_PCR_GCKDIV(gck->gckdiv) |
4585 + AT91_PMC_PCR_GCKEN);
4586 + spin_unlock_irqrestore(gck->lock, flags);
4590 static void clk_generated_disable(struct clk_hw *hw)
4592 struct clk_generated *gck = to_clk_generated(hw);
4593 - struct at91_pmc *pmc = gck->pmc;
4595 + unsigned long flags;
4598 - pmc_write(pmc, AT91_PMC_PCR, (gck->id & AT91_PMC_PCR_PID_MASK));
4599 - tmp = pmc_read(pmc, AT91_PMC_PCR) & ~AT91_PMC_PCR_GCKEN;
4600 - pmc_write(pmc, AT91_PMC_PCR, tmp | AT91_PMC_PCR_CMD);
4602 + spin_lock_irqsave(gck->lock, flags);
4603 + regmap_write(gck->regmap, AT91_PMC_PCR,
4604 + (gck->id & AT91_PMC_PCR_PID_MASK));
4605 + regmap_update_bits(gck->regmap, AT91_PMC_PCR,
4606 + AT91_PMC_PCR_CMD | AT91_PMC_PCR_GCKEN,
4607 + AT91_PMC_PCR_CMD);
4608 + spin_unlock_irqrestore(gck->lock, flags);
4611 static int clk_generated_is_enabled(struct clk_hw *hw)
4613 struct clk_generated *gck = to_clk_generated(hw);
4614 - struct at91_pmc *pmc = gck->pmc;
4616 + unsigned long flags;
4617 + unsigned int status;
4620 - pmc_write(pmc, AT91_PMC_PCR, (gck->id & AT91_PMC_PCR_PID_MASK));
4621 - ret = !!(pmc_read(pmc, AT91_PMC_PCR) & AT91_PMC_PCR_GCKEN);
4623 + spin_lock_irqsave(gck->lock, flags);
4624 + regmap_write(gck->regmap, AT91_PMC_PCR,
4625 + (gck->id & AT91_PMC_PCR_PID_MASK));
4626 + regmap_read(gck->regmap, AT91_PMC_PCR, &status);
4627 + spin_unlock_irqrestore(gck->lock, flags);
4630 + return status & AT91_PMC_PCR_GCKEN ? 1 : 0;
4633 static unsigned long
4634 @@ -214,13 +218,14 @@
4636 static void clk_generated_startup(struct clk_generated *gck)
4638 - struct at91_pmc *pmc = gck->pmc;
4640 + unsigned long flags;
4643 - pmc_write(pmc, AT91_PMC_PCR, (gck->id & AT91_PMC_PCR_PID_MASK));
4644 - tmp = pmc_read(pmc, AT91_PMC_PCR);
4646 + spin_lock_irqsave(gck->lock, flags);
4647 + regmap_write(gck->regmap, AT91_PMC_PCR,
4648 + (gck->id & AT91_PMC_PCR_PID_MASK));
4649 + regmap_read(gck->regmap, AT91_PMC_PCR, &tmp);
4650 + spin_unlock_irqrestore(gck->lock, flags);
4652 gck->parent_id = (tmp & AT91_PMC_PCR_GCKCSS_MASK)
4653 >> AT91_PMC_PCR_GCKCSS_OFFSET;
4657 static struct clk * __init
4658 -at91_clk_register_generated(struct at91_pmc *pmc, const char *name,
4659 - const char **parent_names, u8 num_parents,
4660 +at91_clk_register_generated(struct regmap *regmap, spinlock_t *lock, const char
4661 + *name, const char **parent_names, u8 num_parents,
4662 u8 id, const struct clk_range *range)
4664 struct clk_generated *gck;
4668 gck->hw.init = &init;
4670 + gck->regmap = regmap;
4672 gck->range = *range;
4674 clk = clk_register(NULL, &gck->hw);
4679 -void __init of_sama5d2_clk_generated_setup(struct device_node *np,
4680 - struct at91_pmc *pmc)
4681 +void __init of_sama5d2_clk_generated_setup(struct device_node *np)
4686 const char *parent_names[GENERATED_SOURCE_MAX];
4687 struct device_node *gcknp;
4688 struct clk_range range = CLK_RANGE(0, 0);
4689 + struct regmap *regmap;
4691 num_parents = of_clk_get_parent_count(np);
4692 if (num_parents <= 0 || num_parents > GENERATED_SOURCE_MAX)
4693 @@ -283,6 +289,10 @@
4694 if (!num || num > PERIPHERAL_MAX)
4697 + regmap = syscon_node_to_regmap(of_get_parent(np));
4698 + if (IS_ERR(regmap))
4701 for_each_child_of_node(np, gcknp) {
4702 if (of_property_read_u32(gcknp, "reg", &id))
4704 @@ -296,11 +306,14 @@
4705 of_at91_get_clk_range(gcknp, "atmel,clk-output-range",
4708 - clk = at91_clk_register_generated(pmc, name, parent_names,
4709 - num_parents, id, &range);
4710 + clk = at91_clk_register_generated(regmap, &pmc_pcr_lock, name,
4711 + parent_names, num_parents,
4716 of_clk_add_provider(gcknp, of_clk_src_simple_get, clk);
4719 +CLK_OF_DECLARE(of_sama5d2_clk_generated_setup, "atmel,sama5d2-clk-generated",
4720 + of_sama5d2_clk_generated_setup);
4721 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-h32mx.c linux-4.4.46/drivers/clk/at91/clk-h32mx.c
4722 --- linux-4.4.46.orig/drivers/clk/at91/clk-h32mx.c 2017-02-01 08:31:11.000000000 +0100
4723 +++ linux-4.4.46/drivers/clk/at91/clk-h32mx.c 2017-02-03 17:18:05.671416021 +0100
4725 #include <linux/clk-provider.h>
4726 #include <linux/clkdev.h>
4727 #include <linux/clk/at91_pmc.h>
4728 -#include <linux/delay.h>
4729 #include <linux/of.h>
4730 -#include <linux/of_address.h>
4731 -#include <linux/of_irq.h>
4732 -#include <linux/io.h>
4733 -#include <linux/interrupt.h>
4734 -#include <linux/irq.h>
4735 -#include <linux/sched.h>
4736 -#include <linux/wait.h>
4737 +#include <linux/regmap.h>
4738 +#include <linux/mfd/syscon.h>
4744 struct clk_sama5d4_h32mx {
4746 - struct at91_pmc *pmc;
4747 + struct regmap *regmap;
4750 #define to_clk_sama5d4_h32mx(hw) container_of(hw, struct clk_sama5d4_h32mx, hw)
4752 unsigned long parent_rate)
4754 struct clk_sama5d4_h32mx *h32mxclk = to_clk_sama5d4_h32mx(hw);
4755 + unsigned int mckr;
4757 - if (pmc_read(h32mxclk->pmc, AT91_PMC_MCKR) & AT91_PMC_H32MXDIV)
4758 + regmap_read(h32mxclk->regmap, AT91_PMC_MCKR, &mckr);
4759 + if (mckr & AT91_PMC_H32MXDIV)
4760 return parent_rate / 2;
4762 if (parent_rate > H32MX_MAX_FREQ)
4764 unsigned long parent_rate)
4766 struct clk_sama5d4_h32mx *h32mxclk = to_clk_sama5d4_h32mx(hw);
4767 - struct at91_pmc *pmc = h32mxclk->pmc;
4771 if (parent_rate != rate && (parent_rate / 2) != rate)
4775 - tmp = pmc_read(pmc, AT91_PMC_MCKR) & ~AT91_PMC_H32MXDIV;
4776 if ((parent_rate / 2) == rate)
4777 - tmp |= AT91_PMC_H32MXDIV;
4778 - pmc_write(pmc, AT91_PMC_MCKR, tmp);
4780 + mckr = AT91_PMC_H32MXDIV;
4782 + regmap_update_bits(h32mxclk->regmap, AT91_PMC_MCKR,
4783 + AT91_PMC_H32MXDIV, mckr);
4788 .set_rate = clk_sama5d4_h32mx_set_rate,
4791 -void __init of_sama5d4_clk_h32mx_setup(struct device_node *np,
4792 - struct at91_pmc *pmc)
4793 +static void __init of_sama5d4_clk_h32mx_setup(struct device_node *np)
4795 struct clk_sama5d4_h32mx *h32mxclk;
4796 struct clk_init_data init;
4797 const char *parent_name;
4798 + struct regmap *regmap;
4801 + regmap = syscon_node_to_regmap(of_get_parent(np));
4802 + if (IS_ERR(regmap))
4805 h32mxclk = kzalloc(sizeof(*h32mxclk), GFP_KERNEL);
4809 init.flags = CLK_SET_RATE_GATE;
4811 h32mxclk->hw.init = &init;
4812 - h32mxclk->pmc = pmc;
4813 + h32mxclk->regmap = regmap;
4815 clk = clk_register(NULL, &h32mxclk->hw);
4819 of_clk_add_provider(np, of_clk_src_simple_get, clk);
4821 +CLK_OF_DECLARE(of_sama5d4_clk_h32mx_setup, "atmel,sama5d4-clk-h32mx",
4822 + of_sama5d4_clk_h32mx_setup);
4823 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-main.c linux-4.4.46/drivers/clk/at91/clk-main.c
4824 --- linux-4.4.46.orig/drivers/clk/at91/clk-main.c 2017-02-01 08:31:11.000000000 +0100
4825 +++ linux-4.4.46/drivers/clk/at91/clk-main.c 2017-02-03 17:18:05.671416021 +0100
4827 #include <linux/clk/at91_pmc.h>
4828 #include <linux/delay.h>
4829 #include <linux/of.h>
4830 -#include <linux/of_address.h>
4831 -#include <linux/of_irq.h>
4832 -#include <linux/io.h>
4833 -#include <linux/interrupt.h>
4834 -#include <linux/irq.h>
4835 -#include <linux/sched.h>
4836 -#include <linux/wait.h>
4837 +#include <linux/mfd/syscon.h>
4838 +#include <linux/regmap.h>
4844 struct clk_main_osc {
4846 - struct at91_pmc *pmc;
4848 - wait_queue_head_t wait;
4849 + struct regmap *regmap;
4852 #define to_clk_main_osc(hw) container_of(hw, struct clk_main_osc, hw)
4854 struct clk_main_rc_osc {
4856 - struct at91_pmc *pmc;
4858 - wait_queue_head_t wait;
4859 + struct regmap *regmap;
4860 unsigned long frequency;
4861 unsigned long accuracy;
4865 struct clk_rm9200_main {
4867 - struct at91_pmc *pmc;
4868 + struct regmap *regmap;
4871 #define to_clk_rm9200_main(hw) container_of(hw, struct clk_rm9200_main, hw)
4873 struct clk_sam9x5_main {
4875 - struct at91_pmc *pmc;
4877 - wait_queue_head_t wait;
4878 + struct regmap *regmap;
4882 #define to_clk_sam9x5_main(hw) container_of(hw, struct clk_sam9x5_main, hw)
4884 -static irqreturn_t clk_main_osc_irq_handler(int irq, void *dev_id)
4885 +static inline bool clk_main_osc_ready(struct regmap *regmap)
4887 - struct clk_main_osc *osc = dev_id;
4888 + unsigned int status;
4890 - wake_up(&osc->wait);
4891 - disable_irq_nosync(osc->irq);
4892 + regmap_read(regmap, AT91_PMC_SR, &status);
4894 - return IRQ_HANDLED;
4895 + return status & AT91_PMC_MOSCS;
4898 static int clk_main_osc_prepare(struct clk_hw *hw)
4900 struct clk_main_osc *osc = to_clk_main_osc(hw);
4901 - struct at91_pmc *pmc = osc->pmc;
4902 + struct regmap *regmap = osc->regmap;
4905 - tmp = pmc_read(pmc, AT91_CKGR_MOR) & ~MOR_KEY_MASK;
4906 + regmap_read(regmap, AT91_CKGR_MOR, &tmp);
4907 + tmp &= ~MOR_KEY_MASK;
4909 if (tmp & AT91_PMC_OSCBYPASS)
4912 if (!(tmp & AT91_PMC_MOSCEN)) {
4913 tmp |= AT91_PMC_MOSCEN | AT91_PMC_KEY;
4914 - pmc_write(pmc, AT91_CKGR_MOR, tmp);
4915 + regmap_write(regmap, AT91_CKGR_MOR, tmp);
4918 - while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCS)) {
4919 - enable_irq(osc->irq);
4920 - wait_event(osc->wait,
4921 - pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCS);
4923 + while (!clk_main_osc_ready(regmap))
4929 static void clk_main_osc_unprepare(struct clk_hw *hw)
4931 struct clk_main_osc *osc = to_clk_main_osc(hw);
4932 - struct at91_pmc *pmc = osc->pmc;
4933 - u32 tmp = pmc_read(pmc, AT91_CKGR_MOR);
4934 + struct regmap *regmap = osc->regmap;
4937 + regmap_read(regmap, AT91_CKGR_MOR, &tmp);
4938 if (tmp & AT91_PMC_OSCBYPASS)
4941 @@ -116,20 +104,22 @@
4944 tmp &= ~(AT91_PMC_KEY | AT91_PMC_MOSCEN);
4945 - pmc_write(pmc, AT91_CKGR_MOR, tmp | AT91_PMC_KEY);
4946 + regmap_write(regmap, AT91_CKGR_MOR, tmp | AT91_PMC_KEY);
4949 static int clk_main_osc_is_prepared(struct clk_hw *hw)
4951 struct clk_main_osc *osc = to_clk_main_osc(hw);
4952 - struct at91_pmc *pmc = osc->pmc;
4953 - u32 tmp = pmc_read(pmc, AT91_CKGR_MOR);
4954 + struct regmap *regmap = osc->regmap;
4957 + regmap_read(regmap, AT91_CKGR_MOR, &tmp);
4958 if (tmp & AT91_PMC_OSCBYPASS)
4961 - return !!((pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCS) &&
4962 - (pmc_read(pmc, AT91_CKGR_MOR) & AT91_PMC_MOSCEN));
4963 + regmap_read(regmap, AT91_PMC_SR, &status);
4965 + return (status & AT91_PMC_MOSCS) && (tmp & AT91_PMC_MOSCEN);
4968 static const struct clk_ops main_osc_ops = {
4969 @@ -139,18 +129,16 @@
4972 static struct clk * __init
4973 -at91_clk_register_main_osc(struct at91_pmc *pmc,
4975 +at91_clk_register_main_osc(struct regmap *regmap,
4977 const char *parent_name,
4981 struct clk_main_osc *osc;
4982 struct clk *clk = NULL;
4983 struct clk_init_data init;
4985 - if (!pmc || !irq || !name || !parent_name)
4986 + if (!name || !parent_name)
4987 return ERR_PTR(-EINVAL);
4989 osc = kzalloc(sizeof(*osc), GFP_KERNEL);
4990 @@ -164,85 +152,70 @@
4991 init.flags = CLK_IGNORE_UNUSED;
4993 osc->hw.init = &init;
4997 - init_waitqueue_head(&osc->wait);
4998 - irq_set_status_flags(osc->irq, IRQ_NOAUTOEN);
4999 - ret = request_irq(osc->irq, clk_main_osc_irq_handler,
5000 - IRQF_TRIGGER_HIGH, name, osc);
5003 - return ERR_PTR(ret);
5005 + osc->regmap = regmap;
5008 - pmc_write(pmc, AT91_CKGR_MOR,
5009 - (pmc_read(pmc, AT91_CKGR_MOR) &
5010 - ~(MOR_KEY_MASK | AT91_PMC_MOSCEN)) |
5011 - AT91_PMC_OSCBYPASS | AT91_PMC_KEY);
5012 + regmap_update_bits(regmap,
5013 + AT91_CKGR_MOR, MOR_KEY_MASK |
5015 + AT91_PMC_OSCBYPASS | AT91_PMC_KEY);
5017 clk = clk_register(NULL, &osc->hw);
5018 - if (IS_ERR(clk)) {
5019 - free_irq(irq, osc);
5027 -void __init of_at91rm9200_clk_main_osc_setup(struct device_node *np,
5028 - struct at91_pmc *pmc)
5029 +static void __init of_at91rm9200_clk_main_osc_setup(struct device_node *np)
5033 const char *name = np->name;
5034 const char *parent_name;
5035 + struct regmap *regmap;
5038 of_property_read_string(np, "clock-output-names", &name);
5039 bypass = of_property_read_bool(np, "atmel,osc-bypass");
5040 parent_name = of_clk_get_parent_name(np, 0);
5042 - irq = irq_of_parse_and_map(np, 0);
5044 + regmap = syscon_node_to_regmap(of_get_parent(np));
5045 + if (IS_ERR(regmap))
5048 - clk = at91_clk_register_main_osc(pmc, irq, name, parent_name, bypass);
5049 + clk = at91_clk_register_main_osc(regmap, name, parent_name, bypass);
5053 of_clk_add_provider(np, of_clk_src_simple_get, clk);
5055 +CLK_OF_DECLARE(at91rm9200_clk_main_osc, "atmel,at91rm9200-clk-main-osc",
5056 + of_at91rm9200_clk_main_osc_setup);
5058 -static irqreturn_t clk_main_rc_osc_irq_handler(int irq, void *dev_id)
5059 +static bool clk_main_rc_osc_ready(struct regmap *regmap)
5061 - struct clk_main_rc_osc *osc = dev_id;
5062 + unsigned int status;
5064 - wake_up(&osc->wait);
5065 - disable_irq_nosync(osc->irq);
5066 + regmap_read(regmap, AT91_PMC_SR, &status);
5068 - return IRQ_HANDLED;
5069 + return status & AT91_PMC_MOSCRCS;
5072 static int clk_main_rc_osc_prepare(struct clk_hw *hw)
5074 struct clk_main_rc_osc *osc = to_clk_main_rc_osc(hw);
5075 - struct at91_pmc *pmc = osc->pmc;
5077 + struct regmap *regmap = osc->regmap;
5080 - tmp = pmc_read(pmc, AT91_CKGR_MOR) & ~MOR_KEY_MASK;
5081 + regmap_read(regmap, AT91_CKGR_MOR, &mor);
5083 - if (!(tmp & AT91_PMC_MOSCRCEN)) {
5084 - tmp |= AT91_PMC_MOSCRCEN | AT91_PMC_KEY;
5085 - pmc_write(pmc, AT91_CKGR_MOR, tmp);
5087 + if (!(mor & AT91_PMC_MOSCRCEN))
5088 + regmap_update_bits(regmap, AT91_CKGR_MOR,
5089 + MOR_KEY_MASK | AT91_PMC_MOSCRCEN,
5090 + AT91_PMC_MOSCRCEN | AT91_PMC_KEY);
5092 - while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCRCS)) {
5093 - enable_irq(osc->irq);
5094 - wait_event(osc->wait,
5095 - pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCRCS);
5097 + while (!clk_main_rc_osc_ready(regmap))
5102 @@ -250,23 +223,28 @@
5103 static void clk_main_rc_osc_unprepare(struct clk_hw *hw)
5105 struct clk_main_rc_osc *osc = to_clk_main_rc_osc(hw);
5106 - struct at91_pmc *pmc = osc->pmc;
5107 - u32 tmp = pmc_read(pmc, AT91_CKGR_MOR);
5108 + struct regmap *regmap = osc->regmap;
5111 - if (!(tmp & AT91_PMC_MOSCRCEN))
5112 + regmap_read(regmap, AT91_CKGR_MOR, &mor);
5114 + if (!(mor & AT91_PMC_MOSCRCEN))
5117 - tmp &= ~(MOR_KEY_MASK | AT91_PMC_MOSCRCEN);
5118 - pmc_write(pmc, AT91_CKGR_MOR, tmp | AT91_PMC_KEY);
5119 + regmap_update_bits(regmap, AT91_CKGR_MOR,
5120 + MOR_KEY_MASK | AT91_PMC_MOSCRCEN, AT91_PMC_KEY);
5123 static int clk_main_rc_osc_is_prepared(struct clk_hw *hw)
5125 struct clk_main_rc_osc *osc = to_clk_main_rc_osc(hw);
5126 - struct at91_pmc *pmc = osc->pmc;
5127 + struct regmap *regmap = osc->regmap;
5128 + unsigned int mor, status;
5130 + regmap_read(regmap, AT91_CKGR_MOR, &mor);
5131 + regmap_read(regmap, AT91_PMC_SR, &status);
5133 - return !!((pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCRCS) &&
5134 - (pmc_read(pmc, AT91_CKGR_MOR) & AT91_PMC_MOSCRCEN));
5135 + return (mor & AT91_PMC_MOSCRCEN) && (status & AT91_PMC_MOSCRCS);
5138 static unsigned long clk_main_rc_osc_recalc_rate(struct clk_hw *hw,
5139 @@ -294,17 +272,15 @@
5142 static struct clk * __init
5143 -at91_clk_register_main_rc_osc(struct at91_pmc *pmc,
5145 +at91_clk_register_main_rc_osc(struct regmap *regmap,
5147 u32 frequency, u32 accuracy)
5150 struct clk_main_rc_osc *osc;
5151 struct clk *clk = NULL;
5152 struct clk_init_data init;
5154 - if (!pmc || !irq || !name || !frequency)
5155 + if (!name || !frequency)
5156 return ERR_PTR(-EINVAL);
5158 osc = kzalloc(sizeof(*osc), GFP_KERNEL);
5159 @@ -318,63 +294,53 @@
5160 init.flags = CLK_IS_ROOT | CLK_IGNORE_UNUSED;
5162 osc->hw.init = &init;
5165 + osc->regmap = regmap;
5166 osc->frequency = frequency;
5167 osc->accuracy = accuracy;
5169 - init_waitqueue_head(&osc->wait);
5170 - irq_set_status_flags(osc->irq, IRQ_NOAUTOEN);
5171 - ret = request_irq(osc->irq, clk_main_rc_osc_irq_handler,
5172 - IRQF_TRIGGER_HIGH, name, osc);
5174 - return ERR_PTR(ret);
5176 clk = clk_register(NULL, &osc->hw);
5177 - if (IS_ERR(clk)) {
5178 - free_irq(irq, osc);
5186 -void __init of_at91sam9x5_clk_main_rc_osc_setup(struct device_node *np,
5187 - struct at91_pmc *pmc)
5188 +static void __init of_at91sam9x5_clk_main_rc_osc_setup(struct device_node *np)
5194 const char *name = np->name;
5195 + struct regmap *regmap;
5197 of_property_read_string(np, "clock-output-names", &name);
5198 of_property_read_u32(np, "clock-frequency", &frequency);
5199 of_property_read_u32(np, "clock-accuracy", &accuracy);
5201 - irq = irq_of_parse_and_map(np, 0);
5203 + regmap = syscon_node_to_regmap(of_get_parent(np));
5204 + if (IS_ERR(regmap))
5207 - clk = at91_clk_register_main_rc_osc(pmc, irq, name, frequency,
5209 + clk = at91_clk_register_main_rc_osc(regmap, name, frequency, accuracy);
5213 of_clk_add_provider(np, of_clk_src_simple_get, clk);
5215 +CLK_OF_DECLARE(at91sam9x5_clk_main_rc_osc, "atmel,at91sam9x5-clk-main-rc-osc",
5216 + of_at91sam9x5_clk_main_rc_osc_setup);
5219 -static int clk_main_probe_frequency(struct at91_pmc *pmc)
5220 +static int clk_main_probe_frequency(struct regmap *regmap)
5222 unsigned long prep_time, timeout;
5224 + unsigned int mcfr;
5226 timeout = jiffies + usecs_to_jiffies(MAINFRDY_TIMEOUT);
5228 prep_time = jiffies;
5229 - tmp = pmc_read(pmc, AT91_CKGR_MCFR);
5230 - if (tmp & AT91_PMC_MAINRDY)
5231 + regmap_read(regmap, AT91_CKGR_MCFR, &mcfr);
5232 + if (mcfr & AT91_PMC_MAINRDY)
5234 usleep_range(MAINF_LOOP_MIN_WAIT, MAINF_LOOP_MAX_WAIT);
5235 } while (time_before(prep_time, timeout));
5236 @@ -382,34 +348,37 @@
5240 -static unsigned long clk_main_recalc_rate(struct at91_pmc *pmc,
5241 +static unsigned long clk_main_recalc_rate(struct regmap *regmap,
5242 unsigned long parent_rate)
5245 + unsigned int mcfr;
5250 pr_warn("Main crystal frequency not set, using approximate value\n");
5251 - tmp = pmc_read(pmc, AT91_CKGR_MCFR);
5252 - if (!(tmp & AT91_PMC_MAINRDY))
5253 + regmap_read(regmap, AT91_CKGR_MCFR, &mcfr);
5254 + if (!(mcfr & AT91_PMC_MAINRDY))
5257 - return ((tmp & AT91_PMC_MAINF) * SLOW_CLOCK_FREQ) / MAINF_DIV;
5258 + return ((mcfr & AT91_PMC_MAINF) * SLOW_CLOCK_FREQ) / MAINF_DIV;
5261 static int clk_rm9200_main_prepare(struct clk_hw *hw)
5263 struct clk_rm9200_main *clkmain = to_clk_rm9200_main(hw);
5265 - return clk_main_probe_frequency(clkmain->pmc);
5266 + return clk_main_probe_frequency(clkmain->regmap);
5269 static int clk_rm9200_main_is_prepared(struct clk_hw *hw)
5271 struct clk_rm9200_main *clkmain = to_clk_rm9200_main(hw);
5272 + unsigned int status;
5274 + regmap_read(clkmain->regmap, AT91_CKGR_MCFR, &status);
5276 - return !!(pmc_read(clkmain->pmc, AT91_CKGR_MCFR) & AT91_PMC_MAINRDY);
5277 + return status & AT91_PMC_MAINRDY ? 1 : 0;
5280 static unsigned long clk_rm9200_main_recalc_rate(struct clk_hw *hw,
5283 struct clk_rm9200_main *clkmain = to_clk_rm9200_main(hw);
5285 - return clk_main_recalc_rate(clkmain->pmc, parent_rate);
5286 + return clk_main_recalc_rate(clkmain->regmap, parent_rate);
5289 static const struct clk_ops rm9200_main_ops = {
5293 static struct clk * __init
5294 -at91_clk_register_rm9200_main(struct at91_pmc *pmc,
5295 +at91_clk_register_rm9200_main(struct regmap *regmap,
5297 const char *parent_name)
5300 struct clk *clk = NULL;
5301 struct clk_init_data init;
5303 - if (!pmc || !name)
5305 return ERR_PTR(-EINVAL);
5311 clkmain->hw.init = &init;
5312 - clkmain->pmc = pmc;
5313 + clkmain->regmap = regmap;
5315 clk = clk_register(NULL, &clkmain->hw);
5317 @@ -461,52 +430,54 @@
5321 -void __init of_at91rm9200_clk_main_setup(struct device_node *np,
5322 - struct at91_pmc *pmc)
5323 +static void __init of_at91rm9200_clk_main_setup(struct device_node *np)
5326 const char *parent_name;
5327 const char *name = np->name;
5328 + struct regmap *regmap;
5330 parent_name = of_clk_get_parent_name(np, 0);
5331 of_property_read_string(np, "clock-output-names", &name);
5333 - clk = at91_clk_register_rm9200_main(pmc, name, parent_name);
5334 + regmap = syscon_node_to_regmap(of_get_parent(np));
5335 + if (IS_ERR(regmap))
5338 + clk = at91_clk_register_rm9200_main(regmap, name, parent_name);
5342 of_clk_add_provider(np, of_clk_src_simple_get, clk);
5344 +CLK_OF_DECLARE(at91rm9200_clk_main, "atmel,at91rm9200-clk-main",
5345 + of_at91rm9200_clk_main_setup);
5347 -static irqreturn_t clk_sam9x5_main_irq_handler(int irq, void *dev_id)
5348 +static inline bool clk_sam9x5_main_ready(struct regmap *regmap)
5350 - struct clk_sam9x5_main *clkmain = dev_id;
5351 + unsigned int status;
5353 - wake_up(&clkmain->wait);
5354 - disable_irq_nosync(clkmain->irq);
5355 + regmap_read(regmap, AT91_PMC_SR, &status);
5357 - return IRQ_HANDLED;
5358 + return status & AT91_PMC_MOSCSELS ? 1 : 0;
5361 static int clk_sam9x5_main_prepare(struct clk_hw *hw)
5363 struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw);
5364 - struct at91_pmc *pmc = clkmain->pmc;
5365 + struct regmap *regmap = clkmain->regmap;
5367 - while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS)) {
5368 - enable_irq(clkmain->irq);
5369 - wait_event(clkmain->wait,
5370 - pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS);
5372 + while (!clk_sam9x5_main_ready(regmap))
5375 - return clk_main_probe_frequency(pmc);
5376 + return clk_main_probe_frequency(regmap);
5379 static int clk_sam9x5_main_is_prepared(struct clk_hw *hw)
5381 struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw);
5383 - return !!(pmc_read(clkmain->pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS);
5384 + return clk_sam9x5_main_ready(clkmain->regmap);
5387 static unsigned long clk_sam9x5_main_recalc_rate(struct clk_hw *hw,
5388 @@ -514,30 +485,28 @@
5390 struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw);
5392 - return clk_main_recalc_rate(clkmain->pmc, parent_rate);
5393 + return clk_main_recalc_rate(clkmain->regmap, parent_rate);
5396 static int clk_sam9x5_main_set_parent(struct clk_hw *hw, u8 index)
5398 struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw);
5399 - struct at91_pmc *pmc = clkmain->pmc;
5401 + struct regmap *regmap = clkmain->regmap;
5407 - tmp = pmc_read(pmc, AT91_CKGR_MOR) & ~MOR_KEY_MASK;
5408 + regmap_read(regmap, AT91_CKGR_MOR, &tmp);
5409 + tmp &= ~MOR_KEY_MASK;
5411 if (index && !(tmp & AT91_PMC_MOSCSEL))
5412 - pmc_write(pmc, AT91_CKGR_MOR, tmp | AT91_PMC_MOSCSEL);
5413 + regmap_write(regmap, AT91_CKGR_MOR, tmp | AT91_PMC_MOSCSEL);
5414 else if (!index && (tmp & AT91_PMC_MOSCSEL))
5415 - pmc_write(pmc, AT91_CKGR_MOR, tmp & ~AT91_PMC_MOSCSEL);
5416 + regmap_write(regmap, AT91_CKGR_MOR, tmp & ~AT91_PMC_MOSCSEL);
5418 - while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS)) {
5419 - enable_irq(clkmain->irq);
5420 - wait_event(clkmain->wait,
5421 - pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS);
5423 + while (!clk_sam9x5_main_ready(regmap))
5428 @@ -545,8 +514,11 @@
5429 static u8 clk_sam9x5_main_get_parent(struct clk_hw *hw)
5431 struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw);
5432 + unsigned int status;
5434 + regmap_read(clkmain->regmap, AT91_CKGR_MOR, &status);
5436 - return !!(pmc_read(clkmain->pmc, AT91_CKGR_MOR) & AT91_PMC_MOSCEN);
5437 + return status & AT91_PMC_MOSCEN ? 1 : 0;
5440 static const struct clk_ops sam9x5_main_ops = {
5441 @@ -558,18 +530,17 @@
5444 static struct clk * __init
5445 -at91_clk_register_sam9x5_main(struct at91_pmc *pmc,
5447 +at91_clk_register_sam9x5_main(struct regmap *regmap,
5449 const char **parent_names,
5453 struct clk_sam9x5_main *clkmain;
5454 struct clk *clk = NULL;
5455 struct clk_init_data init;
5456 + unsigned int status;
5458 - if (!pmc || !irq || !name)
5460 return ERR_PTR(-EINVAL);
5462 if (!parent_names || !num_parents)
5463 @@ -586,51 +557,42 @@
5464 init.flags = CLK_SET_PARENT_GATE;
5466 clkmain->hw.init = &init;
5467 - clkmain->pmc = pmc;
5468 - clkmain->irq = irq;
5469 - clkmain->parent = !!(pmc_read(clkmain->pmc, AT91_CKGR_MOR) &
5471 - init_waitqueue_head(&clkmain->wait);
5472 - irq_set_status_flags(clkmain->irq, IRQ_NOAUTOEN);
5473 - ret = request_irq(clkmain->irq, clk_sam9x5_main_irq_handler,
5474 - IRQF_TRIGGER_HIGH, name, clkmain);
5476 - return ERR_PTR(ret);
5477 + clkmain->regmap = regmap;
5478 + regmap_read(clkmain->regmap, AT91_CKGR_MOR, &status);
5479 + clkmain->parent = status & AT91_PMC_MOSCEN ? 1 : 0;
5481 clk = clk_register(NULL, &clkmain->hw);
5482 - if (IS_ERR(clk)) {
5483 - free_irq(clkmain->irq, clkmain);
5491 -void __init of_at91sam9x5_clk_main_setup(struct device_node *np,
5492 - struct at91_pmc *pmc)
5493 +static void __init of_at91sam9x5_clk_main_setup(struct device_node *np)
5496 const char *parent_names[2];
5499 const char *name = np->name;
5500 + struct regmap *regmap;
5502 num_parents = of_clk_get_parent_count(np);
5503 if (num_parents <= 0 || num_parents > 2)
5506 of_clk_parent_fill(np, parent_names, num_parents);
5507 + regmap = syscon_node_to_regmap(of_get_parent(np));
5508 + if (IS_ERR(regmap))
5511 of_property_read_string(np, "clock-output-names", &name);
5513 - irq = irq_of_parse_and_map(np, 0);
5517 - clk = at91_clk_register_sam9x5_main(pmc, irq, name, parent_names,
5518 + clk = at91_clk_register_sam9x5_main(regmap, name, parent_names,
5523 of_clk_add_provider(np, of_clk_src_simple_get, clk);
5525 +CLK_OF_DECLARE(at91sam9x5_clk_main, "atmel,at91sam9x5-clk-main",
5526 + of_at91sam9x5_clk_main_setup);
5527 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-master.c linux-4.4.46/drivers/clk/at91/clk-master.c
5528 --- linux-4.4.46.orig/drivers/clk/at91/clk-master.c 2017-02-01 08:31:11.000000000 +0100
5529 +++ linux-4.4.46/drivers/clk/at91/clk-master.c 2017-02-03 17:18:05.671416021 +0100
5531 #include <linux/clkdev.h>
5532 #include <linux/clk/at91_pmc.h>
5533 #include <linux/of.h>
5534 -#include <linux/of_address.h>
5535 -#include <linux/of_irq.h>
5536 -#include <linux/io.h>
5537 -#include <linux/wait.h>
5538 -#include <linux/sched.h>
5539 -#include <linux/interrupt.h>
5540 -#include <linux/irq.h>
5541 +#include <linux/mfd/syscon.h>
5542 +#include <linux/regmap.h>
5550 - struct at91_pmc *pmc;
5552 - wait_queue_head_t wait;
5553 + struct regmap *regmap;
5554 const struct clk_master_layout *layout;
5555 const struct clk_master_characteristics *characteristics;
5558 -static irqreturn_t clk_master_irq_handler(int irq, void *dev_id)
5559 +static inline bool clk_master_ready(struct regmap *regmap)
5561 - struct clk_master *master = (struct clk_master *)dev_id;
5562 + unsigned int status;
5564 - wake_up(&master->wait);
5565 - disable_irq_nosync(master->irq);
5566 + regmap_read(regmap, AT91_PMC_SR, &status);
5568 - return IRQ_HANDLED;
5569 + return status & AT91_PMC_MCKRDY ? 1 : 0;
5572 static int clk_master_prepare(struct clk_hw *hw)
5574 struct clk_master *master = to_clk_master(hw);
5575 - struct at91_pmc *pmc = master->pmc;
5577 - while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MCKRDY)) {
5578 - enable_irq(master->irq);
5579 - wait_event(master->wait,
5580 - pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MCKRDY);
5582 + while (!clk_master_ready(master->regmap))
5589 struct clk_master *master = to_clk_master(hw);
5591 - return !!(pmc_read(master->pmc, AT91_PMC_SR) & AT91_PMC_MCKRDY);
5592 + return clk_master_ready(master->regmap);
5595 static unsigned long clk_master_recalc_rate(struct clk_hw *hw,
5598 unsigned long rate = parent_rate;
5599 struct clk_master *master = to_clk_master(hw);
5600 - struct at91_pmc *pmc = master->pmc;
5601 const struct clk_master_layout *layout = master->layout;
5602 const struct clk_master_characteristics *characteristics =
5603 master->characteristics;
5605 + unsigned int mckr;
5608 - tmp = pmc_read(pmc, AT91_PMC_MCKR) & layout->mask;
5610 + regmap_read(master->regmap, AT91_PMC_MCKR, &mckr);
5611 + mckr &= layout->mask;
5613 - pres = (tmp >> layout->pres_shift) & MASTER_PRES_MASK;
5614 - div = (tmp >> MASTER_DIV_SHIFT) & MASTER_DIV_MASK;
5615 + pres = (mckr >> layout->pres_shift) & MASTER_PRES_MASK;
5616 + div = (mckr >> MASTER_DIV_SHIFT) & MASTER_DIV_MASK;
5618 if (characteristics->have_div3_pres && pres == MASTER_PRES_MAX)
5620 @@ -119,9 +106,11 @@
5621 static u8 clk_master_get_parent(struct clk_hw *hw)
5623 struct clk_master *master = to_clk_master(hw);
5624 - struct at91_pmc *pmc = master->pmc;
5625 + unsigned int mckr;
5627 - return pmc_read(pmc, AT91_PMC_MCKR) & AT91_PMC_CSS;
5628 + regmap_read(master->regmap, AT91_PMC_MCKR, &mckr);
5630 + return mckr & AT91_PMC_CSS;
5633 static const struct clk_ops master_ops = {
5634 @@ -132,18 +121,17 @@
5637 static struct clk * __init
5638 -at91_clk_register_master(struct at91_pmc *pmc, unsigned int irq,
5639 +at91_clk_register_master(struct regmap *regmap,
5640 const char *name, int num_parents,
5641 const char **parent_names,
5642 const struct clk_master_layout *layout,
5643 const struct clk_master_characteristics *characteristics)
5646 struct clk_master *master;
5647 struct clk *clk = NULL;
5648 struct clk_init_data init;
5650 - if (!pmc || !irq || !name || !num_parents || !parent_names)
5651 + if (!name || !num_parents || !parent_names)
5652 return ERR_PTR(-EINVAL);
5654 master = kzalloc(sizeof(*master), GFP_KERNEL);
5655 @@ -159,20 +147,10 @@
5656 master->hw.init = &init;
5657 master->layout = layout;
5658 master->characteristics = characteristics;
5659 - master->pmc = pmc;
5660 - master->irq = irq;
5661 - init_waitqueue_head(&master->wait);
5662 - irq_set_status_flags(master->irq, IRQ_NOAUTOEN);
5663 - ret = request_irq(master->irq, clk_master_irq_handler,
5664 - IRQF_TRIGGER_HIGH, "clk-master", master);
5667 - return ERR_PTR(ret);
5669 + master->regmap = regmap;
5671 clk = clk_register(NULL, &master->hw);
5673 - free_irq(master->irq, master);
5677 @@ -217,15 +195,15 @@
5681 -of_at91_clk_master_setup(struct device_node *np, struct at91_pmc *pmc,
5682 +of_at91_clk_master_setup(struct device_node *np,
5683 const struct clk_master_layout *layout)
5688 const char *parent_names[MASTER_SOURCE_MAX];
5689 const char *name = np->name;
5690 struct clk_master_characteristics *characteristics;
5691 + struct regmap *regmap;
5693 num_parents = of_clk_get_parent_count(np);
5694 if (num_parents <= 0 || num_parents > MASTER_SOURCE_MAX)
5695 @@ -239,11 +217,11 @@
5696 if (!characteristics)
5699 - irq = irq_of_parse_and_map(np, 0);
5701 - goto out_free_characteristics;
5702 + regmap = syscon_node_to_regmap(of_get_parent(np));
5703 + if (IS_ERR(regmap))
5706 - clk = at91_clk_register_master(pmc, irq, name, num_parents,
5707 + clk = at91_clk_register_master(regmap, name, num_parents,
5708 parent_names, layout,
5711 @@ -256,14 +234,16 @@
5712 kfree(characteristics);
5715 -void __init of_at91rm9200_clk_master_setup(struct device_node *np,
5716 - struct at91_pmc *pmc)
5717 +static void __init of_at91rm9200_clk_master_setup(struct device_node *np)
5719 - of_at91_clk_master_setup(np, pmc, &at91rm9200_master_layout);
5720 + of_at91_clk_master_setup(np, &at91rm9200_master_layout);
5722 +CLK_OF_DECLARE(at91rm9200_clk_master, "atmel,at91rm9200-clk-master",
5723 + of_at91rm9200_clk_master_setup);
5725 -void __init of_at91sam9x5_clk_master_setup(struct device_node *np,
5726 - struct at91_pmc *pmc)
5727 +static void __init of_at91sam9x5_clk_master_setup(struct device_node *np)
5729 - of_at91_clk_master_setup(np, pmc, &at91sam9x5_master_layout);
5730 + of_at91_clk_master_setup(np, &at91sam9x5_master_layout);
5732 +CLK_OF_DECLARE(at91sam9x5_clk_master, "atmel,at91sam9x5-clk-master",
5733 + of_at91sam9x5_clk_master_setup);
5734 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-peripheral.c linux-4.4.46/drivers/clk/at91/clk-peripheral.c
5735 --- linux-4.4.46.orig/drivers/clk/at91/clk-peripheral.c 2017-02-01 08:31:11.000000000 +0100
5736 +++ linux-4.4.46/drivers/clk/at91/clk-peripheral.c 2017-02-03 17:18:05.671416021 +0100
5738 #include <linux/clkdev.h>
5739 #include <linux/clk/at91_pmc.h>
5740 #include <linux/of.h>
5741 -#include <linux/of_address.h>
5742 -#include <linux/io.h>
5743 +#include <linux/mfd/syscon.h>
5744 +#include <linux/regmap.h>
5748 +DEFINE_SPINLOCK(pmc_pcr_lock);
5750 #define PERIPHERAL_MAX 64
5752 #define PERIPHERAL_AT91RM9200 0
5755 struct clk_peripheral {
5757 - struct at91_pmc *pmc;
5758 + struct regmap *regmap;
5764 struct clk_sam9x5_peripheral {
5766 - struct at91_pmc *pmc;
5767 + struct regmap *regmap;
5768 struct clk_range range;
5774 static int clk_peripheral_enable(struct clk_hw *hw)
5776 struct clk_peripheral *periph = to_clk_peripheral(hw);
5777 - struct at91_pmc *pmc = periph->pmc;
5778 int offset = AT91_PMC_PCER;
5779 u32 id = periph->id;
5783 if (id > PERIPHERAL_ID_MAX)
5784 offset = AT91_PMC_PCER1;
5785 - pmc_write(pmc, offset, PERIPHERAL_MASK(id));
5786 + regmap_write(periph->regmap, offset, PERIPHERAL_MASK(id));
5791 static void clk_peripheral_disable(struct clk_hw *hw)
5793 struct clk_peripheral *periph = to_clk_peripheral(hw);
5794 - struct at91_pmc *pmc = periph->pmc;
5795 int offset = AT91_PMC_PCDR;
5796 u32 id = periph->id;
5800 if (id > PERIPHERAL_ID_MAX)
5801 offset = AT91_PMC_PCDR1;
5802 - pmc_write(pmc, offset, PERIPHERAL_MASK(id));
5803 + regmap_write(periph->regmap, offset, PERIPHERAL_MASK(id));
5806 static int clk_peripheral_is_enabled(struct clk_hw *hw)
5808 struct clk_peripheral *periph = to_clk_peripheral(hw);
5809 - struct at91_pmc *pmc = periph->pmc;
5810 int offset = AT91_PMC_PCSR;
5811 + unsigned int status;
5812 u32 id = periph->id;
5814 if (id < PERIPHERAL_ID_MIN)
5816 if (id > PERIPHERAL_ID_MAX)
5817 offset = AT91_PMC_PCSR1;
5818 - return !!(pmc_read(pmc, offset) & PERIPHERAL_MASK(id));
5819 + regmap_read(periph->regmap, offset, &status);
5821 + return status & PERIPHERAL_MASK(id) ? 1 : 0;
5824 static const struct clk_ops peripheral_ops = {
5825 @@ -101,14 +105,14 @@
5828 static struct clk * __init
5829 -at91_clk_register_peripheral(struct at91_pmc *pmc, const char *name,
5830 +at91_clk_register_peripheral(struct regmap *regmap, const char *name,
5831 const char *parent_name, u32 id)
5833 struct clk_peripheral *periph;
5834 struct clk *clk = NULL;
5835 struct clk_init_data init;
5837 - if (!pmc || !name || !parent_name || id > PERIPHERAL_ID_MAX)
5838 + if (!name || !parent_name || id > PERIPHERAL_ID_MAX)
5839 return ERR_PTR(-EINVAL);
5841 periph = kzalloc(sizeof(*periph), GFP_KERNEL);
5845 periph->hw.init = &init;
5846 - periph->pmc = pmc;
5847 + periph->regmap = regmap;
5849 clk = clk_register(NULL, &periph->hw);
5851 @@ -160,53 +164,58 @@
5852 static int clk_sam9x5_peripheral_enable(struct clk_hw *hw)
5854 struct clk_sam9x5_peripheral *periph = to_clk_sam9x5_peripheral(hw);
5855 - struct at91_pmc *pmc = periph->pmc;
5857 + unsigned long flags;
5859 if (periph->id < PERIPHERAL_ID_MIN)
5863 - pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID_MASK));
5864 - tmp = pmc_read(pmc, AT91_PMC_PCR) & ~AT91_PMC_PCR_DIV_MASK;
5865 - pmc_write(pmc, AT91_PMC_PCR, tmp | AT91_PMC_PCR_DIV(periph->div)
5866 - | AT91_PMC_PCR_CMD
5867 - | AT91_PMC_PCR_EN);
5869 + spin_lock_irqsave(periph->lock, flags);
5870 + regmap_write(periph->regmap, AT91_PMC_PCR,
5871 + (periph->id & AT91_PMC_PCR_PID_MASK));
5872 + regmap_update_bits(periph->regmap, AT91_PMC_PCR,
5873 + AT91_PMC_PCR_DIV_MASK | AT91_PMC_PCR_CMD |
5875 + AT91_PMC_PCR_DIV(periph->div) |
5876 + AT91_PMC_PCR_CMD |
5878 + spin_unlock_irqrestore(periph->lock, flags);
5883 static void clk_sam9x5_peripheral_disable(struct clk_hw *hw)
5885 struct clk_sam9x5_peripheral *periph = to_clk_sam9x5_peripheral(hw);
5886 - struct at91_pmc *pmc = periph->pmc;
5888 + unsigned long flags;
5890 if (periph->id < PERIPHERAL_ID_MIN)
5894 - pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID_MASK));
5895 - tmp = pmc_read(pmc, AT91_PMC_PCR) & ~AT91_PMC_PCR_EN;
5896 - pmc_write(pmc, AT91_PMC_PCR, tmp | AT91_PMC_PCR_CMD);
5898 + spin_lock_irqsave(periph->lock, flags);
5899 + regmap_write(periph->regmap, AT91_PMC_PCR,
5900 + (periph->id & AT91_PMC_PCR_PID_MASK));
5901 + regmap_update_bits(periph->regmap, AT91_PMC_PCR,
5902 + AT91_PMC_PCR_EN | AT91_PMC_PCR_CMD,
5903 + AT91_PMC_PCR_CMD);
5904 + spin_unlock_irqrestore(periph->lock, flags);
5907 static int clk_sam9x5_peripheral_is_enabled(struct clk_hw *hw)
5909 struct clk_sam9x5_peripheral *periph = to_clk_sam9x5_peripheral(hw);
5910 - struct at91_pmc *pmc = periph->pmc;
5912 + unsigned long flags;
5913 + unsigned int status;
5915 if (periph->id < PERIPHERAL_ID_MIN)
5919 - pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID_MASK));
5920 - ret = !!(pmc_read(pmc, AT91_PMC_PCR) & AT91_PMC_PCR_EN);
5922 + spin_lock_irqsave(periph->lock, flags);
5923 + regmap_write(periph->regmap, AT91_PMC_PCR,
5924 + (periph->id & AT91_PMC_PCR_PID_MASK));
5925 + regmap_read(periph->regmap, AT91_PMC_PCR, &status);
5926 + spin_unlock_irqrestore(periph->lock, flags);
5929 + return status & AT91_PMC_PCR_EN ? 1 : 0;
5932 static unsigned long
5933 @@ -214,19 +223,20 @@
5934 unsigned long parent_rate)
5936 struct clk_sam9x5_peripheral *periph = to_clk_sam9x5_peripheral(hw);
5937 - struct at91_pmc *pmc = periph->pmc;
5939 + unsigned long flags;
5940 + unsigned int status;
5942 if (periph->id < PERIPHERAL_ID_MIN)
5946 - pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID_MASK));
5947 - tmp = pmc_read(pmc, AT91_PMC_PCR);
5949 + spin_lock_irqsave(periph->lock, flags);
5950 + regmap_write(periph->regmap, AT91_PMC_PCR,
5951 + (periph->id & AT91_PMC_PCR_PID_MASK));
5952 + regmap_read(periph->regmap, AT91_PMC_PCR, &status);
5953 + spin_unlock_irqrestore(periph->lock, flags);
5955 - if (tmp & AT91_PMC_PCR_EN) {
5956 - periph->div = PERIPHERAL_RSHIFT(tmp);
5957 + if (status & AT91_PMC_PCR_EN) {
5958 + periph->div = PERIPHERAL_RSHIFT(status);
5959 periph->auto_div = false;
5961 clk_sam9x5_peripheral_autodiv(periph);
5962 @@ -318,15 +328,15 @@
5965 static struct clk * __init
5966 -at91_clk_register_sam9x5_peripheral(struct at91_pmc *pmc, const char *name,
5967 - const char *parent_name, u32 id,
5968 - const struct clk_range *range)
5969 +at91_clk_register_sam9x5_peripheral(struct regmap *regmap, spinlock_t *lock,
5970 + const char *name, const char *parent_name,
5971 + u32 id, const struct clk_range *range)
5973 struct clk_sam9x5_peripheral *periph;
5974 struct clk *clk = NULL;
5975 struct clk_init_data init;
5977 - if (!pmc || !name || !parent_name)
5978 + if (!name || !parent_name)
5979 return ERR_PTR(-EINVAL);
5981 periph = kzalloc(sizeof(*periph), GFP_KERNEL);
5984 periph->hw.init = &init;
5986 - periph->pmc = pmc;
5987 + periph->regmap = regmap;
5988 + periph->lock = lock;
5989 periph->auto_div = true;
5990 periph->range = *range;
5996 -of_at91_clk_periph_setup(struct device_node *np, struct at91_pmc *pmc, u8 type)
5997 +of_at91_clk_periph_setup(struct device_node *np, u8 type)
6002 const char *parent_name;
6004 struct device_node *periphclknp;
6005 + struct regmap *regmap;
6007 parent_name = of_clk_get_parent_name(np, 0);
6009 @@ -373,6 +385,10 @@
6010 if (!num || num > PERIPHERAL_MAX)
6013 + regmap = syscon_node_to_regmap(of_get_parent(np));
6014 + if (IS_ERR(regmap))
6017 for_each_child_of_node(np, periphclknp) {
6018 if (of_property_read_u32(periphclknp, "reg", &id))
6021 name = periphclknp->name;
6023 if (type == PERIPHERAL_AT91RM9200) {
6024 - clk = at91_clk_register_peripheral(pmc, name,
6025 + clk = at91_clk_register_peripheral(regmap, name,
6028 struct clk_range range = CLK_RANGE(0, 0);
6030 "atmel,clk-output-range",
6033 - clk = at91_clk_register_sam9x5_peripheral(pmc, name,
6034 + clk = at91_clk_register_sam9x5_peripheral(regmap,
6040 @@ -405,14 +423,16 @@
6044 -void __init of_at91rm9200_clk_periph_setup(struct device_node *np,
6045 - struct at91_pmc *pmc)
6046 +static void __init of_at91rm9200_clk_periph_setup(struct device_node *np)
6048 - of_at91_clk_periph_setup(np, pmc, PERIPHERAL_AT91RM9200);
6049 + of_at91_clk_periph_setup(np, PERIPHERAL_AT91RM9200);
6051 +CLK_OF_DECLARE(at91rm9200_clk_periph, "atmel,at91rm9200-clk-peripheral",
6052 + of_at91rm9200_clk_periph_setup);
6054 -void __init of_at91sam9x5_clk_periph_setup(struct device_node *np,
6055 - struct at91_pmc *pmc)
6056 +static void __init of_at91sam9x5_clk_periph_setup(struct device_node *np)
6058 - of_at91_clk_periph_setup(np, pmc, PERIPHERAL_AT91SAM9X5);
6059 + of_at91_clk_periph_setup(np, PERIPHERAL_AT91SAM9X5);
6061 +CLK_OF_DECLARE(at91sam9x5_clk_periph, "atmel,at91sam9x5-clk-peripheral",
6062 + of_at91sam9x5_clk_periph_setup);
6063 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-pll.c linux-4.4.46/drivers/clk/at91/clk-pll.c
6064 --- linux-4.4.46.orig/drivers/clk/at91/clk-pll.c 2017-02-01 08:31:11.000000000 +0100
6065 +++ linux-4.4.46/drivers/clk/at91/clk-pll.c 2017-02-03 17:18:05.671416021 +0100
6067 #include <linux/clkdev.h>
6068 #include <linux/clk/at91_pmc.h>
6069 #include <linux/of.h>
6070 -#include <linux/of_address.h>
6071 -#include <linux/of_irq.h>
6072 -#include <linux/io.h>
6073 -#include <linux/kernel.h>
6074 -#include <linux/wait.h>
6075 -#include <linux/sched.h>
6076 -#include <linux/interrupt.h>
6077 -#include <linux/irq.h>
6078 +#include <linux/mfd/syscon.h>
6079 +#include <linux/regmap.h>
6087 - struct at91_pmc *pmc;
6089 - wait_queue_head_t wait;
6090 + struct regmap *regmap;
6095 const struct clk_pll_characteristics *characteristics;
6098 -static irqreturn_t clk_pll_irq_handler(int irq, void *dev_id)
6099 +static inline bool clk_pll_ready(struct regmap *regmap, int id)
6101 - struct clk_pll *pll = (struct clk_pll *)dev_id;
6102 + unsigned int status;
6104 - wake_up(&pll->wait);
6105 - disable_irq_nosync(pll->irq);
6106 + regmap_read(regmap, AT91_PMC_SR, &status);
6108 - return IRQ_HANDLED;
6109 + return status & PLL_STATUS_MASK(id) ? 1 : 0;
6112 static int clk_pll_prepare(struct clk_hw *hw)
6114 struct clk_pll *pll = to_clk_pll(hw);
6115 - struct at91_pmc *pmc = pll->pmc;
6116 + struct regmap *regmap = pll->regmap;
6117 const struct clk_pll_layout *layout = pll->layout;
6118 const struct clk_pll_characteristics *characteristics =
6119 pll->characteristics;
6121 u32 mask = PLL_STATUS_MASK(id);
6122 int offset = PLL_REG(id);
6125 + unsigned int pllr;
6126 + unsigned int status;
6130 - pllr = pmc_read(pmc, offset);
6131 + regmap_read(regmap, offset, &pllr);
6132 div = PLL_DIV(pllr);
6133 mul = PLL_MUL(pllr, layout);
6135 - if ((pmc_read(pmc, AT91_PMC_SR) & mask) &&
6136 + regmap_read(regmap, AT91_PMC_SR, &status);
6137 + if ((status & mask) &&
6138 (div == pll->div && mul == pll->mul))
6141 if (characteristics->out)
6142 out = characteristics->out[pll->range];
6143 - if (characteristics->icpll) {
6144 - icpr = pmc_read(pmc, AT91_PMC_PLLICPR) & ~PLL_ICPR_MASK(id);
6145 - icpr |= (characteristics->icpll[pll->range] <<
6146 - PLL_ICPR_SHIFT(id));
6147 - pmc_write(pmc, AT91_PMC_PLLICPR, icpr);
6150 - pllr &= ~layout->pllr_mask;
6151 - pllr |= layout->pllr_mask &
6152 - (pll->div | (PLL_MAX_COUNT << PLL_COUNT_SHIFT) |
6153 - (out << PLL_OUT_SHIFT) |
6154 - ((pll->mul & layout->mul_mask) << layout->mul_shift));
6155 - pmc_write(pmc, offset, pllr);
6157 - while (!(pmc_read(pmc, AT91_PMC_SR) & mask)) {
6158 - enable_irq(pll->irq);
6159 - wait_event(pll->wait,
6160 - pmc_read(pmc, AT91_PMC_SR) & mask);
6162 + if (characteristics->icpll)
6163 + regmap_update_bits(regmap, AT91_PMC_PLLICPR, PLL_ICPR_MASK(id),
6164 + characteristics->icpll[pll->range] << PLL_ICPR_SHIFT(id));
6166 + regmap_update_bits(regmap, offset, layout->pllr_mask,
6167 + pll->div | (PLL_MAX_COUNT << PLL_COUNT_SHIFT) |
6168 + (out << PLL_OUT_SHIFT) |
6169 + ((pll->mul & layout->mul_mask) << layout->mul_shift));
6171 + while (!clk_pll_ready(regmap, pll->id))
6176 @@ -130,32 +116,35 @@
6177 static int clk_pll_is_prepared(struct clk_hw *hw)
6179 struct clk_pll *pll = to_clk_pll(hw);
6180 - struct at91_pmc *pmc = pll->pmc;
6182 - return !!(pmc_read(pmc, AT91_PMC_SR) &
6183 - PLL_STATUS_MASK(pll->id));
6184 + return clk_pll_ready(pll->regmap, pll->id);
6187 static void clk_pll_unprepare(struct clk_hw *hw)
6189 struct clk_pll *pll = to_clk_pll(hw);
6190 - struct at91_pmc *pmc = pll->pmc;
6191 - const struct clk_pll_layout *layout = pll->layout;
6192 - int offset = PLL_REG(pll->id);
6193 - u32 tmp = pmc_read(pmc, offset) & ~(layout->pllr_mask);
6194 + unsigned int mask = pll->layout->pllr_mask;
6196 - pmc_write(pmc, offset, tmp);
6197 + regmap_update_bits(pll->regmap, PLL_REG(pll->id), mask, ~mask);
6200 static unsigned long clk_pll_recalc_rate(struct clk_hw *hw,
6201 unsigned long parent_rate)
6203 struct clk_pll *pll = to_clk_pll(hw);
6204 + unsigned int pllr;
6208 + regmap_read(pll->regmap, PLL_REG(pll->id), &pllr);
6210 + div = PLL_DIV(pllr);
6211 + mul = PLL_MUL(pllr, pll->layout);
6213 - if (!pll->div || !pll->mul)
6217 - return (parent_rate / pll->div) * (pll->mul + 1);
6218 + return (parent_rate / div) * (mul + 1);
6221 static long clk_pll_get_best_div_mul(struct clk_pll *pll, unsigned long rate,
6225 static struct clk * __init
6226 -at91_clk_register_pll(struct at91_pmc *pmc, unsigned int irq, const char *name,
6227 +at91_clk_register_pll(struct regmap *regmap, const char *name,
6228 const char *parent_name, u8 id,
6229 const struct clk_pll_layout *layout,
6230 const struct clk_pll_characteristics *characteristics)
6232 struct clk_pll *pll;
6233 struct clk *clk = NULL;
6234 struct clk_init_data init;
6236 int offset = PLL_REG(id);
6238 + unsigned int pllr;
6240 if (id > PLL_MAX_ID)
6241 return ERR_PTR(-EINVAL);
6242 @@ -337,23 +325,13 @@
6243 pll->hw.init = &init;
6244 pll->layout = layout;
6245 pll->characteristics = characteristics;
6248 - tmp = pmc_read(pmc, offset) & layout->pllr_mask;
6249 - pll->div = PLL_DIV(tmp);
6250 - pll->mul = PLL_MUL(tmp, layout);
6251 - init_waitqueue_head(&pll->wait);
6252 - irq_set_status_flags(pll->irq, IRQ_NOAUTOEN);
6253 - ret = request_irq(pll->irq, clk_pll_irq_handler, IRQF_TRIGGER_HIGH,
6254 - id ? "clk-pllb" : "clk-plla", pll);
6257 - return ERR_PTR(ret);
6259 + pll->regmap = regmap;
6260 + regmap_read(regmap, offset, &pllr);
6261 + pll->div = PLL_DIV(pllr);
6262 + pll->mul = PLL_MUL(pllr, layout);
6264 clk = clk_register(NULL, &pll->hw);
6266 - free_irq(pll->irq, pll);
6270 @@ -483,12 +461,12 @@
6274 -of_at91_clk_pll_setup(struct device_node *np, struct at91_pmc *pmc,
6275 +of_at91_clk_pll_setup(struct device_node *np,
6276 const struct clk_pll_layout *layout)
6281 + struct regmap *regmap;
6282 const char *parent_name;
6283 const char *name = np->name;
6284 struct clk_pll_characteristics *characteristics;
6285 @@ -500,15 +478,15 @@
6287 of_property_read_string(np, "clock-output-names", &name);
6289 - characteristics = of_at91_clk_pll_get_characteristics(np);
6290 - if (!characteristics)
6291 + regmap = syscon_node_to_regmap(of_get_parent(np));
6292 + if (IS_ERR(regmap))
6295 - irq = irq_of_parse_and_map(np, 0);
6297 + characteristics = of_at91_clk_pll_get_characteristics(np);
6298 + if (!characteristics)
6301 - clk = at91_clk_register_pll(pmc, irq, name, parent_name, id, layout,
6302 + clk = at91_clk_register_pll(regmap, name, parent_name, id, layout,
6305 goto out_free_characteristics;
6306 @@ -520,26 +498,30 @@
6307 kfree(characteristics);
6310 -void __init of_at91rm9200_clk_pll_setup(struct device_node *np,
6311 - struct at91_pmc *pmc)
6312 +static void __init of_at91rm9200_clk_pll_setup(struct device_node *np)
6314 - of_at91_clk_pll_setup(np, pmc, &at91rm9200_pll_layout);
6315 + of_at91_clk_pll_setup(np, &at91rm9200_pll_layout);
6317 +CLK_OF_DECLARE(at91rm9200_clk_pll, "atmel,at91rm9200-clk-pll",
6318 + of_at91rm9200_clk_pll_setup);
6320 -void __init of_at91sam9g45_clk_pll_setup(struct device_node *np,
6321 - struct at91_pmc *pmc)
6322 +static void __init of_at91sam9g45_clk_pll_setup(struct device_node *np)
6324 - of_at91_clk_pll_setup(np, pmc, &at91sam9g45_pll_layout);
6325 + of_at91_clk_pll_setup(np, &at91sam9g45_pll_layout);
6327 +CLK_OF_DECLARE(at91sam9g45_clk_pll, "atmel,at91sam9g45-clk-pll",
6328 + of_at91sam9g45_clk_pll_setup);
6330 -void __init of_at91sam9g20_clk_pllb_setup(struct device_node *np,
6331 - struct at91_pmc *pmc)
6332 +static void __init of_at91sam9g20_clk_pllb_setup(struct device_node *np)
6334 - of_at91_clk_pll_setup(np, pmc, &at91sam9g20_pllb_layout);
6335 + of_at91_clk_pll_setup(np, &at91sam9g20_pllb_layout);
6337 +CLK_OF_DECLARE(at91sam9g20_clk_pllb, "atmel,at91sam9g20-clk-pllb",
6338 + of_at91sam9g20_clk_pllb_setup);
6340 -void __init of_sama5d3_clk_pll_setup(struct device_node *np,
6341 - struct at91_pmc *pmc)
6342 +static void __init of_sama5d3_clk_pll_setup(struct device_node *np)
6344 - of_at91_clk_pll_setup(np, pmc, &sama5d3_pll_layout);
6345 + of_at91_clk_pll_setup(np, &sama5d3_pll_layout);
6347 +CLK_OF_DECLARE(sama5d3_clk_pll, "atmel,sama5d3-clk-pll",
6348 + of_sama5d3_clk_pll_setup);
6349 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-plldiv.c linux-4.4.46/drivers/clk/at91/clk-plldiv.c
6350 --- linux-4.4.46.orig/drivers/clk/at91/clk-plldiv.c 2017-02-01 08:31:11.000000000 +0100
6351 +++ linux-4.4.46/drivers/clk/at91/clk-plldiv.c 2017-02-03 17:18:05.671416021 +0100
6353 #include <linux/clkdev.h>
6354 #include <linux/clk/at91_pmc.h>
6355 #include <linux/of.h>
6356 -#include <linux/of_address.h>
6357 -#include <linux/io.h>
6358 +#include <linux/mfd/syscon.h>
6359 +#include <linux/regmap.h>
6367 - struct at91_pmc *pmc;
6368 + struct regmap *regmap;
6371 static unsigned long clk_plldiv_recalc_rate(struct clk_hw *hw,
6372 unsigned long parent_rate)
6374 struct clk_plldiv *plldiv = to_clk_plldiv(hw);
6375 - struct at91_pmc *pmc = plldiv->pmc;
6376 + unsigned int mckr;
6378 - if (pmc_read(pmc, AT91_PMC_MCKR) & AT91_PMC_PLLADIV2)
6379 + regmap_read(plldiv->regmap, AT91_PMC_MCKR, &mckr);
6381 + if (mckr & AT91_PMC_PLLADIV2)
6382 return parent_rate / 2;
6386 unsigned long parent_rate)
6388 struct clk_plldiv *plldiv = to_clk_plldiv(hw);
6389 - struct at91_pmc *pmc = plldiv->pmc;
6392 - if (parent_rate != rate && (parent_rate / 2) != rate)
6393 + if ((parent_rate != rate) && (parent_rate / 2 != rate))
6397 - tmp = pmc_read(pmc, AT91_PMC_MCKR) & ~AT91_PMC_PLLADIV2;
6398 - if ((parent_rate / 2) == rate)
6399 - tmp |= AT91_PMC_PLLADIV2;
6400 - pmc_write(pmc, AT91_PMC_MCKR, tmp);
6402 + regmap_update_bits(plldiv->regmap, AT91_PMC_MCKR, AT91_PMC_PLLADIV2,
6403 + parent_rate != rate ? AT91_PMC_PLLADIV2 : 0);
6410 static struct clk * __init
6411 -at91_clk_register_plldiv(struct at91_pmc *pmc, const char *name,
6412 +at91_clk_register_plldiv(struct regmap *regmap, const char *name,
6413 const char *parent_name)
6415 struct clk_plldiv *plldiv;
6417 init.flags = CLK_SET_RATE_GATE;
6419 plldiv->hw.init = &init;
6420 - plldiv->pmc = pmc;
6421 + plldiv->regmap = regmap;
6423 clk = clk_register(NULL, &plldiv->hw);
6425 @@ -109,27 +105,27 @@
6429 -of_at91_clk_plldiv_setup(struct device_node *np, struct at91_pmc *pmc)
6430 +of_at91sam9x5_clk_plldiv_setup(struct device_node *np)
6433 const char *parent_name;
6434 const char *name = np->name;
6435 + struct regmap *regmap;
6437 parent_name = of_clk_get_parent_name(np, 0);
6439 of_property_read_string(np, "clock-output-names", &name);
6441 - clk = at91_clk_register_plldiv(pmc, name, parent_name);
6442 + regmap = syscon_node_to_regmap(of_get_parent(np));
6443 + if (IS_ERR(regmap))
6446 + clk = at91_clk_register_plldiv(regmap, name, parent_name);
6450 of_clk_add_provider(np, of_clk_src_simple_get, clk);
6454 -void __init of_at91sam9x5_clk_plldiv_setup(struct device_node *np,
6455 - struct at91_pmc *pmc)
6457 - of_at91_clk_plldiv_setup(np, pmc);
6459 +CLK_OF_DECLARE(at91sam9x5_clk_plldiv, "atmel,at91sam9x5-clk-plldiv",
6460 + of_at91sam9x5_clk_plldiv_setup);
6461 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-programmable.c linux-4.4.46/drivers/clk/at91/clk-programmable.c
6462 --- linux-4.4.46.orig/drivers/clk/at91/clk-programmable.c 2017-02-01 08:31:11.000000000 +0100
6463 +++ linux-4.4.46/drivers/clk/at91/clk-programmable.c 2017-02-03 17:18:05.675416176 +0100
6465 #include <linux/clkdev.h>
6466 #include <linux/clk/at91_pmc.h>
6467 #include <linux/of.h>
6468 -#include <linux/of_address.h>
6469 -#include <linux/io.h>
6470 -#include <linux/wait.h>
6471 -#include <linux/sched.h>
6472 +#include <linux/mfd/syscon.h>
6473 +#include <linux/regmap.h>
6479 #define PROG_STATUS_MASK(id) (1 << ((id) + 8))
6480 #define PROG_PRES_MASK 0x7
6481 +#define PROG_PRES(layout, pckr) ((pckr >> layout->pres_shift) & PROG_PRES_MASK)
6482 #define PROG_MAX_RM9200_CSS 3
6484 struct clk_programmable_layout {
6487 struct clk_programmable {
6489 - struct at91_pmc *pmc;
6490 + struct regmap *regmap;
6492 const struct clk_programmable_layout *layout;
6495 static unsigned long clk_programmable_recalc_rate(struct clk_hw *hw,
6496 unsigned long parent_rate)
6499 struct clk_programmable *prog = to_clk_programmable(hw);
6500 - struct at91_pmc *pmc = prog->pmc;
6501 - const struct clk_programmable_layout *layout = prog->layout;
6502 + unsigned int pckr;
6504 + regmap_read(prog->regmap, AT91_PMC_PCKR(prog->id), &pckr);
6506 - pres = (pmc_read(pmc, AT91_PMC_PCKR(prog->id)) >> layout->pres_shift) &
6508 - return parent_rate >> pres;
6509 + return parent_rate >> PROG_PRES(prog->layout, pckr);
6512 static int clk_programmable_determine_rate(struct clk_hw *hw,
6513 @@ -101,36 +98,36 @@
6515 struct clk_programmable *prog = to_clk_programmable(hw);
6516 const struct clk_programmable_layout *layout = prog->layout;
6517 - struct at91_pmc *pmc = prog->pmc;
6518 - u32 tmp = pmc_read(pmc, AT91_PMC_PCKR(prog->id)) & ~layout->css_mask;
6519 + unsigned int mask = layout->css_mask;
6520 + unsigned int pckr = 0;
6522 if (layout->have_slck_mck)
6523 - tmp &= AT91_PMC_CSSMCK_MCK;
6524 + mask |= AT91_PMC_CSSMCK_MCK;
6526 if (index > layout->css_mask) {
6527 - if (index > PROG_MAX_RM9200_CSS && layout->have_slck_mck) {
6528 - tmp |= AT91_PMC_CSSMCK_MCK;
6531 + if (index > PROG_MAX_RM9200_CSS && !layout->have_slck_mck)
6535 + pckr |= AT91_PMC_CSSMCK_MCK;
6538 - pmc_write(pmc, AT91_PMC_PCKR(prog->id), tmp | index);
6539 + regmap_update_bits(prog->regmap, AT91_PMC_PCKR(prog->id), mask, pckr);
6544 static u8 clk_programmable_get_parent(struct clk_hw *hw)
6548 struct clk_programmable *prog = to_clk_programmable(hw);
6549 - struct at91_pmc *pmc = prog->pmc;
6550 const struct clk_programmable_layout *layout = prog->layout;
6551 + unsigned int pckr;
6554 + regmap_read(prog->regmap, AT91_PMC_PCKR(prog->id), &pckr);
6556 + ret = pckr & layout->css_mask;
6558 - tmp = pmc_read(pmc, AT91_PMC_PCKR(prog->id));
6559 - ret = tmp & layout->css_mask;
6560 - if (layout->have_slck_mck && (tmp & AT91_PMC_CSSMCK_MCK) && !ret)
6561 + if (layout->have_slck_mck && (pckr & AT91_PMC_CSSMCK_MCK) && !ret)
6562 ret = PROG_MAX_RM9200_CSS + 1;
6565 @@ -140,26 +137,27 @@
6566 unsigned long parent_rate)
6568 struct clk_programmable *prog = to_clk_programmable(hw);
6569 - struct at91_pmc *pmc = prog->pmc;
6570 const struct clk_programmable_layout *layout = prog->layout;
6571 unsigned long div = parent_rate / rate;
6572 + unsigned int pckr;
6574 - u32 tmp = pmc_read(pmc, AT91_PMC_PCKR(prog->id)) &
6575 - ~(PROG_PRES_MASK << layout->pres_shift);
6577 + regmap_read(prog->regmap, AT91_PMC_PCKR(prog->id), &pckr);
6582 shift = fls(div) - 1;
6584 - if (div != (1<<shift))
6585 + if (div != (1 << shift))
6588 if (shift >= PROG_PRES_MASK)
6591 - pmc_write(pmc, AT91_PMC_PCKR(prog->id),
6592 - tmp | (shift << layout->pres_shift));
6593 + regmap_update_bits(prog->regmap, AT91_PMC_PCKR(prog->id),
6594 + PROG_PRES_MASK << layout->pres_shift,
6595 + shift << layout->pres_shift);
6602 static struct clk * __init
6603 -at91_clk_register_programmable(struct at91_pmc *pmc,
6604 +at91_clk_register_programmable(struct regmap *regmap,
6605 const char *name, const char **parent_names,
6606 u8 num_parents, u8 id,
6607 const struct clk_programmable_layout *layout)
6610 prog->layout = layout;
6611 prog->hw.init = &init;
6613 + prog->regmap = regmap;
6615 clk = clk_register(NULL, &prog->hw);
6621 -of_at91_clk_prog_setup(struct device_node *np, struct at91_pmc *pmc,
6622 +of_at91_clk_prog_setup(struct device_node *np,
6623 const struct clk_programmable_layout *layout)
6627 const char *parent_names[PROG_SOURCE_MAX];
6629 struct device_node *progclknp;
6630 + struct regmap *regmap;
6632 num_parents = of_clk_get_parent_count(np);
6633 if (num_parents <= 0 || num_parents > PROG_SOURCE_MAX)
6634 @@ -247,6 +246,10 @@
6635 if (!num || num > (PROG_ID_MAX + 1))
6638 + regmap = syscon_node_to_regmap(of_get_parent(np));
6639 + if (IS_ERR(regmap))
6642 for_each_child_of_node(np, progclknp) {
6643 if (of_property_read_u32(progclknp, "reg", &id))
6646 if (of_property_read_string(np, "clock-output-names", &name))
6647 name = progclknp->name;
6649 - clk = at91_clk_register_programmable(pmc, name,
6650 + clk = at91_clk_register_programmable(regmap, name,
6651 parent_names, num_parents,
6654 @@ -265,20 +268,23 @@
6658 -void __init of_at91rm9200_clk_prog_setup(struct device_node *np,
6659 - struct at91_pmc *pmc)
6660 +static void __init of_at91rm9200_clk_prog_setup(struct device_node *np)
6662 - of_at91_clk_prog_setup(np, pmc, &at91rm9200_programmable_layout);
6663 + of_at91_clk_prog_setup(np, &at91rm9200_programmable_layout);
6665 +CLK_OF_DECLARE(at91rm9200_clk_prog, "atmel,at91rm9200-clk-programmable",
6666 + of_at91rm9200_clk_prog_setup);
6668 -void __init of_at91sam9g45_clk_prog_setup(struct device_node *np,
6669 - struct at91_pmc *pmc)
6670 +static void __init of_at91sam9g45_clk_prog_setup(struct device_node *np)
6672 - of_at91_clk_prog_setup(np, pmc, &at91sam9g45_programmable_layout);
6673 + of_at91_clk_prog_setup(np, &at91sam9g45_programmable_layout);
6675 +CLK_OF_DECLARE(at91sam9g45_clk_prog, "atmel,at91sam9g45-clk-programmable",
6676 + of_at91sam9g45_clk_prog_setup);
6678 -void __init of_at91sam9x5_clk_prog_setup(struct device_node *np,
6679 - struct at91_pmc *pmc)
6680 +static void __init of_at91sam9x5_clk_prog_setup(struct device_node *np)
6682 - of_at91_clk_prog_setup(np, pmc, &at91sam9x5_programmable_layout);
6683 + of_at91_clk_prog_setup(np, &at91sam9x5_programmable_layout);
6685 +CLK_OF_DECLARE(at91sam9x5_clk_prog, "atmel,at91sam9x5-clk-programmable",
6686 + of_at91sam9x5_clk_prog_setup);
6687 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-slow.c linux-4.4.46/drivers/clk/at91/clk-slow.c
6688 --- linux-4.4.46.orig/drivers/clk/at91/clk-slow.c 2017-02-01 08:31:11.000000000 +0100
6689 +++ linux-4.4.46/drivers/clk/at91/clk-slow.c 2017-02-03 17:18:05.675416176 +0100
6691 #include <linux/clk.h>
6692 #include <linux/clk-provider.h>
6693 #include <linux/clkdev.h>
6694 -#include <linux/slab.h>
6695 #include <linux/clk/at91_pmc.h>
6696 #include <linux/delay.h>
6697 #include <linux/of.h>
6698 -#include <linux/of_address.h>
6699 -#include <linux/of_irq.h>
6700 -#include <linux/io.h>
6701 -#include <linux/interrupt.h>
6702 -#include <linux/irq.h>
6703 -#include <linux/sched.h>
6704 -#include <linux/wait.h>
6705 +#include <linux/mfd/syscon.h>
6706 +#include <linux/regmap.h>
6712 struct clk_sam9260_slow {
6714 - struct at91_pmc *pmc;
6715 + struct regmap *regmap;
6718 #define to_clk_sam9260_slow(hw) container_of(hw, struct clk_sam9260_slow, hw)
6719 @@ -393,8 +387,11 @@
6720 static u8 clk_sam9260_slow_get_parent(struct clk_hw *hw)
6722 struct clk_sam9260_slow *slowck = to_clk_sam9260_slow(hw);
6723 + unsigned int status;
6725 - return !!(pmc_read(slowck->pmc, AT91_PMC_SR) & AT91_PMC_OSCSEL);
6726 + regmap_read(slowck->regmap, AT91_PMC_SR, &status);
6728 + return status & AT91_PMC_OSCSEL ? 1 : 0;
6731 static const struct clk_ops sam9260_slow_ops = {
6735 static struct clk * __init
6736 -at91_clk_register_sam9260_slow(struct at91_pmc *pmc,
6737 +at91_clk_register_sam9260_slow(struct regmap *regmap,
6739 const char **parent_names,
6742 struct clk *clk = NULL;
6743 struct clk_init_data init;
6745 - if (!pmc || !name)
6747 return ERR_PTR(-EINVAL);
6749 if (!parent_names || !num_parents)
6753 slowck->hw.init = &init;
6754 - slowck->pmc = pmc;
6755 + slowck->regmap = regmap;
6757 clk = clk_register(NULL, &slowck->hw);
6759 @@ -439,29 +436,34 @@
6763 -void __init of_at91sam9260_clk_slow_setup(struct device_node *np,
6764 - struct at91_pmc *pmc)
6765 +static void __init of_at91sam9260_clk_slow_setup(struct device_node *np)
6768 const char *parent_names[2];
6770 const char *name = np->name;
6771 + struct regmap *regmap;
6773 num_parents = of_clk_get_parent_count(np);
6774 if (num_parents != 2)
6777 of_clk_parent_fill(np, parent_names, num_parents);
6778 + regmap = syscon_node_to_regmap(of_get_parent(np));
6779 + if (IS_ERR(regmap))
6782 of_property_read_string(np, "clock-output-names", &name);
6784 - clk = at91_clk_register_sam9260_slow(pmc, name, parent_names,
6785 + clk = at91_clk_register_sam9260_slow(regmap, name, parent_names,
6790 of_clk_add_provider(np, of_clk_src_simple_get, clk);
6792 +CLK_OF_DECLARE(at91sam9260_clk_slow, "atmel,at91sam9260-clk-slow",
6793 + of_at91sam9260_clk_slow_setup);
6796 * FIXME: All slow clk users are not properly claiming it (get + prepare +
6797 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-smd.c linux-4.4.46/drivers/clk/at91/clk-smd.c
6798 --- linux-4.4.46.orig/drivers/clk/at91/clk-smd.c 2017-02-01 08:31:11.000000000 +0100
6799 +++ linux-4.4.46/drivers/clk/at91/clk-smd.c 2017-02-03 17:18:05.675416176 +0100
6801 #include <linux/clkdev.h>
6802 #include <linux/clk/at91_pmc.h>
6803 #include <linux/of.h>
6804 -#include <linux/of_address.h>
6805 -#include <linux/io.h>
6806 +#include <linux/mfd/syscon.h>
6807 +#include <linux/regmap.h>
6813 struct at91sam9x5_clk_smd {
6815 - struct at91_pmc *pmc;
6816 + struct regmap *regmap;
6819 #define to_at91sam9x5_clk_smd(hw) \
6821 static unsigned long at91sam9x5_clk_smd_recalc_rate(struct clk_hw *hw,
6822 unsigned long parent_rate)
6826 struct at91sam9x5_clk_smd *smd = to_at91sam9x5_clk_smd(hw);
6827 - struct at91_pmc *pmc = smd->pmc;
6828 + unsigned int smdr;
6831 + regmap_read(smd->regmap, AT91_PMC_SMD, &smdr);
6832 + smddiv = (smdr & AT91_PMC_SMD_DIV) >> SMD_DIV_SHIFT;
6834 - tmp = pmc_read(pmc, AT91_PMC_SMD);
6835 - smddiv = (tmp & AT91_PMC_SMD_DIV) >> SMD_DIV_SHIFT;
6836 return parent_rate / (smddiv + 1);
6841 static int at91sam9x5_clk_smd_set_parent(struct clk_hw *hw, u8 index)
6844 struct at91sam9x5_clk_smd *smd = to_at91sam9x5_clk_smd(hw);
6845 - struct at91_pmc *pmc = smd->pmc;
6849 - tmp = pmc_read(pmc, AT91_PMC_SMD) & ~AT91_PMC_SMDS;
6851 - tmp |= AT91_PMC_SMDS;
6852 - pmc_write(pmc, AT91_PMC_SMD, tmp);
6854 + regmap_update_bits(smd->regmap, AT91_PMC_SMD, AT91_PMC_SMDS,
6855 + index ? AT91_PMC_SMDS : 0);
6860 static u8 at91sam9x5_clk_smd_get_parent(struct clk_hw *hw)
6862 struct at91sam9x5_clk_smd *smd = to_at91sam9x5_clk_smd(hw);
6863 - struct at91_pmc *pmc = smd->pmc;
6864 + unsigned int smdr;
6866 - return pmc_read(pmc, AT91_PMC_SMD) & AT91_PMC_SMDS;
6867 + regmap_read(smd->regmap, AT91_PMC_SMD, &smdr);
6869 + return smdr & AT91_PMC_SMDS;
6872 static int at91sam9x5_clk_smd_set_rate(struct clk_hw *hw, unsigned long rate,
6873 unsigned long parent_rate)
6876 struct at91sam9x5_clk_smd *smd = to_at91sam9x5_clk_smd(hw);
6877 - struct at91_pmc *pmc = smd->pmc;
6878 unsigned long div = parent_rate / rate;
6880 if (parent_rate % rate || div < 1 || div > (SMD_MAX_DIV + 1))
6882 - tmp = pmc_read(pmc, AT91_PMC_SMD) & ~AT91_PMC_SMD_DIV;
6883 - tmp |= (div - 1) << SMD_DIV_SHIFT;
6884 - pmc_write(pmc, AT91_PMC_SMD, tmp);
6886 + regmap_update_bits(smd->regmap, AT91_PMC_SMD, AT91_PMC_SMD_DIV,
6887 + (div - 1) << SMD_DIV_SHIFT);
6894 static struct clk * __init
6895 -at91sam9x5_clk_register_smd(struct at91_pmc *pmc, const char *name,
6896 +at91sam9x5_clk_register_smd(struct regmap *regmap, const char *name,
6897 const char **parent_names, u8 num_parents)
6899 struct at91sam9x5_clk_smd *smd;
6901 init.flags = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE;
6903 smd->hw.init = &init;
6905 + smd->regmap = regmap;
6907 clk = clk_register(NULL, &smd->hw);
6909 @@ -141,13 +139,13 @@
6913 -void __init of_at91sam9x5_clk_smd_setup(struct device_node *np,
6914 - struct at91_pmc *pmc)
6915 +static void __init of_at91sam9x5_clk_smd_setup(struct device_node *np)
6919 const char *parent_names[SMD_SOURCE_MAX];
6920 const char *name = np->name;
6921 + struct regmap *regmap;
6923 num_parents = of_clk_get_parent_count(np);
6924 if (num_parents <= 0 || num_parents > SMD_SOURCE_MAX)
6925 @@ -157,10 +155,16 @@
6927 of_property_read_string(np, "clock-output-names", &name);
6929 - clk = at91sam9x5_clk_register_smd(pmc, name, parent_names,
6930 + regmap = syscon_node_to_regmap(of_get_parent(np));
6931 + if (IS_ERR(regmap))
6934 + clk = at91sam9x5_clk_register_smd(regmap, name, parent_names,
6939 of_clk_add_provider(np, of_clk_src_simple_get, clk);
6941 +CLK_OF_DECLARE(at91sam9x5_clk_smd, "atmel,at91sam9x5-clk-smd",
6942 + of_at91sam9x5_clk_smd_setup);
6943 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-system.c linux-4.4.46/drivers/clk/at91/clk-system.c
6944 --- linux-4.4.46.orig/drivers/clk/at91/clk-system.c 2017-02-01 08:31:11.000000000 +0100
6945 +++ linux-4.4.46/drivers/clk/at91/clk-system.c 2017-02-03 17:18:05.675416176 +0100
6947 #include <linux/clkdev.h>
6948 #include <linux/clk/at91_pmc.h>
6949 #include <linux/of.h>
6950 -#include <linux/of_address.h>
6951 -#include <linux/io.h>
6952 -#include <linux/irq.h>
6953 -#include <linux/of_irq.h>
6954 -#include <linux/interrupt.h>
6955 -#include <linux/wait.h>
6956 -#include <linux/sched.h>
6957 +#include <linux/mfd/syscon.h>
6958 +#include <linux/regmap.h>
6963 #define to_clk_system(hw) container_of(hw, struct clk_system, hw)
6966 - struct at91_pmc *pmc;
6968 - wait_queue_head_t wait;
6969 + struct regmap *regmap;
6975 return (id >= 8) && (id <= 15);
6977 -static irqreturn_t clk_system_irq_handler(int irq, void *dev_id)
6979 +static inline bool clk_system_ready(struct regmap *regmap, int id)
6981 - struct clk_system *sys = (struct clk_system *)dev_id;
6982 + unsigned int status;
6984 - wake_up(&sys->wait);
6985 - disable_irq_nosync(sys->irq);
6986 + regmap_read(regmap, AT91_PMC_SR, &status);
6988 - return IRQ_HANDLED;
6989 + return status & (1 << id) ? 1 : 0;
6992 static int clk_system_prepare(struct clk_hw *hw)
6994 struct clk_system *sys = to_clk_system(hw);
6995 - struct at91_pmc *pmc = sys->pmc;
6996 - u32 mask = 1 << sys->id;
6998 - pmc_write(pmc, AT91_PMC_SCER, mask);
6999 + regmap_write(sys->regmap, AT91_PMC_SCER, 1 << sys->id);
7001 if (!is_pck(sys->id))
7004 - while (!(pmc_read(pmc, AT91_PMC_SR) & mask)) {
7006 - enable_irq(sys->irq);
7007 - wait_event(sys->wait,
7008 - pmc_read(pmc, AT91_PMC_SR) & mask);
7012 + while (!clk_system_ready(sys->regmap, sys->id))
7018 static void clk_system_unprepare(struct clk_hw *hw)
7020 struct clk_system *sys = to_clk_system(hw);
7021 - struct at91_pmc *pmc = sys->pmc;
7023 - pmc_write(pmc, AT91_PMC_SCDR, 1 << sys->id);
7024 + regmap_write(sys->regmap, AT91_PMC_SCDR, 1 << sys->id);
7027 static int clk_system_is_prepared(struct clk_hw *hw)
7029 struct clk_system *sys = to_clk_system(hw);
7030 - struct at91_pmc *pmc = sys->pmc;
7031 + unsigned int status;
7033 + regmap_read(sys->regmap, AT91_PMC_SCSR, &status);
7035 - if (!(pmc_read(pmc, AT91_PMC_SCSR) & (1 << sys->id)))
7036 + if (!(status & (1 << sys->id)))
7039 if (!is_pck(sys->id))
7042 - return !!(pmc_read(pmc, AT91_PMC_SR) & (1 << sys->id));
7043 + regmap_read(sys->regmap, AT91_PMC_SR, &status);
7045 + return status & (1 << sys->id) ? 1 : 0;
7048 static const struct clk_ops system_ops = {
7049 @@ -100,13 +89,12 @@
7052 static struct clk * __init
7053 -at91_clk_register_system(struct at91_pmc *pmc, const char *name,
7054 - const char *parent_name, u8 id, int irq)
7055 +at91_clk_register_system(struct regmap *regmap, const char *name,
7056 + const char *parent_name, u8 id)
7058 struct clk_system *sys;
7059 struct clk *clk = NULL;
7060 struct clk_init_data init;
7063 if (!parent_name || id > SYSTEM_MAX_ID)
7064 return ERR_PTR(-EINVAL);
7065 @@ -123,44 +111,33 @@
7068 sys->hw.init = &init;
7072 - init_waitqueue_head(&sys->wait);
7073 - irq_set_status_flags(sys->irq, IRQ_NOAUTOEN);
7074 - ret = request_irq(sys->irq, clk_system_irq_handler,
7075 - IRQF_TRIGGER_HIGH, name, sys);
7078 - return ERR_PTR(ret);
7081 + sys->regmap = regmap;
7083 clk = clk_register(NULL, &sys->hw);
7084 - if (IS_ERR(clk)) {
7086 - free_irq(sys->irq, sys);
7095 -of_at91_clk_sys_setup(struct device_node *np, struct at91_pmc *pmc)
7096 +static void __init of_at91rm9200_clk_sys_setup(struct device_node *np)
7103 struct device_node *sysclknp;
7104 const char *parent_name;
7105 + struct regmap *regmap;
7107 num = of_get_child_count(np);
7108 if (num > (SYSTEM_MAX_ID + 1))
7111 + regmap = syscon_node_to_regmap(of_get_parent(np));
7112 + if (IS_ERR(regmap))
7115 for_each_child_of_node(np, sysclknp) {
7116 if (of_property_read_u32(sysclknp, "reg", &id))
7118 @@ -168,21 +145,14 @@
7119 if (of_property_read_string(np, "clock-output-names", &name))
7120 name = sysclknp->name;
7123 - irq = irq_of_parse_and_map(sysclknp, 0);
7125 parent_name = of_clk_get_parent_name(sysclknp, 0);
7127 - clk = at91_clk_register_system(pmc, name, parent_name, id, irq);
7128 + clk = at91_clk_register_system(regmap, name, parent_name, id);
7132 of_clk_add_provider(sysclknp, of_clk_src_simple_get, clk);
7136 -void __init of_at91rm9200_clk_sys_setup(struct device_node *np,
7137 - struct at91_pmc *pmc)
7139 - of_at91_clk_sys_setup(np, pmc);
7141 +CLK_OF_DECLARE(at91rm9200_clk_sys, "atmel,at91rm9200-clk-system",
7142 + of_at91rm9200_clk_sys_setup);
7143 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-usb.c linux-4.4.46/drivers/clk/at91/clk-usb.c
7144 --- linux-4.4.46.orig/drivers/clk/at91/clk-usb.c 2017-02-01 08:31:11.000000000 +0100
7145 +++ linux-4.4.46/drivers/clk/at91/clk-usb.c 2017-02-03 17:18:05.675416176 +0100
7147 #include <linux/clkdev.h>
7148 #include <linux/clk/at91_pmc.h>
7149 #include <linux/of.h>
7150 -#include <linux/of_address.h>
7151 -#include <linux/io.h>
7152 +#include <linux/mfd/syscon.h>
7153 +#include <linux/regmap.h>
7159 struct at91sam9x5_clk_usb {
7161 - struct at91_pmc *pmc;
7162 + struct regmap *regmap;
7165 #define to_at91sam9x5_clk_usb(hw) \
7168 struct at91rm9200_clk_usb {
7170 - struct at91_pmc *pmc;
7171 + struct regmap *regmap;
7176 static unsigned long at91sam9x5_clk_usb_recalc_rate(struct clk_hw *hw,
7177 unsigned long parent_rate)
7181 struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
7182 - struct at91_pmc *pmc = usb->pmc;
7183 + unsigned int usbr;
7186 - tmp = pmc_read(pmc, AT91_PMC_USB);
7187 - usbdiv = (tmp & AT91_PMC_OHCIUSBDIV) >> SAM9X5_USB_DIV_SHIFT;
7188 + regmap_read(usb->regmap, AT91_PMC_USB, &usbr);
7189 + usbdiv = (usbr & AT91_PMC_OHCIUSBDIV) >> SAM9X5_USB_DIV_SHIFT;
7191 return DIV_ROUND_CLOSEST(parent_rate, (usbdiv + 1));
7193 @@ -109,33 +108,31 @@
7195 static int at91sam9x5_clk_usb_set_parent(struct clk_hw *hw, u8 index)
7198 struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
7199 - struct at91_pmc *pmc = usb->pmc;
7203 - tmp = pmc_read(pmc, AT91_PMC_USB) & ~AT91_PMC_USBS;
7205 - tmp |= AT91_PMC_USBS;
7206 - pmc_write(pmc, AT91_PMC_USB, tmp);
7208 + regmap_update_bits(usb->regmap, AT91_PMC_USB, AT91_PMC_USBS,
7209 + index ? AT91_PMC_USBS : 0);
7214 static u8 at91sam9x5_clk_usb_get_parent(struct clk_hw *hw)
7216 struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
7217 - struct at91_pmc *pmc = usb->pmc;
7218 + unsigned int usbr;
7220 - return pmc_read(pmc, AT91_PMC_USB) & AT91_PMC_USBS;
7221 + regmap_read(usb->regmap, AT91_PMC_USB, &usbr);
7223 + return usbr & AT91_PMC_USBS;
7226 static int at91sam9x5_clk_usb_set_rate(struct clk_hw *hw, unsigned long rate,
7227 unsigned long parent_rate)
7230 struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
7231 - struct at91_pmc *pmc = usb->pmc;
7236 if (div > SAM9X5_USB_MAX_DIV + 1 || !div)
7239 - tmp = pmc_read(pmc, AT91_PMC_USB) & ~AT91_PMC_OHCIUSBDIV;
7240 - tmp |= (div - 1) << SAM9X5_USB_DIV_SHIFT;
7241 - pmc_write(pmc, AT91_PMC_USB, tmp);
7242 + regmap_update_bits(usb->regmap, AT91_PMC_USB, AT91_PMC_OHCIUSBDIV,
7243 + (div - 1) << SAM9X5_USB_DIV_SHIFT);
7247 @@ -163,28 +159,28 @@
7248 static int at91sam9n12_clk_usb_enable(struct clk_hw *hw)
7250 struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
7251 - struct at91_pmc *pmc = usb->pmc;
7253 - pmc_write(pmc, AT91_PMC_USB,
7254 - pmc_read(pmc, AT91_PMC_USB) | AT91_PMC_USBS);
7255 + regmap_update_bits(usb->regmap, AT91_PMC_USB, AT91_PMC_USBS,
7261 static void at91sam9n12_clk_usb_disable(struct clk_hw *hw)
7263 struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
7264 - struct at91_pmc *pmc = usb->pmc;
7266 - pmc_write(pmc, AT91_PMC_USB,
7267 - pmc_read(pmc, AT91_PMC_USB) & ~AT91_PMC_USBS);
7268 + regmap_update_bits(usb->regmap, AT91_PMC_USB, AT91_PMC_USBS, 0);
7271 static int at91sam9n12_clk_usb_is_enabled(struct clk_hw *hw)
7273 struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
7274 - struct at91_pmc *pmc = usb->pmc;
7275 + unsigned int usbr;
7277 - return !!(pmc_read(pmc, AT91_PMC_USB) & AT91_PMC_USBS);
7278 + regmap_read(usb->regmap, AT91_PMC_USB, &usbr);
7280 + return usbr & AT91_PMC_USBS;
7283 static const struct clk_ops at91sam9n12_usb_ops = {
7287 static struct clk * __init
7288 -at91sam9x5_clk_register_usb(struct at91_pmc *pmc, const char *name,
7289 +at91sam9x5_clk_register_usb(struct regmap *regmap, const char *name,
7290 const char **parent_names, u8 num_parents)
7292 struct at91sam9x5_clk_usb *usb;
7294 CLK_SET_RATE_PARENT;
7296 usb->hw.init = &init;
7298 + usb->regmap = regmap;
7300 clk = clk_register(NULL, &usb->hw);
7305 static struct clk * __init
7306 -at91sam9n12_clk_register_usb(struct at91_pmc *pmc, const char *name,
7307 +at91sam9n12_clk_register_usb(struct regmap *regmap, const char *name,
7308 const char *parent_name)
7310 struct at91sam9x5_clk_usb *usb;
7312 init.flags = CLK_SET_RATE_GATE | CLK_SET_RATE_PARENT;
7314 usb->hw.init = &init;
7316 + usb->regmap = regmap;
7318 clk = clk_register(NULL, &usb->hw);
7320 @@ -257,12 +253,12 @@
7321 unsigned long parent_rate)
7323 struct at91rm9200_clk_usb *usb = to_at91rm9200_clk_usb(hw);
7324 - struct at91_pmc *pmc = usb->pmc;
7326 + unsigned int pllbr;
7329 - tmp = pmc_read(pmc, AT91_CKGR_PLLBR);
7330 - usbdiv = (tmp & AT91_PMC_USBDIV) >> RM9200_USB_DIV_SHIFT;
7331 + regmap_read(usb->regmap, AT91_CKGR_PLLBR, &pllbr);
7333 + usbdiv = (pllbr & AT91_PMC_USBDIV) >> RM9200_USB_DIV_SHIFT;
7334 if (usb->divisors[usbdiv])
7335 return parent_rate / usb->divisors[usbdiv];
7337 @@ -310,10 +306,8 @@
7338 static int at91rm9200_clk_usb_set_rate(struct clk_hw *hw, unsigned long rate,
7339 unsigned long parent_rate)
7343 struct at91rm9200_clk_usb *usb = to_at91rm9200_clk_usb(hw);
7344 - struct at91_pmc *pmc = usb->pmc;
7348 @@ -323,10 +317,10 @@
7350 for (i = 0; i < RM9200_USB_DIV_TAB_SIZE; i++) {
7351 if (usb->divisors[i] == div) {
7352 - tmp = pmc_read(pmc, AT91_CKGR_PLLBR) &
7354 - tmp |= i << RM9200_USB_DIV_SHIFT;
7355 - pmc_write(pmc, AT91_CKGR_PLLBR, tmp);
7356 + regmap_update_bits(usb->regmap, AT91_CKGR_PLLBR,
7358 + i << RM9200_USB_DIV_SHIFT);
7366 static struct clk * __init
7367 -at91rm9200_clk_register_usb(struct at91_pmc *pmc, const char *name,
7368 +at91rm9200_clk_register_usb(struct regmap *regmap, const char *name,
7369 const char *parent_name, const u32 *divisors)
7371 struct at91rm9200_clk_usb *usb;
7373 init.flags = CLK_SET_RATE_PARENT;
7375 usb->hw.init = &init;
7377 + usb->regmap = regmap;
7378 memcpy(usb->divisors, divisors, sizeof(usb->divisors));
7380 clk = clk_register(NULL, &usb->hw);
7381 @@ -369,13 +363,13 @@
7385 -void __init of_at91sam9x5_clk_usb_setup(struct device_node *np,
7386 - struct at91_pmc *pmc)
7387 +static void __init of_at91sam9x5_clk_usb_setup(struct device_node *np)
7391 const char *parent_names[USB_SOURCE_MAX];
7392 const char *name = np->name;
7393 + struct regmap *regmap;
7395 num_parents = of_clk_get_parent_count(np);
7396 if (num_parents <= 0 || num_parents > USB_SOURCE_MAX)
7397 @@ -385,19 +379,26 @@
7399 of_property_read_string(np, "clock-output-names", &name);
7401 - clk = at91sam9x5_clk_register_usb(pmc, name, parent_names, num_parents);
7402 + regmap = syscon_node_to_regmap(of_get_parent(np));
7403 + if (IS_ERR(regmap))
7406 + clk = at91sam9x5_clk_register_usb(regmap, name, parent_names,
7411 of_clk_add_provider(np, of_clk_src_simple_get, clk);
7413 +CLK_OF_DECLARE(at91sam9x5_clk_usb, "atmel,at91sam9x5-clk-usb",
7414 + of_at91sam9x5_clk_usb_setup);
7416 -void __init of_at91sam9n12_clk_usb_setup(struct device_node *np,
7417 - struct at91_pmc *pmc)
7418 +static void __init of_at91sam9n12_clk_usb_setup(struct device_node *np)
7421 const char *parent_name;
7422 const char *name = np->name;
7423 + struct regmap *regmap;
7425 parent_name = of_clk_get_parent_name(np, 0);
7427 @@ -405,20 +406,26 @@
7429 of_property_read_string(np, "clock-output-names", &name);
7431 - clk = at91sam9n12_clk_register_usb(pmc, name, parent_name);
7432 + regmap = syscon_node_to_regmap(of_get_parent(np));
7433 + if (IS_ERR(regmap))
7436 + clk = at91sam9n12_clk_register_usb(regmap, name, parent_name);
7440 of_clk_add_provider(np, of_clk_src_simple_get, clk);
7442 +CLK_OF_DECLARE(at91sam9n12_clk_usb, "atmel,at91sam9n12-clk-usb",
7443 + of_at91sam9n12_clk_usb_setup);
7445 -void __init of_at91rm9200_clk_usb_setup(struct device_node *np,
7446 - struct at91_pmc *pmc)
7447 +static void __init of_at91rm9200_clk_usb_setup(struct device_node *np)
7450 const char *parent_name;
7451 const char *name = np->name;
7452 u32 divisors[4] = {0, 0, 0, 0};
7453 + struct regmap *regmap;
7455 parent_name = of_clk_get_parent_name(np, 0);
7457 @@ -430,9 +437,15 @@
7459 of_property_read_string(np, "clock-output-names", &name);
7461 - clk = at91rm9200_clk_register_usb(pmc, name, parent_name, divisors);
7462 + regmap = syscon_node_to_regmap(of_get_parent(np));
7463 + if (IS_ERR(regmap))
7466 + clk = at91rm9200_clk_register_usb(regmap, name, parent_name, divisors);
7470 of_clk_add_provider(np, of_clk_src_simple_get, clk);
7472 +CLK_OF_DECLARE(at91rm9200_clk_usb, "atmel,at91rm9200-clk-usb",
7473 + of_at91rm9200_clk_usb_setup);
7474 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-utmi.c linux-4.4.46/drivers/clk/at91/clk-utmi.c
7475 --- linux-4.4.46.orig/drivers/clk/at91/clk-utmi.c 2017-02-01 08:31:11.000000000 +0100
7476 +++ linux-4.4.46/drivers/clk/at91/clk-utmi.c 2017-02-03 17:18:05.675416176 +0100
7478 #include <linux/clk-provider.h>
7479 #include <linux/clkdev.h>
7480 #include <linux/clk/at91_pmc.h>
7481 -#include <linux/interrupt.h>
7482 -#include <linux/irq.h>
7483 #include <linux/of.h>
7484 -#include <linux/of_address.h>
7485 -#include <linux/of_irq.h>
7486 -#include <linux/io.h>
7487 -#include <linux/sched.h>
7488 -#include <linux/wait.h>
7489 +#include <linux/mfd/syscon.h>
7490 +#include <linux/regmap.h>
7498 - struct at91_pmc *pmc;
7500 - wait_queue_head_t wait;
7501 + struct regmap *regmap;
7504 #define to_clk_utmi(hw) container_of(hw, struct clk_utmi, hw)
7506 -static irqreturn_t clk_utmi_irq_handler(int irq, void *dev_id)
7507 +static inline bool clk_utmi_ready(struct regmap *regmap)
7509 - struct clk_utmi *utmi = (struct clk_utmi *)dev_id;
7510 + unsigned int status;
7512 - wake_up(&utmi->wait);
7513 - disable_irq_nosync(utmi->irq);
7514 + regmap_read(regmap, AT91_PMC_SR, &status);
7516 - return IRQ_HANDLED;
7517 + return status & AT91_PMC_LOCKU;
7520 static int clk_utmi_prepare(struct clk_hw *hw)
7522 struct clk_utmi *utmi = to_clk_utmi(hw);
7523 - struct at91_pmc *pmc = utmi->pmc;
7524 - u32 tmp = pmc_read(pmc, AT91_CKGR_UCKR) | AT91_PMC_UPLLEN |
7525 - AT91_PMC_UPLLCOUNT | AT91_PMC_BIASEN;
7527 - pmc_write(pmc, AT91_CKGR_UCKR, tmp);
7529 - while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_LOCKU)) {
7530 - enable_irq(utmi->irq);
7531 - wait_event(utmi->wait,
7532 - pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_LOCKU);
7534 + unsigned int uckr = AT91_PMC_UPLLEN | AT91_PMC_UPLLCOUNT |
7537 + regmap_update_bits(utmi->regmap, AT91_CKGR_UCKR, uckr, uckr);
7539 + while (!clk_utmi_ready(utmi->regmap))
7545 static int clk_utmi_is_prepared(struct clk_hw *hw)
7547 struct clk_utmi *utmi = to_clk_utmi(hw);
7548 - struct at91_pmc *pmc = utmi->pmc;
7550 - return !!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_LOCKU);
7551 + return clk_utmi_ready(utmi->regmap);
7554 static void clk_utmi_unprepare(struct clk_hw *hw)
7556 struct clk_utmi *utmi = to_clk_utmi(hw);
7557 - struct at91_pmc *pmc = utmi->pmc;
7558 - u32 tmp = pmc_read(pmc, AT91_CKGR_UCKR) & ~AT91_PMC_UPLLEN;
7560 - pmc_write(pmc, AT91_CKGR_UCKR, tmp);
7561 + regmap_update_bits(utmi->regmap, AT91_CKGR_UCKR, AT91_PMC_UPLLEN, 0);
7564 static unsigned long clk_utmi_recalc_rate(struct clk_hw *hw,
7568 static struct clk * __init
7569 -at91_clk_register_utmi(struct at91_pmc *pmc, unsigned int irq,
7570 +at91_clk_register_utmi(struct regmap *regmap,
7571 const char *name, const char *parent_name)
7574 struct clk_utmi *utmi;
7575 struct clk *clk = NULL;
7576 struct clk_init_data init;
7577 @@ -112,52 +96,36 @@
7578 init.flags = CLK_SET_RATE_GATE;
7580 utmi->hw.init = &init;
7583 - init_waitqueue_head(&utmi->wait);
7584 - irq_set_status_flags(utmi->irq, IRQ_NOAUTOEN);
7585 - ret = request_irq(utmi->irq, clk_utmi_irq_handler,
7586 - IRQF_TRIGGER_HIGH, "clk-utmi", utmi);
7589 - return ERR_PTR(ret);
7591 + utmi->regmap = regmap;
7593 clk = clk_register(NULL, &utmi->hw);
7594 - if (IS_ERR(clk)) {
7595 - free_irq(utmi->irq, utmi);
7604 -of_at91_clk_utmi_setup(struct device_node *np, struct at91_pmc *pmc)
7605 +static void __init of_at91sam9x5_clk_utmi_setup(struct device_node *np)
7609 const char *parent_name;
7610 const char *name = np->name;
7611 + struct regmap *regmap;
7613 parent_name = of_clk_get_parent_name(np, 0);
7615 of_property_read_string(np, "clock-output-names", &name);
7617 - irq = irq_of_parse_and_map(np, 0);
7619 + regmap = syscon_node_to_regmap(of_get_parent(np));
7620 + if (IS_ERR(regmap))
7623 - clk = at91_clk_register_utmi(pmc, irq, name, parent_name);
7624 + clk = at91_clk_register_utmi(regmap, name, parent_name);
7628 of_clk_add_provider(np, of_clk_src_simple_get, clk);
7632 -void __init of_at91sam9x5_clk_utmi_setup(struct device_node *np,
7633 - struct at91_pmc *pmc)
7635 - of_at91_clk_utmi_setup(np, pmc);
7637 +CLK_OF_DECLARE(at91sam9x5_clk_utmi, "atmel,at91sam9x5-clk-utmi",
7638 + of_at91sam9x5_clk_utmi_setup);
7639 diff -Nur linux-4.4.46.orig/drivers/clk/at91/pmc.c linux-4.4.46/drivers/clk/at91/pmc.c
7640 --- linux-4.4.46.orig/drivers/clk/at91/pmc.c 2017-02-01 08:31:11.000000000 +0100
7641 +++ linux-4.4.46/drivers/clk/at91/pmc.c 2017-02-03 17:18:05.675416176 +0100
7643 #include <linux/clkdev.h>
7644 #include <linux/clk/at91_pmc.h>
7645 #include <linux/of.h>
7646 -#include <linux/of_address.h>
7647 -#include <linux/io.h>
7648 -#include <linux/interrupt.h>
7649 -#include <linux/irq.h>
7650 -#include <linux/irqchip/chained_irq.h>
7651 -#include <linux/irqdomain.h>
7652 -#include <linux/of_irq.h>
7653 +#include <linux/mfd/syscon.h>
7654 +#include <linux/regmap.h>
7656 #include <asm/proc-fns.h>
7660 -void __iomem *at91_pmc_base;
7661 -EXPORT_SYMBOL_GPL(at91_pmc_base);
7663 -void at91rm9200_idle(void)
7666 - * Disable the processor clock. The processor will be automatically
7667 - * re-enabled by an interrupt or by a reset.
7669 - at91_pmc_write(AT91_PMC_SCDR, AT91_PMC_PCK);
7672 -void at91sam9_idle(void)
7674 - at91_pmc_write(AT91_PMC_SCDR, AT91_PMC_PCK);
7678 int of_at91_get_clk_range(struct device_node *np, const char *propname,
7679 struct clk_range *range)
7684 EXPORT_SYMBOL_GPL(of_at91_get_clk_range);
7686 -static void pmc_irq_mask(struct irq_data *d)
7688 - struct at91_pmc *pmc = irq_data_get_irq_chip_data(d);
7690 - pmc_write(pmc, AT91_PMC_IDR, 1 << d->hwirq);
7693 -static void pmc_irq_unmask(struct irq_data *d)
7695 - struct at91_pmc *pmc = irq_data_get_irq_chip_data(d);
7697 - pmc_write(pmc, AT91_PMC_IER, 1 << d->hwirq);
7700 -static int pmc_irq_set_type(struct irq_data *d, unsigned type)
7702 - if (type != IRQ_TYPE_LEVEL_HIGH) {
7703 - pr_warn("PMC: type not supported (support only IRQ_TYPE_LEVEL_HIGH type)\n");
7710 -static void pmc_irq_suspend(struct irq_data *d)
7712 - struct at91_pmc *pmc = irq_data_get_irq_chip_data(d);
7714 - pmc->imr = pmc_read(pmc, AT91_PMC_IMR);
7715 - pmc_write(pmc, AT91_PMC_IDR, pmc->imr);
7718 -static void pmc_irq_resume(struct irq_data *d)
7720 - struct at91_pmc *pmc = irq_data_get_irq_chip_data(d);
7722 - pmc_write(pmc, AT91_PMC_IER, pmc->imr);
7725 -static struct irq_chip pmc_irq = {
7727 - .irq_disable = pmc_irq_mask,
7728 - .irq_mask = pmc_irq_mask,
7729 - .irq_unmask = pmc_irq_unmask,
7730 - .irq_set_type = pmc_irq_set_type,
7731 - .irq_suspend = pmc_irq_suspend,
7732 - .irq_resume = pmc_irq_resume,
7735 -static struct lock_class_key pmc_lock_class;
7737 -static int pmc_irq_map(struct irq_domain *h, unsigned int virq,
7738 - irq_hw_number_t hw)
7740 - struct at91_pmc *pmc = h->host_data;
7742 - irq_set_lockdep_class(virq, &pmc_lock_class);
7744 - irq_set_chip_and_handler(virq, &pmc_irq,
7745 - handle_level_irq);
7746 - irq_set_chip_data(virq, pmc);
7751 -static int pmc_irq_domain_xlate(struct irq_domain *d,
7752 - struct device_node *ctrlr,
7753 - const u32 *intspec, unsigned int intsize,
7754 - irq_hw_number_t *out_hwirq,
7755 - unsigned int *out_type)
7757 - struct at91_pmc *pmc = d->host_data;
7758 - const struct at91_pmc_caps *caps = pmc->caps;
7760 - if (WARN_ON(intsize < 1))
7763 - *out_hwirq = intspec[0];
7765 - if (!(caps->available_irqs & (1 << *out_hwirq)))
7768 - *out_type = IRQ_TYPE_LEVEL_HIGH;
7773 -static const struct irq_domain_ops pmc_irq_ops = {
7774 - .map = pmc_irq_map,
7775 - .xlate = pmc_irq_domain_xlate,
7778 -static irqreturn_t pmc_irq_handler(int irq, void *data)
7780 - struct at91_pmc *pmc = (struct at91_pmc *)data;
7784 - sr = pmc_read(pmc, AT91_PMC_SR) & pmc_read(pmc, AT91_PMC_IMR);
7788 - for_each_set_bit(n, &sr, BITS_PER_LONG)
7789 - generic_handle_irq(irq_find_mapping(pmc->irqdomain, n));
7791 - return IRQ_HANDLED;
7794 -static const struct at91_pmc_caps at91rm9200_caps = {
7795 - .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_LOCKB |
7796 - AT91_PMC_MCKRDY | AT91_PMC_PCK0RDY |
7797 - AT91_PMC_PCK1RDY | AT91_PMC_PCK2RDY |
7801 -static const struct at91_pmc_caps at91sam9260_caps = {
7802 - .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_LOCKB |
7803 - AT91_PMC_MCKRDY | AT91_PMC_PCK0RDY |
7807 -static const struct at91_pmc_caps at91sam9g45_caps = {
7808 - .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_MCKRDY |
7809 - AT91_PMC_LOCKU | AT91_PMC_PCK0RDY |
7813 -static const struct at91_pmc_caps at91sam9n12_caps = {
7814 - .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_LOCKB |
7815 - AT91_PMC_MCKRDY | AT91_PMC_PCK0RDY |
7816 - AT91_PMC_PCK1RDY | AT91_PMC_MOSCSELS |
7817 - AT91_PMC_MOSCRCS | AT91_PMC_CFDEV,
7820 -static const struct at91_pmc_caps at91sam9x5_caps = {
7821 - .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_MCKRDY |
7822 - AT91_PMC_LOCKU | AT91_PMC_PCK0RDY |
7823 - AT91_PMC_PCK1RDY | AT91_PMC_MOSCSELS |
7824 - AT91_PMC_MOSCRCS | AT91_PMC_CFDEV,
7827 -static const struct at91_pmc_caps sama5d2_caps = {
7828 - .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_MCKRDY |
7829 - AT91_PMC_LOCKU | AT91_PMC_PCK0RDY |
7830 - AT91_PMC_PCK1RDY | AT91_PMC_PCK2RDY |
7831 - AT91_PMC_MOSCSELS | AT91_PMC_MOSCRCS |
7832 - AT91_PMC_CFDEV | AT91_PMC_GCKRDY,
7835 -static const struct at91_pmc_caps sama5d3_caps = {
7836 - .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_MCKRDY |
7837 - AT91_PMC_LOCKU | AT91_PMC_PCK0RDY |
7838 - AT91_PMC_PCK1RDY | AT91_PMC_PCK2RDY |
7839 - AT91_PMC_MOSCSELS | AT91_PMC_MOSCRCS |
7843 -static struct at91_pmc *__init at91_pmc_init(struct device_node *np,
7844 - void __iomem *regbase, int virq,
7845 - const struct at91_pmc_caps *caps)
7847 - struct at91_pmc *pmc;
7849 - if (!regbase || !virq || !caps)
7852 - at91_pmc_base = regbase;
7854 - pmc = kzalloc(sizeof(*pmc), GFP_KERNEL);
7858 - spin_lock_init(&pmc->lock);
7859 - pmc->regbase = regbase;
7863 - pmc->irqdomain = irq_domain_add_linear(np, 32, &pmc_irq_ops, pmc);
7865 - if (!pmc->irqdomain)
7866 - goto out_free_pmc;
7868 - pmc_write(pmc, AT91_PMC_IDR, 0xffffffff);
7869 - if (request_irq(pmc->virq, pmc_irq_handler,
7870 - IRQF_SHARED | IRQF_COND_SUSPEND, "pmc", pmc))
7871 - goto out_remove_irqdomain;
7875 -out_remove_irqdomain:
7876 - irq_domain_remove(pmc->irqdomain);
7883 -static const struct of_device_id pmc_clk_ids[] __initconst = {
7884 - /* Slow oscillator */
7886 - .compatible = "atmel,at91sam9260-clk-slow",
7887 - .data = of_at91sam9260_clk_slow_setup,
7891 - .compatible = "atmel,at91rm9200-clk-main-osc",
7892 - .data = of_at91rm9200_clk_main_osc_setup,
7895 - .compatible = "atmel,at91sam9x5-clk-main-rc-osc",
7896 - .data = of_at91sam9x5_clk_main_rc_osc_setup,
7899 - .compatible = "atmel,at91rm9200-clk-main",
7900 - .data = of_at91rm9200_clk_main_setup,
7903 - .compatible = "atmel,at91sam9x5-clk-main",
7904 - .data = of_at91sam9x5_clk_main_setup,
7908 - .compatible = "atmel,at91rm9200-clk-pll",
7909 - .data = of_at91rm9200_clk_pll_setup,
7912 - .compatible = "atmel,at91sam9g45-clk-pll",
7913 - .data = of_at91sam9g45_clk_pll_setup,
7916 - .compatible = "atmel,at91sam9g20-clk-pllb",
7917 - .data = of_at91sam9g20_clk_pllb_setup,
7920 - .compatible = "atmel,sama5d3-clk-pll",
7921 - .data = of_sama5d3_clk_pll_setup,
7924 - .compatible = "atmel,at91sam9x5-clk-plldiv",
7925 - .data = of_at91sam9x5_clk_plldiv_setup,
7927 - /* Master clock */
7929 - .compatible = "atmel,at91rm9200-clk-master",
7930 - .data = of_at91rm9200_clk_master_setup,
7933 - .compatible = "atmel,at91sam9x5-clk-master",
7934 - .data = of_at91sam9x5_clk_master_setup,
7936 - /* System clocks */
7938 - .compatible = "atmel,at91rm9200-clk-system",
7939 - .data = of_at91rm9200_clk_sys_setup,
7941 - /* Peripheral clocks */
7943 - .compatible = "atmel,at91rm9200-clk-peripheral",
7944 - .data = of_at91rm9200_clk_periph_setup,
7947 - .compatible = "atmel,at91sam9x5-clk-peripheral",
7948 - .data = of_at91sam9x5_clk_periph_setup,
7950 - /* Programmable clocks */
7952 - .compatible = "atmel,at91rm9200-clk-programmable",
7953 - .data = of_at91rm9200_clk_prog_setup,
7956 - .compatible = "atmel,at91sam9g45-clk-programmable",
7957 - .data = of_at91sam9g45_clk_prog_setup,
7960 - .compatible = "atmel,at91sam9x5-clk-programmable",
7961 - .data = of_at91sam9x5_clk_prog_setup,
7964 -#if defined(CONFIG_HAVE_AT91_UTMI)
7966 - .compatible = "atmel,at91sam9x5-clk-utmi",
7967 - .data = of_at91sam9x5_clk_utmi_setup,
7971 -#if defined(CONFIG_HAVE_AT91_USB_CLK)
7973 - .compatible = "atmel,at91rm9200-clk-usb",
7974 - .data = of_at91rm9200_clk_usb_setup,
7977 - .compatible = "atmel,at91sam9x5-clk-usb",
7978 - .data = of_at91sam9x5_clk_usb_setup,
7981 - .compatible = "atmel,at91sam9n12-clk-usb",
7982 - .data = of_at91sam9n12_clk_usb_setup,
7986 -#if defined(CONFIG_HAVE_AT91_SMD)
7988 - .compatible = "atmel,at91sam9x5-clk-smd",
7989 - .data = of_at91sam9x5_clk_smd_setup,
7992 -#if defined(CONFIG_HAVE_AT91_H32MX)
7994 - .compatible = "atmel,sama5d4-clk-h32mx",
7995 - .data = of_sama5d4_clk_h32mx_setup,
7998 -#if defined(CONFIG_HAVE_AT91_GENERATED_CLK)
8000 - .compatible = "atmel,sama5d2-clk-generated",
8001 - .data = of_sama5d2_clk_generated_setup,
8007 -static void __init of_at91_pmc_setup(struct device_node *np,
8008 - const struct at91_pmc_caps *caps)
8010 - struct at91_pmc *pmc;
8011 - struct device_node *childnp;
8012 - void (*clk_setup)(struct device_node *, struct at91_pmc *);
8013 - const struct of_device_id *clk_id;
8014 - void __iomem *regbase = of_iomap(np, 0);
8020 - virq = irq_of_parse_and_map(np, 0);
8024 - pmc = at91_pmc_init(np, regbase, virq, caps);
8027 - for_each_child_of_node(np, childnp) {
8028 - clk_id = of_match_node(pmc_clk_ids, childnp);
8031 - clk_setup = clk_id->data;
8032 - clk_setup(childnp, pmc);
8036 -static void __init of_at91rm9200_pmc_setup(struct device_node *np)
8038 - of_at91_pmc_setup(np, &at91rm9200_caps);
8040 -CLK_OF_DECLARE(at91rm9200_clk_pmc, "atmel,at91rm9200-pmc",
8041 - of_at91rm9200_pmc_setup);
8043 -static void __init of_at91sam9260_pmc_setup(struct device_node *np)
8045 - of_at91_pmc_setup(np, &at91sam9260_caps);
8047 -CLK_OF_DECLARE(at91sam9260_clk_pmc, "atmel,at91sam9260-pmc",
8048 - of_at91sam9260_pmc_setup);
8050 -static void __init of_at91sam9g45_pmc_setup(struct device_node *np)
8052 - of_at91_pmc_setup(np, &at91sam9g45_caps);
8054 -CLK_OF_DECLARE(at91sam9g45_clk_pmc, "atmel,at91sam9g45-pmc",
8055 - of_at91sam9g45_pmc_setup);
8057 -static void __init of_at91sam9n12_pmc_setup(struct device_node *np)
8059 - of_at91_pmc_setup(np, &at91sam9n12_caps);
8061 -CLK_OF_DECLARE(at91sam9n12_clk_pmc, "atmel,at91sam9n12-pmc",
8062 - of_at91sam9n12_pmc_setup);
8064 -static void __init of_at91sam9x5_pmc_setup(struct device_node *np)
8066 - of_at91_pmc_setup(np, &at91sam9x5_caps);
8068 -CLK_OF_DECLARE(at91sam9x5_clk_pmc, "atmel,at91sam9x5-pmc",
8069 - of_at91sam9x5_pmc_setup);
8071 -static void __init of_sama5d2_pmc_setup(struct device_node *np)
8073 - of_at91_pmc_setup(np, &sama5d2_caps);
8075 -CLK_OF_DECLARE(sama5d2_clk_pmc, "atmel,sama5d2-pmc",
8076 - of_sama5d2_pmc_setup);
8078 -static void __init of_sama5d3_pmc_setup(struct device_node *np)
8080 - of_at91_pmc_setup(np, &sama5d3_caps);
8082 -CLK_OF_DECLARE(sama5d3_clk_pmc, "atmel,sama5d3-pmc",
8083 - of_sama5d3_pmc_setup);
8084 diff -Nur linux-4.4.46.orig/drivers/clk/at91/pmc.h linux-4.4.46/drivers/clk/at91/pmc.h
8085 --- linux-4.4.46.orig/drivers/clk/at91/pmc.h 2017-02-01 08:31:11.000000000 +0100
8086 +++ linux-4.4.46/drivers/clk/at91/pmc.h 2017-02-03 17:18:05.675416176 +0100
8089 #include <linux/io.h>
8090 #include <linux/irqdomain.h>
8091 +#include <linux/regmap.h>
8092 #include <linux/spinlock.h>
8094 +extern spinlock_t pmc_pcr_lock;
8101 #define CLK_RANGE(MIN, MAX) {.min = MIN, .max = MAX,}
8103 -struct at91_pmc_caps {
8104 - u32 available_irqs;
8108 - void __iomem *regbase;
8111 - const struct at91_pmc_caps *caps;
8112 - struct irq_domain *irqdomain;
8116 -static inline void pmc_lock(struct at91_pmc *pmc)
8118 - spin_lock(&pmc->lock);
8121 -static inline void pmc_unlock(struct at91_pmc *pmc)
8123 - spin_unlock(&pmc->lock);
8126 -static inline u32 pmc_read(struct at91_pmc *pmc, int offset)
8128 - return readl(pmc->regbase + offset);
8131 -static inline void pmc_write(struct at91_pmc *pmc, int offset, u32 value)
8133 - writel(value, pmc->regbase + offset);
8136 int of_at91_get_clk_range(struct device_node *np, const char *propname,
8137 struct clk_range *range);
8139 -void of_at91sam9260_clk_slow_setup(struct device_node *np,
8140 - struct at91_pmc *pmc);
8142 -void of_at91rm9200_clk_main_osc_setup(struct device_node *np,
8143 - struct at91_pmc *pmc);
8144 -void of_at91sam9x5_clk_main_rc_osc_setup(struct device_node *np,
8145 - struct at91_pmc *pmc);
8146 -void of_at91rm9200_clk_main_setup(struct device_node *np,
8147 - struct at91_pmc *pmc);
8148 -void of_at91sam9x5_clk_main_setup(struct device_node *np,
8149 - struct at91_pmc *pmc);
8151 -void of_at91rm9200_clk_pll_setup(struct device_node *np,
8152 - struct at91_pmc *pmc);
8153 -void of_at91sam9g45_clk_pll_setup(struct device_node *np,
8154 - struct at91_pmc *pmc);
8155 -void of_at91sam9g20_clk_pllb_setup(struct device_node *np,
8156 - struct at91_pmc *pmc);
8157 -void of_sama5d3_clk_pll_setup(struct device_node *np,
8158 - struct at91_pmc *pmc);
8159 -void of_at91sam9x5_clk_plldiv_setup(struct device_node *np,
8160 - struct at91_pmc *pmc);
8162 -void of_at91rm9200_clk_master_setup(struct device_node *np,
8163 - struct at91_pmc *pmc);
8164 -void of_at91sam9x5_clk_master_setup(struct device_node *np,
8165 - struct at91_pmc *pmc);
8167 -void of_at91rm9200_clk_sys_setup(struct device_node *np,
8168 - struct at91_pmc *pmc);
8170 -void of_at91rm9200_clk_periph_setup(struct device_node *np,
8171 - struct at91_pmc *pmc);
8172 -void of_at91sam9x5_clk_periph_setup(struct device_node *np,
8173 - struct at91_pmc *pmc);
8175 -void of_at91rm9200_clk_prog_setup(struct device_node *np,
8176 - struct at91_pmc *pmc);
8177 -void of_at91sam9g45_clk_prog_setup(struct device_node *np,
8178 - struct at91_pmc *pmc);
8179 -void of_at91sam9x5_clk_prog_setup(struct device_node *np,
8180 - struct at91_pmc *pmc);
8182 -void of_at91sam9x5_clk_utmi_setup(struct device_node *np,
8183 - struct at91_pmc *pmc);
8185 -void of_at91rm9200_clk_usb_setup(struct device_node *np,
8186 - struct at91_pmc *pmc);
8187 -void of_at91sam9x5_clk_usb_setup(struct device_node *np,
8188 - struct at91_pmc *pmc);
8189 -void of_at91sam9n12_clk_usb_setup(struct device_node *np,
8190 - struct at91_pmc *pmc);
8192 -void of_at91sam9x5_clk_smd_setup(struct device_node *np,
8193 - struct at91_pmc *pmc);
8195 -void of_sama5d4_clk_h32mx_setup(struct device_node *np,
8196 - struct at91_pmc *pmc);
8198 -void of_sama5d2_clk_generated_setup(struct device_node *np,
8199 - struct at91_pmc *pmc);
8201 #endif /* __PMC_H_ */
8202 diff -Nur linux-4.4.46.orig/drivers/clocksource/tcb_clksrc.c linux-4.4.46/drivers/clocksource/tcb_clksrc.c
8203 --- linux-4.4.46.orig/drivers/clocksource/tcb_clksrc.c 2017-02-01 08:31:11.000000000 +0100
8204 +++ linux-4.4.46/drivers/clocksource/tcb_clksrc.c 2017-02-03 17:18:05.675416176 +0100
8206 * this 32 bit free-running counter. the second channel is not used.
8208 * - The third channel may be used to provide a 16-bit clockevent
8209 - * source, used in either periodic or oneshot mode. This runs
8210 - * at 32 KiHZ, and can handle delays of up to two seconds.
8211 + * source, used in either periodic or oneshot mode.
8213 * A boot clocksource and clockevent source are also currently needed,
8214 * unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so
8216 struct tc_clkevt_device {
8217 struct clock_event_device clkevt;
8225 return container_of(clkevt, struct tc_clkevt_device, clkevt);
8228 -/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
8229 - * because using one of the divided clocks would usually mean the
8230 - * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
8232 - * A divided clock could be good for high resolution timers, since
8233 - * 30.5 usec resolution can seem "low".
8235 static u32 timer_clock;
8237 +static void tc_clk_disable(struct clock_event_device *d)
8239 + struct tc_clkevt_device *tcd = to_tc_clkevt(d);
8241 + clk_disable(tcd->clk);
8242 + tcd->clk_enabled = false;
8245 +static void tc_clk_enable(struct clock_event_device *d)
8247 + struct tc_clkevt_device *tcd = to_tc_clkevt(d);
8249 + if (tcd->clk_enabled)
8251 + clk_enable(tcd->clk);
8252 + tcd->clk_enabled = true;
8255 static int tc_shutdown(struct clock_event_device *d)
8257 struct tc_clkevt_device *tcd = to_tc_clkevt(d);
8260 __raw_writel(0xff, regs + ATMEL_TC_REG(2, IDR));
8261 __raw_writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR));
8265 +static int tc_shutdown_clk_off(struct clock_event_device *d)
8268 if (!clockevent_state_detached(d))
8269 - clk_disable(tcd->clk);
8270 + tc_clk_disable(d);
8275 if (clockevent_state_oneshot(d) || clockevent_state_periodic(d))
8278 - clk_enable(tcd->clk);
8281 - /* slow clock, count up to RC, then irq and stop */
8282 + /* count up to RC, then irq and stop */
8283 __raw_writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE |
8284 ATMEL_TC_WAVESEL_UP_AUTO, regs + ATMEL_TC_REG(2, CMR));
8285 __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
8286 @@ -134,12 +152,12 @@
8287 /* By not making the gentime core emulate periodic mode on top
8288 * of oneshot, we get lower overhead and improved accuracy.
8290 - clk_enable(tcd->clk);
8293 - /* slow clock, count up to RC, then irq and restart */
8294 + /* count up to RC, then irq and restart */
8295 __raw_writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
8296 regs + ATMEL_TC_REG(2, CMR));
8297 - __raw_writel((32768 + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
8298 + __raw_writel((tcd->freq + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
8300 /* Enable clock and interrupts on RC compare */
8301 __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
8302 @@ -166,9 +184,13 @@
8303 .features = CLOCK_EVT_FEAT_PERIODIC |
8304 CLOCK_EVT_FEAT_ONESHOT,
8305 /* Should be lower than at91rm9200's system timer */
8306 +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
8311 .set_next_event = tc_next_event,
8312 - .set_state_shutdown = tc_shutdown,
8313 + .set_state_shutdown = tc_shutdown_clk_off,
8314 .set_state_periodic = tc_set_periodic,
8315 .set_state_oneshot = tc_set_oneshot,
8321 -static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
8322 +static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx)
8324 + unsigned divisor = atmel_tc_divisors[divisor_idx];
8326 struct clk *t2_clk = tc->clk[2];
8327 int irq = tc->irq[2];
8328 @@ -210,7 +233,11 @@
8329 clkevt.regs = tc->regs;
8330 clkevt.clk = t2_clk;
8332 - timer_clock = clk32k_divisor_idx;
8333 + timer_clock = divisor_idx;
8335 + clkevt.freq = 32768;
8337 + clkevt.freq = clk_get_rate(t2_clk) / divisor;
8339 clkevt.clkevt.cpumask = cpumask_of(0);
8345 - clockevents_config_and_register(&clkevt.clkevt, 32768, 1, 0xffff);
8346 + clockevents_config_and_register(&clkevt.clkevt, clkevt.freq, 1, 0xffff);
8350 @@ -358,7 +385,11 @@
8351 goto err_disable_t1;
8353 /* channel 2: periodic and oneshot timer support */
8354 +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
8355 ret = setup_clkevents(tc, clk32k_divisor_idx);
8357 + ret = setup_clkevents(tc, best_divisor_idx);
8360 goto err_unregister_clksrc;
8362 diff -Nur linux-4.4.46.orig/drivers/clocksource/timer-atmel-pit.c linux-4.4.46/drivers/clocksource/timer-atmel-pit.c
8363 --- linux-4.4.46.orig/drivers/clocksource/timer-atmel-pit.c 2017-02-01 08:31:11.000000000 +0100
8364 +++ linux-4.4.46/drivers/clocksource/timer-atmel-pit.c 2017-02-03 17:18:05.675416176 +0100
8369 + bool irq_requested;
8375 /* disable irq, leaving the clocksource active */
8376 pit_write(data->base, AT91_PIT_MR, (data->cycle - 1) | AT91_PIT_PITEN);
8377 + if (data->irq_requested) {
8378 + free_irq(data->irq, data);
8379 + data->irq_requested = false;
8384 +static irqreturn_t at91sam926x_pit_interrupt(int irq, void *dev_id);
8386 * Clockevent device: interrupts every 1/HZ (== pit_cycles * MCK/16)
8388 static int pit_clkevt_set_periodic(struct clock_event_device *dev)
8390 struct pit_data *data = clkevt_to_pit_data(dev);
8393 + ret = request_irq(data->irq, at91sam926x_pit_interrupt,
8394 + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
8395 + "at91_tick", data);
8397 + panic(pr_fmt("Unable to setup IRQ\n"));
8399 + data->irq_requested = true;
8401 /* update clocksource counter */
8402 data->cnt += data->cycle * PIT_PICNT(pit_read(data->base, AT91_PIT_PIVR));
8405 unsigned long pit_rate;
8410 * Use our actual MCK to figure out how many MCK/16 ticks per
8411 @@ -206,13 +220,6 @@
8412 data->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS;
8413 clocksource_register_hz(&data->clksrc, pit_rate);
8415 - /* Set up irq handler */
8416 - ret = request_irq(data->irq, at91sam926x_pit_interrupt,
8417 - IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
8418 - "at91_tick", data);
8420 - panic(pr_fmt("Unable to setup IRQ\n"));
8422 /* Set up and register clockevents */
8423 data->clkevt.name = "pit";
8424 data->clkevt.features = CLOCK_EVT_FEAT_PERIODIC;
8425 diff -Nur linux-4.4.46.orig/drivers/clocksource/timer-atmel-st.c linux-4.4.46/drivers/clocksource/timer-atmel-st.c
8426 --- linux-4.4.46.orig/drivers/clocksource/timer-atmel-st.c 2017-02-01 08:31:11.000000000 +0100
8427 +++ linux-4.4.46/drivers/clocksource/timer-atmel-st.c 2017-02-03 17:18:05.675416176 +0100
8428 @@ -115,18 +115,29 @@
8429 last_crtr = read_CRTR();
8432 +static int atmel_st_irq;
8434 static int clkevt32k_shutdown(struct clock_event_device *evt)
8436 clkdev32k_disable_and_flush_irq();
8438 regmap_write(regmap_st, AT91_ST_IER, irqmask);
8439 + free_irq(atmel_st_irq, regmap_st);
8443 static int clkevt32k_set_oneshot(struct clock_event_device *dev)
8447 clkdev32k_disable_and_flush_irq();
8449 + ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt,
8450 + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
8451 + "at91_tick", regmap_st);
8453 + panic(pr_fmt("Unable to setup IRQ\n"));
8456 * ALM for oneshot irqs, set by next_event()
8457 * before 32 seconds have passed.
8458 @@ -139,8 +150,16 @@
8460 static int clkevt32k_set_periodic(struct clock_event_device *dev)
8464 clkdev32k_disable_and_flush_irq();
8466 + ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt,
8467 + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
8468 + "at91_tick", regmap_st);
8470 + panic(pr_fmt("Unable to setup IRQ\n"));
8472 /* PIT for periodic irqs; fixed rate of 1/HZ */
8473 irqmask = AT91_ST_PITS;
8474 regmap_write(regmap_st, AT91_ST_PIMR, timer_latch);
8478 unsigned int sclk_rate, val;
8482 regmap_st = syscon_node_to_regmap(node);
8483 if (IS_ERR(regmap_st))
8484 @@ -210,17 +229,10 @@
8485 regmap_read(regmap_st, AT91_ST_SR, &val);
8487 /* Get the interrupts property */
8488 - irq = irq_of_parse_and_map(node, 0);
8490 + atmel_st_irq = irq_of_parse_and_map(node, 0);
8491 + if (!atmel_st_irq)
8492 panic(pr_fmt("Unable to get IRQ from DT\n"));
8494 - /* Make IRQs happen for the system timer */
8495 - ret = request_irq(irq, at91rm9200_timer_interrupt,
8496 - IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
8497 - "at91_tick", regmap_st);
8499 - panic(pr_fmt("Unable to setup IRQ\n"));
8501 sclk = of_clk_get(node, 0);
8503 panic(pr_fmt("Unable to get slow clock\n"));
8504 diff -Nur linux-4.4.46.orig/drivers/cpufreq/Kconfig.x86 linux-4.4.46/drivers/cpufreq/Kconfig.x86
8505 --- linux-4.4.46.orig/drivers/cpufreq/Kconfig.x86 2017-02-01 08:31:11.000000000 +0100
8506 +++ linux-4.4.46/drivers/cpufreq/Kconfig.x86 2017-02-03 17:18:05.675416176 +0100
8509 config X86_POWERNOW_K8
8510 tristate "AMD Opteron/Athlon64 PowerNow!"
8511 - depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ
8512 + depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ && !PREEMPT_RT_BASE
8514 This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors.
8515 Support for K10 and newer processors is now in acpi-cpufreq.
8516 diff -Nur linux-4.4.46.orig/drivers/cpuidle/coupled.c linux-4.4.46/drivers/cpuidle/coupled.c
8517 --- linux-4.4.46.orig/drivers/cpuidle/coupled.c 2017-02-01 08:31:11.000000000 +0100
8518 +++ linux-4.4.46/drivers/cpuidle/coupled.c 2017-02-03 17:18:05.675416176 +0100
8521 #define CPUIDLE_COUPLED_NOT_IDLE (-1)
8523 -static DEFINE_MUTEX(cpuidle_coupled_lock);
8524 static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb);
8527 diff -Nur linux-4.4.46.orig/drivers/gpu/drm/i915/i915_gem_execbuffer.c linux-4.4.46/drivers/gpu/drm/i915/i915_gem_execbuffer.c
8528 --- linux-4.4.46.orig/drivers/gpu/drm/i915/i915_gem_execbuffer.c 2017-02-01 08:31:11.000000000 +0100
8529 +++ linux-4.4.46/drivers/gpu/drm/i915/i915_gem_execbuffer.c 2017-02-03 17:18:05.675416176 +0100
8530 @@ -1264,7 +1264,9 @@
8534 +#ifndef CONFIG_PREEMPT_RT_BASE
8535 trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
8538 i915_gem_execbuffer_move_to_active(vmas, params->request);
8539 i915_gem_execbuffer_retire_commands(params);
8540 diff -Nur linux-4.4.46.orig/drivers/gpu/drm/i915/i915_gem_shrinker.c linux-4.4.46/drivers/gpu/drm/i915/i915_gem_shrinker.c
8541 --- linux-4.4.46.orig/drivers/gpu/drm/i915/i915_gem_shrinker.c 2017-02-01 08:31:11.000000000 +0100
8542 +++ linux-4.4.46/drivers/gpu/drm/i915/i915_gem_shrinker.c 2017-02-03 17:18:05.675416176 +0100
8544 if (!mutex_is_locked(mutex))
8547 -#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)
8548 +#if (defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)) && !defined(CONFIG_PREEMPT_RT_BASE)
8549 return mutex->owner == task;
8551 /* Since UP may be pre-empted, we cannot assume that we own the lock */
8552 diff -Nur linux-4.4.46.orig/drivers/gpu/drm/i915/i915_irq.c linux-4.4.46/drivers/gpu/drm/i915/i915_irq.c
8553 --- linux-4.4.46.orig/drivers/gpu/drm/i915/i915_irq.c 2017-02-01 08:31:11.000000000 +0100
8554 +++ linux-4.4.46/drivers/gpu/drm/i915/i915_irq.c 2017-02-03 17:18:05.675416176 +0100
8556 spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
8558 /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
8559 + preempt_disable_rt();
8561 /* Get optional system timestamp before query. */
8564 *etime = ktime_get();
8566 /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
8567 + preempt_enable_rt();
8569 spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
8571 diff -Nur linux-4.4.46.orig/drivers/gpu/drm/i915/intel_display.c linux-4.4.46/drivers/gpu/drm/i915/intel_display.c
8572 --- linux-4.4.46.orig/drivers/gpu/drm/i915/intel_display.c 2017-02-01 08:31:11.000000000 +0100
8573 +++ linux-4.4.46/drivers/gpu/drm/i915/intel_display.c 2017-02-03 17:18:05.679416330 +0100
8574 @@ -11400,7 +11400,7 @@
8575 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
8576 struct intel_unpin_work *work;
8578 - WARN_ON(!in_interrupt());
8579 + WARN_ON_NONRT(!in_interrupt());
8583 diff -Nur linux-4.4.46.orig/drivers/gpu/drm/i915/intel_sprite.c linux-4.4.46/drivers/gpu/drm/i915/intel_sprite.c
8584 --- linux-4.4.46.orig/drivers/gpu/drm/i915/intel_sprite.c 2017-02-01 08:31:11.000000000 +0100
8585 +++ linux-4.4.46/drivers/gpu/drm/i915/intel_sprite.c 2017-02-03 17:18:05.679416330 +0100
8587 #include "intel_drv.h"
8588 #include <drm/i915_drm.h>
8589 #include "i915_drv.h"
8590 +#include <linux/locallock.h>
8593 format_is_yuv(uint32_t format)
8595 1000 * adjusted_mode->crtc_htotal);
8598 +static DEFINE_LOCAL_IRQ_LOCK(pipe_update_lock);
8601 * intel_pipe_update_start() - start update of a set of display registers
8602 * @crtc: the crtc of which the registers are going to be updated
8604 min = vblank_start - usecs_to_scanlines(adjusted_mode, 100);
8605 max = vblank_start - 1;
8607 - local_irq_disable();
8608 + local_lock_irq(pipe_update_lock);
8610 if (min <= 0 || max <= 0)
8612 @@ -126,11 +129,11 @@
8616 - local_irq_enable();
8617 + local_unlock_irq(pipe_update_lock);
8619 timeout = schedule_timeout(timeout);
8621 - local_irq_disable();
8622 + local_lock_irq(pipe_update_lock);
8625 finish_wait(wq, &wait);
8628 trace_i915_pipe_update_end(crtc, end_vbl_count, scanline_end);
8630 - local_irq_enable();
8631 + local_unlock_irq(pipe_update_lock);
8633 if (crtc->debug.start_vbl_count &&
8634 crtc->debug.start_vbl_count != end_vbl_count) {
8635 diff -Nur linux-4.4.46.orig/drivers/gpu/drm/radeon/radeon_display.c linux-4.4.46/drivers/gpu/drm/radeon/radeon_display.c
8636 --- linux-4.4.46.orig/drivers/gpu/drm/radeon/radeon_display.c 2017-02-01 08:31:11.000000000 +0100
8637 +++ linux-4.4.46/drivers/gpu/drm/radeon/radeon_display.c 2017-02-03 17:18:05.679416330 +0100
8638 @@ -1862,6 +1862,7 @@
8639 struct radeon_device *rdev = dev->dev_private;
8641 /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
8642 + preempt_disable_rt();
8644 /* Get optional system timestamp before query. */
8646 @@ -1954,6 +1955,7 @@
8647 *etime = ktime_get();
8649 /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
8650 + preempt_enable_rt();
8652 /* Decode into vertical and horizontal scanout position. */
8653 *vpos = position & 0x1fff;
8654 diff -Nur linux-4.4.46.orig/drivers/hv/vmbus_drv.c linux-4.4.46/drivers/hv/vmbus_drv.c
8655 --- linux-4.4.46.orig/drivers/hv/vmbus_drv.c 2017-02-01 08:31:11.000000000 +0100
8656 +++ linux-4.4.46/drivers/hv/vmbus_drv.c 2017-02-03 17:18:05.679416330 +0100
8658 tasklet_schedule(&msg_dpc);
8661 - add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0);
8662 + add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0, 0);
8666 diff -Nur linux-4.4.46.orig/drivers/i2c/busses/i2c-omap.c linux-4.4.46/drivers/i2c/busses/i2c-omap.c
8667 --- linux-4.4.46.orig/drivers/i2c/busses/i2c-omap.c 2017-02-01 08:31:11.000000000 +0100
8668 +++ linux-4.4.46/drivers/i2c/busses/i2c-omap.c 2017-02-03 17:18:05.679416330 +0100
8669 @@ -995,15 +995,12 @@
8673 - spin_lock(&omap->lock);
8674 - mask = omap_i2c_read_reg(omap, OMAP_I2C_IE_REG);
8675 stat = omap_i2c_read_reg(omap, OMAP_I2C_STAT_REG);
8676 + mask = omap_i2c_read_reg(omap, OMAP_I2C_IE_REG);
8679 ret = IRQ_WAKE_THREAD;
8681 - spin_unlock(&omap->lock);
8686 diff -Nur linux-4.4.46.orig/drivers/ide/alim15x3.c linux-4.4.46/drivers/ide/alim15x3.c
8687 --- linux-4.4.46.orig/drivers/ide/alim15x3.c 2017-02-01 08:31:11.000000000 +0100
8688 +++ linux-4.4.46/drivers/ide/alim15x3.c 2017-02-03 17:18:05.679416330 +0100
8691 isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL);
8693 - local_irq_save(flags);
8694 + local_irq_save_nort(flags);
8696 if (m5229_revision < 0xC2) {
8701 pci_dev_put(isa_dev);
8702 - local_irq_restore(flags);
8703 + local_irq_restore_nort(flags);
8707 diff -Nur linux-4.4.46.orig/drivers/ide/hpt366.c linux-4.4.46/drivers/ide/hpt366.c
8708 --- linux-4.4.46.orig/drivers/ide/hpt366.c 2017-02-01 08:31:11.000000000 +0100
8709 +++ linux-4.4.46/drivers/ide/hpt366.c 2017-02-03 17:18:05.679416330 +0100
8710 @@ -1241,7 +1241,7 @@
8712 dma_old = inb(base + 2);
8714 - local_irq_save(flags);
8715 + local_irq_save_nort(flags);
8718 pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma);
8719 @@ -1252,7 +1252,7 @@
8720 if (dma_new != dma_old)
8721 outb(dma_new, base + 2);
8723 - local_irq_restore(flags);
8724 + local_irq_restore_nort(flags);
8726 printk(KERN_INFO " %s: BM-DMA at 0x%04lx-0x%04lx\n",
8727 hwif->name, base, base + 7);
8728 diff -Nur linux-4.4.46.orig/drivers/ide/ide-io.c linux-4.4.46/drivers/ide/ide-io.c
8729 --- linux-4.4.46.orig/drivers/ide/ide-io.c 2017-02-01 08:31:11.000000000 +0100
8730 +++ linux-4.4.46/drivers/ide/ide-io.c 2017-02-03 17:18:05.679416330 +0100
8732 /* disable_irq_nosync ?? */
8733 disable_irq(hwif->irq);
8734 /* local CPU only, as if we were handling an interrupt */
8735 - local_irq_disable();
8736 + local_irq_disable_nort();
8737 if (hwif->polling) {
8738 startstop = handler(drive);
8739 } else if (drive_is_ready(drive)) {
8740 diff -Nur linux-4.4.46.orig/drivers/ide/ide-iops.c linux-4.4.46/drivers/ide/ide-iops.c
8741 --- linux-4.4.46.orig/drivers/ide/ide-iops.c 2017-02-01 08:31:11.000000000 +0100
8742 +++ linux-4.4.46/drivers/ide/ide-iops.c 2017-02-03 17:18:05.679416330 +0100
8743 @@ -129,12 +129,12 @@
8744 if ((stat & ATA_BUSY) == 0)
8747 - local_irq_restore(flags);
8748 + local_irq_restore_nort(flags);
8753 - local_irq_restore(flags);
8754 + local_irq_restore_nort(flags);
8757 * Allow status to settle, then read it again.
8758 diff -Nur linux-4.4.46.orig/drivers/ide/ide-io-std.c linux-4.4.46/drivers/ide/ide-io-std.c
8759 --- linux-4.4.46.orig/drivers/ide/ide-io-std.c 2017-02-01 08:31:11.000000000 +0100
8760 +++ linux-4.4.46/drivers/ide/ide-io-std.c 2017-02-03 17:18:05.679416330 +0100
8762 unsigned long uninitialized_var(flags);
8764 if ((io_32bit & 2) && !mmio) {
8765 - local_irq_save(flags);
8766 + local_irq_save_nort(flags);
8767 ata_vlb_sync(io_ports->nsect_addr);
8771 insl(data_addr, buf, words);
8773 if ((io_32bit & 2) && !mmio)
8774 - local_irq_restore(flags);
8775 + local_irq_restore_nort(flags);
8777 if (((len + 1) & 3) < 2)
8780 unsigned long uninitialized_var(flags);
8782 if ((io_32bit & 2) && !mmio) {
8783 - local_irq_save(flags);
8784 + local_irq_save_nort(flags);
8785 ata_vlb_sync(io_ports->nsect_addr);
8789 outsl(data_addr, buf, words);
8791 if ((io_32bit & 2) && !mmio)
8792 - local_irq_restore(flags);
8793 + local_irq_restore_nort(flags);
8795 if (((len + 1) & 3) < 2)
8797 diff -Nur linux-4.4.46.orig/drivers/ide/ide-probe.c linux-4.4.46/drivers/ide/ide-probe.c
8798 --- linux-4.4.46.orig/drivers/ide/ide-probe.c 2017-02-01 08:31:11.000000000 +0100
8799 +++ linux-4.4.46/drivers/ide/ide-probe.c 2017-02-03 17:18:05.679416330 +0100
8800 @@ -196,10 +196,10 @@
8803 /* local CPU only; some systems need this */
8804 - local_irq_save(flags);
8805 + local_irq_save_nort(flags);
8806 /* read 512 bytes of id info */
8807 hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE);
8808 - local_irq_restore(flags);
8809 + local_irq_restore_nort(flags);
8811 drive->dev_flags |= IDE_DFLAG_ID_READ;
8813 diff -Nur linux-4.4.46.orig/drivers/ide/ide-taskfile.c linux-4.4.46/drivers/ide/ide-taskfile.c
8814 --- linux-4.4.46.orig/drivers/ide/ide-taskfile.c 2017-02-01 08:31:11.000000000 +0100
8815 +++ linux-4.4.46/drivers/ide/ide-taskfile.c 2017-02-03 17:18:05.683416484 +0100
8818 page_is_high = PageHighMem(page);
8820 - local_irq_save(flags);
8821 + local_irq_save_nort(flags);
8823 buf = kmap_atomic(page) + offset;
8829 - local_irq_restore(flags);
8830 + local_irq_restore_nort(flags);
8837 if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0)
8838 - local_irq_disable();
8839 + local_irq_disable_nort();
8841 ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE);
8843 diff -Nur linux-4.4.46.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c linux-4.4.46/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
8844 --- linux-4.4.46.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2017-02-01 08:31:11.000000000 +0100
8845 +++ linux-4.4.46/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2017-02-03 17:18:05.683416484 +0100
8848 ipoib_dbg_mcast(priv, "restarting multicast task\n");
8850 - local_irq_save(flags);
8851 + local_irq_save_nort(flags);
8852 netif_addr_lock(dev);
8853 spin_lock(&priv->lock);
8857 spin_unlock(&priv->lock);
8858 netif_addr_unlock(dev);
8859 - local_irq_restore(flags);
8860 + local_irq_restore_nort(flags);
8863 * make sure the in-flight joins have finished before we attempt
8864 diff -Nur linux-4.4.46.orig/drivers/input/gameport/gameport.c linux-4.4.46/drivers/input/gameport/gameport.c
8865 --- linux-4.4.46.orig/drivers/input/gameport/gameport.c 2017-02-01 08:31:11.000000000 +0100
8866 +++ linux-4.4.46/drivers/input/gameport/gameport.c 2017-02-03 17:18:05.683416484 +0100
8870 for (i = 0; i < 50; i++) {
8871 - local_irq_save(flags);
8872 + local_irq_save_nort(flags);
8873 t1 = ktime_get_ns();
8874 for (t = 0; t < 50; t++)
8875 gameport_read(gameport);
8876 t2 = ktime_get_ns();
8877 t3 = ktime_get_ns();
8878 - local_irq_restore(flags);
8879 + local_irq_restore_nort(flags);
8881 t = (t2 - t1) - (t3 - t2);
8883 @@ -124,12 +124,12 @@
8886 for(i = 0; i < 50; i++) {
8887 - local_irq_save(flags);
8888 + local_irq_save_nort(flags);
8890 for (t = 0; t < 50; t++) gameport_read(gameport);
8893 - local_irq_restore(flags);
8894 + local_irq_restore_nort(flags);
8896 if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t;
8898 @@ -148,11 +148,11 @@
8901 for(i = 0; i < 50; i++) {
8902 - local_irq_save(flags);
8903 + local_irq_save_nort(flags);
8905 for (t = 0; t < 50; t++) gameport_read(gameport);
8907 - local_irq_restore(flags);
8908 + local_irq_restore_nort(flags);
8910 if (t2 - t1 < tx) tx = t2 - t1;
8912 diff -Nur linux-4.4.46.orig/drivers/iommu/amd_iommu.c linux-4.4.46/drivers/iommu/amd_iommu.c
8913 --- linux-4.4.46.orig/drivers/iommu/amd_iommu.c 2017-02-01 08:31:11.000000000 +0100
8914 +++ linux-4.4.46/drivers/iommu/amd_iommu.c 2017-02-03 17:18:05.683416484 +0100
8915 @@ -2022,10 +2022,10 @@
8919 - * Must be called with IRQs disabled. Warn here to detect early
8921 + * Must be called with IRQs disabled on a non RT kernel. Warn here to
8922 + * detect early when its not.
8924 - WARN_ON(!irqs_disabled());
8925 + WARN_ON_NONRT(!irqs_disabled());
8928 spin_lock(&domain->lock);
8929 @@ -2188,10 +2188,10 @@
8930 struct protection_domain *domain;
8933 - * Must be called with IRQs disabled. Warn here to detect early
8935 + * Must be called with IRQs disabled on a non RT kernel. Warn here to
8936 + * detect early when its not.
8938 - WARN_ON(!irqs_disabled());
8939 + WARN_ON_NONRT(!irqs_disabled());
8941 if (WARN_ON(!dev_data->domain))
8943 diff -Nur linux-4.4.46.orig/drivers/leds/trigger/Kconfig linux-4.4.46/drivers/leds/trigger/Kconfig
8944 --- linux-4.4.46.orig/drivers/leds/trigger/Kconfig 2017-02-01 08:31:11.000000000 +0100
8945 +++ linux-4.4.46/drivers/leds/trigger/Kconfig 2017-02-03 17:18:05.683416484 +0100
8948 config LEDS_TRIGGER_CPU
8949 bool "LED CPU Trigger"
8950 - depends on LEDS_TRIGGERS
8951 + depends on LEDS_TRIGGERS && !PREEMPT_RT_BASE
8953 This allows LEDs to be controlled by active CPUs. This shows
8954 the active CPUs across an array of LEDs so you can see which
8955 diff -Nur linux-4.4.46.orig/drivers/md/bcache/Kconfig linux-4.4.46/drivers/md/bcache/Kconfig
8956 --- linux-4.4.46.orig/drivers/md/bcache/Kconfig 2017-02-01 08:31:11.000000000 +0100
8957 +++ linux-4.4.46/drivers/md/bcache/Kconfig 2017-02-03 17:18:05.683416484 +0100
8961 tristate "Block device as cache"
8962 + depends on !PREEMPT_RT_FULL
8964 Allows a block device to be used as cache for other devices; uses
8965 a btree for indexing and the layout is optimized for SSDs.
8966 diff -Nur linux-4.4.46.orig/drivers/md/dm.c linux-4.4.46/drivers/md/dm.c
8967 --- linux-4.4.46.orig/drivers/md/dm.c 2017-02-01 08:31:11.000000000 +0100
8968 +++ linux-4.4.46/drivers/md/dm.c 2017-02-03 17:18:05.683416484 +0100
8969 @@ -2127,7 +2127,7 @@
8970 /* Establish tio->ti before queuing work (map_tio_request) */
8972 queue_kthread_work(&md->kworker, &tio->work);
8973 - BUG_ON(!irqs_disabled());
8974 + BUG_ON_NONRT(!irqs_disabled());
8978 diff -Nur linux-4.4.46.orig/drivers/md/raid5.c linux-4.4.46/drivers/md/raid5.c
8979 --- linux-4.4.46.orig/drivers/md/raid5.c 2017-02-01 08:31:11.000000000 +0100
8980 +++ linux-4.4.46/drivers/md/raid5.c 2017-02-03 17:18:05.683416484 +0100
8981 @@ -1920,8 +1920,9 @@
8982 struct raid5_percpu *percpu;
8986 + cpu = get_cpu_light();
8987 percpu = per_cpu_ptr(conf->percpu, cpu);
8988 + spin_lock(&percpu->lock);
8989 if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
8990 ops_run_biofill(sh);
8992 @@ -1977,7 +1978,8 @@
8993 if (test_and_clear_bit(R5_Overlap, &dev->flags))
8994 wake_up(&sh->raid_conf->wait_for_overlap);
8997 + spin_unlock(&percpu->lock);
9001 static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp)
9002 @@ -6414,6 +6416,7 @@
9006 + spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock);
9010 diff -Nur linux-4.4.46.orig/drivers/md/raid5.h linux-4.4.46/drivers/md/raid5.h
9011 --- linux-4.4.46.orig/drivers/md/raid5.h 2017-02-01 08:31:11.000000000 +0100
9012 +++ linux-4.4.46/drivers/md/raid5.h 2017-02-03 17:18:05.683416484 +0100
9014 int recovery_disabled;
9015 /* per cpu variables */
9016 struct raid5_percpu {
9017 + spinlock_t lock; /* Protection for -RT */
9018 struct page *spare_page; /* Used when checking P/Q in raid6 */
9019 struct flex_array *scribble; /* space for constructing buffer
9020 * lists and performing address
9021 diff -Nur linux-4.4.46.orig/drivers/media/platform/vsp1/vsp1_video.c linux-4.4.46/drivers/media/platform/vsp1/vsp1_video.c
9022 --- linux-4.4.46.orig/drivers/media/platform/vsp1/vsp1_video.c 2017-02-01 08:31:11.000000000 +0100
9023 +++ linux-4.4.46/drivers/media/platform/vsp1/vsp1_video.c 2017-02-03 17:18:05.683416484 +0100
9027 spin_lock_irqsave(&pipe->irqlock, flags);
9028 - stopped = pipe->state == VSP1_PIPELINE_STOPPED,
9029 + stopped = pipe->state == VSP1_PIPELINE_STOPPED;
9030 spin_unlock_irqrestore(&pipe->irqlock, flags);
9033 diff -Nur linux-4.4.46.orig/drivers/misc/hwlat_detector.c linux-4.4.46/drivers/misc/hwlat_detector.c
9034 --- linux-4.4.46.orig/drivers/misc/hwlat_detector.c 1970-01-01 01:00:00.000000000 +0100
9035 +++ linux-4.4.46/drivers/misc/hwlat_detector.c 2017-02-03 17:18:05.687416638 +0100
9038 + * hwlat_detector.c - A simple Hardware Latency detector.
9040 + * Use this module to detect large system latencies induced by the behavior of
9041 + * certain underlying system hardware or firmware, independent of Linux itself.
9042 + * The code was developed originally to detect the presence of SMIs on Intel
9043 + * and AMD systems, although there is no dependency upon x86 herein.
9045 + * The classical example usage of this module is in detecting the presence of
9046 + * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a
9047 + * somewhat special form of hardware interrupt spawned from earlier CPU debug
9048 + * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge
9049 + * LPC (or other device) to generate a special interrupt under certain
9050 + * circumstances, for example, upon expiration of a special SMI timer device,
9051 + * due to certain external thermal readings, on certain I/O address accesses,
9052 + * and other situations. An SMI hits a special CPU pin, triggers a special
9053 + * SMI mode (complete with special memory map), and the OS is unaware.
9055 + * Although certain hardware-inducing latencies are necessary (for example,
9056 + * a modern system often requires an SMI handler for correct thermal control
9057 + * and remote management) they can wreak havoc upon any OS-level performance
9058 + * guarantees toward low-latency, especially when the OS is not even made
9059 + * aware of the presence of these interrupts. For this reason, we need a
9060 + * somewhat brute force mechanism to detect these interrupts. In this case,
9061 + * we do it by hogging all of the CPU(s) for configurable timer intervals,
9062 + * sampling the built-in CPU timer, looking for discontiguous readings.
9064 + * WARNING: This implementation necessarily introduces latencies. Therefore,
9065 + * you should NEVER use this module in a production environment
9066 + * requiring any kind of low-latency performance guarantee(s).
9068 + * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
9070 + * Includes useful feedback from Clark Williams <clark@redhat.com>
9072 + * This file is licensed under the terms of the GNU General Public
9073 + * License version 2. This program is licensed "as is" without any
9074 + * warranty of any kind, whether express or implied.
9077 +#include <linux/module.h>
9078 +#include <linux/init.h>
9079 +#include <linux/ring_buffer.h>
9080 +#include <linux/time.h>
9081 +#include <linux/hrtimer.h>
9082 +#include <linux/kthread.h>
9083 +#include <linux/debugfs.h>
9084 +#include <linux/seq_file.h>
9085 +#include <linux/uaccess.h>
9086 +#include <linux/version.h>
9087 +#include <linux/delay.h>
9088 +#include <linux/slab.h>
9089 +#include <linux/trace_clock.h>
9091 +#define BUF_SIZE_DEFAULT 262144UL /* 8K*(sizeof(entry)) */
9092 +#define BUF_FLAGS (RB_FL_OVERWRITE) /* no block on full */
9093 +#define U64STR_SIZE 22 /* 20 digits max */
9095 +#define VERSION "1.0.0"
9096 +#define BANNER "hwlat_detector: "
9097 +#define DRVNAME "hwlat_detector"
9098 +#define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */
9099 +#define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */
9100 +#define DEFAULT_LAT_THRESHOLD 10 /* 10us */
9102 +/* Module metadata */
9104 +MODULE_LICENSE("GPL");
9105 +MODULE_AUTHOR("Jon Masters <jcm@redhat.com>");
9106 +MODULE_DESCRIPTION("A simple hardware latency detector");
9107 +MODULE_VERSION(VERSION);
9109 +/* Module parameters */
9112 +static int enabled;
9113 +static int threshold;
9115 +module_param(debug, int, 0); /* enable debug */
9116 +module_param(enabled, int, 0); /* enable detector */
9117 +module_param(threshold, int, 0); /* latency threshold */
9119 +/* Buffering and sampling */
9121 +static struct ring_buffer *ring_buffer; /* sample buffer */
9122 +static DEFINE_MUTEX(ring_buffer_mutex); /* lock changes */
9123 +static unsigned long buf_size = BUF_SIZE_DEFAULT;
9124 +static struct task_struct *kthread; /* sampling thread */
9126 +/* DebugFS filesystem entries */
9128 +static struct dentry *debug_dir; /* debugfs directory */
9129 +static struct dentry *debug_max; /* maximum TSC delta */
9130 +static struct dentry *debug_count; /* total detect count */
9131 +static struct dentry *debug_sample_width; /* sample width us */
9132 +static struct dentry *debug_sample_window; /* sample window us */
9133 +static struct dentry *debug_sample; /* raw samples us */
9134 +static struct dentry *debug_threshold; /* threshold us */
9135 +static struct dentry *debug_enable; /* enable/disable */
9137 +/* Individual samples and global state */
9139 +struct sample; /* latency sample */
9140 +struct data; /* Global state */
9142 +/* Sampling functions */
9143 +static int __buffer_add_sample(struct sample *sample);
9144 +static struct sample *buffer_get_sample(struct sample *sample);
9146 +/* Threading and state */
9147 +static int kthread_fn(void *unused);
9148 +static int start_kthread(void);
9149 +static int stop_kthread(void);
9150 +static void __reset_stats(void);
9151 +static int init_stats(void);
9153 +/* Debugfs interface */
9154 +static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
9155 + size_t cnt, loff_t *ppos, const u64 *entry);
9156 +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
9157 + size_t cnt, loff_t *ppos, u64 *entry);
9158 +static int debug_sample_fopen(struct inode *inode, struct file *filp);
9159 +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
9160 + size_t cnt, loff_t *ppos);
9161 +static int debug_sample_release(struct inode *inode, struct file *filp);
9162 +static int debug_enable_fopen(struct inode *inode, struct file *filp);
9163 +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
9164 + size_t cnt, loff_t *ppos);
9165 +static ssize_t debug_enable_fwrite(struct file *file,
9166 + const char __user *user_buffer,
9167 + size_t user_size, loff_t *offset);
9169 +/* Initialization functions */
9170 +static int init_debugfs(void);
9171 +static void free_debugfs(void);
9172 +static int detector_init(void);
9173 +static void detector_exit(void);
9175 +/* Individual latency samples are stored here when detected and packed into
9176 + * the ring_buffer circular buffer, where they are overwritten when
9177 + * more than buf_size/sizeof(sample) samples are received. */
9179 + u64 seqnum; /* unique sequence */
9180 + u64 duration; /* ktime delta */
9181 + u64 outer_duration; /* ktime delta (outer loop) */
9182 + struct timespec timestamp; /* wall time */
9183 + unsigned long lost;
9186 +/* keep the global state somewhere. */
9187 +static struct data {
9189 + struct mutex lock; /* protect changes */
9191 + u64 count; /* total since reset */
9192 + u64 max_sample; /* max hardware latency */
9193 + u64 threshold; /* sample threshold level */
9195 + u64 sample_window; /* total sampling window (on+off) */
9196 + u64 sample_width; /* active sampling portion of window */
9198 + atomic_t sample_open; /* whether the sample file is open */
9200 + wait_queue_head_t wq; /* waitqeue for new sample values */
9205 + * __buffer_add_sample - add a new latency sample recording to the ring buffer
9206 + * @sample: The new latency sample value
9208 + * This receives a new latency sample and records it in a global ring buffer.
9209 + * No additional locking is used in this case.
9211 +static int __buffer_add_sample(struct sample *sample)
9213 + return ring_buffer_write(ring_buffer,
9214 + sizeof(struct sample), sample);
9218 + * buffer_get_sample - remove a hardware latency sample from the ring buffer
9219 + * @sample: Pre-allocated storage for the sample
9221 + * This retrieves a hardware latency sample from the global circular buffer
9223 +static struct sample *buffer_get_sample(struct sample *sample)
9225 + struct ring_buffer_event *e = NULL;
9226 + struct sample *s = NULL;
9227 + unsigned int cpu = 0;
9232 + mutex_lock(&ring_buffer_mutex);
9233 + for_each_online_cpu(cpu) {
9234 + e = ring_buffer_consume(ring_buffer, cpu, NULL, &sample->lost);
9240 + s = ring_buffer_event_data(e);
9241 + memcpy(sample, s, sizeof(struct sample));
9244 + mutex_unlock(&ring_buffer_mutex);
9249 +#ifndef CONFIG_TRACING
9250 +#define time_type ktime_t
9251 +#define time_get() ktime_get()
9252 +#define time_to_us(x) ktime_to_us(x)
9253 +#define time_sub(a, b) ktime_sub(a, b)
9254 +#define init_time(a, b) (a).tv64 = b
9255 +#define time_u64(a) ((a).tv64)
9257 +#define time_type u64
9258 +#define time_get() trace_clock_local()
9259 +#define time_to_us(x) div_u64(x, 1000)
9260 +#define time_sub(a, b) ((a) - (b))
9261 +#define init_time(a, b) (a = b)
9262 +#define time_u64(a) a
9265 + * get_sample - sample the CPU TSC and look for likely hardware latencies
9267 + * Used to repeatedly capture the CPU TSC (or similar), looking for potential
9268 + * hardware-induced latency. Called with interrupts disabled and with
9271 +static int get_sample(void)
9273 + time_type start, t1, t2, last_t2;
9274 + s64 diff, total = 0;
9276 + u64 outer_sample = 0;
9279 + init_time(last_t2, 0);
9280 + start = time_get(); /* start timestamp */
9284 + t1 = time_get(); /* we'll look for a discontinuity */
9287 + if (time_u64(last_t2)) {
9288 + /* Check the delta from outer loop (t2 to next t1) */
9289 + diff = time_to_us(time_sub(t1, last_t2));
9290 + /* This shouldn't happen */
9292 + pr_err(BANNER "time running backwards\n");
9295 + if (diff > outer_sample)
9296 + outer_sample = diff;
9300 + total = time_to_us(time_sub(t2, start)); /* sample width */
9302 + /* This checks the inner loop (t1 to t2) */
9303 + diff = time_to_us(time_sub(t2, t1)); /* current diff */
9305 + /* This shouldn't happen */
9307 + pr_err(BANNER "time running backwards\n");
9311 + if (diff > sample)
9312 + sample = diff; /* only want highest value */
9314 + } while (total <= data.sample_width);
9318 + /* If we exceed the threshold value, we have found a hardware latency */
9319 + if (sample > data.threshold || outer_sample > data.threshold) {
9325 + s.seqnum = data.count;
9326 + s.duration = sample;
9327 + s.outer_duration = outer_sample;
9328 + s.timestamp = CURRENT_TIME;
9329 + __buffer_add_sample(&s);
9331 + /* Keep a running maximum ever recorded hardware latency */
9332 + if (sample > data.max_sample)
9333 + data.max_sample = sample;
9341 + * kthread_fn - The CPU time sampling/hardware latency detection kernel thread
9342 + * @unused: A required part of the kthread API.
9344 + * Used to periodically sample the CPU TSC via a call to get_sample. We
9345 + * disable interrupts, which does (intentionally) introduce latency since we
9346 + * need to ensure nothing else might be running (and thus pre-empting).
9347 + * Obviously this should never be used in production environments.
9349 + * Currently this runs on which ever CPU it was scheduled on, but most
9350 + * real-worald hardware latency situations occur across several CPUs,
9351 + * but we might later generalize this if we find there are any actualy
9352 + * systems with alternate SMI delivery or other hardware latencies.
9354 +static int kthread_fn(void *unused)
9359 + while (!kthread_should_stop()) {
9361 + mutex_lock(&data.lock);
9363 + local_irq_disable();
9364 + ret = get_sample();
9365 + local_irq_enable();
9368 + wake_up(&data.wq); /* wake up reader(s) */
9370 + interval = data.sample_window - data.sample_width;
9371 + do_div(interval, USEC_PER_MSEC); /* modifies interval value */
9373 + mutex_unlock(&data.lock);
9375 + if (msleep_interruptible(interval))
9383 + * start_kthread - Kick off the hardware latency sampling/detector kthread
9385 + * This starts a kernel thread that will sit and sample the CPU timestamp
9386 + * counter (TSC or similar) and look for potential hardware latencies.
9388 +static int start_kthread(void)
9390 + kthread = kthread_run(kthread_fn, NULL,
9392 + if (IS_ERR(kthread)) {
9393 + pr_err(BANNER "could not start sampling thread\n");
9402 + * stop_kthread - Inform the hardware latency samping/detector kthread to stop
9404 + * This kicks the running hardware latency sampling/detector kernel thread and
9405 + * tells it to stop sampling now. Use this on unload and at system shutdown.
9407 +static int stop_kthread(void)
9411 + ret = kthread_stop(kthread);
9417 + * __reset_stats - Reset statistics for the hardware latency detector
9419 + * We use data to store various statistics and global state. We call this
9420 + * function in order to reset those when "enable" is toggled on or off, and
9421 + * also at initialization. Should be called with data.lock held.
9423 +static void __reset_stats(void)
9426 + data.max_sample = 0;
9427 + ring_buffer_reset(ring_buffer); /* flush out old sample entries */
9431 + * init_stats - Setup global state statistics for the hardware latency detector
9433 + * We use data to store various statistics and global state. We also use
9434 + * a global ring buffer (ring_buffer) to keep raw samples of detected hardware
9435 + * induced system latencies. This function initializes these structures and
9436 + * allocates the global ring buffer also.
9438 +static int init_stats(void)
9440 + int ret = -ENOMEM;
9442 + mutex_init(&data.lock);
9443 + init_waitqueue_head(&data.wq);
9444 + atomic_set(&data.sample_open, 0);
9446 + ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS);
9448 + if (WARN(!ring_buffer, KERN_ERR BANNER
9449 + "failed to allocate ring buffer!\n"))
9453 + data.threshold = threshold ?: DEFAULT_LAT_THRESHOLD; /* threshold us */
9454 + data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */
9455 + data.sample_width = DEFAULT_SAMPLE_WIDTH; /* width us */
9465 + * simple_data_read - Wrapper read function for global state debugfs entries
9466 + * @filp: The active open file structure for the debugfs "file"
9467 + * @ubuf: The userspace provided buffer to read value into
9468 + * @cnt: The maximum number of bytes to read
9469 + * @ppos: The current "file" position
9470 + * @entry: The entry to read from
9472 + * This function provides a generic read implementation for the global state
9473 + * "data" structure debugfs filesystem entries. It would be nice to use
9474 + * simple_attr_read directly, but we need to make sure that the data.lock
9475 + * is held during the actual read.
9477 +static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
9478 + size_t cnt, loff_t *ppos, const u64 *entry)
9480 + char buf[U64STR_SIZE];
9484 + memset(buf, 0, sizeof(buf));
9489 + mutex_lock(&data.lock);
9491 + mutex_unlock(&data.lock);
9493 + len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val);
9495 + return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
9500 + * simple_data_write - Wrapper write function for global state debugfs entries
9501 + * @filp: The active open file structure for the debugfs "file"
9502 + * @ubuf: The userspace provided buffer to write value from
9503 + * @cnt: The maximum number of bytes to write
9504 + * @ppos: The current "file" position
9505 + * @entry: The entry to write to
9507 + * This function provides a generic write implementation for the global state
9508 + * "data" structure debugfs filesystem entries. It would be nice to use
9509 + * simple_attr_write directly, but we need to make sure that the data.lock
9510 + * is held during the actual write.
9512 +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
9513 + size_t cnt, loff_t *ppos, u64 *entry)
9515 + char buf[U64STR_SIZE];
9516 + int csize = min(cnt, sizeof(buf));
9520 + memset(buf, '\0', sizeof(buf));
9521 + if (copy_from_user(buf, ubuf, csize))
9524 + buf[U64STR_SIZE-1] = '\0'; /* just in case */
9525 + err = kstrtoull(buf, 10, &val);
9529 + mutex_lock(&data.lock);
9531 + mutex_unlock(&data.lock);
9537 + * debug_count_fopen - Open function for "count" debugfs entry
9538 + * @inode: The in-kernel inode representation of the debugfs "file"
9539 + * @filp: The active open file structure for the debugfs "file"
9541 + * This function provides an open implementation for the "count" debugfs
9542 + * interface to the hardware latency detector.
9544 +static int debug_count_fopen(struct inode *inode, struct file *filp)
9550 + * debug_count_fread - Read function for "count" debugfs entry
9551 + * @filp: The active open file structure for the debugfs "file"
9552 + * @ubuf: The userspace provided buffer to read value into
9553 + * @cnt: The maximum number of bytes to read
9554 + * @ppos: The current "file" position
9556 + * This function provides a read implementation for the "count" debugfs
9557 + * interface to the hardware latency detector. Can be used to read the
9558 + * number of latency readings exceeding the configured threshold since
9559 + * the detector was last reset (e.g. by writing a zero into "count").
9561 +static ssize_t debug_count_fread(struct file *filp, char __user *ubuf,
9562 + size_t cnt, loff_t *ppos)
9564 + return simple_data_read(filp, ubuf, cnt, ppos, &data.count);
9568 + * debug_count_fwrite - Write function for "count" debugfs entry
9569 + * @filp: The active open file structure for the debugfs "file"
9570 + * @ubuf: The user buffer that contains the value to write
9571 + * @cnt: The maximum number of bytes to write to "file"
9572 + * @ppos: The current position in the debugfs "file"
9574 + * This function provides a write implementation for the "count" debugfs
9575 + * interface to the hardware latency detector. Can be used to write a
9576 + * desired value, especially to zero the total count.
9578 +static ssize_t debug_count_fwrite(struct file *filp,
9579 + const char __user *ubuf,
9583 + return simple_data_write(filp, ubuf, cnt, ppos, &data.count);
9587 + * debug_enable_fopen - Dummy open function for "enable" debugfs interface
9588 + * @inode: The in-kernel inode representation of the debugfs "file"
9589 + * @filp: The active open file structure for the debugfs "file"
9591 + * This function provides an open implementation for the "enable" debugfs
9592 + * interface to the hardware latency detector.
9594 +static int debug_enable_fopen(struct inode *inode, struct file *filp)
9600 + * debug_enable_fread - Read function for "enable" debugfs interface
9601 + * @filp: The active open file structure for the debugfs "file"
9602 + * @ubuf: The userspace provided buffer to read value into
9603 + * @cnt: The maximum number of bytes to read
9604 + * @ppos: The current "file" position
9606 + * This function provides a read implementation for the "enable" debugfs
9607 + * interface to the hardware latency detector. Can be used to determine
9608 + * whether the detector is currently enabled ("0\n" or "1\n" returned).
9610 +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
9611 + size_t cnt, loff_t *ppos)
9615 + if ((cnt < sizeof(buf)) || (*ppos))
9618 + buf[0] = enabled ? '1' : '0';
9621 + if (copy_to_user(ubuf, buf, strlen(buf)))
9623 + return *ppos = strlen(buf);
9627 + * debug_enable_fwrite - Write function for "enable" debugfs interface
9628 + * @filp: The active open file structure for the debugfs "file"
9629 + * @ubuf: The user buffer that contains the value to write
9630 + * @cnt: The maximum number of bytes to write to "file"
9631 + * @ppos: The current position in the debugfs "file"
9633 + * This function provides a write implementation for the "enable" debugfs
9634 + * interface to the hardware latency detector. Can be used to enable or
9635 + * disable the detector, which will have the side-effect of possibly
9636 + * also resetting the global stats and kicking off the measuring
9637 + * kthread (on an enable) or the converse (upon a disable).
9639 +static ssize_t debug_enable_fwrite(struct file *filp,
9640 + const char __user *ubuf,
9645 + int csize = min(cnt, sizeof(buf));
9649 + memset(buf, '\0', sizeof(buf));
9650 + if (copy_from_user(buf, ubuf, csize))
9653 + buf[sizeof(buf)-1] = '\0'; /* just in case */
9654 + err = kstrtoul(buf, 10, &val);
9663 + if (start_kthread())
9669 + err = stop_kthread();
9671 + pr_err(BANNER "cannot stop kthread\n");
9674 + wake_up(&data.wq); /* reader(s) should return */
9681 + * debug_max_fopen - Open function for "max" debugfs entry
9682 + * @inode: The in-kernel inode representation of the debugfs "file"
9683 + * @filp: The active open file structure for the debugfs "file"
9685 + * This function provides an open implementation for the "max" debugfs
9686 + * interface to the hardware latency detector.
9688 +static int debug_max_fopen(struct inode *inode, struct file *filp)
9694 + * debug_max_fread - Read function for "max" debugfs entry
9695 + * @filp: The active open file structure for the debugfs "file"
9696 + * @ubuf: The userspace provided buffer to read value into
9697 + * @cnt: The maximum number of bytes to read
9698 + * @ppos: The current "file" position
9700 + * This function provides a read implementation for the "max" debugfs
9701 + * interface to the hardware latency detector. Can be used to determine
9702 + * the maximum latency value observed since it was last reset.
9704 +static ssize_t debug_max_fread(struct file *filp, char __user *ubuf,
9705 + size_t cnt, loff_t *ppos)
9707 + return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample);
9711 + * debug_max_fwrite - Write function for "max" debugfs entry
9712 + * @filp: The active open file structure for the debugfs "file"
9713 + * @ubuf: The user buffer that contains the value to write
9714 + * @cnt: The maximum number of bytes to write to "file"
9715 + * @ppos: The current position in the debugfs "file"
9717 + * This function provides a write implementation for the "max" debugfs
9718 + * interface to the hardware latency detector. Can be used to reset the
9719 + * maximum or set it to some other desired value - if, then, subsequent
9720 + * measurements exceed this value, the maximum will be updated.
9722 +static ssize_t debug_max_fwrite(struct file *filp,
9723 + const char __user *ubuf,
9727 + return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample);
9732 + * debug_sample_fopen - An open function for "sample" debugfs interface
9733 + * @inode: The in-kernel inode representation of this debugfs "file"
9734 + * @filp: The active open file structure for the debugfs "file"
9736 + * This function handles opening the "sample" file within the hardware
9737 + * latency detector debugfs directory interface. This file is used to read
9738 + * raw samples from the global ring_buffer and allows the user to see a
9739 + * running latency history. Can be opened blocking or non-blocking,
9740 + * affecting whether it behaves as a buffer read pipe, or does not.
9741 + * Implements simple locking to prevent multiple simultaneous use.
9743 +static int debug_sample_fopen(struct inode *inode, struct file *filp)
9745 + if (!atomic_add_unless(&data.sample_open, 1, 1))
9752 + * debug_sample_fread - A read function for "sample" debugfs interface
9753 + * @filp: The active open file structure for the debugfs "file"
9754 + * @ubuf: The user buffer that will contain the samples read
9755 + * @cnt: The maximum bytes to read from the debugfs "file"
9756 + * @ppos: The current position in the debugfs "file"
9758 + * This function handles reading from the "sample" file within the hardware
9759 + * latency detector debugfs directory interface. This file is used to read
9760 + * raw samples from the global ring_buffer and allows the user to see a
9761 + * running latency history. By default this will block pending a new
9762 + * value written into the sample buffer, unless there are already a
9763 + * number of value(s) waiting in the buffer, or the sample file was
9764 + * previously opened in a non-blocking mode of operation.
9766 +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
9767 + size_t cnt, loff_t *ppos)
9771 + struct sample *sample = NULL;
9776 + sample = kzalloc(sizeof(struct sample), GFP_KERNEL);
9780 + while (!buffer_get_sample(sample)) {
9782 + DEFINE_WAIT(wait);
9784 + if (filp->f_flags & O_NONBLOCK) {
9789 + prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE);
9791 + finish_wait(&data.wq, &wait);
9793 + if (signal_pending(current)) {
9798 + if (!enabled) { /* enable was toggled */
9804 + len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\t%llu\n",
9805 + sample->timestamp.tv_sec,
9806 + sample->timestamp.tv_nsec,
9808 + sample->outer_duration);
9811 + /* handling partial reads is more trouble than it's worth */
9815 + if (copy_to_user(ubuf, buf, len))
9824 + * debug_sample_release - Release function for "sample" debugfs interface
9825 + * @inode: The in-kernel inode represenation of the debugfs "file"
9826 + * @filp: The active open file structure for the debugfs "file"
9828 + * This function completes the close of the debugfs interface "sample" file.
9829 + * Frees the sample_open "lock" so that other users may open the interface.
9831 +static int debug_sample_release(struct inode *inode, struct file *filp)
9833 + atomic_dec(&data.sample_open);
9839 + * debug_threshold_fopen - Open function for "threshold" debugfs entry
9840 + * @inode: The in-kernel inode representation of the debugfs "file"
9841 + * @filp: The active open file structure for the debugfs "file"
9843 + * This function provides an open implementation for the "threshold" debugfs
9844 + * interface to the hardware latency detector.
9846 +static int debug_threshold_fopen(struct inode *inode, struct file *filp)
9852 + * debug_threshold_fread - Read function for "threshold" debugfs entry
9853 + * @filp: The active open file structure for the debugfs "file"
9854 + * @ubuf: The userspace provided buffer to read value into
9855 + * @cnt: The maximum number of bytes to read
9856 + * @ppos: The current "file" position
9858 + * This function provides a read implementation for the "threshold" debugfs
9859 + * interface to the hardware latency detector. It can be used to determine
9860 + * the current threshold level at which a latency will be recorded in the
9861 + * global ring buffer, typically on the order of 10us.
9863 +static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf,
9864 + size_t cnt, loff_t *ppos)
9866 + return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold);
9870 + * debug_threshold_fwrite - Write function for "threshold" debugfs entry
9871 + * @filp: The active open file structure for the debugfs "file"
9872 + * @ubuf: The user buffer that contains the value to write
9873 + * @cnt: The maximum number of bytes to write to "file"
9874 + * @ppos: The current position in the debugfs "file"
9876 + * This function provides a write implementation for the "threshold" debugfs
9877 + * interface to the hardware latency detector. It can be used to configure
9878 + * the threshold level at which any subsequently detected latencies will
9879 + * be recorded into the global ring buffer.
9881 +static ssize_t debug_threshold_fwrite(struct file *filp,
9882 + const char __user *ubuf,
9888 + ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold);
9891 + wake_up_process(kthread);
9897 + * debug_width_fopen - Open function for "width" debugfs entry
9898 + * @inode: The in-kernel inode representation of the debugfs "file"
9899 + * @filp: The active open file structure for the debugfs "file"
9901 + * This function provides an open implementation for the "width" debugfs
9902 + * interface to the hardware latency detector.
9904 +static int debug_width_fopen(struct inode *inode, struct file *filp)
9910 + * debug_width_fread - Read function for "width" debugfs entry
9911 + * @filp: The active open file structure for the debugfs "file"
9912 + * @ubuf: The userspace provided buffer to read value into
9913 + * @cnt: The maximum number of bytes to read
9914 + * @ppos: The current "file" position
9916 + * This function provides a read implementation for the "width" debugfs
9917 + * interface to the hardware latency detector. It can be used to determine
9918 + * for how many us of the total window us we will actively sample for any
9919 + * hardware-induced latecy periods. Obviously, it is not possible to
9920 + * sample constantly and have the system respond to a sample reader, or,
9921 + * worse, without having the system appear to have gone out to lunch.
9923 +static ssize_t debug_width_fread(struct file *filp, char __user *ubuf,
9924 + size_t cnt, loff_t *ppos)
9926 + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width);
9930 + * debug_width_fwrite - Write function for "width" debugfs entry
9931 + * @filp: The active open file structure for the debugfs "file"
9932 + * @ubuf: The user buffer that contains the value to write
9933 + * @cnt: The maximum number of bytes to write to "file"
9934 + * @ppos: The current position in the debugfs "file"
9936 + * This function provides a write implementation for the "width" debugfs
9937 + * interface to the hardware latency detector. It can be used to configure
9938 + * for how many us of the total window us we will actively sample for any
9939 + * hardware-induced latency periods. Obviously, it is not possible to
9940 + * sample constantly and have the system respond to a sample reader, or,
9941 + * worse, without having the system appear to have gone out to lunch. It
9942 + * is enforced that width is less that the total window size.
9944 +static ssize_t debug_width_fwrite(struct file *filp,
9945 + const char __user *ubuf,
9949 + char buf[U64STR_SIZE];
9950 + int csize = min(cnt, sizeof(buf));
9954 + memset(buf, '\0', sizeof(buf));
9955 + if (copy_from_user(buf, ubuf, csize))
9958 + buf[U64STR_SIZE-1] = '\0'; /* just in case */
9959 + err = kstrtoull(buf, 10, &val);
9963 + mutex_lock(&data.lock);
9964 + if (val < data.sample_window)
9965 + data.sample_width = val;
9967 + mutex_unlock(&data.lock);
9970 + mutex_unlock(&data.lock);
9973 + wake_up_process(kthread);
9979 + * debug_window_fopen - Open function for "window" debugfs entry
9980 + * @inode: The in-kernel inode representation of the debugfs "file"
9981 + * @filp: The active open file structure for the debugfs "file"
9983 + * This function provides an open implementation for the "window" debugfs
9984 + * interface to the hardware latency detector. The window is the total time
9985 + * in us that will be considered one sample period. Conceptually, windows
9986 + * occur back-to-back and contain a sample width period during which
9987 + * actual sampling occurs.
9989 +static int debug_window_fopen(struct inode *inode, struct file *filp)
9995 + * debug_window_fread - Read function for "window" debugfs entry
9996 + * @filp: The active open file structure for the debugfs "file"
9997 + * @ubuf: The userspace provided buffer to read value into
9998 + * @cnt: The maximum number of bytes to read
9999 + * @ppos: The current "file" position
10001 + * This function provides a read implementation for the "window" debugfs
10002 + * interface to the hardware latency detector. The window is the total time
10003 + * in us that will be considered one sample period. Conceptually, windows
10004 + * occur back-to-back and contain a sample width period during which
10005 + * actual sampling occurs. Can be used to read the total window size.
10007 +static ssize_t debug_window_fread(struct file *filp, char __user *ubuf,
10008 + size_t cnt, loff_t *ppos)
10010 + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window);
10014 + * debug_window_fwrite - Write function for "window" debugfs entry
10015 + * @filp: The active open file structure for the debugfs "file"
10016 + * @ubuf: The user buffer that contains the value to write
10017 + * @cnt: The maximum number of bytes to write to "file"
10018 + * @ppos: The current position in the debugfs "file"
10020 + * This function provides a write implementation for the "window" debufds
10021 + * interface to the hardware latency detetector. The window is the total time
10022 + * in us that will be considered one sample period. Conceptually, windows
10023 + * occur back-to-back and contain a sample width period during which
10024 + * actual sampling occurs. Can be used to write a new total window size. It
10025 + * is enfoced that any value written must be greater than the sample width
10026 + * size, or an error results.
10028 +static ssize_t debug_window_fwrite(struct file *filp,
10029 + const char __user *ubuf,
10033 + char buf[U64STR_SIZE];
10034 + int csize = min(cnt, sizeof(buf));
10038 + memset(buf, '\0', sizeof(buf));
10039 + if (copy_from_user(buf, ubuf, csize))
10042 + buf[U64STR_SIZE-1] = '\0'; /* just in case */
10043 + err = kstrtoull(buf, 10, &val);
10047 + mutex_lock(&data.lock);
10048 + if (data.sample_width < val)
10049 + data.sample_window = val;
10051 + mutex_unlock(&data.lock);
10054 + mutex_unlock(&data.lock);
10060 + * Function pointers for the "count" debugfs file operations
10062 +static const struct file_operations count_fops = {
10063 + .open = debug_count_fopen,
10064 + .read = debug_count_fread,
10065 + .write = debug_count_fwrite,
10066 + .owner = THIS_MODULE,
10070 + * Function pointers for the "enable" debugfs file operations
10072 +static const struct file_operations enable_fops = {
10073 + .open = debug_enable_fopen,
10074 + .read = debug_enable_fread,
10075 + .write = debug_enable_fwrite,
10076 + .owner = THIS_MODULE,
10080 + * Function pointers for the "max" debugfs file operations
10082 +static const struct file_operations max_fops = {
10083 + .open = debug_max_fopen,
10084 + .read = debug_max_fread,
10085 + .write = debug_max_fwrite,
10086 + .owner = THIS_MODULE,
10090 + * Function pointers for the "sample" debugfs file operations
10092 +static const struct file_operations sample_fops = {
10093 + .open = debug_sample_fopen,
10094 + .read = debug_sample_fread,
10095 + .release = debug_sample_release,
10096 + .owner = THIS_MODULE,
10100 + * Function pointers for the "threshold" debugfs file operations
10102 +static const struct file_operations threshold_fops = {
10103 + .open = debug_threshold_fopen,
10104 + .read = debug_threshold_fread,
10105 + .write = debug_threshold_fwrite,
10106 + .owner = THIS_MODULE,
10110 + * Function pointers for the "width" debugfs file operations
10112 +static const struct file_operations width_fops = {
10113 + .open = debug_width_fopen,
10114 + .read = debug_width_fread,
10115 + .write = debug_width_fwrite,
10116 + .owner = THIS_MODULE,
10120 + * Function pointers for the "window" debugfs file operations
10122 +static const struct file_operations window_fops = {
10123 + .open = debug_window_fopen,
10124 + .read = debug_window_fread,
10125 + .write = debug_window_fwrite,
10126 + .owner = THIS_MODULE,
10130 + * init_debugfs - A function to initialize the debugfs interface files
10132 + * This function creates entries in debugfs for "hwlat_detector", including
10133 + * files to read values from the detector, current samples, and the
10134 + * maximum sample that has been captured since the hardware latency
10135 + * dectector was started.
10137 +static int init_debugfs(void)
10139 + int ret = -ENOMEM;
10141 + debug_dir = debugfs_create_dir(DRVNAME, NULL);
10143 + goto err_debug_dir;
10145 + debug_sample = debugfs_create_file("sample", 0444,
10148 + if (!debug_sample)
10151 + debug_count = debugfs_create_file("count", 0444,
10154 + if (!debug_count)
10157 + debug_max = debugfs_create_file("max", 0444,
10163 + debug_sample_window = debugfs_create_file("window", 0644,
10166 + if (!debug_sample_window)
10169 + debug_sample_width = debugfs_create_file("width", 0644,
10172 + if (!debug_sample_width)
10175 + debug_threshold = debugfs_create_file("threshold", 0644,
10177 + &threshold_fops);
10178 + if (!debug_threshold)
10179 + goto err_threshold;
10181 + debug_enable = debugfs_create_file("enable", 0644,
10182 + debug_dir, &enabled,
10184 + if (!debug_enable)
10193 + debugfs_remove(debug_threshold);
10195 + debugfs_remove(debug_sample_width);
10197 + debugfs_remove(debug_sample_window);
10199 + debugfs_remove(debug_max);
10201 + debugfs_remove(debug_count);
10203 + debugfs_remove(debug_sample);
10205 + debugfs_remove(debug_dir);
10212 + * free_debugfs - A function to cleanup the debugfs file interface
10214 +static void free_debugfs(void)
10216 + /* could also use a debugfs_remove_recursive */
10217 + debugfs_remove(debug_enable);
10218 + debugfs_remove(debug_threshold);
10219 + debugfs_remove(debug_sample_width);
10220 + debugfs_remove(debug_sample_window);
10221 + debugfs_remove(debug_max);
10222 + debugfs_remove(debug_count);
10223 + debugfs_remove(debug_sample);
10224 + debugfs_remove(debug_dir);
10228 + * detector_init - Standard module initialization code
10230 +static int detector_init(void)
10232 + int ret = -ENOMEM;
10234 + pr_info(BANNER "version %s\n", VERSION);
10236 + ret = init_stats();
10240 + ret = init_debugfs();
10245 + ret = start_kthread();
10250 + ring_buffer_free(ring_buffer);
10257 + * detector_exit - Standard module cleanup code
10259 +static void detector_exit(void)
10265 + err = stop_kthread();
10267 + pr_err(BANNER "cannot stop kthread\n");
10271 + ring_buffer_free(ring_buffer); /* free up the ring buffer */
10275 +module_init(detector_init);
10276 +module_exit(detector_exit);
10277 diff -Nur linux-4.4.46.orig/drivers/misc/Kconfig linux-4.4.46/drivers/misc/Kconfig
10278 --- linux-4.4.46.orig/drivers/misc/Kconfig 2017-02-01 08:31:11.000000000 +0100
10279 +++ linux-4.4.46/drivers/misc/Kconfig 2017-02-03 17:18:05.683416484 +0100
10282 bool "Atmel AT32/AT91 Timer/Counter Library"
10283 depends on (AVR32 || ARCH_AT91)
10284 + default y if PREEMPT_RT_FULL
10286 Select this if you want a library to allocate the Timer/Counter
10287 blocks found on many Atmel processors. This facilitates using
10289 are combined to make a single 32-bit timer.
10291 When GENERIC_CLOCKEVENTS is defined, the third timer channel
10292 - may be used as a clock event device supporting oneshot mode
10293 - (delays of up to two seconds) based on the 32 KiHz clock.
10294 + may be used as a clock event device supporting oneshot mode.
10296 config ATMEL_TCB_CLKSRC_BLOCK
10299 TC can be used for other purposes, such as PWM generation and
10302 +config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
10303 + bool "TC Block use 32 KiHz clock"
10304 + depends on ATMEL_TCB_CLKSRC
10305 + default y if !PREEMPT_RT_FULL
10307 + Select this to use 32 KiHz base clock rate as TC block clock
10308 + source for clock events.
10312 tristate "Dummy IRQ handler"
10314 @@ -113,6 +122,35 @@
10315 for information on the specific driver level and support statement
10316 for your IBM server.
10318 +config HWLAT_DETECTOR
10319 + tristate "Testing module to detect hardware-induced latencies"
10320 + depends on DEBUG_FS
10321 + depends on RING_BUFFER
10324 + A simple hardware latency detector. Use this module to detect
10325 + large latencies introduced by the behavior of the underlying
10326 + system firmware external to Linux. We do this using periodic
10327 + use of stop_machine to grab all available CPUs and measure
10328 + for unexplainable gaps in the CPU timestamp counter(s). By
10329 + default, the module is not enabled until the "enable" file
10330 + within the "hwlat_detector" debugfs directory is toggled.
10332 + This module is often used to detect SMI (System Management
10333 + Interrupts) on x86 systems, though is not x86 specific. To
10334 + this end, we default to using a sample window of 1 second,
10335 + during which we will sample for 0.5 seconds. If an SMI or
10336 + similar event occurs during that time, it is recorded
10337 + into an 8K samples global ring buffer until retreived.
10339 + WARNING: This software should never be enabled (it can be built
10340 + but should not be turned on after it is loaded) in a production
10341 + environment where high latencies are a concern since the
10342 + sampling mechanism actually introduces latencies for
10343 + regular tasks while the CPU(s) are being held.
10348 tristate "Sensable PHANToM (PCI)"
10350 diff -Nur linux-4.4.46.orig/drivers/misc/Makefile linux-4.4.46/drivers/misc/Makefile
10351 --- linux-4.4.46.orig/drivers/misc/Makefile 2017-02-01 08:31:11.000000000 +0100
10352 +++ linux-4.4.46/drivers/misc/Makefile 2017-02-03 17:18:05.683416484 +0100
10354 obj-$(CONFIG_HMC6352) += hmc6352.o
10357 +obj-$(CONFIG_HWLAT_DETECTOR) += hwlat_detector.o
10358 obj-$(CONFIG_SPEAR13XX_PCIE_GADGET) += spear13xx_pcie_gadget.o
10359 obj-$(CONFIG_VMWARE_BALLOON) += vmw_balloon.o
10360 obj-$(CONFIG_ARM_CHARLCD) += arm-charlcd.o
10361 diff -Nur linux-4.4.46.orig/drivers/mmc/host/mmci.c linux-4.4.46/drivers/mmc/host/mmci.c
10362 --- linux-4.4.46.orig/drivers/mmc/host/mmci.c 2017-02-01 08:31:11.000000000 +0100
10363 +++ linux-4.4.46/drivers/mmc/host/mmci.c 2017-02-03 17:18:05.687416638 +0100
10364 @@ -1155,15 +1155,12 @@
10365 struct sg_mapping_iter *sg_miter = &host->sg_miter;
10366 struct variant_data *variant = host->variant;
10367 void __iomem *base = host->base;
10368 - unsigned long flags;
10371 status = readl(base + MMCISTATUS);
10373 dev_dbg(mmc_dev(host->mmc), "irq1 (pio) %08x\n", status);
10375 - local_irq_save(flags);
10378 unsigned int remain, len;
10380 @@ -1203,8 +1200,6 @@
10382 sg_miter_stop(sg_miter);
10384 - local_irq_restore(flags);
10387 * If we have less than the fifo 'half-full' threshold to transfer,
10388 * trigger a PIO interrupt as soon as any data is available.
10389 diff -Nur linux-4.4.46.orig/drivers/net/ethernet/3com/3c59x.c linux-4.4.46/drivers/net/ethernet/3com/3c59x.c
10390 --- linux-4.4.46.orig/drivers/net/ethernet/3com/3c59x.c 2017-02-01 08:31:11.000000000 +0100
10391 +++ linux-4.4.46/drivers/net/ethernet/3com/3c59x.c 2017-02-03 17:18:05.687416638 +0100
10392 @@ -842,9 +842,9 @@
10394 struct vortex_private *vp = netdev_priv(dev);
10395 unsigned long flags;
10396 - local_irq_save(flags);
10397 + local_irq_save_nort(flags);
10398 (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev);
10399 - local_irq_restore(flags);
10400 + local_irq_restore_nort(flags);
10404 @@ -1916,12 +1916,12 @@
10405 * Block interrupts because vortex_interrupt does a bare spin_lock()
10407 unsigned long flags;
10408 - local_irq_save(flags);
10409 + local_irq_save_nort(flags);
10410 if (vp->full_bus_master_tx)
10411 boomerang_interrupt(dev->irq, dev);
10413 vortex_interrupt(dev->irq, dev);
10414 - local_irq_restore(flags);
10415 + local_irq_restore_nort(flags);
10419 diff -Nur linux-4.4.46.orig/drivers/net/ethernet/atheros/atl1c/atl1c_main.c linux-4.4.46/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
10420 --- linux-4.4.46.orig/drivers/net/ethernet/atheros/atl1c/atl1c_main.c 2017-02-01 08:31:11.000000000 +0100
10421 +++ linux-4.4.46/drivers/net/ethernet/atheros/atl1c/atl1c_main.c 2017-02-03 17:18:05.687416638 +0100
10422 @@ -2221,11 +2221,7 @@
10425 tpd_req = atl1c_cal_tpd_req(skb);
10426 - if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) {
10427 - if (netif_msg_pktdata(adapter))
10428 - dev_info(&adapter->pdev->dev, "tx locked\n");
10429 - return NETDEV_TX_LOCKED;
10431 + spin_lock_irqsave(&adapter->tx_lock, flags);
10433 if (atl1c_tpd_avail(adapter, type) < tpd_req) {
10434 /* no enough descriptor, just stop queue */
10435 diff -Nur linux-4.4.46.orig/drivers/net/ethernet/atheros/atl1e/atl1e_main.c linux-4.4.46/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
10436 --- linux-4.4.46.orig/drivers/net/ethernet/atheros/atl1e/atl1e_main.c 2017-02-01 08:31:11.000000000 +0100
10437 +++ linux-4.4.46/drivers/net/ethernet/atheros/atl1e/atl1e_main.c 2017-02-03 17:18:05.687416638 +0100
10438 @@ -1880,8 +1880,7 @@
10439 return NETDEV_TX_OK;
10441 tpd_req = atl1e_cal_tdp_req(skb);
10442 - if (!spin_trylock_irqsave(&adapter->tx_lock, flags))
10443 - return NETDEV_TX_LOCKED;
10444 + spin_lock_irqsave(&adapter->tx_lock, flags);
10446 if (atl1e_tpd_avail(adapter) < tpd_req) {
10447 /* no enough descriptor, just stop queue */
10448 diff -Nur linux-4.4.46.orig/drivers/net/ethernet/chelsio/cxgb/sge.c linux-4.4.46/drivers/net/ethernet/chelsio/cxgb/sge.c
10449 --- linux-4.4.46.orig/drivers/net/ethernet/chelsio/cxgb/sge.c 2017-02-01 08:31:11.000000000 +0100
10450 +++ linux-4.4.46/drivers/net/ethernet/chelsio/cxgb/sge.c 2017-02-03 17:18:05.691416794 +0100
10451 @@ -1664,8 +1664,7 @@
10452 struct cmdQ *q = &sge->cmdQ[qid];
10453 unsigned int credits, pidx, genbit, count, use_sched_skb = 0;
10455 - if (!spin_trylock(&q->lock))
10456 - return NETDEV_TX_LOCKED;
10457 + spin_lock(&q->lock);
10459 reclaim_completed_tx(sge, q);
10461 diff -Nur linux-4.4.46.orig/drivers/net/ethernet/neterion/s2io.c linux-4.4.46/drivers/net/ethernet/neterion/s2io.c
10462 --- linux-4.4.46.orig/drivers/net/ethernet/neterion/s2io.c 2017-02-01 08:31:11.000000000 +0100
10463 +++ linux-4.4.46/drivers/net/ethernet/neterion/s2io.c 2017-02-03 17:18:05.691416794 +0100
10464 @@ -4084,12 +4084,7 @@
10465 [skb->priority & (MAX_TX_FIFOS - 1)];
10466 fifo = &mac_control->fifos[queue];
10468 - if (do_spin_lock)
10469 - spin_lock_irqsave(&fifo->tx_lock, flags);
10471 - if (unlikely(!spin_trylock_irqsave(&fifo->tx_lock, flags)))
10472 - return NETDEV_TX_LOCKED;
10474 + spin_lock_irqsave(&fifo->tx_lock, flags);
10476 if (sp->config.multiq) {
10477 if (__netif_subqueue_stopped(dev, fifo->fifo_no)) {
10478 diff -Nur linux-4.4.46.orig/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c linux-4.4.46/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
10479 --- linux-4.4.46.orig/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c 2017-02-01 08:31:11.000000000 +0100
10480 +++ linux-4.4.46/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c 2017-02-03 17:18:05.691416794 +0100
10481 @@ -2137,10 +2137,8 @@
10482 struct pch_gbe_tx_ring *tx_ring = adapter->tx_ring;
10483 unsigned long flags;
10485 - if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags)) {
10486 - /* Collision - tell upper layer to requeue */
10487 - return NETDEV_TX_LOCKED;
10489 + spin_lock_irqsave(&tx_ring->tx_lock, flags);
10491 if (unlikely(!PCH_GBE_DESC_UNUSED(tx_ring))) {
10492 netif_stop_queue(netdev);
10493 spin_unlock_irqrestore(&tx_ring->tx_lock, flags);
10494 diff -Nur linux-4.4.46.orig/drivers/net/ethernet/realtek/8139too.c linux-4.4.46/drivers/net/ethernet/realtek/8139too.c
10495 --- linux-4.4.46.orig/drivers/net/ethernet/realtek/8139too.c 2017-02-01 08:31:11.000000000 +0100
10496 +++ linux-4.4.46/drivers/net/ethernet/realtek/8139too.c 2017-02-03 17:18:05.691416794 +0100
10497 @@ -2229,7 +2229,7 @@
10498 struct rtl8139_private *tp = netdev_priv(dev);
10499 const int irq = tp->pci_dev->irq;
10501 - disable_irq(irq);
10502 + disable_irq_nosync(irq);
10503 rtl8139_interrupt(irq, dev);
10506 diff -Nur linux-4.4.46.orig/drivers/net/ethernet/tehuti/tehuti.c linux-4.4.46/drivers/net/ethernet/tehuti/tehuti.c
10507 --- linux-4.4.46.orig/drivers/net/ethernet/tehuti/tehuti.c 2017-02-01 08:31:11.000000000 +0100
10508 +++ linux-4.4.46/drivers/net/ethernet/tehuti/tehuti.c 2017-02-03 17:18:05.691416794 +0100
10509 @@ -1629,13 +1629,8 @@
10510 unsigned long flags;
10513 - local_irq_save(flags);
10514 - if (!spin_trylock(&priv->tx_lock)) {
10515 - local_irq_restore(flags);
10516 - DBG("%s[%s]: TX locked, returning NETDEV_TX_LOCKED\n",
10517 - BDX_DRV_NAME, ndev->name);
10518 - return NETDEV_TX_LOCKED;
10521 + spin_lock_irqsave(&priv->tx_lock, flags);
10523 /* build tx descriptor */
10524 BDX_ASSERT(f->m.wptr >= f->m.memsz); /* started with valid wptr */
10525 diff -Nur linux-4.4.46.orig/drivers/net/rionet.c linux-4.4.46/drivers/net/rionet.c
10526 --- linux-4.4.46.orig/drivers/net/rionet.c 2017-02-01 08:31:11.000000000 +0100
10527 +++ linux-4.4.46/drivers/net/rionet.c 2017-02-03 17:18:05.691416794 +0100
10528 @@ -174,11 +174,7 @@
10529 unsigned long flags;
10532 - local_irq_save(flags);
10533 - if (!spin_trylock(&rnet->tx_lock)) {
10534 - local_irq_restore(flags);
10535 - return NETDEV_TX_LOCKED;
10537 + spin_lock_irqsave(&rnet->tx_lock, flags);
10539 if (is_multicast_ether_addr(eth->h_dest))
10540 add_num = nets[rnet->mport->id].nact;
10541 diff -Nur linux-4.4.46.orig/drivers/net/wireless/orinoco/orinoco_usb.c linux-4.4.46/drivers/net/wireless/orinoco/orinoco_usb.c
10542 --- linux-4.4.46.orig/drivers/net/wireless/orinoco/orinoco_usb.c 2017-02-01 08:31:11.000000000 +0100
10543 +++ linux-4.4.46/drivers/net/wireless/orinoco/orinoco_usb.c 2017-02-03 17:18:05.695416949 +0100
10544 @@ -697,7 +697,7 @@
10545 while (!ctx->done.done && msecs--)
10548 - wait_event_interruptible(ctx->done.wait,
10549 + swait_event_interruptible(ctx->done.wait,
10553 diff -Nur linux-4.4.46.orig/drivers/pci/access.c linux-4.4.46/drivers/pci/access.c
10554 --- linux-4.4.46.orig/drivers/pci/access.c 2017-02-01 08:31:11.000000000 +0100
10555 +++ linux-4.4.46/drivers/pci/access.c 2017-02-03 17:18:05.695416949 +0100
10556 @@ -561,7 +561,7 @@
10557 WARN_ON(!dev->block_cfg_access);
10559 dev->block_cfg_access = 0;
10560 - wake_up_all(&pci_cfg_wait);
10561 + wake_up_all_locked(&pci_cfg_wait);
10562 raw_spin_unlock_irqrestore(&pci_lock, flags);
10564 EXPORT_SYMBOL_GPL(pci_cfg_access_unlock);
10565 diff -Nur linux-4.4.46.orig/drivers/scsi/fcoe/fcoe.c linux-4.4.46/drivers/scsi/fcoe/fcoe.c
10566 --- linux-4.4.46.orig/drivers/scsi/fcoe/fcoe.c 2017-02-01 08:31:11.000000000 +0100
10567 +++ linux-4.4.46/drivers/scsi/fcoe/fcoe.c 2017-02-03 17:18:05.695416949 +0100
10568 @@ -1286,7 +1286,7 @@
10569 struct sk_buff *skb;
10571 struct fcoe_percpu_s *p0;
10572 - unsigned targ_cpu = get_cpu();
10573 + unsigned targ_cpu = get_cpu_light();
10574 #endif /* CONFIG_SMP */
10576 FCOE_DBG("Destroying receive thread for CPU %d\n", cpu);
10577 @@ -1342,7 +1342,7 @@
10579 spin_unlock_bh(&p->fcoe_rx_list.lock);
10585 * This a non-SMP scenario where the singular Rx thread is
10586 @@ -1566,11 +1566,11 @@
10587 static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen)
10589 struct fcoe_percpu_s *fps;
10591 + int rc, cpu = get_cpu_light();
10593 - fps = &get_cpu_var(fcoe_percpu);
10594 + fps = &per_cpu(fcoe_percpu, cpu);
10595 rc = fcoe_get_paged_crc_eof(skb, tlen, fps);
10596 - put_cpu_var(fcoe_percpu);
10601 @@ -1766,11 +1766,11 @@
10605 - stats = per_cpu_ptr(lport->stats, get_cpu());
10606 + stats = per_cpu_ptr(lport->stats, get_cpu_light());
10607 stats->InvalidCRCCount++;
10608 if (stats->InvalidCRCCount < 5)
10609 printk(KERN_WARNING "fcoe: dropping frame with CRC error\n");
10615 @@ -1814,7 +1814,7 @@
10617 hp = (struct fcoe_hdr *) skb_network_header(skb);
10619 - stats = per_cpu_ptr(lport->stats, get_cpu());
10620 + stats = per_cpu_ptr(lport->stats, get_cpu_light());
10621 if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) {
10622 if (stats->ErrorFrames < 5)
10623 printk(KERN_WARNING "fcoe: FCoE version "
10624 @@ -1846,13 +1846,13 @@
10627 if (!fcoe_filter_frames(lport, fp)) {
10630 fc_exch_recv(lport, fp);
10634 stats->ErrorFrames++;
10640 diff -Nur linux-4.4.46.orig/drivers/scsi/fcoe/fcoe_ctlr.c linux-4.4.46/drivers/scsi/fcoe/fcoe_ctlr.c
10641 --- linux-4.4.46.orig/drivers/scsi/fcoe/fcoe_ctlr.c 2017-02-01 08:31:11.000000000 +0100
10642 +++ linux-4.4.46/drivers/scsi/fcoe/fcoe_ctlr.c 2017-02-03 17:18:05.695416949 +0100
10643 @@ -831,7 +831,7 @@
10645 INIT_LIST_HEAD(&del_list);
10647 - stats = per_cpu_ptr(fip->lp->stats, get_cpu());
10648 + stats = per_cpu_ptr(fip->lp->stats, get_cpu_light());
10650 list_for_each_entry_safe(fcf, next, &fip->fcfs, list) {
10651 deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2;
10652 @@ -867,7 +867,7 @@
10653 sel_time = fcf->time;
10659 list_for_each_entry_safe(fcf, next, &del_list, list) {
10660 /* Removes fcf from current list */
10661 diff -Nur linux-4.4.46.orig/drivers/scsi/libfc/fc_exch.c linux-4.4.46/drivers/scsi/libfc/fc_exch.c
10662 --- linux-4.4.46.orig/drivers/scsi/libfc/fc_exch.c 2017-02-01 08:31:11.000000000 +0100
10663 +++ linux-4.4.46/drivers/scsi/libfc/fc_exch.c 2017-02-03 17:18:05.695416949 +0100
10664 @@ -814,10 +814,10 @@
10666 memset(ep, 0, sizeof(*ep));
10669 + cpu = get_cpu_light();
10670 pool = per_cpu_ptr(mp->pool, cpu);
10671 spin_lock_bh(&pool->lock);
10675 /* peek cache of free slot */
10676 if (pool->left != FC_XID_UNKNOWN) {
10677 diff -Nur linux-4.4.46.orig/drivers/scsi/libsas/sas_ata.c linux-4.4.46/drivers/scsi/libsas/sas_ata.c
10678 --- linux-4.4.46.orig/drivers/scsi/libsas/sas_ata.c 2017-02-01 08:31:11.000000000 +0100
10679 +++ linux-4.4.46/drivers/scsi/libsas/sas_ata.c 2017-02-03 17:18:05.695416949 +0100
10680 @@ -190,7 +190,7 @@
10681 /* TODO: audit callers to ensure they are ready for qc_issue to
10682 * unconditionally re-enable interrupts
10684 - local_irq_save(flags);
10685 + local_irq_save_nort(flags);
10686 spin_unlock(ap->lock);
10688 /* If the device fell off, no sense in issuing commands */
10689 @@ -255,7 +255,7 @@
10692 spin_lock(ap->lock);
10693 - local_irq_restore(flags);
10694 + local_irq_restore_nort(flags);
10698 diff -Nur linux-4.4.46.orig/drivers/scsi/qla2xxx/qla_inline.h linux-4.4.46/drivers/scsi/qla2xxx/qla_inline.h
10699 --- linux-4.4.46.orig/drivers/scsi/qla2xxx/qla_inline.h 2017-02-01 08:31:11.000000000 +0100
10700 +++ linux-4.4.46/drivers/scsi/qla2xxx/qla_inline.h 2017-02-03 17:18:05.695416949 +0100
10701 @@ -59,12 +59,12 @@
10703 unsigned long flags;
10704 struct qla_hw_data *ha = rsp->hw;
10705 - local_irq_save(flags);
10706 + local_irq_save_nort(flags);
10707 if (IS_P3P_TYPE(ha))
10708 qla82xx_poll(0, rsp);
10710 ha->isp_ops->intr_handler(0, rsp);
10711 - local_irq_restore(flags);
10712 + local_irq_restore_nort(flags);
10715 static inline uint8_t *
10716 diff -Nur linux-4.4.46.orig/drivers/thermal/x86_pkg_temp_thermal.c linux-4.4.46/drivers/thermal/x86_pkg_temp_thermal.c
10717 --- linux-4.4.46.orig/drivers/thermal/x86_pkg_temp_thermal.c 2017-02-01 08:31:11.000000000 +0100
10718 +++ linux-4.4.46/drivers/thermal/x86_pkg_temp_thermal.c 2017-02-03 17:18:05.695416949 +0100
10720 #include <linux/pm.h>
10721 #include <linux/thermal.h>
10722 #include <linux/debugfs.h>
10723 +#include <linux/swork.h>
10724 #include <asm/cpu_device_id.h>
10725 #include <asm/mce.h>
10727 @@ -352,7 +353,7 @@
10731 -static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
10732 +static void platform_thermal_notify_work(struct swork_event *event)
10734 unsigned long flags;
10735 int cpu = smp_processor_id();
10736 @@ -369,7 +370,7 @@
10737 pkg_work_scheduled[phy_id]) {
10738 disable_pkg_thres_interrupt();
10739 spin_unlock_irqrestore(&pkg_work_lock, flags);
10743 pkg_work_scheduled[phy_id] = 1;
10744 spin_unlock_irqrestore(&pkg_work_lock, flags);
10745 @@ -378,9 +379,48 @@
10746 schedule_delayed_work_on(cpu,
10747 &per_cpu(pkg_temp_thermal_threshold_work, cpu),
10748 msecs_to_jiffies(notify_delay_ms));
10751 +#ifdef CONFIG_PREEMPT_RT_FULL
10752 +static struct swork_event notify_work;
10754 +static int thermal_notify_work_init(void)
10758 + err = swork_get();
10762 + INIT_SWORK(¬ify_work, platform_thermal_notify_work);
10766 +static void thermal_notify_work_cleanup(void)
10771 +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
10773 + swork_queue(¬ify_work);
10777 +#else /* !CONFIG_PREEMPT_RT_FULL */
10779 +static int thermal_notify_work_init(void) { return 0; }
10781 +static void thermal_notify_work_cleanup(void) { }
10783 +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
10785 + platform_thermal_notify_work(NULL);
10789 +#endif /* CONFIG_PREEMPT_RT_FULL */
10791 static int find_siblings_cpu(int cpu)
10794 @@ -584,6 +624,9 @@
10795 if (!x86_match_cpu(pkg_temp_thermal_ids))
10798 + if (!thermal_notify_work_init())
10801 spin_lock_init(&pkg_work_lock);
10802 platform_thermal_package_notify =
10803 pkg_temp_thermal_platform_thermal_notify;
10804 @@ -608,7 +651,7 @@
10805 kfree(pkg_work_scheduled);
10806 platform_thermal_package_notify = NULL;
10807 platform_thermal_package_rate_control = NULL;
10809 + thermal_notify_work_cleanup();
10813 @@ -633,6 +676,7 @@
10814 mutex_unlock(&phy_dev_list_mutex);
10815 platform_thermal_package_notify = NULL;
10816 platform_thermal_package_rate_control = NULL;
10817 + thermal_notify_work_cleanup();
10818 for_each_online_cpu(i)
10819 cancel_delayed_work_sync(
10820 &per_cpu(pkg_temp_thermal_threshold_work, i));
10821 diff -Nur linux-4.4.46.orig/drivers/tty/serial/8250/8250_core.c linux-4.4.46/drivers/tty/serial/8250/8250_core.c
10822 --- linux-4.4.46.orig/drivers/tty/serial/8250/8250_core.c 2017-02-01 08:31:11.000000000 +0100
10823 +++ linux-4.4.46/drivers/tty/serial/8250/8250_core.c 2017-02-03 17:18:05.695416949 +0100
10826 static unsigned int skip_txen_test; /* force skip of txen test at init time */
10828 -#define PASS_LIMIT 512
10830 + * On -rt we can have a more delays, and legitimately
10831 + * so - so don't drop work spuriously and spam the
10834 +#ifdef CONFIG_PREEMPT_RT_FULL
10835 +# define PASS_LIMIT 1000000
10837 +# define PASS_LIMIT 512
10840 #include <asm/serial.h>
10842 diff -Nur linux-4.4.46.orig/drivers/tty/serial/8250/8250_port.c linux-4.4.46/drivers/tty/serial/8250/8250_port.c
10843 --- linux-4.4.46.orig/drivers/tty/serial/8250/8250_port.c 2017-02-01 08:31:11.000000000 +0100
10844 +++ linux-4.4.46/drivers/tty/serial/8250/8250_port.c 2017-02-03 17:18:05.695416949 +0100
10846 #include <linux/nmi.h>
10847 #include <linux/mutex.h>
10848 #include <linux/slab.h>
10849 +#include <linux/kdb.h>
10850 #include <linux/uaccess.h>
10851 #include <linux/pm_runtime.h>
10853 @@ -2843,9 +2844,9 @@
10855 serial8250_rpm_get(up);
10858 + if (port->sysrq || oops_in_progress)
10860 - else if (oops_in_progress)
10861 + else if (in_kdb_printk())
10862 locked = spin_trylock_irqsave(&port->lock, flags);
10864 spin_lock_irqsave(&port->lock, flags);
10865 diff -Nur linux-4.4.46.orig/drivers/tty/serial/amba-pl011.c linux-4.4.46/drivers/tty/serial/amba-pl011.c
10866 --- linux-4.4.46.orig/drivers/tty/serial/amba-pl011.c 2017-02-01 08:31:11.000000000 +0100
10867 +++ linux-4.4.46/drivers/tty/serial/amba-pl011.c 2017-02-03 17:18:05.695416949 +0100
10868 @@ -2067,13 +2067,19 @@
10870 clk_enable(uap->clk);
10872 - local_irq_save(flags);
10874 + * local_irq_save(flags);
10876 + * This local_irq_save() is nonsense. If we come in via sysrq
10877 + * handling then interrupts are already disabled. Aside of
10878 + * that the port.sysrq check is racy on SMP regardless.
10880 if (uap->port.sysrq)
10882 else if (oops_in_progress)
10883 - locked = spin_trylock(&uap->port.lock);
10884 + locked = spin_trylock_irqsave(&uap->port.lock, flags);
10886 - spin_lock(&uap->port.lock);
10887 + spin_lock_irqsave(&uap->port.lock, flags);
10890 * First save the CR then disable the interrupts
10891 @@ -2098,8 +2104,7 @@
10892 writew(old_cr, uap->port.membase + UART011_CR);
10895 - spin_unlock(&uap->port.lock);
10896 - local_irq_restore(flags);
10897 + spin_unlock_irqrestore(&uap->port.lock, flags);
10899 clk_disable(uap->clk);
10901 diff -Nur linux-4.4.46.orig/drivers/tty/serial/omap-serial.c linux-4.4.46/drivers/tty/serial/omap-serial.c
10902 --- linux-4.4.46.orig/drivers/tty/serial/omap-serial.c 2017-02-01 08:31:11.000000000 +0100
10903 +++ linux-4.4.46/drivers/tty/serial/omap-serial.c 2017-02-03 17:18:05.695416949 +0100
10904 @@ -1257,13 +1257,10 @@
10906 pm_runtime_get_sync(up->dev);
10908 - local_irq_save(flags);
10909 - if (up->port.sysrq)
10911 - else if (oops_in_progress)
10912 - locked = spin_trylock(&up->port.lock);
10913 + if (up->port.sysrq || oops_in_progress)
10914 + locked = spin_trylock_irqsave(&up->port.lock, flags);
10916 - spin_lock(&up->port.lock);
10917 + spin_lock_irqsave(&up->port.lock, flags);
10920 * First save the IER then disable the interrupts
10921 @@ -1292,8 +1289,7 @@
10922 pm_runtime_mark_last_busy(up->dev);
10923 pm_runtime_put_autosuspend(up->dev);
10925 - spin_unlock(&up->port.lock);
10926 - local_irq_restore(flags);
10927 + spin_unlock_irqrestore(&up->port.lock, flags);
10931 diff -Nur linux-4.4.46.orig/drivers/usb/core/hcd.c linux-4.4.46/drivers/usb/core/hcd.c
10932 --- linux-4.4.46.orig/drivers/usb/core/hcd.c 2017-02-01 08:31:11.000000000 +0100
10933 +++ linux-4.4.46/drivers/usb/core/hcd.c 2017-02-03 17:18:10.895617822 +0100
10934 @@ -1735,9 +1735,9 @@
10935 * and no one may trigger the above deadlock situation when
10936 * running complete() in tasklet.
10938 - local_irq_save(flags);
10939 + local_irq_save_nort(flags);
10940 urb->complete(urb);
10941 - local_irq_restore(flags);
10942 + local_irq_restore_nort(flags);
10944 usb_anchor_resume_wakeups(anchor);
10945 atomic_dec(&urb->use_count);
10946 diff -Nur linux-4.4.46.orig/drivers/usb/gadget/function/f_fs.c linux-4.4.46/drivers/usb/gadget/function/f_fs.c
10947 --- linux-4.4.46.orig/drivers/usb/gadget/function/f_fs.c 2017-02-01 08:31:11.000000000 +0100
10948 +++ linux-4.4.46/drivers/usb/gadget/function/f_fs.c 2017-02-03 17:18:10.895617822 +0100
10949 @@ -1404,7 +1404,7 @@
10950 pr_info("%s(): freeing\n", __func__);
10951 ffs_data_clear(ffs);
10952 BUG_ON(waitqueue_active(&ffs->ev.waitq) ||
10953 - waitqueue_active(&ffs->ep0req_completion.wait));
10954 + swait_active(&ffs->ep0req_completion.wait));
10955 kfree(ffs->dev_name);
10958 diff -Nur linux-4.4.46.orig/drivers/usb/gadget/legacy/inode.c linux-4.4.46/drivers/usb/gadget/legacy/inode.c
10959 --- linux-4.4.46.orig/drivers/usb/gadget/legacy/inode.c 2017-02-01 08:31:11.000000000 +0100
10960 +++ linux-4.4.46/drivers/usb/gadget/legacy/inode.c 2017-02-03 17:18:10.895617822 +0100
10961 @@ -345,7 +345,7 @@
10962 spin_unlock_irq (&epdata->dev->lock);
10964 if (likely (value == 0)) {
10965 - value = wait_event_interruptible (done.wait, done.done);
10966 + value = swait_event_interruptible (done.wait, done.done);
10968 spin_lock_irq (&epdata->dev->lock);
10969 if (likely (epdata->ep != NULL)) {
10970 @@ -354,7 +354,7 @@
10971 usb_ep_dequeue (epdata->ep, epdata->req);
10972 spin_unlock_irq (&epdata->dev->lock);
10974 - wait_event (done.wait, done.done);
10975 + swait_event (done.wait, done.done);
10976 if (epdata->status == -ECONNRESET)
10977 epdata->status = -EINTR;
10979 diff -Nur linux-4.4.46.orig/drivers/usb/gadget/udc/atmel_usba_udc.c linux-4.4.46/drivers/usb/gadget/udc/atmel_usba_udc.c
10980 --- linux-4.4.46.orig/drivers/usb/gadget/udc/atmel_usba_udc.c 2017-02-01 08:31:11.000000000 +0100
10981 +++ linux-4.4.46/drivers/usb/gadget/udc/atmel_usba_udc.c 2017-02-03 17:18:10.895617822 +0100
10983 #include <linux/device.h>
10984 #include <linux/dma-mapping.h>
10985 #include <linux/list.h>
10986 +#include <linux/mfd/syscon.h>
10987 #include <linux/platform_device.h>
10988 +#include <linux/regmap.h>
10989 #include <linux/usb/ch9.h>
10990 #include <linux/usb/gadget.h>
10991 #include <linux/usb/atmel_usba_udc.h>
10992 @@ -1888,20 +1890,15 @@
10994 static void at91sam9rl_toggle_bias(struct usba_udc *udc, int is_on)
10996 - unsigned int uckr = at91_pmc_read(AT91_CKGR_UCKR);
10999 - at91_pmc_write(AT91_CKGR_UCKR, uckr | AT91_PMC_BIASEN);
11001 - at91_pmc_write(AT91_CKGR_UCKR, uckr & ~(AT91_PMC_BIASEN));
11002 + regmap_update_bits(udc->pmc, AT91_CKGR_UCKR, AT91_PMC_BIASEN,
11003 + is_on ? AT91_PMC_BIASEN : 0);
11006 static void at91sam9g45_pulse_bias(struct usba_udc *udc)
11008 - unsigned int uckr = at91_pmc_read(AT91_CKGR_UCKR);
11010 - at91_pmc_write(AT91_CKGR_UCKR, uckr & ~(AT91_PMC_BIASEN));
11011 - at91_pmc_write(AT91_CKGR_UCKR, uckr | AT91_PMC_BIASEN);
11012 + regmap_update_bits(udc->pmc, AT91_CKGR_UCKR, AT91_PMC_BIASEN, 0);
11013 + regmap_update_bits(udc->pmc, AT91_CKGR_UCKR, AT91_PMC_BIASEN,
11014 + AT91_PMC_BIASEN);
11017 static const struct usba_udc_errata at91sam9rl_errata = {
11018 @@ -1938,6 +1935,9 @@
11019 return ERR_PTR(-EINVAL);
11021 udc->errata = match->data;
11022 + udc->pmc = syscon_regmap_lookup_by_compatible("atmel,at91sam9g45-pmc");
11023 + if (udc->errata && IS_ERR(udc->pmc))
11024 + return ERR_CAST(udc->pmc);
11028 diff -Nur linux-4.4.46.orig/drivers/usb/gadget/udc/atmel_usba_udc.h linux-4.4.46/drivers/usb/gadget/udc/atmel_usba_udc.h
11029 --- linux-4.4.46.orig/drivers/usb/gadget/udc/atmel_usba_udc.h 2017-02-01 08:31:11.000000000 +0100
11030 +++ linux-4.4.46/drivers/usb/gadget/udc/atmel_usba_udc.h 2017-02-03 17:18:10.895617822 +0100
11031 @@ -354,6 +354,8 @@
11032 struct dentry *debugfs_root;
11033 struct dentry *debugfs_regs;
11036 + struct regmap *pmc;
11039 static inline struct usba_ep *to_usba_ep(struct usb_ep *ep)
11040 diff -Nur linux-4.4.46.orig/fs/aio.c linux-4.4.46/fs/aio.c
11041 --- linux-4.4.46.orig/fs/aio.c 2017-02-01 08:31:11.000000000 +0100
11042 +++ linux-4.4.46/fs/aio.c 2017-02-03 17:18:10.899617976 +0100
11044 #include <linux/ramfs.h>
11045 #include <linux/percpu-refcount.h>
11046 #include <linux/mount.h>
11047 +#include <linux/swork.h>
11049 #include <asm/kmap_types.h>
11050 #include <asm/uaccess.h>
11051 @@ -115,7 +116,7 @@
11052 struct page **ring_pages;
11055 - struct work_struct free_work;
11056 + struct swork_event free_work;
11059 * signals when all in-flight requests are done
11060 @@ -258,6 +259,7 @@
11061 .mount = aio_mount,
11062 .kill_sb = kill_anon_super,
11064 + BUG_ON(swork_get());
11065 aio_mnt = kern_mount(&aio_fs);
11066 if (IS_ERR(aio_mnt))
11067 panic("Failed to create aio fs mount.");
11068 @@ -573,9 +575,9 @@
11069 return cancel(&kiocb->common);
11072 -static void free_ioctx(struct work_struct *work)
11073 +static void free_ioctx(struct swork_event *sev)
11075 - struct kioctx *ctx = container_of(work, struct kioctx, free_work);
11076 + struct kioctx *ctx = container_of(sev, struct kioctx, free_work);
11078 pr_debug("freeing %p\n", ctx);
11080 @@ -594,8 +596,8 @@
11081 if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
11082 complete(&ctx->rq_wait->comp);
11084 - INIT_WORK(&ctx->free_work, free_ioctx);
11085 - schedule_work(&ctx->free_work);
11086 + INIT_SWORK(&ctx->free_work, free_ioctx);
11087 + swork_queue(&ctx->free_work);
11091 @@ -603,9 +605,9 @@
11092 * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
11093 * now it's safe to cancel any that need to be.
11095 -static void free_ioctx_users(struct percpu_ref *ref)
11096 +static void free_ioctx_users_work(struct swork_event *sev)
11098 - struct kioctx *ctx = container_of(ref, struct kioctx, users);
11099 + struct kioctx *ctx = container_of(sev, struct kioctx, free_work);
11100 struct aio_kiocb *req;
11102 spin_lock_irq(&ctx->ctx_lock);
11103 @@ -624,6 +626,14 @@
11104 percpu_ref_put(&ctx->reqs);
11107 +static void free_ioctx_users(struct percpu_ref *ref)
11109 + struct kioctx *ctx = container_of(ref, struct kioctx, users);
11111 + INIT_SWORK(&ctx->free_work, free_ioctx_users_work);
11112 + swork_queue(&ctx->free_work);
11115 static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
11117 unsigned i, new_nr;
11118 diff -Nur linux-4.4.46.orig/fs/autofs4/autofs_i.h linux-4.4.46/fs/autofs4/autofs_i.h
11119 --- linux-4.4.46.orig/fs/autofs4/autofs_i.h 2017-02-01 08:31:11.000000000 +0100
11120 +++ linux-4.4.46/fs/autofs4/autofs_i.h 2017-02-03 17:18:10.899617976 +0100
11122 #include <linux/sched.h>
11123 #include <linux/mount.h>
11124 #include <linux/namei.h>
11125 +#include <linux/delay.h>
11126 #include <asm/current.h>
11127 #include <asm/uaccess.h>
11129 diff -Nur linux-4.4.46.orig/fs/autofs4/expire.c linux-4.4.46/fs/autofs4/expire.c
11130 --- linux-4.4.46.orig/fs/autofs4/expire.c 2017-02-01 08:31:11.000000000 +0100
11131 +++ linux-4.4.46/fs/autofs4/expire.c 2017-02-03 17:18:10.899617976 +0100
11132 @@ -150,7 +150,7 @@
11133 parent = p->d_parent;
11134 if (!spin_trylock(&parent->d_lock)) {
11135 spin_unlock(&p->d_lock);
11140 spin_unlock(&p->d_lock);
11141 diff -Nur linux-4.4.46.orig/fs/buffer.c linux-4.4.46/fs/buffer.c
11142 --- linux-4.4.46.orig/fs/buffer.c 2017-02-01 08:31:11.000000000 +0100
11143 +++ linux-4.4.46/fs/buffer.c 2017-02-03 17:18:10.899617976 +0100
11144 @@ -305,8 +305,7 @@
11145 * decide that the page is now completely done.
11147 first = page_buffers(page);
11148 - local_irq_save(flags);
11149 - bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
11150 + flags = bh_uptodate_lock_irqsave(first);
11151 clear_buffer_async_read(bh);
11154 @@ -319,8 +318,7 @@
11156 tmp = tmp->b_this_page;
11157 } while (tmp != bh);
11158 - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
11159 - local_irq_restore(flags);
11160 + bh_uptodate_unlock_irqrestore(first, flags);
11163 * If none of the buffers had errors and they are all
11164 @@ -332,9 +330,7 @@
11168 - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
11169 - local_irq_restore(flags);
11171 + bh_uptodate_unlock_irqrestore(first, flags);
11175 @@ -362,8 +358,7 @@
11178 first = page_buffers(page);
11179 - local_irq_save(flags);
11180 - bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
11181 + flags = bh_uptodate_lock_irqsave(first);
11183 clear_buffer_async_write(bh);
11185 @@ -375,15 +370,12 @@
11187 tmp = tmp->b_this_page;
11189 - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
11190 - local_irq_restore(flags);
11191 + bh_uptodate_unlock_irqrestore(first, flags);
11192 end_page_writeback(page);
11196 - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
11197 - local_irq_restore(flags);
11199 + bh_uptodate_unlock_irqrestore(first, flags);
11201 EXPORT_SYMBOL(end_buffer_async_write);
11203 @@ -3325,6 +3317,7 @@
11204 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
11206 INIT_LIST_HEAD(&ret->b_assoc_buffers);
11207 + buffer_head_init_locks(ret);
11209 __this_cpu_inc(bh_accounting.nr);
11211 diff -Nur linux-4.4.46.orig/fs/dcache.c linux-4.4.46/fs/dcache.c
11212 --- linux-4.4.46.orig/fs/dcache.c 2017-02-01 08:31:11.000000000 +0100
11213 +++ linux-4.4.46/fs/dcache.c 2017-02-03 17:18:10.899617976 +0100
11215 #include <linux/mm.h>
11216 #include <linux/fs.h>
11217 #include <linux/fsnotify.h>
11218 +#include <linux/delay.h>
11219 #include <linux/slab.h>
11220 #include <linux/init.h>
11221 #include <linux/hash.h>
11222 @@ -747,6 +748,8 @@
11224 void dput(struct dentry *dentry)
11226 + struct dentry *parent;
11228 if (unlikely(!dentry))
11231 @@ -783,9 +786,18 @@
11235 - dentry = dentry_kill(dentry);
11238 + parent = dentry_kill(dentry);
11242 + if (parent == dentry) {
11243 + /* the task with the highest priority won't schedule */
11244 + r = cond_resched();
11253 @@ -2397,7 +2409,7 @@
11254 if (dentry->d_lockref.count == 1) {
11255 if (!spin_trylock(&inode->i_lock)) {
11256 spin_unlock(&dentry->d_lock);
11261 dentry->d_flags &= ~DCACHE_CANT_MOUNT;
11262 diff -Nur linux-4.4.46.orig/fs/eventpoll.c linux-4.4.46/fs/eventpoll.c
11263 --- linux-4.4.46.orig/fs/eventpoll.c 2017-02-01 08:31:11.000000000 +0100
11264 +++ linux-4.4.46/fs/eventpoll.c 2017-02-03 17:18:10.899617976 +0100
11265 @@ -505,12 +505,12 @@
11267 static void ep_poll_safewake(wait_queue_head_t *wq)
11269 - int this_cpu = get_cpu();
11270 + int this_cpu = get_cpu_light();
11272 ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
11273 ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
11279 static void ep_remove_wait_queue(struct eppoll_entry *pwq)
11280 diff -Nur linux-4.4.46.orig/fs/exec.c linux-4.4.46/fs/exec.c
11281 --- linux-4.4.46.orig/fs/exec.c 2017-02-01 08:31:11.000000000 +0100
11282 +++ linux-4.4.46/fs/exec.c 2017-02-03 17:18:10.899617976 +0100
11283 @@ -866,12 +866,14 @@
11287 + preempt_disable_rt();
11288 active_mm = tsk->active_mm;
11290 tsk->active_mm = mm;
11291 activate_mm(active_mm, mm);
11292 tsk->mm->vmacache_seqnum = 0;
11293 vmacache_flush(tsk);
11294 + preempt_enable_rt();
11297 up_read(&old_mm->mmap_sem);
11298 diff -Nur linux-4.4.46.orig/fs/f2fs/f2fs.h linux-4.4.46/fs/f2fs/f2fs.h
11299 --- linux-4.4.46.orig/fs/f2fs/f2fs.h 2017-02-01 08:31:11.000000000 +0100
11300 +++ linux-4.4.46/fs/f2fs/f2fs.h 2017-02-03 17:18:10.899617976 +0100
11303 #ifdef CONFIG_F2FS_CHECK_FS
11304 #define f2fs_bug_on(sbi, condition) BUG_ON(condition)
11305 -#define f2fs_down_write(x, y) down_write_nest_lock(x, y)
11307 #define f2fs_bug_on(sbi, condition) \
11310 set_sbi_flag(sbi, SBI_NEED_FSCK); \
11313 -#define f2fs_down_write(x, y) down_write(x)
11317 @@ -959,7 +957,7 @@
11319 static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
11321 - f2fs_down_write(&sbi->cp_rwsem, &sbi->cp_mutex);
11322 + down_write(&sbi->cp_rwsem);
11325 static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
11326 diff -Nur linux-4.4.46.orig/fs/jbd2/checkpoint.c linux-4.4.46/fs/jbd2/checkpoint.c
11327 --- linux-4.4.46.orig/fs/jbd2/checkpoint.c 2017-02-01 08:31:11.000000000 +0100
11328 +++ linux-4.4.46/fs/jbd2/checkpoint.c 2017-02-03 17:18:10.899617976 +0100
11329 @@ -116,6 +116,8 @@
11330 nblocks = jbd2_space_needed(journal);
11331 while (jbd2_log_space_left(journal) < nblocks) {
11332 write_unlock(&journal->j_state_lock);
11333 + if (current->plug)
11335 mutex_lock(&journal->j_checkpoint_mutex);
11338 diff -Nur linux-4.4.46.orig/fs/namespace.c linux-4.4.46/fs/namespace.c
11339 --- linux-4.4.46.orig/fs/namespace.c 2017-02-01 08:31:11.000000000 +0100
11340 +++ linux-4.4.46/fs/namespace.c 2017-02-03 17:18:10.899617976 +0100
11342 #include <linux/mnt_namespace.h>
11343 #include <linux/user_namespace.h>
11344 #include <linux/namei.h>
11345 +#include <linux/delay.h>
11346 #include <linux/security.h>
11347 #include <linux/idr.h>
11348 #include <linux/init.h> /* init_rootfs */
11349 @@ -353,8 +354,11 @@
11350 * incremented count after it has set MNT_WRITE_HOLD.
11353 - while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
11355 + while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
11356 + preempt_enable();
11358 + preempt_disable();
11361 * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
11362 * be set to match its requirements. So we must not load that until
11363 diff -Nur linux-4.4.46.orig/fs/ntfs/aops.c linux-4.4.46/fs/ntfs/aops.c
11364 --- linux-4.4.46.orig/fs/ntfs/aops.c 2017-02-01 08:31:11.000000000 +0100
11365 +++ linux-4.4.46/fs/ntfs/aops.c 2017-02-03 17:18:10.899617976 +0100
11366 @@ -107,8 +107,7 @@
11367 "0x%llx.", (unsigned long long)bh->b_blocknr);
11369 first = page_buffers(page);
11370 - local_irq_save(flags);
11371 - bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
11372 + flags = bh_uptodate_lock_irqsave(first);
11373 clear_buffer_async_read(bh);
11376 @@ -123,8 +122,7 @@
11378 tmp = tmp->b_this_page;
11379 } while (tmp != bh);
11380 - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
11381 - local_irq_restore(flags);
11382 + bh_uptodate_unlock_irqrestore(first, flags);
11384 * If none of the buffers had errors then we can set the page uptodate,
11385 * but we first have to perform the post read mst fixups, if the
11386 @@ -145,13 +143,13 @@
11387 recs = PAGE_CACHE_SIZE / rec_size;
11388 /* Should have been verified before we got here... */
11390 - local_irq_save(flags);
11391 + local_irq_save_nort(flags);
11392 kaddr = kmap_atomic(page);
11393 for (i = 0; i < recs; i++)
11394 post_read_mst_fixup((NTFS_RECORD*)(kaddr +
11395 i * rec_size), rec_size);
11396 kunmap_atomic(kaddr);
11397 - local_irq_restore(flags);
11398 + local_irq_restore_nort(flags);
11399 flush_dcache_page(page);
11400 if (likely(page_uptodate && !PageError(page)))
11401 SetPageUptodate(page);
11402 @@ -159,9 +157,7 @@
11406 - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
11407 - local_irq_restore(flags);
11409 + bh_uptodate_unlock_irqrestore(first, flags);
11413 diff -Nur linux-4.4.46.orig/fs/timerfd.c linux-4.4.46/fs/timerfd.c
11414 --- linux-4.4.46.orig/fs/timerfd.c 2017-02-01 08:31:11.000000000 +0100
11415 +++ linux-4.4.46/fs/timerfd.c 2017-02-03 17:18:10.899617976 +0100
11416 @@ -450,7 +450,10 @@
11419 spin_unlock_irq(&ctx->wqh.lock);
11421 + if (isalarm(ctx))
11422 + hrtimer_wait_for_timer(&ctx->t.alarm.timer);
11424 + hrtimer_wait_for_timer(&ctx->t.tmr);
11428 diff -Nur linux-4.4.46.orig/include/acpi/platform/aclinux.h linux-4.4.46/include/acpi/platform/aclinux.h
11429 --- linux-4.4.46.orig/include/acpi/platform/aclinux.h 2017-02-01 08:31:11.000000000 +0100
11430 +++ linux-4.4.46/include/acpi/platform/aclinux.h 2017-02-03 17:18:10.899617976 +0100
11431 @@ -127,6 +127,7 @@
11433 #define acpi_cache_t struct kmem_cache
11434 #define acpi_spinlock spinlock_t *
11435 +#define acpi_raw_spinlock raw_spinlock_t *
11436 #define acpi_cpu_flags unsigned long
11438 /* Use native linux version of acpi_os_allocate_zeroed */
11439 @@ -145,6 +146,20 @@
11440 #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_thread_id
11441 #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_lock
11443 +#define acpi_os_create_raw_lock(__handle) \
11445 + raw_spinlock_t *lock = ACPI_ALLOCATE(sizeof(*lock)); \
11448 + *(__handle) = lock; \
11449 + raw_spin_lock_init(*(__handle)); \
11451 + lock ? AE_OK : AE_NO_MEMORY; \
11454 +#define acpi_os_delete_raw_lock(__handle) kfree(__handle)
11458 * OSL interfaces used by debugger/disassembler
11460 diff -Nur linux-4.4.46.orig/include/asm-generic/bug.h linux-4.4.46/include/asm-generic/bug.h
11461 --- linux-4.4.46.orig/include/asm-generic/bug.h 2017-02-01 08:31:11.000000000 +0100
11462 +++ linux-4.4.46/include/asm-generic/bug.h 2017-02-03 17:18:10.903618130 +0100
11463 @@ -206,6 +206,20 @@
11464 # define WARN_ON_SMP(x) ({0;})
11467 +#ifdef CONFIG_PREEMPT_RT_BASE
11468 +# define BUG_ON_RT(c) BUG_ON(c)
11469 +# define BUG_ON_NONRT(c) do { } while (0)
11470 +# define WARN_ON_RT(condition) WARN_ON(condition)
11471 +# define WARN_ON_NONRT(condition) do { } while (0)
11472 +# define WARN_ON_ONCE_NONRT(condition) do { } while (0)
11474 +# define BUG_ON_RT(c) do { } while (0)
11475 +# define BUG_ON_NONRT(c) BUG_ON(c)
11476 +# define WARN_ON_RT(condition) do { } while (0)
11477 +# define WARN_ON_NONRT(condition) WARN_ON(condition)
11478 +# define WARN_ON_ONCE_NONRT(condition) WARN_ON_ONCE(condition)
11481 #endif /* __ASSEMBLY__ */
11484 diff -Nur linux-4.4.46.orig/include/asm-generic/preempt.h linux-4.4.46/include/asm-generic/preempt.h
11485 --- linux-4.4.46.orig/include/asm-generic/preempt.h 2017-02-01 08:31:11.000000000 +0100
11486 +++ linux-4.4.46/include/asm-generic/preempt.h 2017-02-03 17:18:10.903618130 +0100
11489 static __always_inline int preempt_count(void)
11491 - return current_thread_info()->preempt_count;
11492 + return READ_ONCE(current_thread_info()->preempt_count);
11495 -static __always_inline int *preempt_count_ptr(void)
11496 +static __always_inline volatile int *preempt_count_ptr(void)
11498 return ¤t_thread_info()->preempt_count;
11500 diff -Nur linux-4.4.46.orig/include/linux/blkdev.h linux-4.4.46/include/linux/blkdev.h
11501 --- linux-4.4.46.orig/include/linux/blkdev.h 2017-02-01 08:31:11.000000000 +0100
11502 +++ linux-4.4.46/include/linux/blkdev.h 2017-02-03 17:18:10.903618130 +0100
11504 struct list_head queuelist;
11506 struct call_single_data csd;
11507 + struct work_struct work;
11508 unsigned long fifo_time;
11511 @@ -455,7 +456,7 @@
11512 struct throtl_data *td;
11514 struct rcu_head rcu_head;
11515 - wait_queue_head_t mq_freeze_wq;
11516 + struct swait_queue_head mq_freeze_wq;
11517 struct percpu_ref q_usage_counter;
11518 struct list_head all_q_node;
11520 diff -Nur linux-4.4.46.orig/include/linux/blk-mq.h linux-4.4.46/include/linux/blk-mq.h
11521 --- linux-4.4.46.orig/include/linux/blk-mq.h 2017-02-01 08:31:11.000000000 +0100
11522 +++ linux-4.4.46/include/linux/blk-mq.h 2017-02-03 17:18:10.903618130 +0100
11523 @@ -212,6 +212,7 @@
11525 struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
11526 struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
11527 +void __blk_mq_complete_request_remote_work(struct work_struct *work);
11529 int blk_mq_request_started(struct request *rq);
11530 void blk_mq_start_request(struct request *rq);
11531 diff -Nur linux-4.4.46.orig/include/linux/bottom_half.h linux-4.4.46/include/linux/bottom_half.h
11532 --- linux-4.4.46.orig/include/linux/bottom_half.h 2017-02-01 08:31:11.000000000 +0100
11533 +++ linux-4.4.46/include/linux/bottom_half.h 2017-02-03 17:18:10.903618130 +0100
11536 #include <linux/preempt.h>
11538 +#ifdef CONFIG_PREEMPT_RT_FULL
11540 +extern void __local_bh_disable(void);
11541 +extern void _local_bh_enable(void);
11542 +extern void __local_bh_enable(void);
11544 +static inline void local_bh_disable(void)
11546 + __local_bh_disable();
11549 +static inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
11551 + __local_bh_disable();
11554 +static inline void local_bh_enable(void)
11556 + __local_bh_enable();
11559 +static inline void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
11561 + __local_bh_enable();
11564 +static inline void local_bh_enable_ip(unsigned long ip)
11566 + __local_bh_enable();
11571 #ifdef CONFIG_TRACE_IRQFLAGS
11572 extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt);
11576 __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
11580 #endif /* _LINUX_BH_H */
11581 diff -Nur linux-4.4.46.orig/include/linux/buffer_head.h linux-4.4.46/include/linux/buffer_head.h
11582 --- linux-4.4.46.orig/include/linux/buffer_head.h 2017-02-01 08:31:11.000000000 +0100
11583 +++ linux-4.4.46/include/linux/buffer_head.h 2017-02-03 17:18:10.903618130 +0100
11585 struct address_space *b_assoc_map; /* mapping this buffer is
11587 atomic_t b_count; /* users using this buffer_head */
11588 +#ifdef CONFIG_PREEMPT_RT_BASE
11589 + spinlock_t b_uptodate_lock;
11590 +#if IS_ENABLED(CONFIG_JBD2)
11591 + spinlock_t b_state_lock;
11592 + spinlock_t b_journal_head_lock;
11597 +static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh)
11599 + unsigned long flags;
11601 +#ifndef CONFIG_PREEMPT_RT_BASE
11602 + local_irq_save(flags);
11603 + bit_spin_lock(BH_Uptodate_Lock, &bh->b_state);
11605 + spin_lock_irqsave(&bh->b_uptodate_lock, flags);
11610 +static inline void
11611 +bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags)
11613 +#ifndef CONFIG_PREEMPT_RT_BASE
11614 + bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state);
11615 + local_irq_restore(flags);
11617 + spin_unlock_irqrestore(&bh->b_uptodate_lock, flags);
11621 +static inline void buffer_head_init_locks(struct buffer_head *bh)
11623 +#ifdef CONFIG_PREEMPT_RT_BASE
11624 + spin_lock_init(&bh->b_uptodate_lock);
11625 +#if IS_ENABLED(CONFIG_JBD2)
11626 + spin_lock_init(&bh->b_state_lock);
11627 + spin_lock_init(&bh->b_journal_head_lock);
11633 * macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
11634 * and buffer_foo() functions.
11635 diff -Nur linux-4.4.46.orig/include/linux/cgroup-defs.h linux-4.4.46/include/linux/cgroup-defs.h
11636 --- linux-4.4.46.orig/include/linux/cgroup-defs.h 2017-02-01 08:31:11.000000000 +0100
11637 +++ linux-4.4.46/include/linux/cgroup-defs.h 2017-02-03 17:18:10.903618130 +0100
11639 #include <linux/percpu-refcount.h>
11640 #include <linux/percpu-rwsem.h>
11641 #include <linux/workqueue.h>
11642 +#include <linux/swork.h>
11644 #ifdef CONFIG_CGROUPS
11646 @@ -142,6 +143,7 @@
11647 /* percpu_ref killing and RCU release */
11648 struct rcu_head rcu_head;
11649 struct work_struct destroy_work;
11650 + struct swork_event destroy_swork;
11654 diff -Nur linux-4.4.46.orig/include/linux/clk/at91_pmc.h linux-4.4.46/include/linux/clk/at91_pmc.h
11655 --- linux-4.4.46.orig/include/linux/clk/at91_pmc.h 2017-02-01 08:31:11.000000000 +0100
11656 +++ linux-4.4.46/include/linux/clk/at91_pmc.h 2017-02-03 17:18:10.903618130 +0100
11661 -#ifndef __ASSEMBLY__
11662 -extern void __iomem *at91_pmc_base;
11664 -#define at91_pmc_read(field) \
11665 - readl_relaxed(at91_pmc_base + field)
11667 -#define at91_pmc_write(field, value) \
11668 - writel_relaxed(value, at91_pmc_base + field)
11670 -.extern at91_pmc_base
11673 #define AT91_PMC_SCER 0x00 /* System Clock Enable Register */
11674 #define AT91_PMC_SCDR 0x04 /* System Clock Disable Register */
11676 diff -Nur linux-4.4.46.orig/include/linux/completion.h linux-4.4.46/include/linux/completion.h
11677 --- linux-4.4.46.orig/include/linux/completion.h 2017-02-01 08:31:11.000000000 +0100
11678 +++ linux-4.4.46/include/linux/completion.h 2017-02-03 17:18:10.903618130 +0100
11680 * Atomic wait-for-completion handler data structures.
11681 * See kernel/sched/completion.c for details.
11684 -#include <linux/wait.h>
11685 +#include <linux/swait.h>
11688 * struct completion - structure used to maintain state for a "completion"
11689 @@ -24,11 +23,11 @@
11691 struct completion {
11693 - wait_queue_head_t wait;
11694 + struct swait_queue_head wait;
11697 #define COMPLETION_INITIALIZER(work) \
11698 - { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
11699 + { 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
11701 #define COMPLETION_INITIALIZER_ONSTACK(work) \
11702 ({ init_completion(&work); work; })
11704 static inline void init_completion(struct completion *x)
11707 - init_waitqueue_head(&x->wait);
11708 + init_swait_queue_head(&x->wait);
11712 diff -Nur linux-4.4.46.orig/include/linux/cpu.h linux-4.4.46/include/linux/cpu.h
11713 --- linux-4.4.46.orig/include/linux/cpu.h 2017-02-01 08:31:11.000000000 +0100
11714 +++ linux-4.4.46/include/linux/cpu.h 2017-02-03 17:18:10.903618130 +0100
11715 @@ -224,6 +224,8 @@
11716 extern void put_online_cpus(void);
11717 extern void cpu_hotplug_disable(void);
11718 extern void cpu_hotplug_enable(void);
11719 +extern void pin_current_cpu(void);
11720 +extern void unpin_current_cpu(void);
11721 #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri)
11722 #define __hotcpu_notifier(fn, pri) __cpu_notifier(fn, pri)
11723 #define register_hotcpu_notifier(nb) register_cpu_notifier(nb)
11724 @@ -241,6 +243,8 @@
11725 #define put_online_cpus() do { } while (0)
11726 #define cpu_hotplug_disable() do { } while (0)
11727 #define cpu_hotplug_enable() do { } while (0)
11728 +static inline void pin_current_cpu(void) { }
11729 +static inline void unpin_current_cpu(void) { }
11730 #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
11731 #define __hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
11732 /* These aren't inline functions due to a GCC bug. */
11733 diff -Nur linux-4.4.46.orig/include/linux/delay.h linux-4.4.46/include/linux/delay.h
11734 --- linux-4.4.46.orig/include/linux/delay.h 2017-02-01 08:31:11.000000000 +0100
11735 +++ linux-4.4.46/include/linux/delay.h 2017-02-03 17:18:10.903618130 +0100
11737 msleep(seconds * 1000);
11740 +#ifdef CONFIG_PREEMPT_RT_FULL
11741 +extern void cpu_chill(void);
11743 +# define cpu_chill() cpu_relax()
11746 #endif /* defined(_LINUX_DELAY_H) */
11747 diff -Nur linux-4.4.46.orig/include/linux/ftrace.h linux-4.4.46/include/linux/ftrace.h
11748 --- linux-4.4.46.orig/include/linux/ftrace.h 2017-02-01 08:31:11.000000000 +0100
11749 +++ linux-4.4.46/include/linux/ftrace.h 2017-02-03 17:18:10.903618130 +0100
11750 @@ -694,6 +694,18 @@
11751 #define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5))
11752 #define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6))
11754 +static inline unsigned long get_lock_parent_ip(void)
11756 + unsigned long addr = CALLER_ADDR0;
11758 + if (!in_lock_functions(addr))
11760 + addr = CALLER_ADDR1;
11761 + if (!in_lock_functions(addr))
11763 + return CALLER_ADDR2;
11766 #ifdef CONFIG_IRQSOFF_TRACER
11767 extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
11768 extern void time_hardirqs_off(unsigned long a0, unsigned long a1);
11769 diff -Nur linux-4.4.46.orig/include/linux/highmem.h linux-4.4.46/include/linux/highmem.h
11770 --- linux-4.4.46.orig/include/linux/highmem.h 2017-02-01 08:31:11.000000000 +0100
11771 +++ linux-4.4.46/include/linux/highmem.h 2017-02-03 17:18:10.903618130 +0100
11773 #include <linux/mm.h>
11774 #include <linux/uaccess.h>
11775 #include <linux/hardirq.h>
11776 +#include <linux/sched.h>
11778 #include <asm/cacheflush.h>
11782 static inline void *kmap_atomic(struct page *page)
11784 - preempt_disable();
11785 + preempt_disable_nort();
11786 pagefault_disable();
11787 return page_address(page);
11790 static inline void __kunmap_atomic(void *addr)
11792 pagefault_enable();
11793 - preempt_enable();
11794 + preempt_enable_nort();
11797 #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn))
11798 @@ -86,32 +87,51 @@
11800 #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
11802 +#ifndef CONFIG_PREEMPT_RT_FULL
11803 DECLARE_PER_CPU(int, __kmap_atomic_idx);
11806 static inline int kmap_atomic_idx_push(void)
11808 +#ifndef CONFIG_PREEMPT_RT_FULL
11809 int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1;
11811 -#ifdef CONFIG_DEBUG_HIGHMEM
11812 +# ifdef CONFIG_DEBUG_HIGHMEM
11813 WARN_ON_ONCE(in_irq() && !irqs_disabled());
11814 BUG_ON(idx >= KM_TYPE_NR);
11819 + current->kmap_idx++;
11820 + BUG_ON(current->kmap_idx > KM_TYPE_NR);
11821 + return current->kmap_idx - 1;
11825 static inline int kmap_atomic_idx(void)
11827 +#ifndef CONFIG_PREEMPT_RT_FULL
11828 return __this_cpu_read(__kmap_atomic_idx) - 1;
11830 + return current->kmap_idx - 1;
11834 static inline void kmap_atomic_idx_pop(void)
11836 -#ifdef CONFIG_DEBUG_HIGHMEM
11837 +#ifndef CONFIG_PREEMPT_RT_FULL
11838 +# ifdef CONFIG_DEBUG_HIGHMEM
11839 int idx = __this_cpu_dec_return(__kmap_atomic_idx);
11844 __this_cpu_dec(__kmap_atomic_idx);
11847 + current->kmap_idx--;
11848 +# ifdef CONFIG_DEBUG_HIGHMEM
11849 + BUG_ON(current->kmap_idx < 0);
11854 diff -Nur linux-4.4.46.orig/include/linux/hrtimer.h linux-4.4.46/include/linux/hrtimer.h
11855 --- linux-4.4.46.orig/include/linux/hrtimer.h 2017-02-01 08:31:11.000000000 +0100
11856 +++ linux-4.4.46/include/linux/hrtimer.h 2017-02-03 17:18:10.903618130 +0100
11858 * @function: timer expiry callback function
11859 * @base: pointer to the timer base (per cpu and per clock)
11860 * @state: state information (See bit values above)
11861 + * @cb_entry: list entry to defer timers from hardirq context
11862 + * @irqsafe: timer can run in hardirq context
11863 + * @praecox: timer expiry time if expired at the time of programming
11864 * @is_rel: Set if the timer was armed relative
11865 * @start_pid: timer statistics field to store the pid of the task which
11866 * started the timer
11867 @@ -103,6 +106,11 @@
11868 enum hrtimer_restart (*function)(struct hrtimer *);
11869 struct hrtimer_clock_base *base;
11871 + struct list_head cb_entry;
11873 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
11877 #ifdef CONFIG_TIMER_STATS
11879 @@ -123,11 +131,7 @@
11880 struct task_struct *task;
11883 -#ifdef CONFIG_64BIT
11884 # define HRTIMER_CLOCK_BASE_ALIGN 64
11886 -# define HRTIMER_CLOCK_BASE_ALIGN 32
11890 * struct hrtimer_clock_base - the timer base for a specific clock
11891 @@ -136,6 +140,7 @@
11892 * timer to a base on another cpu.
11893 * @clockid: clock id for per_cpu support
11894 * @active: red black tree root node for the active timers
11895 + * @expired: list head for deferred timers.
11896 * @get_time: function to retrieve the current time of the clock
11897 * @offset: offset of this clock to the monotonic base
11899 @@ -144,6 +149,7 @@
11902 struct timerqueue_head active;
11903 + struct list_head expired;
11904 ktime_t (*get_time)(void);
11906 } __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN)));
11907 @@ -187,6 +193,7 @@
11908 raw_spinlock_t lock;
11910 struct hrtimer *running;
11911 + struct hrtimer *running_soft;
11913 unsigned int active_bases;
11914 unsigned int clock_was_set_seq;
11915 @@ -203,6 +210,9 @@
11916 unsigned int nr_hangs;
11917 unsigned int max_hang_time;
11919 +#ifdef CONFIG_PREEMPT_RT_BASE
11920 + wait_queue_head_t wait;
11922 struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
11923 } ____cacheline_aligned;
11925 @@ -412,6 +422,13 @@
11926 hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
11929 +/* Softirq preemption could deadlock timer removal */
11930 +#ifdef CONFIG_PREEMPT_RT_BASE
11931 + extern void hrtimer_wait_for_timer(const struct hrtimer *timer);
11933 +# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0)
11936 /* Query timers: */
11937 extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust);
11939 @@ -436,7 +453,7 @@
11940 * Helper function to check, whether the timer is running the callback
11943 -static inline int hrtimer_callback_running(struct hrtimer *timer)
11944 +static inline int hrtimer_callback_running(const struct hrtimer *timer)
11946 return timer->base->cpu_base->running == timer;
11948 diff -Nur linux-4.4.46.orig/include/linux/idr.h linux-4.4.46/include/linux/idr.h
11949 --- linux-4.4.46.orig/include/linux/idr.h 2017-02-01 08:31:11.000000000 +0100
11950 +++ linux-4.4.46/include/linux/idr.h 2017-02-03 17:18:10.903618130 +0100
11951 @@ -95,10 +95,14 @@
11952 * Each idr_preload() should be matched with an invocation of this
11953 * function. See idr_preload() for details.
11955 +#ifdef CONFIG_PREEMPT_RT_FULL
11956 +void idr_preload_end(void);
11958 static inline void idr_preload_end(void)
11965 * idr_find - return pointer for given id
11966 diff -Nur linux-4.4.46.orig/include/linux/init_task.h linux-4.4.46/include/linux/init_task.h
11967 --- linux-4.4.46.orig/include/linux/init_task.h 2017-02-01 08:31:11.000000000 +0100
11968 +++ linux-4.4.46/include/linux/init_task.h 2017-02-03 17:18:10.903618130 +0100
11969 @@ -148,9 +148,15 @@
11970 # define INIT_PERF_EVENTS(tsk)
11973 +#ifdef CONFIG_PREEMPT_RT_BASE
11974 +# define INIT_TIMER_LIST .posix_timer_list = NULL,
11976 +# define INIT_TIMER_LIST
11979 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
11980 # define INIT_VTIME(tsk) \
11981 - .vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock), \
11982 + .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \
11984 .vtime_snap_whence = VTIME_SYS,
11986 @@ -239,6 +245,7 @@
11987 .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
11988 .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
11989 .timer_slack_ns = 50000, /* 50 usec default slack */ \
11990 + INIT_TIMER_LIST \
11992 [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \
11993 [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \
11994 diff -Nur linux-4.4.46.orig/include/linux/interrupt.h linux-4.4.46/include/linux/interrupt.h
11995 --- linux-4.4.46.orig/include/linux/interrupt.h 2017-02-01 08:31:11.000000000 +0100
11996 +++ linux-4.4.46/include/linux/interrupt.h 2017-02-03 17:18:10.903618130 +0100
11998 * interrupt handler after suspending interrupts. For system
11999 * wakeup devices users need to implement wakeup detection in
12000 * their interrupt handlers.
12001 + * IRQF_NO_SOFTIRQ_CALL - Do not process softirqs in the irq thread context (RT)
12003 #define IRQF_SHARED 0x00000080
12004 #define IRQF_PROBE_SHARED 0x00000100
12006 #define IRQF_NO_THREAD 0x00010000
12007 #define IRQF_EARLY_RESUME 0x00020000
12008 #define IRQF_COND_SUSPEND 0x00040000
12009 +#define IRQF_NO_SOFTIRQ_CALL 0x00080000
12011 #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
12013 @@ -186,7 +188,7 @@
12014 #ifdef CONFIG_LOCKDEP
12015 # define local_irq_enable_in_hardirq() do { } while (0)
12017 -# define local_irq_enable_in_hardirq() local_irq_enable()
12018 +# define local_irq_enable_in_hardirq() local_irq_enable_nort()
12021 extern void disable_irq_nosync(unsigned int irq);
12022 @@ -206,6 +208,7 @@
12023 * @irq: Interrupt to which notification applies
12024 * @kref: Reference count, for internal use
12025 * @work: Work item, for internal use
12026 + * @list: List item for deferred callbacks
12027 * @notify: Function to be called on change. This will be
12028 * called in process context.
12029 * @release: Function to be called on release. This will be
12030 @@ -217,6 +220,7 @@
12033 struct work_struct work;
12034 + struct list_head list;
12035 void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
12036 void (*release)(struct kref *ref);
12038 @@ -379,9 +383,13 @@
12041 #ifdef CONFIG_IRQ_FORCED_THREADING
12042 +# ifndef CONFIG_PREEMPT_RT_BASE
12043 extern bool force_irqthreads;
12045 +# define force_irqthreads (true)
12048 -#define force_irqthreads (0)
12049 +#define force_irqthreads (false)
12052 #ifndef __ARCH_SET_SOFTIRQ_PENDING
12053 @@ -438,9 +446,10 @@
12054 void (*action)(struct softirq_action *);
12057 +#ifndef CONFIG_PREEMPT_RT_FULL
12058 asmlinkage void do_softirq(void);
12059 asmlinkage void __do_softirq(void);
12061 +static inline void thread_do_softirq(void) { do_softirq(); }
12062 #ifdef __ARCH_HAS_DO_SOFTIRQ
12063 void do_softirq_own_stack(void);
12065 @@ -449,13 +458,25 @@
12070 +extern void thread_do_softirq(void);
12073 extern void open_softirq(int nr, void (*action)(struct softirq_action *));
12074 extern void softirq_init(void);
12075 extern void __raise_softirq_irqoff(unsigned int nr);
12076 +#ifdef CONFIG_PREEMPT_RT_FULL
12077 +extern void __raise_softirq_irqoff_ksoft(unsigned int nr);
12079 +static inline void __raise_softirq_irqoff_ksoft(unsigned int nr)
12081 + __raise_softirq_irqoff(nr);
12085 extern void raise_softirq_irqoff(unsigned int nr);
12086 extern void raise_softirq(unsigned int nr);
12087 +extern void softirq_check_pending_idle(void);
12089 DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
12091 @@ -477,8 +498,9 @@
12092 to be executed on some cpu at least once after this.
12093 * If the tasklet is already scheduled, but its execution is still not
12094 started, it will be executed only once.
12095 - * If this tasklet is already running on another CPU (or schedule is called
12096 - from tasklet itself), it is rescheduled for later.
12097 + * If this tasklet is already running on another CPU, it is rescheduled
12099 + * Schedule must not be called from the tasklet itself (a lockup occurs)
12100 * Tasklet is strictly serialized wrt itself, but not
12101 wrt another tasklets. If client needs some intertask synchronization,
12102 he makes it with spinlocks.
12103 @@ -503,27 +525,36 @@
12106 TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */
12107 - TASKLET_STATE_RUN /* Tasklet is running (SMP only) */
12108 + TASKLET_STATE_RUN, /* Tasklet is running (SMP only) */
12109 + TASKLET_STATE_PENDING /* Tasklet is pending */
12113 +#define TASKLET_STATEF_SCHED (1 << TASKLET_STATE_SCHED)
12114 +#define TASKLET_STATEF_RUN (1 << TASKLET_STATE_RUN)
12115 +#define TASKLET_STATEF_PENDING (1 << TASKLET_STATE_PENDING)
12117 +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
12118 static inline int tasklet_trylock(struct tasklet_struct *t)
12120 return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state);
12123 +static inline int tasklet_tryunlock(struct tasklet_struct *t)
12125 + return cmpxchg(&t->state, TASKLET_STATEF_RUN, 0) == TASKLET_STATEF_RUN;
12128 static inline void tasklet_unlock(struct tasklet_struct *t)
12130 smp_mb__before_atomic();
12131 clear_bit(TASKLET_STATE_RUN, &(t)->state);
12134 -static inline void tasklet_unlock_wait(struct tasklet_struct *t)
12136 - while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); }
12138 +extern void tasklet_unlock_wait(struct tasklet_struct *t);
12141 #define tasklet_trylock(t) 1
12142 +#define tasklet_tryunlock(t) 1
12143 #define tasklet_unlock_wait(t) do { } while (0)
12144 #define tasklet_unlock(t) do { } while (0)
12146 @@ -572,12 +603,7 @@
12150 -static inline void tasklet_enable(struct tasklet_struct *t)
12152 - smp_mb__before_atomic();
12153 - atomic_dec(&t->count);
12156 +extern void tasklet_enable(struct tasklet_struct *t);
12157 extern void tasklet_kill(struct tasklet_struct *t);
12158 extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu);
12159 extern void tasklet_init(struct tasklet_struct *t,
12160 @@ -608,6 +634,12 @@
12161 tasklet_kill(&ttimer->tasklet);
12164 +#ifdef CONFIG_PREEMPT_RT_FULL
12165 +extern void softirq_early_init(void);
12167 +static inline void softirq_early_init(void) { }
12171 * Autoprobing for irqs:
12173 diff -Nur linux-4.4.46.orig/include/linux/irqdesc.h linux-4.4.46/include/linux/irqdesc.h
12174 --- linux-4.4.46.orig/include/linux/irqdesc.h 2017-02-01 08:31:11.000000000 +0100
12175 +++ linux-4.4.46/include/linux/irqdesc.h 2017-02-03 17:18:10.903618130 +0100
12177 unsigned int irqs_unhandled;
12178 atomic_t threads_handled;
12179 int threads_handled_last;
12181 raw_spinlock_t lock;
12182 struct cpumask *percpu_enabled;
12184 diff -Nur linux-4.4.46.orig/include/linux/irqflags.h linux-4.4.46/include/linux/irqflags.h
12185 --- linux-4.4.46.orig/include/linux/irqflags.h 2017-02-01 08:31:11.000000000 +0100
12186 +++ linux-4.4.46/include/linux/irqflags.h 2017-02-03 17:18:10.903618130 +0100
12188 # define trace_softirqs_enabled(p) ((p)->softirqs_enabled)
12189 # define trace_hardirq_enter() do { current->hardirq_context++; } while (0)
12190 # define trace_hardirq_exit() do { current->hardirq_context--; } while (0)
12191 -# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
12192 -# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)
12193 # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1,
12195 # define trace_hardirqs_on() do { } while (0)
12197 # define trace_softirqs_enabled(p) 0
12198 # define trace_hardirq_enter() do { } while (0)
12199 # define trace_hardirq_exit() do { } while (0)
12200 +# define INIT_TRACE_IRQFLAGS
12203 +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT_FULL)
12204 +# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
12205 +# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)
12207 # define lockdep_softirq_enter() do { } while (0)
12208 # define lockdep_softirq_exit() do { } while (0)
12209 -# define INIT_TRACE_IRQFLAGS
12212 #if defined(CONFIG_IRQSOFF_TRACER) || \
12213 @@ -148,4 +152,23 @@
12215 #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags)
12218 + * local_irq* variants depending on RT/!RT
12220 +#ifdef CONFIG_PREEMPT_RT_FULL
12221 +# define local_irq_disable_nort() do { } while (0)
12222 +# define local_irq_enable_nort() do { } while (0)
12223 +# define local_irq_save_nort(flags) local_save_flags(flags)
12224 +# define local_irq_restore_nort(flags) (void)(flags)
12225 +# define local_irq_disable_rt() local_irq_disable()
12226 +# define local_irq_enable_rt() local_irq_enable()
12228 +# define local_irq_disable_nort() local_irq_disable()
12229 +# define local_irq_enable_nort() local_irq_enable()
12230 +# define local_irq_save_nort(flags) local_irq_save(flags)
12231 +# define local_irq_restore_nort(flags) local_irq_restore(flags)
12232 +# define local_irq_disable_rt() do { } while (0)
12233 +# define local_irq_enable_rt() do { } while (0)
12237 diff -Nur linux-4.4.46.orig/include/linux/irq.h linux-4.4.46/include/linux/irq.h
12238 --- linux-4.4.46.orig/include/linux/irq.h 2017-02-01 08:31:11.000000000 +0100
12239 +++ linux-4.4.46/include/linux/irq.h 2017-02-03 17:18:10.903618130 +0100
12241 * IRQ_IS_POLLED - Always polled by another interrupt. Exclude
12242 * it from the spurious interrupt detection
12243 * mechanism and from core side polling.
12244 + * IRQ_NO_SOFTIRQ_CALL - No softirq processing in the irq thread context (RT)
12245 * IRQ_DISABLE_UNLAZY - Disable lazy irq disable
12248 @@ -99,13 +100,14 @@
12249 IRQ_PER_CPU_DEVID = (1 << 17),
12250 IRQ_IS_POLLED = (1 << 18),
12251 IRQ_DISABLE_UNLAZY = (1 << 19),
12252 + IRQ_NO_SOFTIRQ_CALL = (1 << 20),
12255 #define IRQF_MODIFY_MASK \
12256 (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
12257 IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \
12258 IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \
12259 - IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY)
12260 + IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY | IRQ_NO_SOFTIRQ_CALL)
12262 #define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING)
12264 diff -Nur linux-4.4.46.orig/include/linux/irq_work.h linux-4.4.46/include/linux/irq_work.h
12265 --- linux-4.4.46.orig/include/linux/irq_work.h 2017-02-01 08:31:11.000000000 +0100
12266 +++ linux-4.4.46/include/linux/irq_work.h 2017-02-03 17:18:10.903618130 +0100
12268 #define IRQ_WORK_BUSY 2UL
12269 #define IRQ_WORK_FLAGS 3UL
12270 #define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */
12271 +#define IRQ_WORK_HARD_IRQ 8UL /* Run hard IRQ context, even on RT */
12274 unsigned long flags;
12276 static inline void irq_work_run(void) { }
12279 +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
12280 +void irq_work_tick_soft(void);
12282 +static inline void irq_work_tick_soft(void) { }
12285 #endif /* _LINUX_IRQ_WORK_H */
12286 diff -Nur linux-4.4.46.orig/include/linux/jbd2.h linux-4.4.46/include/linux/jbd2.h
12287 --- linux-4.4.46.orig/include/linux/jbd2.h 2017-02-01 08:31:11.000000000 +0100
12288 +++ linux-4.4.46/include/linux/jbd2.h 2017-02-03 17:18:10.907618284 +0100
12289 @@ -352,32 +352,56 @@
12291 static inline void jbd_lock_bh_state(struct buffer_head *bh)
12293 +#ifndef CONFIG_PREEMPT_RT_BASE
12294 bit_spin_lock(BH_State, &bh->b_state);
12296 + spin_lock(&bh->b_state_lock);
12300 static inline int jbd_trylock_bh_state(struct buffer_head *bh)
12302 +#ifndef CONFIG_PREEMPT_RT_BASE
12303 return bit_spin_trylock(BH_State, &bh->b_state);
12305 + return spin_trylock(&bh->b_state_lock);
12309 static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
12311 +#ifndef CONFIG_PREEMPT_RT_BASE
12312 return bit_spin_is_locked(BH_State, &bh->b_state);
12314 + return spin_is_locked(&bh->b_state_lock);
12318 static inline void jbd_unlock_bh_state(struct buffer_head *bh)
12320 +#ifndef CONFIG_PREEMPT_RT_BASE
12321 bit_spin_unlock(BH_State, &bh->b_state);
12323 + spin_unlock(&bh->b_state_lock);
12327 static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
12329 +#ifndef CONFIG_PREEMPT_RT_BASE
12330 bit_spin_lock(BH_JournalHead, &bh->b_state);
12332 + spin_lock(&bh->b_journal_head_lock);
12336 static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
12338 +#ifndef CONFIG_PREEMPT_RT_BASE
12339 bit_spin_unlock(BH_JournalHead, &bh->b_state);
12341 + spin_unlock(&bh->b_journal_head_lock);
12345 #define J_ASSERT(assert) BUG_ON(!(assert))
12346 diff -Nur linux-4.4.46.orig/include/linux/kdb.h linux-4.4.46/include/linux/kdb.h
12347 --- linux-4.4.46.orig/include/linux/kdb.h 2017-02-01 08:31:11.000000000 +0100
12348 +++ linux-4.4.46/include/linux/kdb.h 2017-02-03 17:18:10.907618284 +0100
12349 @@ -167,6 +167,7 @@
12350 extern __printf(1, 2) int kdb_printf(const char *, ...);
12351 typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...);
12353 +#define in_kdb_printk() (kdb_trap_printk)
12354 extern void kdb_init(int level);
12356 /* Access to kdb specific polling devices */
12357 @@ -201,6 +202,7 @@
12358 extern int kdb_unregister(char *);
12359 #else /* ! CONFIG_KGDB_KDB */
12360 static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; }
12361 +#define in_kdb_printk() (0)
12362 static inline void kdb_init(int level) {}
12363 static inline int kdb_register(char *cmd, kdb_func_t func, char *usage,
12364 char *help, short minlen) { return 0; }
12365 diff -Nur linux-4.4.46.orig/include/linux/kernel.h linux-4.4.46/include/linux/kernel.h
12366 --- linux-4.4.46.orig/include/linux/kernel.h 2017-02-01 08:31:11.000000000 +0100
12367 +++ linux-4.4.46/include/linux/kernel.h 2017-02-03 17:18:10.907618284 +0100
12368 @@ -188,6 +188,9 @@
12370 # define might_sleep() \
12371 do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
12373 +# define might_sleep_no_state_check() \
12374 + do { ___might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
12375 # define sched_annotate_sleep() (current->task_state_change = 0)
12377 static inline void ___might_sleep(const char *file, int line,
12378 @@ -195,6 +198,7 @@
12379 static inline void __might_sleep(const char *file, int line,
12380 int preempt_offset) { }
12381 # define might_sleep() do { might_resched(); } while (0)
12382 +# define might_sleep_no_state_check() do { might_resched(); } while (0)
12383 # define sched_annotate_sleep() do { } while (0)
12386 @@ -255,6 +259,7 @@
12388 void panic(const char *fmt, ...)
12390 +void nmi_panic(struct pt_regs *regs, const char *msg);
12391 extern void oops_enter(void);
12392 extern void oops_exit(void);
12393 void print_oops_end_marker(void);
12394 @@ -448,6 +453,14 @@
12395 extern bool crash_kexec_post_notifiers;
12398 + * panic_cpu is used for synchronizing panic() and crash_kexec() execution. It
12399 + * holds a CPU number which is executing panic() currently. A value of
12400 + * PANIC_CPU_INVALID means no CPU has entered panic() or crash_kexec().
12402 +extern atomic_t panic_cpu;
12403 +#define PANIC_CPU_INVALID -1
12406 * Only to be used by arch init code. If the user over-wrote the default
12407 * CONFIG_PANIC_TIMEOUT, honor it.
12409 @@ -475,6 +488,7 @@
12416 #define TAINT_PROPRIETARY_MODULE 0
12417 diff -Nur linux-4.4.46.orig/include/linux/kvm_host.h linux-4.4.46/include/linux/kvm_host.h
12418 --- linux-4.4.46.orig/include/linux/kvm_host.h 2017-02-01 08:31:11.000000000 +0100
12419 +++ linux-4.4.46/include/linux/kvm_host.h 2017-02-03 17:18:10.907618284 +0100
12421 #include <linux/irqflags.h>
12422 #include <linux/context_tracking.h>
12423 #include <linux/irqbypass.h>
12424 +#include <linux/swait.h>
12425 #include <asm/signal.h>
12427 #include <linux/kvm.h>
12428 @@ -243,7 +244,7 @@
12430 int guest_fpu_loaded, guest_xcr0_loaded;
12431 unsigned char fpu_counter;
12432 - wait_queue_head_t wq;
12433 + struct swait_queue_head wq;
12437 @@ -794,7 +795,7 @@
12441 -static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
12442 +static inline struct swait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
12444 #ifdef __KVM_HAVE_ARCH_WQP
12445 return vcpu->arch.wqp;
12446 diff -Nur linux-4.4.46.orig/include/linux/lglock.h linux-4.4.46/include/linux/lglock.h
12447 --- linux-4.4.46.orig/include/linux/lglock.h 2017-02-01 08:31:11.000000000 +0100
12448 +++ linux-4.4.46/include/linux/lglock.h 2017-02-03 17:18:10.907618284 +0100
12449 @@ -34,13 +34,30 @@
12453 +#ifdef CONFIG_PREEMPT_RT_FULL
12454 + struct rt_mutex __percpu *lock;
12456 arch_spinlock_t __percpu *lock;
12458 #ifdef CONFIG_DEBUG_LOCK_ALLOC
12459 struct lock_class_key lock_key;
12460 struct lockdep_map lock_dep_map;
12464 +#ifdef CONFIG_PREEMPT_RT_FULL
12465 +# define DEFINE_LGLOCK(name) \
12466 + static DEFINE_PER_CPU(struct rt_mutex, name ## _lock) \
12467 + = __RT_MUTEX_INITIALIZER( name ## _lock); \
12468 + struct lglock name = { .lock = &name ## _lock }
12470 +# define DEFINE_STATIC_LGLOCK(name) \
12471 + static DEFINE_PER_CPU(struct rt_mutex, name ## _lock) \
12472 + = __RT_MUTEX_INITIALIZER( name ## _lock); \
12473 + static struct lglock name = { .lock = &name ## _lock }
12477 #define DEFINE_LGLOCK(name) \
12478 static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \
12479 = __ARCH_SPIN_LOCK_UNLOCKED; \
12481 static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \
12482 = __ARCH_SPIN_LOCK_UNLOCKED; \
12483 static struct lglock name = { .lock = &name ## _lock }
12486 void lg_lock_init(struct lglock *lg, char *name);
12489 void lg_global_lock(struct lglock *lg);
12490 void lg_global_unlock(struct lglock *lg);
12492 +#ifndef CONFIG_PREEMPT_RT_FULL
12493 +#define lg_global_trylock_relax(name) lg_global_lock(name)
12495 +void lg_global_trylock_relax(struct lglock *lg);
12499 /* When !CONFIG_SMP, map lglock to spinlock */
12500 #define lglock spinlock
12501 diff -Nur linux-4.4.46.orig/include/linux/list_bl.h linux-4.4.46/include/linux/list_bl.h
12502 --- linux-4.4.46.orig/include/linux/list_bl.h 2017-02-01 08:31:11.000000000 +0100
12503 +++ linux-4.4.46/include/linux/list_bl.h 2017-02-03 17:18:10.907618284 +0100
12505 #define _LINUX_LIST_BL_H
12507 #include <linux/list.h>
12508 +#include <linux/spinlock.h>
12509 #include <linux/bit_spinlock.h>
12512 @@ -32,13 +33,24 @@
12514 struct hlist_bl_head {
12515 struct hlist_bl_node *first;
12516 +#ifdef CONFIG_PREEMPT_RT_BASE
12517 + raw_spinlock_t lock;
12521 struct hlist_bl_node {
12522 struct hlist_bl_node *next, **pprev;
12524 -#define INIT_HLIST_BL_HEAD(ptr) \
12525 - ((ptr)->first = NULL)
12527 +#ifdef CONFIG_PREEMPT_RT_BASE
12528 +#define INIT_HLIST_BL_HEAD(h) \
12530 + (h)->first = NULL; \
12531 + raw_spin_lock_init(&(h)->lock); \
12534 +#define INIT_HLIST_BL_HEAD(h) (h)->first = NULL
12537 static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h)
12539 @@ -118,12 +130,26 @@
12541 static inline void hlist_bl_lock(struct hlist_bl_head *b)
12543 +#ifndef CONFIG_PREEMPT_RT_BASE
12544 bit_spin_lock(0, (unsigned long *)b);
12546 + raw_spin_lock(&b->lock);
12547 +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
12548 + __set_bit(0, (unsigned long *)b);
12553 static inline void hlist_bl_unlock(struct hlist_bl_head *b)
12555 +#ifndef CONFIG_PREEMPT_RT_BASE
12556 __bit_spin_unlock(0, (unsigned long *)b);
12558 +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
12559 + __clear_bit(0, (unsigned long *)b);
12561 + raw_spin_unlock(&b->lock);
12565 static inline bool hlist_bl_is_locked(struct hlist_bl_head *b)
12566 diff -Nur linux-4.4.46.orig/include/linux/locallock.h linux-4.4.46/include/linux/locallock.h
12567 --- linux-4.4.46.orig/include/linux/locallock.h 1970-01-01 01:00:00.000000000 +0100
12568 +++ linux-4.4.46/include/linux/locallock.h 2017-02-03 17:18:10.907618284 +0100
12570 +#ifndef _LINUX_LOCALLOCK_H
12571 +#define _LINUX_LOCALLOCK_H
12573 +#include <linux/percpu.h>
12574 +#include <linux/spinlock.h>
12576 +#ifdef CONFIG_PREEMPT_RT_BASE
12578 +#ifdef CONFIG_DEBUG_SPINLOCK
12579 +# define LL_WARN(cond) WARN_ON(cond)
12581 +# define LL_WARN(cond) do { } while (0)
12585 + * per cpu lock based substitute for local_irq_*()
12587 +struct local_irq_lock {
12589 + struct task_struct *owner;
12591 + unsigned long flags;
12594 +#define DEFINE_LOCAL_IRQ_LOCK(lvar) \
12595 + DEFINE_PER_CPU(struct local_irq_lock, lvar) = { \
12596 + .lock = __SPIN_LOCK_UNLOCKED((lvar).lock) }
12598 +#define DECLARE_LOCAL_IRQ_LOCK(lvar) \
12599 + DECLARE_PER_CPU(struct local_irq_lock, lvar)
12601 +#define local_irq_lock_init(lvar) \
12604 + for_each_possible_cpu(__cpu) \
12605 + spin_lock_init(&per_cpu(lvar, __cpu).lock); \
12609 + * spin_lock|trylock|unlock_local flavour that does not migrate disable
12610 + * used for __local_lock|trylock|unlock where get_local_var/put_local_var
12611 + * already takes care of the migrate_disable/enable
12612 + * for CONFIG_PREEMPT_BASE map to the normal spin_* calls.
12614 +#ifdef CONFIG_PREEMPT_RT_FULL
12615 +# define spin_lock_local(lock) rt_spin_lock__no_mg(lock)
12616 +# define spin_trylock_local(lock) rt_spin_trylock__no_mg(lock)
12617 +# define spin_unlock_local(lock) rt_spin_unlock__no_mg(lock)
12619 +# define spin_lock_local(lock) spin_lock(lock)
12620 +# define spin_trylock_local(lock) spin_trylock(lock)
12621 +# define spin_unlock_local(lock) spin_unlock(lock)
12624 +static inline void __local_lock(struct local_irq_lock *lv)
12626 + if (lv->owner != current) {
12627 + spin_lock_local(&lv->lock);
12628 + LL_WARN(lv->owner);
12629 + LL_WARN(lv->nestcnt);
12630 + lv->owner = current;
12635 +#define local_lock(lvar) \
12636 + do { __local_lock(&get_local_var(lvar)); } while (0)
12638 +#define local_lock_on(lvar, cpu) \
12639 + do { __local_lock(&per_cpu(lvar, cpu)); } while (0)
12641 +static inline int __local_trylock(struct local_irq_lock *lv)
12643 + if (lv->owner != current && spin_trylock_local(&lv->lock)) {
12644 + LL_WARN(lv->owner);
12645 + LL_WARN(lv->nestcnt);
12646 + lv->owner = current;
12653 +#define local_trylock(lvar) \
12656 + __locked = __local_trylock(&get_local_var(lvar)); \
12658 + put_local_var(lvar); \
12662 +static inline void __local_unlock(struct local_irq_lock *lv)
12664 + LL_WARN(lv->nestcnt == 0);
12665 + LL_WARN(lv->owner != current);
12666 + if (--lv->nestcnt)
12669 + lv->owner = NULL;
12670 + spin_unlock_local(&lv->lock);
12673 +#define local_unlock(lvar) \
12675 + __local_unlock(this_cpu_ptr(&lvar)); \
12676 + put_local_var(lvar); \
12679 +#define local_unlock_on(lvar, cpu) \
12680 + do { __local_unlock(&per_cpu(lvar, cpu)); } while (0)
12682 +static inline void __local_lock_irq(struct local_irq_lock *lv)
12684 + spin_lock_irqsave(&lv->lock, lv->flags);
12685 + LL_WARN(lv->owner);
12686 + LL_WARN(lv->nestcnt);
12687 + lv->owner = current;
12691 +#define local_lock_irq(lvar) \
12692 + do { __local_lock_irq(&get_local_var(lvar)); } while (0)
12694 +#define local_lock_irq_on(lvar, cpu) \
12695 + do { __local_lock_irq(&per_cpu(lvar, cpu)); } while (0)
12697 +static inline void __local_unlock_irq(struct local_irq_lock *lv)
12699 + LL_WARN(!lv->nestcnt);
12700 + LL_WARN(lv->owner != current);
12701 + lv->owner = NULL;
12703 + spin_unlock_irq(&lv->lock);
12706 +#define local_unlock_irq(lvar) \
12708 + __local_unlock_irq(this_cpu_ptr(&lvar)); \
12709 + put_local_var(lvar); \
12712 +#define local_unlock_irq_on(lvar, cpu) \
12714 + __local_unlock_irq(&per_cpu(lvar, cpu)); \
12717 +static inline int __local_lock_irqsave(struct local_irq_lock *lv)
12719 + if (lv->owner != current) {
12720 + __local_lock_irq(lv);
12728 +#define local_lock_irqsave(lvar, _flags) \
12730 + if (__local_lock_irqsave(&get_local_var(lvar))) \
12731 + put_local_var(lvar); \
12732 + _flags = __this_cpu_read(lvar.flags); \
12735 +#define local_lock_irqsave_on(lvar, _flags, cpu) \
12737 + __local_lock_irqsave(&per_cpu(lvar, cpu)); \
12738 + _flags = per_cpu(lvar, cpu).flags; \
12741 +static inline int __local_unlock_irqrestore(struct local_irq_lock *lv,
12742 + unsigned long flags)
12744 + LL_WARN(!lv->nestcnt);
12745 + LL_WARN(lv->owner != current);
12746 + if (--lv->nestcnt)
12749 + lv->owner = NULL;
12750 + spin_unlock_irqrestore(&lv->lock, lv->flags);
12754 +#define local_unlock_irqrestore(lvar, flags) \
12756 + if (__local_unlock_irqrestore(this_cpu_ptr(&lvar), flags)) \
12757 + put_local_var(lvar); \
12760 +#define local_unlock_irqrestore_on(lvar, flags, cpu) \
12762 + __local_unlock_irqrestore(&per_cpu(lvar, cpu), flags); \
12765 +#define local_spin_trylock_irq(lvar, lock) \
12768 + local_lock_irq(lvar); \
12769 + __locked = spin_trylock(lock); \
12771 + local_unlock_irq(lvar); \
12775 +#define local_spin_lock_irq(lvar, lock) \
12777 + local_lock_irq(lvar); \
12778 + spin_lock(lock); \
12781 +#define local_spin_unlock_irq(lvar, lock) \
12783 + spin_unlock(lock); \
12784 + local_unlock_irq(lvar); \
12787 +#define local_spin_lock_irqsave(lvar, lock, flags) \
12789 + local_lock_irqsave(lvar, flags); \
12790 + spin_lock(lock); \
12793 +#define local_spin_unlock_irqrestore(lvar, lock, flags) \
12795 + spin_unlock(lock); \
12796 + local_unlock_irqrestore(lvar, flags); \
12799 +#define get_locked_var(lvar, var) \
12801 + local_lock(lvar); \
12802 + this_cpu_ptr(&var); \
12805 +#define put_locked_var(lvar, var) local_unlock(lvar);
12807 +#define local_lock_cpu(lvar) \
12809 + local_lock(lvar); \
12810 + smp_processor_id(); \
12813 +#define local_unlock_cpu(lvar) local_unlock(lvar)
12815 +#else /* PREEMPT_RT_BASE */
12817 +#define DEFINE_LOCAL_IRQ_LOCK(lvar) __typeof__(const int) lvar
12818 +#define DECLARE_LOCAL_IRQ_LOCK(lvar) extern __typeof__(const int) lvar
12820 +static inline void local_irq_lock_init(int lvar) { }
12822 +#define local_lock(lvar) preempt_disable()
12823 +#define local_unlock(lvar) preempt_enable()
12824 +#define local_lock_irq(lvar) local_irq_disable()
12825 +#define local_unlock_irq(lvar) local_irq_enable()
12826 +#define local_lock_irqsave(lvar, flags) local_irq_save(flags)
12827 +#define local_unlock_irqrestore(lvar, flags) local_irq_restore(flags)
12829 +#define local_spin_trylock_irq(lvar, lock) spin_trylock_irq(lock)
12830 +#define local_spin_lock_irq(lvar, lock) spin_lock_irq(lock)
12831 +#define local_spin_unlock_irq(lvar, lock) spin_unlock_irq(lock)
12832 +#define local_spin_lock_irqsave(lvar, lock, flags) \
12833 + spin_lock_irqsave(lock, flags)
12834 +#define local_spin_unlock_irqrestore(lvar, lock, flags) \
12835 + spin_unlock_irqrestore(lock, flags)
12837 +#define get_locked_var(lvar, var) get_cpu_var(var)
12838 +#define put_locked_var(lvar, var) put_cpu_var(var)
12840 +#define local_lock_cpu(lvar) get_cpu()
12841 +#define local_unlock_cpu(lvar) put_cpu()
12846 diff -Nur linux-4.4.46.orig/include/linux/mm_types.h linux-4.4.46/include/linux/mm_types.h
12847 --- linux-4.4.46.orig/include/linux/mm_types.h 2017-02-01 08:31:11.000000000 +0100
12848 +++ linux-4.4.46/include/linux/mm_types.h 2017-02-03 17:18:10.907618284 +0100
12850 #include <linux/completion.h>
12851 #include <linux/cpumask.h>
12852 #include <linux/uprobes.h>
12853 +#include <linux/rcupdate.h>
12854 #include <linux/page-flags-layout.h>
12855 #include <asm/page.h>
12856 #include <asm/mmu.h>
12857 @@ -505,6 +506,9 @@
12858 bool tlb_flush_pending;
12860 struct uprobes_state uprobes_state;
12861 +#ifdef CONFIG_PREEMPT_RT_BASE
12862 + struct rcu_head delayed_drop;
12864 #ifdef CONFIG_X86_INTEL_MPX
12865 /* address of the bounds directory */
12866 void __user *bd_addr;
12867 diff -Nur linux-4.4.46.orig/include/linux/mutex.h linux-4.4.46/include/linux/mutex.h
12868 --- linux-4.4.46.orig/include/linux/mutex.h 2017-02-01 08:31:11.000000000 +0100
12869 +++ linux-4.4.46/include/linux/mutex.h 2017-02-03 17:18:10.907618284 +0100
12871 #include <asm/processor.h>
12872 #include <linux/osq_lock.h>
12874 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
12875 +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
12876 + , .dep_map = { .name = #lockname }
12878 +# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
12881 +#ifdef CONFIG_PREEMPT_RT_FULL
12882 +# include <linux/mutex_rt.h>
12886 * Simple, straightforward mutexes with strict semantics:
12888 @@ -99,13 +110,6 @@
12889 static inline void mutex_destroy(struct mutex *lock) {}
12892 -#ifdef CONFIG_DEBUG_LOCK_ALLOC
12893 -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
12894 - , .dep_map = { .name = #lockname }
12896 -# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
12899 #define __MUTEX_INITIALIZER(lockname) \
12900 { .count = ATOMIC_INIT(1) \
12901 , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
12902 @@ -173,6 +177,8 @@
12903 extern int mutex_trylock(struct mutex *lock);
12904 extern void mutex_unlock(struct mutex *lock);
12906 +#endif /* !PREEMPT_RT_FULL */
12908 extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
12910 #endif /* __LINUX_MUTEX_H */
12911 diff -Nur linux-4.4.46.orig/include/linux/mutex_rt.h linux-4.4.46/include/linux/mutex_rt.h
12912 --- linux-4.4.46.orig/include/linux/mutex_rt.h 1970-01-01 01:00:00.000000000 +0100
12913 +++ linux-4.4.46/include/linux/mutex_rt.h 2017-02-03 17:18:10.907618284 +0100
12915 +#ifndef __LINUX_MUTEX_RT_H
12916 +#define __LINUX_MUTEX_RT_H
12918 +#ifndef __LINUX_MUTEX_H
12919 +#error "Please include mutex.h"
12922 +#include <linux/rtmutex.h>
12924 +/* FIXME: Just for __lockfunc */
12925 +#include <linux/spinlock.h>
12928 + struct rt_mutex lock;
12929 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
12930 + struct lockdep_map dep_map;
12934 +#define __MUTEX_INITIALIZER(mutexname) \
12936 + .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \
12937 + __DEP_MAP_MUTEX_INITIALIZER(mutexname) \
12940 +#define DEFINE_MUTEX(mutexname) \
12941 + struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
12943 +extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key);
12944 +extern void __lockfunc _mutex_lock(struct mutex *lock);
12945 +extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock);
12946 +extern int __lockfunc _mutex_lock_killable(struct mutex *lock);
12947 +extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass);
12948 +extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);
12949 +extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass);
12950 +extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass);
12951 +extern int __lockfunc _mutex_trylock(struct mutex *lock);
12952 +extern void __lockfunc _mutex_unlock(struct mutex *lock);
12954 +#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock)
12955 +#define mutex_lock(l) _mutex_lock(l)
12956 +#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l)
12957 +#define mutex_lock_killable(l) _mutex_lock_killable(l)
12958 +#define mutex_trylock(l) _mutex_trylock(l)
12959 +#define mutex_unlock(l) _mutex_unlock(l)
12960 +#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock)
12962 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
12963 +# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s)
12964 +# define mutex_lock_interruptible_nested(l, s) \
12965 + _mutex_lock_interruptible_nested(l, s)
12966 +# define mutex_lock_killable_nested(l, s) \
12967 + _mutex_lock_killable_nested(l, s)
12969 +# define mutex_lock_nest_lock(lock, nest_lock) \
12971 + typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \
12972 + _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \
12976 +# define mutex_lock_nested(l, s) _mutex_lock(l)
12977 +# define mutex_lock_interruptible_nested(l, s) \
12978 + _mutex_lock_interruptible(l)
12979 +# define mutex_lock_killable_nested(l, s) \
12980 + _mutex_lock_killable(l)
12981 +# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
12984 +# define mutex_init(mutex) \
12986 + static struct lock_class_key __key; \
12988 + rt_mutex_init(&(mutex)->lock); \
12989 + __mutex_do_init((mutex), #mutex, &__key); \
12992 +# define __mutex_init(mutex, name, key) \
12994 + rt_mutex_init(&(mutex)->lock); \
12995 + __mutex_do_init((mutex), name, key); \
12999 diff -Nur linux-4.4.46.orig/include/linux/netdevice.h linux-4.4.46/include/linux/netdevice.h
13000 --- linux-4.4.46.orig/include/linux/netdevice.h 2017-02-01 08:31:11.000000000 +0100
13001 +++ linux-4.4.46/include/linux/netdevice.h 2017-02-03 17:18:10.907618284 +0100
13002 @@ -2286,11 +2286,20 @@
13003 void synchronize_net(void);
13004 int init_dummy_netdev(struct net_device *dev);
13006 +#ifdef CONFIG_PREEMPT_RT_FULL
13007 +static inline int dev_recursion_level(void)
13009 + return current->xmit_recursion;
13014 DECLARE_PER_CPU(int, xmit_recursion);
13015 static inline int dev_recursion_level(void)
13017 return this_cpu_read(xmit_recursion);
13021 struct net_device *dev_get_by_index(struct net *net, int ifindex);
13022 struct net_device *__dev_get_by_index(struct net *net, int ifindex);
13023 @@ -2606,6 +2615,7 @@
13024 unsigned int dropped;
13025 struct sk_buff_head input_pkt_queue;
13026 struct napi_struct backlog;
13027 + struct sk_buff_head tofree_queue;
13031 diff -Nur linux-4.4.46.orig/include/linux/netfilter/x_tables.h linux-4.4.46/include/linux/netfilter/x_tables.h
13032 --- linux-4.4.46.orig/include/linux/netfilter/x_tables.h 2017-02-01 08:31:11.000000000 +0100
13033 +++ linux-4.4.46/include/linux/netfilter/x_tables.h 2017-02-03 17:18:10.907618284 +0100
13036 #include <linux/netdevice.h>
13037 #include <linux/static_key.h>
13038 +#include <linux/locallock.h>
13039 #include <uapi/linux/netfilter/x_tables.h>
13042 @@ -289,6 +290,8 @@
13044 DECLARE_PER_CPU(seqcount_t, xt_recseq);
13046 +DECLARE_LOCAL_IRQ_LOCK(xt_write_lock);
13048 /* xt_tee_enabled - true if x_tables needs to handle reentrancy
13050 * Enabled if current ip(6)tables ruleset has at least one -j TEE rule.
13051 @@ -309,6 +312,9 @@
13053 unsigned int addend;
13055 + /* RT protection */
13056 + local_lock(xt_write_lock);
13059 * Low order bit of sequence is set if we already
13060 * called xt_write_recseq_begin().
13061 @@ -339,6 +345,7 @@
13062 /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */
13064 __this_cpu_add(xt_recseq.sequence, addend);
13065 + local_unlock(xt_write_lock);
13069 diff -Nur linux-4.4.46.orig/include/linux/notifier.h linux-4.4.46/include/linux/notifier.h
13070 --- linux-4.4.46.orig/include/linux/notifier.h 2017-02-01 08:31:11.000000000 +0100
13071 +++ linux-4.4.46/include/linux/notifier.h 2017-02-03 17:18:10.907618284 +0100
13074 * Alan Cox <Alan.Cox@linux.org>
13078 #ifndef _LINUX_NOTIFIER_H
13079 #define _LINUX_NOTIFIER_H
13080 #include <linux/errno.h>
13082 * in srcu_notifier_call_chain(): no cache bounces and no memory barriers.
13083 * As compensation, srcu_notifier_chain_unregister() is rather expensive.
13084 * SRCU notifier chains should be used when the chain will be called very
13085 - * often but notifier_blocks will seldom be removed. Also, SRCU notifier
13086 - * chains are slightly more difficult to use because they require special
13087 - * runtime initialization.
13088 + * often but notifier_blocks will seldom be removed.
13091 typedef int (*notifier_fn_t)(struct notifier_block *nb,
13093 (name)->head = NULL; \
13096 -/* srcu_notifier_heads must be initialized and cleaned up dynamically */
13097 +/* srcu_notifier_heads must be cleaned up dynamically */
13098 extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
13099 #define srcu_cleanup_notifier_head(name) \
13100 cleanup_srcu_struct(&(name)->srcu);
13101 @@ -101,7 +99,13 @@
13103 #define RAW_NOTIFIER_INIT(name) { \
13105 -/* srcu_notifier_heads cannot be initialized statically */
13107 +#define SRCU_NOTIFIER_INIT(name, pcpu) \
13109 + .mutex = __MUTEX_INITIALIZER(name.mutex), \
13111 + .srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu), \
13114 #define ATOMIC_NOTIFIER_HEAD(name) \
13115 struct atomic_notifier_head name = \
13116 @@ -113,6 +117,18 @@
13117 struct raw_notifier_head name = \
13118 RAW_NOTIFIER_INIT(name)
13120 +#define _SRCU_NOTIFIER_HEAD(name, mod) \
13121 + static DEFINE_PER_CPU(struct srcu_struct_array, \
13122 + name##_head_srcu_array); \
13123 + mod struct srcu_notifier_head name = \
13124 + SRCU_NOTIFIER_INIT(name, name##_head_srcu_array)
13126 +#define SRCU_NOTIFIER_HEAD(name) \
13127 + _SRCU_NOTIFIER_HEAD(name, )
13129 +#define SRCU_NOTIFIER_HEAD_STATIC(name) \
13130 + _SRCU_NOTIFIER_HEAD(name, static)
13134 extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
13135 @@ -182,12 +198,12 @@
13138 * Declared notifiers so far. I can imagine quite a few more chains
13139 - * over time (eg laptop power reset chains, reboot chain (to clean
13140 + * over time (eg laptop power reset chains, reboot chain (to clean
13141 * device units up), device [un]mount chain, module load/unload chain,
13142 - * low memory chain, screenblank chain (for plug in modular screenblankers)
13143 + * low memory chain, screenblank chain (for plug in modular screenblankers)
13144 * VC switch chains (for loadable kernel svgalib VC switch helpers) etc...
13148 /* CPU notfiers are defined in include/linux/cpu.h. */
13150 /* netdevice notifiers are defined in include/linux/netdevice.h */
13151 diff -Nur linux-4.4.46.orig/include/linux/percpu.h linux-4.4.46/include/linux/percpu.h
13152 --- linux-4.4.46.orig/include/linux/percpu.h 2017-02-01 08:31:11.000000000 +0100
13153 +++ linux-4.4.46/include/linux/percpu.h 2017-02-03 17:18:10.907618284 +0100
13155 PERCPU_MODULE_RESERVE)
13158 +#ifdef CONFIG_PREEMPT_RT_FULL
13160 +#define get_local_var(var) (*({ \
13161 + migrate_disable(); \
13162 + this_cpu_ptr(&var); }))
13164 +#define put_local_var(var) do { \
13166 + migrate_enable(); \
13169 +# define get_local_ptr(var) ({ \
13170 + migrate_disable(); \
13171 + this_cpu_ptr(var); })
13173 +# define put_local_ptr(var) do { \
13175 + migrate_enable(); \
13180 +#define get_local_var(var) get_cpu_var(var)
13181 +#define put_local_var(var) put_cpu_var(var)
13182 +#define get_local_ptr(var) get_cpu_ptr(var)
13183 +#define put_local_ptr(var) put_cpu_ptr(var)
13187 /* minimum unit size, also is the maximum supported allocation size */
13188 #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10)
13190 diff -Nur linux-4.4.46.orig/include/linux/pid.h linux-4.4.46/include/linux/pid.h
13191 --- linux-4.4.46.orig/include/linux/pid.h 2017-02-01 08:31:11.000000000 +0100
13192 +++ linux-4.4.46/include/linux/pid.h 2017-02-03 17:18:10.907618284 +0100
13194 #define _LINUX_PID_H
13196 #include <linux/rcupdate.h>
13197 +#include <linux/atomic.h>
13201 diff -Nur linux-4.4.46.orig/include/linux/preempt.h linux-4.4.46/include/linux/preempt.h
13202 --- linux-4.4.46.orig/include/linux/preempt.h 2017-02-01 08:31:11.000000000 +0100
13203 +++ linux-4.4.46/include/linux/preempt.h 2017-02-03 17:18:10.907618284 +0100
13205 #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
13206 #define NMI_OFFSET (1UL << NMI_SHIFT)
13208 -#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
13209 +#ifndef CONFIG_PREEMPT_RT_FULL
13210 +# define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
13212 +# define SOFTIRQ_DISABLE_OFFSET (0)
13215 /* We use the MSB mostly because its available */
13216 #define PREEMPT_NEED_RESCHED 0x80000000
13218 #include <asm/preempt.h>
13220 #define hardirq_count() (preempt_count() & HARDIRQ_MASK)
13221 -#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
13222 #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
13224 +#ifndef CONFIG_PREEMPT_RT_FULL
13225 +# define softirq_count() (preempt_count() & SOFTIRQ_MASK)
13226 +# define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
13228 +# define softirq_count() (0UL)
13229 +extern int in_serving_softirq(void);
13233 * Are we doing bottom half or hardware interrupt processing?
13235 #define in_irq() (hardirq_count())
13236 #define in_softirq() (softirq_count())
13237 #define in_interrupt() (irq_count())
13238 -#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
13241 * Are we in NMI context?
13242 @@ -91,7 +100,11 @@
13244 * The preempt_count offset after spin_lock()
13246 +#if !defined(CONFIG_PREEMPT_RT_FULL)
13247 #define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET
13249 +#define PREEMPT_LOCK_OFFSET 0
13253 * The preempt_count offset needed for things like:
13254 @@ -140,6 +153,20 @@
13255 #define preempt_count_inc() preempt_count_add(1)
13256 #define preempt_count_dec() preempt_count_sub(1)
13258 +#ifdef CONFIG_PREEMPT_LAZY
13259 +#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0)
13260 +#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0)
13261 +#define inc_preempt_lazy_count() add_preempt_lazy_count(1)
13262 +#define dec_preempt_lazy_count() sub_preempt_lazy_count(1)
13263 +#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count)
13265 +#define add_preempt_lazy_count(val) do { } while (0)
13266 +#define sub_preempt_lazy_count(val) do { } while (0)
13267 +#define inc_preempt_lazy_count() do { } while (0)
13268 +#define dec_preempt_lazy_count() do { } while (0)
13269 +#define preempt_lazy_count() (0)
13272 #ifdef CONFIG_PREEMPT_COUNT
13274 #define preempt_disable() \
13275 @@ -148,13 +175,25 @@
13279 +#define preempt_lazy_disable() \
13281 + inc_preempt_lazy_count(); \
13285 #define sched_preempt_enable_no_resched() \
13288 preempt_count_dec(); \
13291 -#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
13292 +#ifdef CONFIG_PREEMPT_RT_BASE
13293 +# define preempt_enable_no_resched() sched_preempt_enable_no_resched()
13294 +# define preempt_check_resched_rt() preempt_check_resched()
13296 +# define preempt_enable_no_resched() preempt_enable()
13297 +# define preempt_check_resched_rt() barrier();
13300 #define preemptible() (preempt_count() == 0 && !irqs_disabled())
13302 @@ -179,6 +218,13 @@
13303 __preempt_schedule(); \
13306 +#define preempt_lazy_enable() \
13308 + dec_preempt_lazy_count(); \
13310 + preempt_check_resched(); \
13313 #else /* !CONFIG_PREEMPT */
13314 #define preempt_enable() \
13316 @@ -224,6 +270,7 @@
13317 #define preempt_disable_notrace() barrier()
13318 #define preempt_enable_no_resched_notrace() barrier()
13319 #define preempt_enable_notrace() barrier()
13320 +#define preempt_check_resched_rt() barrier()
13321 #define preemptible() 0
13323 #endif /* CONFIG_PREEMPT_COUNT */
13324 @@ -244,10 +291,31 @@
13326 #define preempt_fold_need_resched() \
13328 - if (tif_need_resched()) \
13329 + if (tif_need_resched_now()) \
13330 set_preempt_need_resched(); \
13333 +#ifdef CONFIG_PREEMPT_RT_FULL
13334 +# define preempt_disable_rt() preempt_disable()
13335 +# define preempt_enable_rt() preempt_enable()
13336 +# define preempt_disable_nort() barrier()
13337 +# define preempt_enable_nort() barrier()
13338 +# ifdef CONFIG_SMP
13339 + extern void migrate_disable(void);
13340 + extern void migrate_enable(void);
13341 +# else /* CONFIG_SMP */
13342 +# define migrate_disable() barrier()
13343 +# define migrate_enable() barrier()
13344 +# endif /* CONFIG_SMP */
13346 +# define preempt_disable_rt() barrier()
13347 +# define preempt_enable_rt() barrier()
13348 +# define preempt_disable_nort() preempt_disable()
13349 +# define preempt_enable_nort() preempt_enable()
13350 +# define migrate_disable() preempt_disable()
13351 +# define migrate_enable() preempt_enable()
13354 #ifdef CONFIG_PREEMPT_NOTIFIERS
13356 struct preempt_notifier;
13357 diff -Nur linux-4.4.46.orig/include/linux/printk.h linux-4.4.46/include/linux/printk.h
13358 --- linux-4.4.46.orig/include/linux/printk.h 2017-02-01 08:31:11.000000000 +0100
13359 +++ linux-4.4.46/include/linux/printk.h 2017-02-03 17:18:10.907618284 +0100
13360 @@ -117,9 +117,11 @@
13361 #ifdef CONFIG_EARLY_PRINTK
13362 extern asmlinkage __printf(1, 2)
13363 void early_printk(const char *fmt, ...);
13364 +extern void printk_kill(void);
13366 static inline __printf(1, 2) __cold
13367 void early_printk(const char *s, ...) { }
13368 +static inline void printk_kill(void) { }
13371 typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args);
13372 diff -Nur linux-4.4.46.orig/include/linux/radix-tree.h linux-4.4.46/include/linux/radix-tree.h
13373 --- linux-4.4.46.orig/include/linux/radix-tree.h 2017-02-01 08:31:11.000000000 +0100
13374 +++ linux-4.4.46/include/linux/radix-tree.h 2017-02-03 17:18:10.907618284 +0100
13375 @@ -277,8 +277,13 @@
13376 unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root,
13377 void ***results, unsigned long *indices,
13378 unsigned long first_index, unsigned int max_items);
13379 +#ifndef CONFIG_PREEMPT_RT_FULL
13380 int radix_tree_preload(gfp_t gfp_mask);
13381 int radix_tree_maybe_preload(gfp_t gfp_mask);
13383 +static inline int radix_tree_preload(gfp_t gm) { return 0; }
13384 +static inline int radix_tree_maybe_preload(gfp_t gfp_mask) { return 0; }
13386 void radix_tree_init(void);
13387 void *radix_tree_tag_set(struct radix_tree_root *root,
13388 unsigned long index, unsigned int tag);
13389 @@ -303,7 +308,7 @@
13391 static inline void radix_tree_preload_end(void)
13393 - preempt_enable();
13394 + preempt_enable_nort();
13398 diff -Nur linux-4.4.46.orig/include/linux/random.h linux-4.4.46/include/linux/random.h
13399 --- linux-4.4.46.orig/include/linux/random.h 2017-02-01 08:31:11.000000000 +0100
13400 +++ linux-4.4.46/include/linux/random.h 2017-02-03 17:18:10.907618284 +0100
13402 extern void add_device_randomness(const void *, unsigned int);
13403 extern void add_input_randomness(unsigned int type, unsigned int code,
13404 unsigned int value);
13405 -extern void add_interrupt_randomness(int irq, int irq_flags);
13406 +extern void add_interrupt_randomness(int irq, int irq_flags, __u64 ip);
13408 extern void get_random_bytes(void *buf, int nbytes);
13409 extern int add_random_ready_callback(struct random_ready_callback *rdy);
13410 diff -Nur linux-4.4.46.orig/include/linux/rbtree.h linux-4.4.46/include/linux/rbtree.h
13411 --- linux-4.4.46.orig/include/linux/rbtree.h 2017-02-01 08:31:11.000000000 +0100
13412 +++ linux-4.4.46/include/linux/rbtree.h 2017-02-03 17:18:10.907618284 +0100
13415 #include <linux/kernel.h>
13416 #include <linux/stddef.h>
13417 -#include <linux/rcupdate.h>
13420 unsigned long __rb_parent_color;
13425 -static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent,
13426 - struct rb_node **rb_link)
13428 - node->__rb_parent_color = (unsigned long)parent;
13429 - node->rb_left = node->rb_right = NULL;
13431 - rcu_assign_pointer(*rb_link, node);
13433 +void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent,
13434 + struct rb_node **rb_link);
13436 #define rb_entry_safe(ptr, type, member) \
13437 ({ typeof(ptr) ____ptr = (ptr); \
13438 diff -Nur linux-4.4.46.orig/include/linux/rcupdate.h linux-4.4.46/include/linux/rcupdate.h
13439 --- linux-4.4.46.orig/include/linux/rcupdate.h 2017-02-01 08:31:11.000000000 +0100
13440 +++ linux-4.4.46/include/linux/rcupdate.h 2017-02-03 17:18:10.907618284 +0100
13441 @@ -169,6 +169,9 @@
13443 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
13445 +#ifdef CONFIG_PREEMPT_RT_FULL
13446 +#define call_rcu_bh call_rcu
13449 * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
13450 * @head: structure to be used for queueing the RCU updates.
13451 @@ -192,6 +195,7 @@
13453 void call_rcu_bh(struct rcu_head *head,
13454 rcu_callback_t func);
13458 * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
13459 @@ -292,6 +296,11 @@
13460 * types of kernel builds, the rcu_read_lock() nesting depth is unknowable.
13462 #define rcu_preempt_depth() (current->rcu_read_lock_nesting)
13463 +#ifndef CONFIG_PREEMPT_RT_FULL
13464 +#define sched_rcu_preempt_depth() rcu_preempt_depth()
13466 +static inline int sched_rcu_preempt_depth(void) { return 0; }
13469 #else /* #ifdef CONFIG_PREEMPT_RCU */
13471 @@ -317,6 +326,8 @@
13475 +#define sched_rcu_preempt_depth() rcu_preempt_depth()
13477 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
13479 /* Internal to kernel */
13480 @@ -489,7 +500,14 @@
13481 int debug_lockdep_rcu_enabled(void);
13483 int rcu_read_lock_held(void);
13484 +#ifdef CONFIG_PREEMPT_RT_FULL
13485 +static inline int rcu_read_lock_bh_held(void)
13487 + return rcu_read_lock_held();
13490 int rcu_read_lock_bh_held(void);
13494 * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
13495 @@ -937,10 +955,14 @@
13496 static inline void rcu_read_lock_bh(void)
13498 local_bh_disable();
13499 +#ifdef CONFIG_PREEMPT_RT_FULL
13503 rcu_lock_acquire(&rcu_bh_lock_map);
13504 RCU_LOCKDEP_WARN(!rcu_is_watching(),
13505 "rcu_read_lock_bh() used illegally while idle");
13510 @@ -950,10 +972,14 @@
13512 static inline void rcu_read_unlock_bh(void)
13514 +#ifdef CONFIG_PREEMPT_RT_FULL
13515 + rcu_read_unlock();
13517 RCU_LOCKDEP_WARN(!rcu_is_watching(),
13518 "rcu_read_unlock_bh() used illegally while idle");
13519 rcu_lock_release(&rcu_bh_lock_map);
13525 diff -Nur linux-4.4.46.orig/include/linux/rcutree.h linux-4.4.46/include/linux/rcutree.h
13526 --- linux-4.4.46.orig/include/linux/rcutree.h 2017-02-01 08:31:11.000000000 +0100
13527 +++ linux-4.4.46/include/linux/rcutree.h 2017-02-03 17:18:10.907618284 +0100
13529 rcu_note_context_switch();
13532 +#ifdef CONFIG_PREEMPT_RT_FULL
13533 +# define synchronize_rcu_bh synchronize_rcu
13535 void synchronize_rcu_bh(void);
13537 void synchronize_sched_expedited(void);
13538 void synchronize_rcu_expedited(void);
13543 void rcu_barrier(void);
13544 +#ifdef CONFIG_PREEMPT_RT_FULL
13545 +# define rcu_barrier_bh rcu_barrier
13547 void rcu_barrier_bh(void);
13549 void rcu_barrier_sched(void);
13550 unsigned long get_state_synchronize_rcu(void);
13551 void cond_synchronize_rcu(unsigned long oldstate);
13552 @@ -85,12 +93,10 @@
13553 unsigned long rcu_batches_started_bh(void);
13554 unsigned long rcu_batches_started_sched(void);
13555 unsigned long rcu_batches_completed(void);
13556 -unsigned long rcu_batches_completed_bh(void);
13557 unsigned long rcu_batches_completed_sched(void);
13558 void show_rcu_gp_kthreads(void);
13560 void rcu_force_quiescent_state(void);
13561 -void rcu_bh_force_quiescent_state(void);
13562 void rcu_sched_force_quiescent_state(void);
13564 void rcu_idle_enter(void);
13565 @@ -105,6 +111,14 @@
13567 bool rcu_is_watching(void);
13569 +#ifndef CONFIG_PREEMPT_RT_FULL
13570 +void rcu_bh_force_quiescent_state(void);
13571 +unsigned long rcu_batches_completed_bh(void);
13573 +# define rcu_bh_force_quiescent_state rcu_force_quiescent_state
13574 +# define rcu_batches_completed_bh rcu_batches_completed
13577 void rcu_all_qs(void);
13579 #endif /* __LINUX_RCUTREE_H */
13580 diff -Nur linux-4.4.46.orig/include/linux/rtmutex.h linux-4.4.46/include/linux/rtmutex.h
13581 --- linux-4.4.46.orig/include/linux/rtmutex.h 2017-02-01 08:31:11.000000000 +0100
13582 +++ linux-4.4.46/include/linux/rtmutex.h 2017-02-03 17:18:10.911618440 +0100
13583 @@ -13,11 +13,15 @@
13584 #define __LINUX_RT_MUTEX_H
13586 #include <linux/linkage.h>
13587 +#include <linux/spinlock_types_raw.h>
13588 #include <linux/rbtree.h>
13589 -#include <linux/spinlock_types.h>
13591 extern int max_lock_depth; /* for sysctl */
13593 +#ifdef CONFIG_DEBUG_MUTEXES
13594 +#include <linux/debug_locks.h>
13598 * The rt_mutex structure
13601 struct rb_root waiters;
13602 struct rb_node *waiters_leftmost;
13603 struct task_struct *owner;
13604 -#ifdef CONFIG_DEBUG_RT_MUTEXES
13606 +#ifdef CONFIG_DEBUG_RT_MUTEXES
13607 const char *name, *file;
13610 @@ -55,22 +59,33 @@
13611 # define rt_mutex_debug_check_no_locks_held(task) do { } while (0)
13614 +# define rt_mutex_init(mutex) \
13616 + raw_spin_lock_init(&(mutex)->wait_lock); \
13617 + __rt_mutex_init(mutex, #mutex); \
13620 #ifdef CONFIG_DEBUG_RT_MUTEXES
13621 # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
13622 , .name = #mutexname, .file = __FILE__, .line = __LINE__
13623 -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, __func__)
13624 extern void rt_mutex_debug_task_free(struct task_struct *tsk);
13626 # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
13627 -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL)
13628 # define rt_mutex_debug_task_free(t) do { } while (0)
13631 -#define __RT_MUTEX_INITIALIZER(mutexname) \
13632 - { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
13633 +#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
13634 + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
13635 , .waiters = RB_ROOT \
13637 - __DEBUG_RT_MUTEX_INITIALIZER(mutexname)}
13638 + __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
13640 +#define __RT_MUTEX_INITIALIZER(mutexname) \
13641 + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) }
13643 +#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \
13644 + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
13645 + , .save_state = 1 }
13647 #define DEFINE_RT_MUTEX(mutexname) \
13648 struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname)
13651 extern void rt_mutex_lock(struct rt_mutex *lock);
13652 extern int rt_mutex_lock_interruptible(struct rt_mutex *lock);
13653 +extern int rt_mutex_lock_killable(struct rt_mutex *lock);
13654 extern int rt_mutex_timed_lock(struct rt_mutex *lock,
13655 struct hrtimer_sleeper *timeout);
13657 diff -Nur linux-4.4.46.orig/include/linux/rwlock_rt.h linux-4.4.46/include/linux/rwlock_rt.h
13658 --- linux-4.4.46.orig/include/linux/rwlock_rt.h 1970-01-01 01:00:00.000000000 +0100
13659 +++ linux-4.4.46/include/linux/rwlock_rt.h 2017-02-03 17:18:10.911618440 +0100
13661 +#ifndef __LINUX_RWLOCK_RT_H
13662 +#define __LINUX_RWLOCK_RT_H
13664 +#ifndef __LINUX_SPINLOCK_H
13665 +#error Do not include directly. Use spinlock.h
13668 +#define rwlock_init(rwl) \
13670 + static struct lock_class_key __key; \
13672 + rt_mutex_init(&(rwl)->lock); \
13673 + __rt_rwlock_init(rwl, #rwl, &__key); \
13676 +extern void __lockfunc rt_write_lock(rwlock_t *rwlock);
13677 +extern void __lockfunc rt_read_lock(rwlock_t *rwlock);
13678 +extern int __lockfunc rt_write_trylock(rwlock_t *rwlock);
13679 +extern int __lockfunc rt_write_trylock_irqsave(rwlock_t *trylock, unsigned long *flags);
13680 +extern int __lockfunc rt_read_trylock(rwlock_t *rwlock);
13681 +extern void __lockfunc rt_write_unlock(rwlock_t *rwlock);
13682 +extern void __lockfunc rt_read_unlock(rwlock_t *rwlock);
13683 +extern unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock);
13684 +extern unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock);
13685 +extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key);
13687 +#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock))
13688 +#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock))
13690 +#define write_trylock_irqsave(lock, flags) \
13691 + __cond_lock(lock, rt_write_trylock_irqsave(lock, &flags))
13693 +#define read_lock_irqsave(lock, flags) \
13695 + typecheck(unsigned long, flags); \
13696 + flags = rt_read_lock_irqsave(lock); \
13699 +#define write_lock_irqsave(lock, flags) \
13701 + typecheck(unsigned long, flags); \
13702 + flags = rt_write_lock_irqsave(lock); \
13705 +#define read_lock(lock) rt_read_lock(lock)
13707 +#define read_lock_bh(lock) \
13709 + local_bh_disable(); \
13710 + rt_read_lock(lock); \
13713 +#define read_lock_irq(lock) read_lock(lock)
13715 +#define write_lock(lock) rt_write_lock(lock)
13717 +#define write_lock_bh(lock) \
13719 + local_bh_disable(); \
13720 + rt_write_lock(lock); \
13723 +#define write_lock_irq(lock) write_lock(lock)
13725 +#define read_unlock(lock) rt_read_unlock(lock)
13727 +#define read_unlock_bh(lock) \
13729 + rt_read_unlock(lock); \
13730 + local_bh_enable(); \
13733 +#define read_unlock_irq(lock) read_unlock(lock)
13735 +#define write_unlock(lock) rt_write_unlock(lock)
13737 +#define write_unlock_bh(lock) \
13739 + rt_write_unlock(lock); \
13740 + local_bh_enable(); \
13743 +#define write_unlock_irq(lock) write_unlock(lock)
13745 +#define read_unlock_irqrestore(lock, flags) \
13747 + typecheck(unsigned long, flags); \
13749 + rt_read_unlock(lock); \
13752 +#define write_unlock_irqrestore(lock, flags) \
13754 + typecheck(unsigned long, flags); \
13756 + rt_write_unlock(lock); \
13760 diff -Nur linux-4.4.46.orig/include/linux/rwlock_types.h linux-4.4.46/include/linux/rwlock_types.h
13761 --- linux-4.4.46.orig/include/linux/rwlock_types.h 2017-02-01 08:31:11.000000000 +0100
13762 +++ linux-4.4.46/include/linux/rwlock_types.h 2017-02-03 17:18:10.911618440 +0100
13764 #ifndef __LINUX_RWLOCK_TYPES_H
13765 #define __LINUX_RWLOCK_TYPES_H
13767 +#if !defined(__LINUX_SPINLOCK_TYPES_H)
13768 +# error "Do not include directly, include spinlock_types.h"
13772 * include/linux/rwlock_types.h - generic rwlock type definitions
13775 RW_DEP_MAP_INIT(lockname) }
13778 -#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x)
13779 +#define DEFINE_RWLOCK(name) \
13780 + rwlock_t name __cacheline_aligned_in_smp = __RW_LOCK_UNLOCKED(name)
13782 #endif /* __LINUX_RWLOCK_TYPES_H */
13783 diff -Nur linux-4.4.46.orig/include/linux/rwlock_types_rt.h linux-4.4.46/include/linux/rwlock_types_rt.h
13784 --- linux-4.4.46.orig/include/linux/rwlock_types_rt.h 1970-01-01 01:00:00.000000000 +0100
13785 +++ linux-4.4.46/include/linux/rwlock_types_rt.h 2017-02-03 17:18:10.911618440 +0100
13787 +#ifndef __LINUX_RWLOCK_TYPES_RT_H
13788 +#define __LINUX_RWLOCK_TYPES_RT_H
13790 +#ifndef __LINUX_SPINLOCK_TYPES_H
13791 +#error "Do not include directly. Include spinlock_types.h instead"
13795 + * rwlocks - rtmutex which allows single reader recursion
13798 + struct rt_mutex lock;
13800 + unsigned int break_lock;
13801 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
13802 + struct lockdep_map dep_map;
13806 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
13807 +# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
13809 +# define RW_DEP_MAP_INIT(lockname)
13812 +#define __RW_LOCK_UNLOCKED(name) \
13813 + { .lock = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.lock), \
13814 + RW_DEP_MAP_INIT(name) }
13816 +#define DEFINE_RWLOCK(name) \
13817 + rwlock_t name __cacheline_aligned_in_smp = __RW_LOCK_UNLOCKED(name)
13820 diff -Nur linux-4.4.46.orig/include/linux/rwsem.h linux-4.4.46/include/linux/rwsem.h
13821 --- linux-4.4.46.orig/include/linux/rwsem.h 2017-02-01 08:31:11.000000000 +0100
13822 +++ linux-4.4.46/include/linux/rwsem.h 2017-02-03 17:18:10.911618440 +0100
13824 #include <linux/osq_lock.h>
13827 +#ifdef CONFIG_PREEMPT_RT_FULL
13828 +#include <linux/rwsem_rt.h>
13829 +#else /* PREEMPT_RT_FULL */
13831 struct rw_semaphore;
13833 #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
13834 @@ -177,4 +181,6 @@
13835 # define up_read_non_owner(sem) up_read(sem)
13838 +#endif /* !PREEMPT_RT_FULL */
13840 #endif /* _LINUX_RWSEM_H */
13841 diff -Nur linux-4.4.46.orig/include/linux/rwsem_rt.h linux-4.4.46/include/linux/rwsem_rt.h
13842 --- linux-4.4.46.orig/include/linux/rwsem_rt.h 1970-01-01 01:00:00.000000000 +0100
13843 +++ linux-4.4.46/include/linux/rwsem_rt.h 2017-02-03 17:18:10.911618440 +0100
13845 +#ifndef _LINUX_RWSEM_RT_H
13846 +#define _LINUX_RWSEM_RT_H
13848 +#ifndef _LINUX_RWSEM_H
13849 +#error "Include rwsem.h"
13853 + * RW-semaphores are a spinlock plus a reader-depth count.
13855 + * Note that the semantics are different from the usual
13856 + * Linux rw-sems, in PREEMPT_RT mode we do not allow
13857 + * multiple readers to hold the lock at once, we only allow
13858 + * a read-lock owner to read-lock recursively. This is
13859 + * better for latency, makes the implementation inherently
13860 + * fair and makes it simpler as well.
13863 +#include <linux/rtmutex.h>
13865 +struct rw_semaphore {
13866 + struct rt_mutex lock;
13868 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
13869 + struct lockdep_map dep_map;
13873 +#define __RWSEM_INITIALIZER(name) \
13874 + { .lock = __RT_MUTEX_INITIALIZER(name.lock), \
13875 + RW_DEP_MAP_INIT(name) }
13877 +#define DECLARE_RWSEM(lockname) \
13878 + struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname)
13880 +extern void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name,
13881 + struct lock_class_key *key);
13883 +#define __rt_init_rwsem(sem, name, key) \
13885 + rt_mutex_init(&(sem)->lock); \
13886 + __rt_rwsem_init((sem), (name), (key));\
13889 +#define __init_rwsem(sem, name, key) __rt_init_rwsem(sem, name, key)
13891 +# define rt_init_rwsem(sem) \
13893 + static struct lock_class_key __key; \
13895 + __rt_init_rwsem((sem), #sem, &__key); \
13898 +extern void rt_down_write(struct rw_semaphore *rwsem);
13899 +extern void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass);
13900 +extern void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass);
13901 +extern void rt_down_write_nested_lock(struct rw_semaphore *rwsem,
13902 + struct lockdep_map *nest);
13903 +extern void rt__down_read(struct rw_semaphore *rwsem);
13904 +extern void rt_down_read(struct rw_semaphore *rwsem);
13905 +extern int rt_down_write_trylock(struct rw_semaphore *rwsem);
13906 +extern int rt__down_read_trylock(struct rw_semaphore *rwsem);
13907 +extern int rt_down_read_trylock(struct rw_semaphore *rwsem);
13908 +extern void __rt_up_read(struct rw_semaphore *rwsem);
13909 +extern void rt_up_read(struct rw_semaphore *rwsem);
13910 +extern void rt_up_write(struct rw_semaphore *rwsem);
13911 +extern void rt_downgrade_write(struct rw_semaphore *rwsem);
13913 +#define init_rwsem(sem) rt_init_rwsem(sem)
13914 +#define rwsem_is_locked(s) rt_mutex_is_locked(&(s)->lock)
13916 +static inline int rwsem_is_contended(struct rw_semaphore *sem)
13918 + /* rt_mutex_has_waiters() */
13919 + return !RB_EMPTY_ROOT(&sem->lock.waiters);
13922 +static inline void __down_read(struct rw_semaphore *sem)
13924 + rt__down_read(sem);
13927 +static inline void down_read(struct rw_semaphore *sem)
13929 + rt_down_read(sem);
13932 +static inline int __down_read_trylock(struct rw_semaphore *sem)
13934 + return rt__down_read_trylock(sem);
13937 +static inline int down_read_trylock(struct rw_semaphore *sem)
13939 + return rt_down_read_trylock(sem);
13942 +static inline void down_write(struct rw_semaphore *sem)
13944 + rt_down_write(sem);
13947 +static inline int down_write_trylock(struct rw_semaphore *sem)
13949 + return rt_down_write_trylock(sem);
13952 +static inline void __up_read(struct rw_semaphore *sem)
13954 + __rt_up_read(sem);
13957 +static inline void up_read(struct rw_semaphore *sem)
13962 +static inline void up_write(struct rw_semaphore *sem)
13964 + rt_up_write(sem);
13967 +static inline void downgrade_write(struct rw_semaphore *sem)
13969 + rt_downgrade_write(sem);
13972 +static inline void down_read_nested(struct rw_semaphore *sem, int subclass)
13974 + return rt_down_read_nested(sem, subclass);
13977 +static inline void down_write_nested(struct rw_semaphore *sem, int subclass)
13979 + rt_down_write_nested(sem, subclass);
13981 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
13982 +static inline void down_write_nest_lock(struct rw_semaphore *sem,
13983 + struct rw_semaphore *nest_lock)
13985 + rt_down_write_nested_lock(sem, &nest_lock->dep_map);
13990 +static inline void down_write_nest_lock(struct rw_semaphore *sem,
13991 + struct rw_semaphore *nest_lock)
13993 + rt_down_write_nested_lock(sem, NULL);
13997 diff -Nur linux-4.4.46.orig/include/linux/sched.h linux-4.4.46/include/linux/sched.h
13998 --- linux-4.4.46.orig/include/linux/sched.h 2017-02-01 08:31:11.000000000 +0100
13999 +++ linux-4.4.46/include/linux/sched.h 2017-02-03 17:18:10.911618440 +0100
14001 #include <linux/nodemask.h>
14002 #include <linux/mm_types.h>
14003 #include <linux/preempt.h>
14004 +#include <asm/kmap_types.h>
14006 #include <asm/page.h>
14007 #include <asm/ptrace.h>
14008 @@ -182,8 +183,6 @@
14009 static inline void update_cpu_load_nohz(void) { }
14012 -extern unsigned long get_parent_ip(unsigned long addr);
14014 extern void dump_cpu_task(int cpu);
14017 @@ -242,10 +241,7 @@
14018 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
14019 __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
14021 -#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0)
14022 #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0)
14023 -#define task_is_stopped_or_traced(task) \
14024 - ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
14025 #define task_contributes_to_load(task) \
14026 ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
14027 (task->flags & PF_FROZEN) == 0 && \
14028 @@ -311,6 +307,11 @@
14032 +#define __set_current_state_no_track(state_value) \
14033 + do { current->state = (state_value); } while (0)
14034 +#define set_current_state_no_track(state_value) \
14035 + set_mb(current->state, (state_value))
14037 /* Task command name length */
14038 #define TASK_COMM_LEN 16
14040 @@ -970,8 +971,18 @@
14041 struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
14043 extern void wake_q_add(struct wake_q_head *head,
14044 - struct task_struct *task);
14045 -extern void wake_up_q(struct wake_q_head *head);
14046 + struct task_struct *task);
14047 +extern void __wake_up_q(struct wake_q_head *head, bool sleeper);
14049 +static inline void wake_up_q(struct wake_q_head *head)
14051 + __wake_up_q(head, false);
14054 +static inline void wake_up_q_sleeper(struct wake_q_head *head)
14056 + __wake_up_q(head, true);
14060 * sched-domains (multiprocessor balancing) declarations:
14061 @@ -1379,6 +1390,7 @@
14063 struct task_struct {
14064 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
14065 + volatile long saved_state; /* saved state for "spinlock sleepers" */
14068 unsigned int flags; /* per process flags, defined below */
14069 @@ -1415,6 +1427,12 @@
14072 unsigned int policy;
14073 +#ifdef CONFIG_PREEMPT_RT_FULL
14074 + int migrate_disable;
14075 +# ifdef CONFIG_SCHED_DEBUG
14076 + int migrate_disable_atomic;
14079 int nr_cpus_allowed;
14080 cpumask_t cpus_allowed;
14082 @@ -1522,11 +1540,14 @@
14084 struct prev_cputime prev_cputime;
14085 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
14086 - seqlock_t vtime_seqlock;
14087 + seqcount_t vtime_seqcount;
14088 unsigned long long vtime_snap;
14090 - VTIME_SLEEPING = 0,
14091 + /* Task is sleeping or running in a CPU with VTIME inactive */
14092 + VTIME_INACTIVE = 0,
14093 + /* Task runs in userspace in a CPU with VTIME active */
14095 + /* Task runs in kernelspace in a CPU with VTIME active */
14097 } vtime_snap_whence;
14099 @@ -1538,6 +1559,9 @@
14101 struct task_cputime cputime_expires;
14102 struct list_head cpu_timers[3];
14103 +#ifdef CONFIG_PREEMPT_RT_BASE
14104 + struct task_struct *posix_timer_list;
14107 /* process credentials */
14108 const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */
14109 @@ -1569,10 +1593,15 @@
14110 /* signal handlers */
14111 struct signal_struct *signal;
14112 struct sighand_struct *sighand;
14113 + struct sigqueue *sigqueue_cache;
14115 sigset_t blocked, real_blocked;
14116 sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
14117 struct sigpending pending;
14118 +#ifdef CONFIG_PREEMPT_RT_FULL
14119 + /* TODO: move me into ->restart_block ? */
14120 + struct siginfo forced_info;
14123 unsigned long sas_ss_sp;
14124 size_t sas_ss_size;
14125 @@ -1796,6 +1825,12 @@
14126 unsigned long trace;
14127 /* bitmask and counter of trace recursion */
14128 unsigned long trace_recursion;
14129 +#ifdef CONFIG_WAKEUP_LATENCY_HIST
14130 + u64 preempt_timestamp_hist;
14131 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
14132 + long timer_offset;
14135 #endif /* CONFIG_TRACING */
14136 #ifdef CONFIG_MEMCG
14137 struct mem_cgroup *memcg_in_oom;
14138 @@ -1812,9 +1847,23 @@
14139 unsigned int sequential_io;
14140 unsigned int sequential_io_avg;
14142 +#ifdef CONFIG_PREEMPT_RT_BASE
14143 + struct rcu_head put_rcu;
14144 + int softirq_nestcnt;
14145 + unsigned int softirqs_raised;
14147 +#ifdef CONFIG_PREEMPT_RT_FULL
14148 +# if defined CONFIG_HIGHMEM || defined CONFIG_X86_32
14150 + pte_t kmap_pte[KM_TYPE_NR];
14153 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
14154 unsigned long task_state_change;
14156 +#ifdef CONFIG_PREEMPT_RT_FULL
14157 + int xmit_recursion;
14159 int pagefault_disabled;
14160 /* CPU-specific state of this task */
14161 struct thread_struct thread;
14162 @@ -1832,9 +1881,6 @@
14163 # define arch_task_struct_size (sizeof(struct task_struct))
14166 -/* Future-safe accessor for struct task_struct's cpus_allowed. */
14167 -#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
14169 #define TNF_MIGRATED 0x01
14170 #define TNF_NO_GROUP 0x02
14171 #define TNF_SHARED 0x04
14172 @@ -2024,6 +2070,15 @@
14173 extern void free_task(struct task_struct *tsk);
14174 #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
14176 +#ifdef CONFIG_PREEMPT_RT_BASE
14177 +extern void __put_task_struct_cb(struct rcu_head *rhp);
14179 +static inline void put_task_struct(struct task_struct *t)
14181 + if (atomic_dec_and_test(&t->usage))
14182 + call_rcu(&t->put_rcu, __put_task_struct_cb);
14185 extern void __put_task_struct(struct task_struct *t);
14187 static inline void put_task_struct(struct task_struct *t)
14188 @@ -2031,6 +2086,7 @@
14189 if (atomic_dec_and_test(&t->usage))
14190 __put_task_struct(t);
14194 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
14195 extern void task_cputime(struct task_struct *t,
14196 @@ -2069,6 +2125,7 @@
14198 * Per process flags
14200 +#define PF_IN_SOFTIRQ 0x00000001 /* Task is serving softirq */
14201 #define PF_EXITING 0x00000004 /* getting shut down */
14202 #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
14203 #define PF_VCPU 0x00000010 /* I'm a virtual CPU */
14204 @@ -2233,6 +2290,10 @@
14206 extern int set_cpus_allowed_ptr(struct task_struct *p,
14207 const struct cpumask *new_mask);
14208 +int migrate_me(void);
14209 +void tell_sched_cpu_down_begin(int cpu);
14210 +void tell_sched_cpu_down_done(int cpu);
14213 static inline void do_set_cpus_allowed(struct task_struct *p,
14214 const struct cpumask *new_mask)
14215 @@ -2245,6 +2306,9 @@
14219 +static inline int migrate_me(void) { return 0; }
14220 +static inline void tell_sched_cpu_down_begin(int cpu) { }
14221 +static inline void tell_sched_cpu_down_done(int cpu) { }
14224 #ifdef CONFIG_NO_HZ_COMMON
14225 @@ -2454,6 +2518,7 @@
14227 extern int wake_up_state(struct task_struct *tsk, unsigned int state);
14228 extern int wake_up_process(struct task_struct *tsk);
14229 +extern int wake_up_lock_sleeper(struct task_struct * tsk);
14230 extern void wake_up_new_task(struct task_struct *tsk);
14232 extern void kick_process(struct task_struct *tsk);
14233 @@ -2577,12 +2642,24 @@
14235 /* mmdrop drops the mm and the page tables */
14236 extern void __mmdrop(struct mm_struct *);
14238 static inline void mmdrop(struct mm_struct * mm)
14240 if (unlikely(atomic_dec_and_test(&mm->mm_count)))
14244 +#ifdef CONFIG_PREEMPT_RT_BASE
14245 +extern void __mmdrop_delayed(struct rcu_head *rhp);
14246 +static inline void mmdrop_delayed(struct mm_struct *mm)
14248 + if (atomic_dec_and_test(&mm->mm_count))
14249 + call_rcu(&mm->delayed_drop, __mmdrop_delayed);
14252 +# define mmdrop_delayed(mm) mmdrop(mm)
14255 /* mmput gets rid of the mappings and all user-space */
14256 extern void mmput(struct mm_struct *);
14257 /* Grab a reference to a task's mm, if it is not already going away */
14258 @@ -2892,6 +2969,43 @@
14259 return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
14262 +#ifdef CONFIG_PREEMPT_LAZY
14263 +static inline void set_tsk_need_resched_lazy(struct task_struct *tsk)
14265 + set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
14268 +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk)
14270 + clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
14273 +static inline int test_tsk_need_resched_lazy(struct task_struct *tsk)
14275 + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY));
14278 +static inline int need_resched_lazy(void)
14280 + return test_thread_flag(TIF_NEED_RESCHED_LAZY);
14283 +static inline int need_resched_now(void)
14285 + return test_thread_flag(TIF_NEED_RESCHED);
14289 +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { }
14290 +static inline int need_resched_lazy(void) { return 0; }
14292 +static inline int need_resched_now(void)
14294 + return test_thread_flag(TIF_NEED_RESCHED);
14299 static inline int restart_syscall(void)
14301 set_tsk_thread_flag(current, TIF_SIGPENDING);
14302 @@ -2923,6 +3037,51 @@
14303 return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
14306 +static inline bool __task_is_stopped_or_traced(struct task_struct *task)
14308 + if (task->state & (__TASK_STOPPED | __TASK_TRACED))
14310 +#ifdef CONFIG_PREEMPT_RT_FULL
14311 + if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED))
14317 +static inline bool task_is_stopped_or_traced(struct task_struct *task)
14319 + bool traced_stopped;
14321 +#ifdef CONFIG_PREEMPT_RT_FULL
14322 + unsigned long flags;
14324 + raw_spin_lock_irqsave(&task->pi_lock, flags);
14325 + traced_stopped = __task_is_stopped_or_traced(task);
14326 + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
14328 + traced_stopped = __task_is_stopped_or_traced(task);
14330 + return traced_stopped;
14333 +static inline bool task_is_traced(struct task_struct *task)
14335 + bool traced = false;
14337 + if (task->state & __TASK_TRACED)
14339 +#ifdef CONFIG_PREEMPT_RT_FULL
14340 + /* in case the task is sleeping on tasklist_lock */
14341 + raw_spin_lock_irq(&task->pi_lock);
14342 + if (task->state & __TASK_TRACED)
14344 + else if (task->saved_state & __TASK_TRACED)
14346 + raw_spin_unlock_irq(&task->pi_lock);
14352 * cond_resched() and cond_resched_lock(): latency reduction via
14353 * explicit rescheduling in places that are safe. The return
14354 @@ -2944,12 +3103,16 @@
14355 __cond_resched_lock(lock); \
14358 +#ifndef CONFIG_PREEMPT_RT_FULL
14359 extern int __cond_resched_softirq(void);
14361 #define cond_resched_softirq() ({ \
14362 ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
14363 __cond_resched_softirq(); \
14366 +# define cond_resched_softirq() cond_resched()
14369 static inline void cond_resched_rcu(void)
14371 @@ -3111,6 +3274,31 @@
14373 #endif /* CONFIG_SMP */
14375 +static inline int __migrate_disabled(struct task_struct *p)
14377 +#ifdef CONFIG_PREEMPT_RT_FULL
14378 + return p->migrate_disable;
14384 +/* Future-safe accessor for struct task_struct's cpus_allowed. */
14385 +static inline const struct cpumask *tsk_cpus_allowed(struct task_struct *p)
14387 + if (__migrate_disabled(p))
14388 + return cpumask_of(task_cpu(p));
14390 + return &p->cpus_allowed;
14393 +static inline int tsk_nr_cpus_allowed(struct task_struct *p)
14395 + if (__migrate_disabled(p))
14397 + return p->nr_cpus_allowed;
14400 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
14401 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
14403 diff -Nur linux-4.4.46.orig/include/linux/seqlock.h linux-4.4.46/include/linux/seqlock.h
14404 --- linux-4.4.46.orig/include/linux/seqlock.h 2017-02-01 08:31:11.000000000 +0100
14405 +++ linux-4.4.46/include/linux/seqlock.h 2017-02-03 17:18:10.911618440 +0100
14406 @@ -220,20 +220,30 @@
14407 return __read_seqcount_retry(s, start);
14412 -static inline void raw_write_seqcount_begin(seqcount_t *s)
14413 +static inline void __raw_write_seqcount_begin(seqcount_t *s)
14419 -static inline void raw_write_seqcount_end(seqcount_t *s)
14420 +static inline void raw_write_seqcount_begin(seqcount_t *s)
14422 + preempt_disable_rt();
14423 + __raw_write_seqcount_begin(s);
14426 +static inline void __raw_write_seqcount_end(seqcount_t *s)
14432 +static inline void raw_write_seqcount_end(seqcount_t *s)
14434 + __raw_write_seqcount_end(s);
14435 + preempt_enable_rt();
14439 * raw_write_seqcount_barrier - do a seq write barrier
14440 * @s: pointer to seqcount_t
14441 @@ -425,10 +435,32 @@
14443 * Read side functions for starting and finalizing a read side section.
14445 +#ifndef CONFIG_PREEMPT_RT_FULL
14446 static inline unsigned read_seqbegin(const seqlock_t *sl)
14448 return read_seqcount_begin(&sl->seqcount);
14452 + * Starvation safe read side for RT
14454 +static inline unsigned read_seqbegin(seqlock_t *sl)
14459 + ret = ACCESS_ONCE(sl->seqcount.sequence);
14460 + if (unlikely(ret & 1)) {
14462 + * Take the lock and let the writer proceed (i.e. evtl
14463 + * boost it), otherwise we could loop here forever.
14465 + spin_unlock_wait(&sl->lock);
14472 static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
14474 @@ -443,36 +475,36 @@
14475 static inline void write_seqlock(seqlock_t *sl)
14477 spin_lock(&sl->lock);
14478 - write_seqcount_begin(&sl->seqcount);
14479 + __raw_write_seqcount_begin(&sl->seqcount);
14482 static inline void write_sequnlock(seqlock_t *sl)
14484 - write_seqcount_end(&sl->seqcount);
14485 + __raw_write_seqcount_end(&sl->seqcount);
14486 spin_unlock(&sl->lock);
14489 static inline void write_seqlock_bh(seqlock_t *sl)
14491 spin_lock_bh(&sl->lock);
14492 - write_seqcount_begin(&sl->seqcount);
14493 + __raw_write_seqcount_begin(&sl->seqcount);
14496 static inline void write_sequnlock_bh(seqlock_t *sl)
14498 - write_seqcount_end(&sl->seqcount);
14499 + __raw_write_seqcount_end(&sl->seqcount);
14500 spin_unlock_bh(&sl->lock);
14503 static inline void write_seqlock_irq(seqlock_t *sl)
14505 spin_lock_irq(&sl->lock);
14506 - write_seqcount_begin(&sl->seqcount);
14507 + __raw_write_seqcount_begin(&sl->seqcount);
14510 static inline void write_sequnlock_irq(seqlock_t *sl)
14512 - write_seqcount_end(&sl->seqcount);
14513 + __raw_write_seqcount_end(&sl->seqcount);
14514 spin_unlock_irq(&sl->lock);
14517 @@ -481,7 +513,7 @@
14518 unsigned long flags;
14520 spin_lock_irqsave(&sl->lock, flags);
14521 - write_seqcount_begin(&sl->seqcount);
14522 + __raw_write_seqcount_begin(&sl->seqcount);
14526 @@ -491,7 +523,7 @@
14528 write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
14530 - write_seqcount_end(&sl->seqcount);
14531 + __raw_write_seqcount_end(&sl->seqcount);
14532 spin_unlock_irqrestore(&sl->lock, flags);
14535 diff -Nur linux-4.4.46.orig/include/linux/signal.h linux-4.4.46/include/linux/signal.h
14536 --- linux-4.4.46.orig/include/linux/signal.h 2017-02-01 08:31:11.000000000 +0100
14537 +++ linux-4.4.46/include/linux/signal.h 2017-02-03 17:18:10.911618440 +0100
14538 @@ -233,6 +233,7 @@
14541 extern void flush_sigqueue(struct sigpending *queue);
14542 +extern void flush_task_sigqueue(struct task_struct *tsk);
14544 /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */
14545 static inline int valid_signal(unsigned long sig)
14546 diff -Nur linux-4.4.46.orig/include/linux/skbuff.h linux-4.4.46/include/linux/skbuff.h
14547 --- linux-4.4.46.orig/include/linux/skbuff.h 2017-02-01 08:31:11.000000000 +0100
14548 +++ linux-4.4.46/include/linux/skbuff.h 2017-02-03 17:18:10.911618440 +0100
14549 @@ -203,6 +203,7 @@
14553 + raw_spinlock_t raw_lock;
14557 @@ -1465,6 +1466,12 @@
14558 __skb_queue_head_init(list);
14561 +static inline void skb_queue_head_init_raw(struct sk_buff_head *list)
14563 + raw_spin_lock_init(&list->raw_lock);
14564 + __skb_queue_head_init(list);
14567 static inline void skb_queue_head_init_class(struct sk_buff_head *list,
14568 struct lock_class_key *class)
14570 diff -Nur linux-4.4.46.orig/include/linux/smp.h linux-4.4.46/include/linux/smp.h
14571 --- linux-4.4.46.orig/include/linux/smp.h 2017-02-01 08:31:11.000000000 +0100
14572 +++ linux-4.4.46/include/linux/smp.h 2017-02-03 17:18:10.911618440 +0100
14573 @@ -185,6 +185,9 @@
14574 #define get_cpu() ({ preempt_disable(); smp_processor_id(); })
14575 #define put_cpu() preempt_enable()
14577 +#define get_cpu_light() ({ migrate_disable(); smp_processor_id(); })
14578 +#define put_cpu_light() migrate_enable()
14581 * Callback to arch code if there's nosmp or maxcpus=0 on the
14582 * boot command line:
14583 diff -Nur linux-4.4.46.orig/include/linux/spinlock_api_smp.h linux-4.4.46/include/linux/spinlock_api_smp.h
14584 --- linux-4.4.46.orig/include/linux/spinlock_api_smp.h 2017-02-01 08:31:11.000000000 +0100
14585 +++ linux-4.4.46/include/linux/spinlock_api_smp.h 2017-02-03 17:18:10.911618440 +0100
14586 @@ -189,6 +189,8 @@
14590 -#include <linux/rwlock_api_smp.h>
14591 +#ifndef CONFIG_PREEMPT_RT_FULL
14592 +# include <linux/rwlock_api_smp.h>
14595 #endif /* __LINUX_SPINLOCK_API_SMP_H */
14596 diff -Nur linux-4.4.46.orig/include/linux/spinlock.h linux-4.4.46/include/linux/spinlock.h
14597 --- linux-4.4.46.orig/include/linux/spinlock.h 2017-02-01 08:31:11.000000000 +0100
14598 +++ linux-4.4.46/include/linux/spinlock.h 2017-02-03 17:18:10.911618440 +0100
14599 @@ -271,7 +271,11 @@
14600 #define raw_spin_can_lock(lock) (!raw_spin_is_locked(lock))
14602 /* Include rwlock functions */
14603 -#include <linux/rwlock.h>
14604 +#ifdef CONFIG_PREEMPT_RT_FULL
14605 +# include <linux/rwlock_rt.h>
14607 +# include <linux/rwlock.h>
14611 * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
14612 @@ -282,6 +286,10 @@
14613 # include <linux/spinlock_api_up.h>
14616 +#ifdef CONFIG_PREEMPT_RT_FULL
14617 +# include <linux/spinlock_rt.h>
14618 +#else /* PREEMPT_RT_FULL */
14621 * Map the spin_lock functions to the raw variants for PREEMPT_RT=n
14623 @@ -347,6 +355,12 @@
14624 raw_spin_unlock(&lock->rlock);
14627 +static __always_inline int spin_unlock_no_deboost(spinlock_t *lock)
14629 + raw_spin_unlock(&lock->rlock);
14633 static __always_inline void spin_unlock_bh(spinlock_t *lock)
14635 raw_spin_unlock_bh(&lock->rlock);
14636 @@ -416,4 +430,6 @@
14637 #define atomic_dec_and_lock(atomic, lock) \
14638 __cond_lock(lock, _atomic_dec_and_lock(atomic, lock))
14640 +#endif /* !PREEMPT_RT_FULL */
14642 #endif /* __LINUX_SPINLOCK_H */
14643 diff -Nur linux-4.4.46.orig/include/linux/spinlock_rt.h linux-4.4.46/include/linux/spinlock_rt.h
14644 --- linux-4.4.46.orig/include/linux/spinlock_rt.h 1970-01-01 01:00:00.000000000 +0100
14645 +++ linux-4.4.46/include/linux/spinlock_rt.h 2017-02-03 17:18:10.911618440 +0100
14647 +#ifndef __LINUX_SPINLOCK_RT_H
14648 +#define __LINUX_SPINLOCK_RT_H
14650 +#ifndef __LINUX_SPINLOCK_H
14651 +#error Do not include directly. Use spinlock.h
14654 +#include <linux/bug.h>
14657 +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key);
14659 +#define spin_lock_init(slock) \
14661 + static struct lock_class_key __key; \
14663 + rt_mutex_init(&(slock)->lock); \
14664 + __rt_spin_lock_init(slock, #slock, &__key); \
14667 +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock);
14668 +void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock);
14669 +int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock);
14671 +extern void __lockfunc rt_spin_lock(spinlock_t *lock);
14672 +extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock);
14673 +extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass);
14674 +extern void __lockfunc rt_spin_unlock(spinlock_t *lock);
14675 +extern int __lockfunc rt_spin_unlock_no_deboost(spinlock_t *lock);
14676 +extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock);
14677 +extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags);
14678 +extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock);
14679 +extern int __lockfunc rt_spin_trylock(spinlock_t *lock);
14680 +extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock);
14683 + * lockdep-less calls, for derived types like rwlock:
14684 + * (for trylock they can use rt_mutex_trylock() directly.
14686 +extern void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock);
14687 +extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock);
14688 +extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock);
14689 +extern int __lockfunc __rt_spin_trylock(struct rt_mutex *lock);
14691 +#define spin_lock(lock) rt_spin_lock(lock)
14693 +#define spin_lock_bh(lock) \
14695 + local_bh_disable(); \
14696 + rt_spin_lock(lock); \
14699 +#define spin_lock_irq(lock) spin_lock(lock)
14701 +#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock))
14703 +#define spin_trylock(lock) \
14706 + __locked = spin_do_trylock(lock); \
14710 +#ifdef CONFIG_LOCKDEP
14711 +# define spin_lock_nested(lock, subclass) \
14713 + rt_spin_lock_nested(lock, subclass); \
14716 +#define spin_lock_bh_nested(lock, subclass) \
14718 + local_bh_disable(); \
14719 + rt_spin_lock_nested(lock, subclass); \
14722 +# define spin_lock_irqsave_nested(lock, flags, subclass) \
14724 + typecheck(unsigned long, flags); \
14726 + rt_spin_lock_nested(lock, subclass); \
14729 +# define spin_lock_nested(lock, subclass) spin_lock(lock)
14730 +# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(lock)
14732 +# define spin_lock_irqsave_nested(lock, flags, subclass) \
14734 + typecheck(unsigned long, flags); \
14736 + spin_lock(lock); \
14740 +#define spin_lock_irqsave(lock, flags) \
14742 + typecheck(unsigned long, flags); \
14744 + spin_lock(lock); \
14747 +static inline unsigned long spin_lock_trace_flags(spinlock_t *lock)
14749 + unsigned long flags = 0;
14750 +#ifdef CONFIG_TRACE_IRQFLAGS
14751 + flags = rt_spin_lock_trace_flags(lock);
14753 + spin_lock(lock); /* lock_local */
14758 +/* FIXME: we need rt_spin_lock_nest_lock */
14759 +#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0)
14761 +#define spin_unlock(lock) rt_spin_unlock(lock)
14762 +#define spin_unlock_no_deboost(lock) rt_spin_unlock_no_deboost(lock)
14764 +#define spin_unlock_bh(lock) \
14766 + rt_spin_unlock(lock); \
14767 + local_bh_enable(); \
14770 +#define spin_unlock_irq(lock) spin_unlock(lock)
14772 +#define spin_unlock_irqrestore(lock, flags) \
14774 + typecheck(unsigned long, flags); \
14776 + spin_unlock(lock); \
14779 +#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock))
14780 +#define spin_trylock_irq(lock) spin_trylock(lock)
14782 +#define spin_trylock_irqsave(lock, flags) \
14783 + rt_spin_trylock_irqsave(lock, &(flags))
14785 +#define spin_unlock_wait(lock) rt_spin_unlock_wait(lock)
14787 +#ifdef CONFIG_GENERIC_LOCKBREAK
14788 +# define spin_is_contended(lock) ((lock)->break_lock)
14790 +# define spin_is_contended(lock) (((void)(lock), 0))
14793 +static inline int spin_can_lock(spinlock_t *lock)
14795 + return !rt_mutex_is_locked(&lock->lock);
14798 +static inline int spin_is_locked(spinlock_t *lock)
14800 + return rt_mutex_is_locked(&lock->lock);
14803 +static inline void assert_spin_locked(spinlock_t *lock)
14805 + BUG_ON(!spin_is_locked(lock));
14808 +#define atomic_dec_and_lock(atomic, lock) \
14809 + atomic_dec_and_spin_lock(atomic, lock)
14812 diff -Nur linux-4.4.46.orig/include/linux/spinlock_types.h linux-4.4.46/include/linux/spinlock_types.h
14813 --- linux-4.4.46.orig/include/linux/spinlock_types.h 2017-02-01 08:31:11.000000000 +0100
14814 +++ linux-4.4.46/include/linux/spinlock_types.h 2017-02-03 17:18:10.911618440 +0100
14816 * Released under the General Public License (GPL).
14819 -#if defined(CONFIG_SMP)
14820 -# include <asm/spinlock_types.h>
14822 -# include <linux/spinlock_types_up.h>
14825 -#include <linux/lockdep.h>
14827 -typedef struct raw_spinlock {
14828 - arch_spinlock_t raw_lock;
14829 -#ifdef CONFIG_GENERIC_LOCKBREAK
14830 - unsigned int break_lock;
14832 -#ifdef CONFIG_DEBUG_SPINLOCK
14833 - unsigned int magic, owner_cpu;
14836 -#ifdef CONFIG_DEBUG_LOCK_ALLOC
14837 - struct lockdep_map dep_map;
14841 -#define SPINLOCK_MAGIC 0xdead4ead
14843 -#define SPINLOCK_OWNER_INIT ((void *)-1L)
14845 -#ifdef CONFIG_DEBUG_LOCK_ALLOC
14846 -# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
14848 -# define SPIN_DEP_MAP_INIT(lockname)
14850 +#include <linux/spinlock_types_raw.h>
14852 -#ifdef CONFIG_DEBUG_SPINLOCK
14853 -# define SPIN_DEBUG_INIT(lockname) \
14854 - .magic = SPINLOCK_MAGIC, \
14855 - .owner_cpu = -1, \
14856 - .owner = SPINLOCK_OWNER_INIT,
14857 +#ifndef CONFIG_PREEMPT_RT_FULL
14858 +# include <linux/spinlock_types_nort.h>
14859 +# include <linux/rwlock_types.h>
14861 -# define SPIN_DEBUG_INIT(lockname)
14862 +# include <linux/rtmutex.h>
14863 +# include <linux/spinlock_types_rt.h>
14864 +# include <linux/rwlock_types_rt.h>
14867 -#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
14869 - .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
14870 - SPIN_DEBUG_INIT(lockname) \
14871 - SPIN_DEP_MAP_INIT(lockname) }
14873 -#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
14874 - (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
14876 -#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
14878 -typedef struct spinlock {
14880 - struct raw_spinlock rlock;
14882 -#ifdef CONFIG_DEBUG_LOCK_ALLOC
14883 -# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
14885 - u8 __padding[LOCK_PADSIZE];
14886 - struct lockdep_map dep_map;
14892 -#define __SPIN_LOCK_INITIALIZER(lockname) \
14893 - { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
14895 -#define __SPIN_LOCK_UNLOCKED(lockname) \
14896 - (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
14898 -#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
14900 -#include <linux/rwlock_types.h>
14902 #endif /* __LINUX_SPINLOCK_TYPES_H */
14903 diff -Nur linux-4.4.46.orig/include/linux/spinlock_types_nort.h linux-4.4.46/include/linux/spinlock_types_nort.h
14904 --- linux-4.4.46.orig/include/linux/spinlock_types_nort.h 1970-01-01 01:00:00.000000000 +0100
14905 +++ linux-4.4.46/include/linux/spinlock_types_nort.h 2017-02-03 17:18:10.911618440 +0100
14907 +#ifndef __LINUX_SPINLOCK_TYPES_NORT_H
14908 +#define __LINUX_SPINLOCK_TYPES_NORT_H
14910 +#ifndef __LINUX_SPINLOCK_TYPES_H
14911 +#error "Do not include directly. Include spinlock_types.h instead"
14915 + * The non RT version maps spinlocks to raw_spinlocks
14917 +typedef struct spinlock {
14919 + struct raw_spinlock rlock;
14921 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
14922 +# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
14924 + u8 __padding[LOCK_PADSIZE];
14925 + struct lockdep_map dep_map;
14931 +#define __SPIN_LOCK_INITIALIZER(lockname) \
14932 + { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
14934 +#define __SPIN_LOCK_UNLOCKED(lockname) \
14935 + (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
14937 +#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
14940 diff -Nur linux-4.4.46.orig/include/linux/spinlock_types_raw.h linux-4.4.46/include/linux/spinlock_types_raw.h
14941 --- linux-4.4.46.orig/include/linux/spinlock_types_raw.h 1970-01-01 01:00:00.000000000 +0100
14942 +++ linux-4.4.46/include/linux/spinlock_types_raw.h 2017-02-03 17:18:10.911618440 +0100
14944 +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
14945 +#define __LINUX_SPINLOCK_TYPES_RAW_H
14947 +#if defined(CONFIG_SMP)
14948 +# include <asm/spinlock_types.h>
14950 +# include <linux/spinlock_types_up.h>
14953 +#include <linux/lockdep.h>
14955 +typedef struct raw_spinlock {
14956 + arch_spinlock_t raw_lock;
14957 +#ifdef CONFIG_GENERIC_LOCKBREAK
14958 + unsigned int break_lock;
14960 +#ifdef CONFIG_DEBUG_SPINLOCK
14961 + unsigned int magic, owner_cpu;
14964 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
14965 + struct lockdep_map dep_map;
14969 +#define SPINLOCK_MAGIC 0xdead4ead
14971 +#define SPINLOCK_OWNER_INIT ((void *)-1L)
14973 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
14974 +# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
14976 +# define SPIN_DEP_MAP_INIT(lockname)
14979 +#ifdef CONFIG_DEBUG_SPINLOCK
14980 +# define SPIN_DEBUG_INIT(lockname) \
14981 + .magic = SPINLOCK_MAGIC, \
14982 + .owner_cpu = -1, \
14983 + .owner = SPINLOCK_OWNER_INIT,
14985 +# define SPIN_DEBUG_INIT(lockname)
14988 +#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
14990 + .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
14991 + SPIN_DEBUG_INIT(lockname) \
14992 + SPIN_DEP_MAP_INIT(lockname) }
14994 +#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
14995 + (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
14997 +#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
15000 diff -Nur linux-4.4.46.orig/include/linux/spinlock_types_rt.h linux-4.4.46/include/linux/spinlock_types_rt.h
15001 --- linux-4.4.46.orig/include/linux/spinlock_types_rt.h 1970-01-01 01:00:00.000000000 +0100
15002 +++ linux-4.4.46/include/linux/spinlock_types_rt.h 2017-02-03 17:18:10.911618440 +0100
15004 +#ifndef __LINUX_SPINLOCK_TYPES_RT_H
15005 +#define __LINUX_SPINLOCK_TYPES_RT_H
15007 +#ifndef __LINUX_SPINLOCK_TYPES_H
15008 +#error "Do not include directly. Include spinlock_types.h instead"
15011 +#include <linux/cache.h>
15014 + * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field:
15016 +typedef struct spinlock {
15017 + struct rt_mutex lock;
15018 + unsigned int break_lock;
15019 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
15020 + struct lockdep_map dep_map;
15024 +#ifdef CONFIG_DEBUG_RT_MUTEXES
15025 +# define __RT_SPIN_INITIALIZER(name) \
15027 + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
15028 + .save_state = 1, \
15029 + .file = __FILE__, \
15030 + .line = __LINE__ , \
15033 +# define __RT_SPIN_INITIALIZER(name) \
15035 + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
15036 + .save_state = 1, \
15041 +.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock)
15044 +#define __SPIN_LOCK_UNLOCKED(name) \
15045 + { .lock = __RT_SPIN_INITIALIZER(name.lock), \
15046 + SPIN_DEP_MAP_INIT(name) }
15048 +#define __DEFINE_SPINLOCK(name) \
15049 + spinlock_t name = __SPIN_LOCK_UNLOCKED(name)
15051 +#define DEFINE_SPINLOCK(name) \
15052 + spinlock_t name __cacheline_aligned_in_smp = __SPIN_LOCK_UNLOCKED(name)
15055 diff -Nur linux-4.4.46.orig/include/linux/srcu.h linux-4.4.46/include/linux/srcu.h
15056 --- linux-4.4.46.orig/include/linux/srcu.h 2017-02-01 08:31:11.000000000 +0100
15057 +++ linux-4.4.46/include/linux/srcu.h 2017-02-03 17:18:10.911618440 +0100
15058 @@ -84,10 +84,10 @@
15060 void process_srcu(struct work_struct *work);
15062 -#define __SRCU_STRUCT_INIT(name) \
15063 +#define __SRCU_STRUCT_INIT(name, pcpu_name) \
15065 .completed = -300, \
15066 - .per_cpu_ref = &name##_srcu_array, \
15067 + .per_cpu_ref = &pcpu_name, \
15068 .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \
15069 .running = false, \
15070 .batch_queue = RCU_BATCH_INIT(name.batch_queue), \
15071 @@ -104,7 +104,7 @@
15073 #define __DEFINE_SRCU(name, is_static) \
15074 static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
15075 - is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
15076 + is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_array)
15077 #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */)
15078 #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static)
15080 diff -Nur linux-4.4.46.orig/include/linux/suspend.h linux-4.4.46/include/linux/suspend.h
15081 --- linux-4.4.46.orig/include/linux/suspend.h 2017-02-01 08:31:11.000000000 +0100
15082 +++ linux-4.4.46/include/linux/suspend.h 2017-02-03 17:18:10.911618440 +0100
15083 @@ -194,6 +194,12 @@
15087 +#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION)
15088 +extern bool pm_in_action;
15090 +# define pm_in_action false
15093 #ifdef CONFIG_SUSPEND
15095 * suspend_set_ops - set platform dependent suspend operations
15096 diff -Nur linux-4.4.46.orig/include/linux/swait.h linux-4.4.46/include/linux/swait.h
15097 --- linux-4.4.46.orig/include/linux/swait.h 1970-01-01 01:00:00.000000000 +0100
15098 +++ linux-4.4.46/include/linux/swait.h 2017-02-03 17:18:10.915618595 +0100
15100 +#ifndef _LINUX_SWAIT_H
15101 +#define _LINUX_SWAIT_H
15103 +#include <linux/list.h>
15104 +#include <linux/stddef.h>
15105 +#include <linux/spinlock.h>
15106 +#include <asm/current.h>
15109 + * Simple wait queues
15111 + * While these are very similar to the other/complex wait queues (wait.h) the
15112 + * most important difference is that the simple waitqueue allows for
15113 + * deterministic behaviour -- IOW it has strictly bounded IRQ and lock hold
15116 + * In order to make this so, we had to drop a fair number of features of the
15117 + * other waitqueue code; notably:
15119 + * - mixing INTERRUPTIBLE and UNINTERRUPTIBLE sleeps on the same waitqueue;
15120 + * all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right
15123 + * - the exclusive mode; because this requires preserving the list order
15124 + * and this is hard.
15126 + * - custom wake functions; because you cannot give any guarantees about
15129 + * As a side effect of this; the data structures are slimmer.
15131 + * One would recommend using this wait queue where possible.
15134 +struct task_struct;
15136 +struct swait_queue_head {
15137 + raw_spinlock_t lock;
15138 + struct list_head task_list;
15141 +struct swait_queue {
15142 + struct task_struct *task;
15143 + struct list_head task_list;
15146 +#define __SWAITQUEUE_INITIALIZER(name) { \
15147 + .task = current, \
15148 + .task_list = LIST_HEAD_INIT((name).task_list), \
15151 +#define DECLARE_SWAITQUEUE(name) \
15152 + struct swait_queue name = __SWAITQUEUE_INITIALIZER(name)
15154 +#define __SWAIT_QUEUE_HEAD_INITIALIZER(name) { \
15155 + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
15156 + .task_list = LIST_HEAD_INIT((name).task_list), \
15159 +#define DECLARE_SWAIT_QUEUE_HEAD(name) \
15160 + struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INITIALIZER(name)
15162 +extern void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
15163 + struct lock_class_key *key);
15165 +#define init_swait_queue_head(q) \
15167 + static struct lock_class_key __key; \
15168 + __init_swait_queue_head((q), #q, &__key); \
15171 +#ifdef CONFIG_LOCKDEP
15172 +# define __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name) \
15173 + ({ init_swait_queue_head(&name); name; })
15174 +# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name) \
15175 + struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name)
15177 +# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name) \
15178 + DECLARE_SWAIT_QUEUE_HEAD(name)
15181 +static inline int swait_active(struct swait_queue_head *q)
15183 + return !list_empty(&q->task_list);
15186 +extern void swake_up(struct swait_queue_head *q);
15187 +extern void swake_up_all(struct swait_queue_head *q);
15188 +extern void swake_up_locked(struct swait_queue_head *q);
15189 +extern void swake_up_all_locked(struct swait_queue_head *q);
15191 +extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
15192 +extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state);
15193 +extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);
15195 +extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
15196 +extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
15198 +/* as per ___wait_event() but for swait, therefore "exclusive == 0" */
15199 +#define ___swait_event(wq, condition, state, ret, cmd) \
15201 + struct swait_queue __wait; \
15202 + long __ret = ret; \
15204 + INIT_LIST_HEAD(&__wait.task_list); \
15206 + long __int = prepare_to_swait_event(&wq, &__wait, state);\
15211 + if (___wait_is_interruptible(state) && __int) { \
15218 + finish_swait(&wq, &__wait); \
15222 +#define __swait_event(wq, condition) \
15223 + (void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, \
15226 +#define swait_event(wq, condition) \
15230 + __swait_event(wq, condition); \
15233 +#define __swait_event_timeout(wq, condition, timeout) \
15234 + ___swait_event(wq, ___wait_cond_timeout(condition), \
15235 + TASK_UNINTERRUPTIBLE, timeout, \
15236 + __ret = schedule_timeout(__ret))
15238 +#define swait_event_timeout(wq, condition, timeout) \
15240 + long __ret = timeout; \
15241 + if (!___wait_cond_timeout(condition)) \
15242 + __ret = __swait_event_timeout(wq, condition, timeout); \
15246 +#define __swait_event_interruptible(wq, condition) \
15247 + ___swait_event(wq, condition, TASK_INTERRUPTIBLE, 0, \
15250 +#define swait_event_interruptible(wq, condition) \
15253 + if (!(condition)) \
15254 + __ret = __swait_event_interruptible(wq, condition); \
15258 +#define __swait_event_interruptible_timeout(wq, condition, timeout) \
15259 + ___swait_event(wq, ___wait_cond_timeout(condition), \
15260 + TASK_INTERRUPTIBLE, timeout, \
15261 + __ret = schedule_timeout(__ret))
15263 +#define swait_event_interruptible_timeout(wq, condition, timeout) \
15265 + long __ret = timeout; \
15266 + if (!___wait_cond_timeout(condition)) \
15267 + __ret = __swait_event_interruptible_timeout(wq, \
15268 + condition, timeout); \
15272 +#endif /* _LINUX_SWAIT_H */
15273 diff -Nur linux-4.4.46.orig/include/linux/swap.h linux-4.4.46/include/linux/swap.h
15274 --- linux-4.4.46.orig/include/linux/swap.h 2017-02-01 08:31:11.000000000 +0100
15275 +++ linux-4.4.46/include/linux/swap.h 2017-02-03 17:18:10.915618595 +0100
15277 #include <linux/fs.h>
15278 #include <linux/atomic.h>
15279 #include <linux/page-flags.h>
15280 +#include <linux/locallock.h>
15281 #include <asm/page.h>
15283 struct notifier_block;
15284 @@ -252,7 +253,8 @@
15285 void *workingset_eviction(struct address_space *mapping, struct page *page);
15286 bool workingset_refault(void *shadow);
15287 void workingset_activation(struct page *page);
15288 -extern struct list_lru workingset_shadow_nodes;
15289 +extern struct list_lru __workingset_shadow_nodes;
15290 +DECLARE_LOCAL_IRQ_LOCK(workingset_shadow_lock);
15292 static inline unsigned int workingset_node_pages(struct radix_tree_node *node)
15294 @@ -298,6 +300,7 @@
15297 /* linux/mm/swap.c */
15298 +DECLARE_LOCAL_IRQ_LOCK(swapvec_lock);
15299 extern void lru_cache_add(struct page *);
15300 extern void lru_cache_add_anon(struct page *page);
15301 extern void lru_cache_add_file(struct page *page);
15302 diff -Nur linux-4.4.46.orig/include/linux/swork.h linux-4.4.46/include/linux/swork.h
15303 --- linux-4.4.46.orig/include/linux/swork.h 1970-01-01 01:00:00.000000000 +0100
15304 +++ linux-4.4.46/include/linux/swork.h 2017-02-03 17:18:10.915618595 +0100
15306 +#ifndef _LINUX_SWORK_H
15307 +#define _LINUX_SWORK_H
15309 +#include <linux/list.h>
15311 +struct swork_event {
15312 + struct list_head item;
15313 + unsigned long flags;
15314 + void (*func)(struct swork_event *);
15317 +static inline void INIT_SWORK(struct swork_event *event,
15318 + void (*func)(struct swork_event *))
15320 + event->flags = 0;
15321 + event->func = func;
15324 +bool swork_queue(struct swork_event *sev);
15326 +int swork_get(void);
15327 +void swork_put(void);
15329 +#endif /* _LINUX_SWORK_H */
15330 diff -Nur linux-4.4.46.orig/include/linux/thread_info.h linux-4.4.46/include/linux/thread_info.h
15331 --- linux-4.4.46.orig/include/linux/thread_info.h 2017-02-01 08:31:11.000000000 +0100
15332 +++ linux-4.4.46/include/linux/thread_info.h 2017-02-03 17:18:10.915618595 +0100
15333 @@ -102,7 +102,17 @@
15334 #define test_thread_flag(flag) \
15335 test_ti_thread_flag(current_thread_info(), flag)
15337 -#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
15338 +#ifdef CONFIG_PREEMPT_LAZY
15339 +#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \
15340 + test_thread_flag(TIF_NEED_RESCHED_LAZY))
15341 +#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED))
15342 +#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY))
15345 +#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
15346 +#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED)
15347 +#define tif_need_resched_lazy() 0
15350 #if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK
15352 diff -Nur linux-4.4.46.orig/include/linux/timer.h linux-4.4.46/include/linux/timer.h
15353 --- linux-4.4.46.orig/include/linux/timer.h 2017-02-01 08:31:11.000000000 +0100
15354 +++ linux-4.4.46/include/linux/timer.h 2017-02-03 17:18:10.915618595 +0100
15355 @@ -225,7 +225,7 @@
15357 extern int try_to_del_timer_sync(struct timer_list *timer);
15360 +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
15361 extern int del_timer_sync(struct timer_list *timer);
15363 # define del_timer_sync(t) del_timer(t)
15364 diff -Nur linux-4.4.46.orig/include/linux/trace_events.h linux-4.4.46/include/linux/trace_events.h
15365 --- linux-4.4.46.orig/include/linux/trace_events.h 2017-02-01 08:31:11.000000000 +0100
15366 +++ linux-4.4.46/include/linux/trace_events.h 2017-02-03 17:18:10.915618595 +0100
15368 unsigned char flags;
15369 unsigned char preempt_count;
15371 + unsigned short migrate_disable;
15372 + unsigned short padding;
15373 + unsigned char preempt_lazy_count;
15376 #define TRACE_EVENT_TYPE_MAX \
15377 diff -Nur linux-4.4.46.orig/include/linux/uaccess.h linux-4.4.46/include/linux/uaccess.h
15378 --- linux-4.4.46.orig/include/linux/uaccess.h 2017-02-01 08:31:11.000000000 +0100
15379 +++ linux-4.4.46/include/linux/uaccess.h 2017-02-03 17:18:10.915618595 +0100
15382 static inline void pagefault_disable(void)
15384 + migrate_disable();
15385 pagefault_disabled_inc();
15387 * make sure to have issued the store before a pagefault
15391 pagefault_disabled_dec();
15392 + migrate_enable();
15396 diff -Nur linux-4.4.46.orig/include/linux/uprobes.h linux-4.4.46/include/linux/uprobes.h
15397 --- linux-4.4.46.orig/include/linux/uprobes.h 2017-02-01 08:31:11.000000000 +0100
15398 +++ linux-4.4.46/include/linux/uprobes.h 2017-02-03 17:18:10.915618595 +0100
15400 #include <linux/errno.h>
15401 #include <linux/rbtree.h>
15402 #include <linux/types.h>
15403 +#include <linux/wait.h>
15405 struct vm_area_struct;
15407 diff -Nur linux-4.4.46.orig/include/linux/vmstat.h linux-4.4.46/include/linux/vmstat.h
15408 --- linux-4.4.46.orig/include/linux/vmstat.h 2017-02-01 08:31:11.000000000 +0100
15409 +++ linux-4.4.46/include/linux/vmstat.h 2017-02-03 17:18:10.915618595 +0100
15412 static inline void __count_vm_event(enum vm_event_item item)
15414 + preempt_disable_rt();
15415 raw_cpu_inc(vm_event_states.event[item]);
15416 + preempt_enable_rt();
15419 static inline void count_vm_event(enum vm_event_item item)
15422 static inline void __count_vm_events(enum vm_event_item item, long delta)
15424 + preempt_disable_rt();
15425 raw_cpu_add(vm_event_states.event[item], delta);
15426 + preempt_enable_rt();
15429 static inline void count_vm_events(enum vm_event_item item, long delta)
15430 diff -Nur linux-4.4.46.orig/include/linux/wait.h linux-4.4.46/include/linux/wait.h
15431 --- linux-4.4.46.orig/include/linux/wait.h 2017-02-01 08:31:11.000000000 +0100
15432 +++ linux-4.4.46/include/linux/wait.h 2017-02-03 17:18:10.915618595 +0100
15434 #include <linux/spinlock.h>
15435 #include <asm/current.h>
15436 #include <uapi/linux/wait.h>
15437 +#include <linux/atomic.h>
15439 typedef struct __wait_queue wait_queue_t;
15440 typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
15441 diff -Nur linux-4.4.46.orig/include/net/dst.h linux-4.4.46/include/net/dst.h
15442 --- linux-4.4.46.orig/include/net/dst.h 2017-02-01 08:31:11.000000000 +0100
15443 +++ linux-4.4.46/include/net/dst.h 2017-02-03 17:18:10.915618595 +0100
15444 @@ -437,7 +437,7 @@
15445 static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
15446 struct sk_buff *skb)
15448 - const struct hh_cache *hh;
15449 + struct hh_cache *hh;
15451 if (dst->pending_confirm) {
15452 unsigned long now = jiffies;
15453 diff -Nur linux-4.4.46.orig/include/net/neighbour.h linux-4.4.46/include/net/neighbour.h
15454 --- linux-4.4.46.orig/include/net/neighbour.h 2017-02-01 08:31:11.000000000 +0100
15455 +++ linux-4.4.46/include/net/neighbour.h 2017-02-03 17:18:10.915618595 +0100
15456 @@ -446,7 +446,7 @@
15460 -static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb)
15461 +static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb)
15465 @@ -501,7 +501,7 @@
15467 #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb)
15469 -static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n,
15470 +static inline void neigh_ha_snapshot(char *dst, struct neighbour *n,
15471 const struct net_device *dev)
15474 diff -Nur linux-4.4.46.orig/include/net/netns/ipv4.h linux-4.4.46/include/net/netns/ipv4.h
15475 --- linux-4.4.46.orig/include/net/netns/ipv4.h 2017-02-01 08:31:11.000000000 +0100
15476 +++ linux-4.4.46/include/net/netns/ipv4.h 2017-02-03 17:18:10.915618595 +0100
15479 int sysctl_icmp_echo_ignore_all;
15480 int sysctl_icmp_echo_ignore_broadcasts;
15481 + int sysctl_icmp_echo_sysrq;
15482 int sysctl_icmp_ignore_bogus_error_responses;
15483 int sysctl_icmp_ratelimit;
15484 int sysctl_icmp_ratemask;
15485 diff -Nur linux-4.4.46.orig/include/trace/events/hist.h linux-4.4.46/include/trace/events/hist.h
15486 --- linux-4.4.46.orig/include/trace/events/hist.h 1970-01-01 01:00:00.000000000 +0100
15487 +++ linux-4.4.46/include/trace/events/hist.h 2017-02-03 17:18:10.915618595 +0100
15489 +#undef TRACE_SYSTEM
15490 +#define TRACE_SYSTEM hist
15492 +#if !defined(_TRACE_HIST_H) || defined(TRACE_HEADER_MULTI_READ)
15493 +#define _TRACE_HIST_H
15495 +#include "latency_hist.h"
15496 +#include <linux/tracepoint.h>
15498 +#if !defined(CONFIG_PREEMPT_OFF_HIST) && !defined(CONFIG_INTERRUPT_OFF_HIST)
15499 +#define trace_preemptirqsoff_hist(a, b)
15500 +#define trace_preemptirqsoff_hist_rcuidle(a, b)
15502 +TRACE_EVENT(preemptirqsoff_hist,
15504 + TP_PROTO(int reason, int starthist),
15506 + TP_ARGS(reason, starthist),
15508 + TP_STRUCT__entry(
15509 + __field(int, reason)
15510 + __field(int, starthist)
15514 + __entry->reason = reason;
15515 + __entry->starthist = starthist;
15518 + TP_printk("reason=%s starthist=%s", getaction(__entry->reason),
15519 + __entry->starthist ? "start" : "stop")
15523 +#ifndef CONFIG_MISSED_TIMER_OFFSETS_HIST
15524 +#define trace_hrtimer_interrupt(a, b, c, d)
15526 +TRACE_EVENT(hrtimer_interrupt,
15528 + TP_PROTO(int cpu, long long offset, struct task_struct *curr,
15529 + struct task_struct *task),
15531 + TP_ARGS(cpu, offset, curr, task),
15533 + TP_STRUCT__entry(
15534 + __field(int, cpu)
15535 + __field(long long, offset)
15536 + __array(char, ccomm, TASK_COMM_LEN)
15537 + __field(int, cprio)
15538 + __array(char, tcomm, TASK_COMM_LEN)
15539 + __field(int, tprio)
15543 + __entry->cpu = cpu;
15544 + __entry->offset = offset;
15545 + memcpy(__entry->ccomm, curr->comm, TASK_COMM_LEN);
15546 + __entry->cprio = curr->prio;
15547 + memcpy(__entry->tcomm, task != NULL ? task->comm : "<none>",
15548 + task != NULL ? TASK_COMM_LEN : 7);
15549 + __entry->tprio = task != NULL ? task->prio : -1;
15552 + TP_printk("cpu=%d offset=%lld curr=%s[%d] thread=%s[%d]",
15553 + __entry->cpu, __entry->offset, __entry->ccomm,
15554 + __entry->cprio, __entry->tcomm, __entry->tprio)
15558 +#endif /* _TRACE_HIST_H */
15560 +/* This part must be outside protection */
15561 +#include <trace/define_trace.h>
15562 diff -Nur linux-4.4.46.orig/include/trace/events/latency_hist.h linux-4.4.46/include/trace/events/latency_hist.h
15563 --- linux-4.4.46.orig/include/trace/events/latency_hist.h 1970-01-01 01:00:00.000000000 +0100
15564 +++ linux-4.4.46/include/trace/events/latency_hist.h 2017-02-03 17:18:10.915618595 +0100
15566 +#ifndef _LATENCY_HIST_H
15567 +#define _LATENCY_HIST_H
15569 +enum hist_action {
15578 +static char *actions[] = {
15587 +static inline char *getaction(int action)
15589 + if (action >= 0 && action <= sizeof(actions)/sizeof(actions[0]))
15590 + return actions[action];
15591 + return "unknown";
15594 +#endif /* _LATENCY_HIST_H */
15595 diff -Nur linux-4.4.46.orig/include/trace/events/writeback.h linux-4.4.46/include/trace/events/writeback.h
15596 --- linux-4.4.46.orig/include/trace/events/writeback.h 2017-02-01 08:31:11.000000000 +0100
15597 +++ linux-4.4.46/include/trace/events/writeback.h 2017-02-03 17:18:10.915618595 +0100
15598 @@ -134,58 +134,28 @@
15599 #ifdef CREATE_TRACE_POINTS
15600 #ifdef CONFIG_CGROUP_WRITEBACK
15602 -static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb)
15603 +static inline unsigned int __trace_wb_assign_cgroup(struct bdi_writeback *wb)
15605 - return kernfs_path_len(wb->memcg_css->cgroup->kn) + 1;
15606 + return wb->memcg_css->cgroup->kn->ino;
15609 -static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb)
15611 - struct cgroup *cgrp = wb->memcg_css->cgroup;
15614 - path = cgroup_path(cgrp, buf, kernfs_path_len(cgrp->kn) + 1);
15615 - WARN_ON_ONCE(path != buf);
15618 -static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc)
15621 - return __trace_wb_cgroup_size(wbc->wb);
15626 -static inline void __trace_wbc_assign_cgroup(char *buf,
15627 - struct writeback_control *wbc)
15628 +static inline unsigned int __trace_wbc_assign_cgroup(struct writeback_control *wbc)
15631 - __trace_wb_assign_cgroup(buf, wbc->wb);
15632 + return __trace_wb_assign_cgroup(wbc->wb);
15634 - strcpy(buf, "/");
15638 #else /* CONFIG_CGROUP_WRITEBACK */
15640 -static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb)
15645 -static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb)
15647 - strcpy(buf, "/");
15650 -static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc)
15651 +static inline unsigned int __trace_wb_assign_cgroup(struct bdi_writeback *wb)
15657 -static inline void __trace_wbc_assign_cgroup(char *buf,
15658 - struct writeback_control *wbc)
15659 +static inline unsigned int __trace_wbc_assign_cgroup(struct writeback_control *wbc)
15661 - strcpy(buf, "/");
15665 #endif /* CONFIG_CGROUP_WRITEBACK */
15666 @@ -201,7 +171,7 @@
15667 __array(char, name, 32)
15668 __field(unsigned long, ino)
15669 __field(int, sync_mode)
15670 - __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
15671 + __field(unsigned int, cgroup_ino)
15675 @@ -209,14 +179,14 @@
15676 dev_name(inode_to_bdi(inode)->dev), 32);
15677 __entry->ino = inode->i_ino;
15678 __entry->sync_mode = wbc->sync_mode;
15679 - __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
15680 + __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
15683 - TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup=%s",
15684 + TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup_ino=%u",
15687 __entry->sync_mode,
15688 - __get_str(cgroup)
15689 + __entry->cgroup_ino
15693 @@ -246,7 +216,7 @@
15694 __field(int, range_cyclic)
15695 __field(int, for_background)
15696 __field(int, reason)
15697 - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
15698 + __field(unsigned int, cgroup_ino)
15701 strncpy(__entry->name,
15702 @@ -258,10 +228,10 @@
15703 __entry->range_cyclic = work->range_cyclic;
15704 __entry->for_background = work->for_background;
15705 __entry->reason = work->reason;
15706 - __trace_wb_assign_cgroup(__get_str(cgroup), wb);
15707 + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
15709 TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
15710 - "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup=%s",
15711 + "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup_ino=%u",
15713 MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
15715 @@ -270,7 +240,7 @@
15716 __entry->range_cyclic,
15717 __entry->for_background,
15718 __print_symbolic(__entry->reason, WB_WORK_REASON),
15719 - __get_str(cgroup)
15720 + __entry->cgroup_ino
15723 #define DEFINE_WRITEBACK_WORK_EVENT(name) \
15724 @@ -300,15 +270,15 @@
15727 __array(char, name, 32)
15728 - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
15729 + __field(unsigned int, cgroup_ino)
15732 strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
15733 - __trace_wb_assign_cgroup(__get_str(cgroup), wb);
15734 + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
15736 - TP_printk("bdi %s: cgroup=%s",
15737 + TP_printk("bdi %s: cgroup_ino=%u",
15739 - __get_str(cgroup)
15740 + __entry->cgroup_ino
15743 #define DEFINE_WRITEBACK_EVENT(name) \
15744 @@ -347,7 +317,7 @@
15745 __field(int, range_cyclic)
15746 __field(long, range_start)
15747 __field(long, range_end)
15748 - __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
15749 + __field(unsigned int, cgroup_ino)
15753 @@ -361,12 +331,12 @@
15754 __entry->range_cyclic = wbc->range_cyclic;
15755 __entry->range_start = (long)wbc->range_start;
15756 __entry->range_end = (long)wbc->range_end;
15757 - __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
15758 + __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
15761 TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d "
15762 "bgrd=%d reclm=%d cyclic=%d "
15763 - "start=0x%lx end=0x%lx cgroup=%s",
15764 + "start=0x%lx end=0x%lx cgroup_ino=%u",
15766 __entry->nr_to_write,
15767 __entry->pages_skipped,
15768 @@ -377,7 +347,7 @@
15769 __entry->range_cyclic,
15770 __entry->range_start,
15771 __entry->range_end,
15772 - __get_str(cgroup)
15773 + __entry->cgroup_ino
15777 @@ -398,7 +368,7 @@
15779 __field(int, moved)
15780 __field(int, reason)
15781 - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
15782 + __field(unsigned int, cgroup_ino)
15785 unsigned long *older_than_this = work->older_than_this;
15786 @@ -408,15 +378,15 @@
15787 (jiffies - *older_than_this) * 1000 / HZ : -1;
15788 __entry->moved = moved;
15789 __entry->reason = work->reason;
15790 - __trace_wb_assign_cgroup(__get_str(cgroup), wb);
15791 + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
15793 - TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup=%s",
15794 + TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup_ino=%u",
15796 __entry->older, /* older_than_this in jiffies */
15797 __entry->age, /* older_than_this in relative milliseconds */
15799 __print_symbolic(__entry->reason, WB_WORK_REASON),
15800 - __get_str(cgroup)
15801 + __entry->cgroup_ino
15805 @@ -484,7 +454,7 @@
15806 __field(unsigned long, dirty_ratelimit)
15807 __field(unsigned long, task_ratelimit)
15808 __field(unsigned long, balanced_dirty_ratelimit)
15809 - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
15810 + __field(unsigned int, cgroup_ino)
15814 @@ -496,13 +466,13 @@
15815 __entry->task_ratelimit = KBps(task_ratelimit);
15816 __entry->balanced_dirty_ratelimit =
15817 KBps(wb->balanced_dirty_ratelimit);
15818 - __trace_wb_assign_cgroup(__get_str(cgroup), wb);
15819 + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
15822 TP_printk("bdi %s: "
15823 "write_bw=%lu awrite_bw=%lu dirty_rate=%lu "
15824 "dirty_ratelimit=%lu task_ratelimit=%lu "
15825 - "balanced_dirty_ratelimit=%lu cgroup=%s",
15826 + "balanced_dirty_ratelimit=%lu cgroup_ino=%u",
15828 __entry->write_bw, /* write bandwidth */
15829 __entry->avg_write_bw, /* avg write bandwidth */
15830 @@ -510,7 +480,7 @@
15831 __entry->dirty_ratelimit, /* base ratelimit */
15832 __entry->task_ratelimit, /* ratelimit with position control */
15833 __entry->balanced_dirty_ratelimit, /* the balanced ratelimit */
15834 - __get_str(cgroup)
15835 + __entry->cgroup_ino
15839 @@ -548,7 +518,7 @@
15840 __field( long, pause)
15841 __field(unsigned long, period)
15842 __field( long, think)
15843 - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
15844 + __field(unsigned int, cgroup_ino)
15848 @@ -571,7 +541,7 @@
15849 __entry->period = period * 1000 / HZ;
15850 __entry->pause = pause * 1000 / HZ;
15851 __entry->paused = (jiffies - start_time) * 1000 / HZ;
15852 - __trace_wb_assign_cgroup(__get_str(cgroup), wb);
15853 + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
15857 @@ -580,7 +550,7 @@
15858 "bdi_setpoint=%lu bdi_dirty=%lu "
15859 "dirty_ratelimit=%lu task_ratelimit=%lu "
15860 "dirtied=%u dirtied_pause=%u "
15861 - "paused=%lu pause=%ld period=%lu think=%ld cgroup=%s",
15862 + "paused=%lu pause=%ld period=%lu think=%ld cgroup_ino=%u",
15866 @@ -595,7 +565,7 @@
15867 __entry->pause, /* ms */
15868 __entry->period, /* ms */
15869 __entry->think, /* ms */
15870 - __get_str(cgroup)
15871 + __entry->cgroup_ino
15875 @@ -609,8 +579,7 @@
15876 __field(unsigned long, ino)
15877 __field(unsigned long, state)
15878 __field(unsigned long, dirtied_when)
15879 - __dynamic_array(char, cgroup,
15880 - __trace_wb_cgroup_size(inode_to_wb(inode)))
15881 + __field(unsigned int, cgroup_ino)
15885 @@ -619,16 +588,16 @@
15886 __entry->ino = inode->i_ino;
15887 __entry->state = inode->i_state;
15888 __entry->dirtied_when = inode->dirtied_when;
15889 - __trace_wb_assign_cgroup(__get_str(cgroup), inode_to_wb(inode));
15890 + __entry->cgroup_ino = __trace_wb_assign_cgroup(inode_to_wb(inode));
15893 - TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup=%s",
15894 + TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup_ino=%u",
15897 show_inode_state(__entry->state),
15898 __entry->dirtied_when,
15899 (jiffies - __entry->dirtied_when) / HZ,
15900 - __get_str(cgroup)
15901 + __entry->cgroup_ino
15905 @@ -684,7 +653,7 @@
15906 __field(unsigned long, writeback_index)
15907 __field(long, nr_to_write)
15908 __field(unsigned long, wrote)
15909 - __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
15910 + __field(unsigned int, cgroup_ino)
15914 @@ -696,11 +665,11 @@
15915 __entry->writeback_index = inode->i_mapping->writeback_index;
15916 __entry->nr_to_write = nr_to_write;
15917 __entry->wrote = nr_to_write - wbc->nr_to_write;
15918 - __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
15919 + __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
15922 TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu "
15923 - "index=%lu to_write=%ld wrote=%lu cgroup=%s",
15924 + "index=%lu to_write=%ld wrote=%lu cgroup_ino=%u",
15927 show_inode_state(__entry->state),
15928 @@ -709,7 +678,7 @@
15929 __entry->writeback_index,
15930 __entry->nr_to_write,
15932 - __get_str(cgroup)
15933 + __entry->cgroup_ino
15937 diff -Nur linux-4.4.46.orig/init/Kconfig linux-4.4.46/init/Kconfig
15938 --- linux-4.4.46.orig/init/Kconfig 2017-02-01 08:31:11.000000000 +0100
15939 +++ linux-4.4.46/init/Kconfig 2017-02-03 17:18:10.915618595 +0100
15940 @@ -498,7 +498,7 @@
15943 bool "Make expert-level adjustments to RCU configuration"
15945 + default y if PREEMPT_RT_FULL
15947 This option needs to be enabled if you wish to make
15948 expert-level adjustments to RCU configuration. By default,
15949 @@ -614,7 +614,7 @@
15951 config RCU_FAST_NO_HZ
15952 bool "Accelerate last non-dyntick-idle CPU's grace periods"
15953 - depends on NO_HZ_COMMON && SMP && RCU_EXPERT
15954 + depends on NO_HZ_COMMON && SMP && RCU_EXPERT && !PREEMPT_RT_FULL
15957 This option permits CPUs to enter dynticks-idle state even if
15958 @@ -641,7 +641,7 @@
15960 bool "Enable RCU priority boosting"
15961 depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT
15963 + default y if PREEMPT_RT_FULL
15965 This option boosts the priority of preempted RCU readers that
15966 block the current preemptible RCU grace period for too long.
15967 @@ -1106,6 +1106,7 @@
15968 config RT_GROUP_SCHED
15969 bool "Group scheduling for SCHED_RR/FIFO"
15970 depends on CGROUP_SCHED
15971 + depends on !PREEMPT_RT_FULL
15974 This feature lets you explicitly allocate real CPU bandwidth
15975 @@ -1719,6 +1720,7 @@
15979 + depends on !PREEMPT_RT_FULL
15981 The regular slab allocator that is established and known to work
15982 well in all environments. It organizes cache hot objects in
15983 @@ -1737,6 +1739,7 @@
15986 bool "SLOB (Simple Allocator)"
15987 + depends on !PREEMPT_RT_FULL
15989 SLOB replaces the stock allocator with a drastically simpler
15990 allocator. SLOB is generally more space efficient but
15991 @@ -1746,7 +1749,7 @@
15993 config SLUB_CPU_PARTIAL
15995 - depends on SLUB && SMP
15996 + depends on SLUB && SMP && !PREEMPT_RT_FULL
15997 bool "SLUB per cpu partial cache"
15999 Per cpu partial caches accellerate objects allocation and freeing
16000 diff -Nur linux-4.4.46.orig/init/main.c linux-4.4.46/init/main.c
16001 --- linux-4.4.46.orig/init/main.c 2017-02-01 08:31:11.000000000 +0100
16002 +++ linux-4.4.46/init/main.c 2017-02-03 17:18:10.915618595 +0100
16003 @@ -530,6 +530,7 @@
16004 setup_command_line(command_line);
16005 setup_nr_cpu_ids();
16006 setup_per_cpu_areas();
16007 + softirq_early_init();
16008 smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
16010 build_all_zonelists(NULL, NULL);
16011 diff -Nur linux-4.4.46.orig/init/Makefile linux-4.4.46/init/Makefile
16012 --- linux-4.4.46.orig/init/Makefile 2017-02-01 08:31:11.000000000 +0100
16013 +++ linux-4.4.46/init/Makefile 2017-02-03 17:18:10.915618595 +0100
16015 include/generated/compile.h: FORCE
16016 @$($(quiet)chk_compile.h)
16017 $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \
16018 - "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)"
16019 + "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CONFIG_PREEMPT_RT_FULL)" "$(CC) $(KBUILD_CFLAGS)"
16020 diff -Nur linux-4.4.46.orig/ipc/msg.c linux-4.4.46/ipc/msg.c
16021 --- linux-4.4.46.orig/ipc/msg.c 2017-02-01 08:31:11.000000000 +0100
16022 +++ linux-4.4.46/ipc/msg.c 2017-02-03 17:18:10.919618749 +0100
16023 @@ -183,20 +183,14 @@
16027 -static void expunge_all(struct msg_queue *msq, int res)
16028 +static void expunge_all(struct msg_queue *msq, int res,
16029 + struct wake_q_head *wake_q)
16031 struct msg_receiver *msr, *t;
16033 list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
16034 - msr->r_msg = NULL; /* initialize expunge ordering */
16035 - wake_up_process(msr->r_tsk);
16037 - * Ensure that the wakeup is visible before setting r_msg as
16038 - * the receiving end depends on it: either spinning on a nil,
16039 - * or dealing with -EAGAIN cases. See lockless receive part 1
16040 - * and 2 in do_msgrcv().
16042 - smp_wmb(); /* barrier (B) */
16044 + wake_q_add(wake_q, msr->r_tsk);
16045 msr->r_msg = ERR_PTR(res);
16048 @@ -213,11 +207,13 @@
16050 struct msg_msg *msg, *t;
16051 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
16054 - expunge_all(msq, -EIDRM);
16055 + expunge_all(msq, -EIDRM, &wake_q);
16056 ss_wakeup(&msq->q_senders, 1);
16058 ipc_unlock_object(&msq->q_perm);
16059 + wake_up_q(&wake_q);
16062 list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
16063 @@ -342,6 +338,7 @@
16064 struct kern_ipc_perm *ipcp;
16065 struct msqid64_ds uninitialized_var(msqid64);
16066 struct msg_queue *msq;
16070 if (cmd == IPC_SET) {
16071 @@ -389,7 +386,7 @@
16072 /* sleeping receivers might be excluded by
16073 * stricter permissions.
16075 - expunge_all(msq, -EAGAIN);
16076 + expunge_all(msq, -EAGAIN, &wake_q);
16077 /* sleeping senders might be able to send
16078 * due to a larger queue size.
16080 @@ -402,6 +399,7 @@
16083 ipc_unlock_object(&msq->q_perm);
16084 + wake_up_q(&wake_q);
16088 @@ -566,7 +564,8 @@
16092 -static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
16093 +static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg,
16094 + struct wake_q_head *wake_q)
16096 struct msg_receiver *msr, *t;
16098 @@ -577,27 +576,13 @@
16100 list_del(&msr->r_list);
16101 if (msr->r_maxsize < msg->m_ts) {
16102 - /* initialize pipelined send ordering */
16103 - msr->r_msg = NULL;
16104 - wake_up_process(msr->r_tsk);
16105 - /* barrier (B) see barrier comment below */
16107 + wake_q_add(wake_q, msr->r_tsk);
16108 msr->r_msg = ERR_PTR(-E2BIG);
16110 - msr->r_msg = NULL;
16111 msq->q_lrpid = task_pid_vnr(msr->r_tsk);
16112 msq->q_rtime = get_seconds();
16113 - wake_up_process(msr->r_tsk);
16115 - * Ensure that the wakeup is visible before
16116 - * setting r_msg, as the receiving can otherwise
16117 - * exit - once r_msg is set, the receiver can
16118 - * continue. See lockless receive part 1 and 2
16119 - * in do_msgrcv(). Barrier (B).
16122 + wake_q_add(wake_q, msr->r_tsk);
16128 @@ -613,6 +598,7 @@
16129 struct msg_msg *msg;
16131 struct ipc_namespace *ns;
16134 ns = current->nsproxy->ipc_ns;
16136 @@ -698,7 +684,7 @@
16137 msq->q_lspid = task_tgid_vnr(current);
16138 msq->q_stime = get_seconds();
16140 - if (!pipelined_send(msq, msg)) {
16141 + if (!pipelined_send(msq, msg, &wake_q)) {
16142 /* no one is waiting for this message, enqueue it */
16143 list_add_tail(&msg->m_list, &msq->q_messages);
16144 msq->q_cbytes += msgsz;
16145 @@ -712,6 +698,7 @@
16148 ipc_unlock_object(&msq->q_perm);
16149 + wake_up_q(&wake_q);
16153 @@ -932,57 +919,25 @@
16156 /* Lockless receive, part 2:
16157 - * Wait until pipelined_send or expunge_all are outside of
16158 - * wake_up_process(). There is a race with exit(), see
16159 - * ipc/mqueue.c for the details. The correct serialization
16160 - * ensures that a receiver cannot continue without the wakeup
16161 - * being visibible _before_ setting r_msg:
16162 + * The work in pipelined_send() and expunge_all():
16163 + * - Set pointer to message
16164 + * - Queue the receiver task for later wakeup
16165 + * - Wake up the process after the lock is dropped.
16168 - * <loop receiver>
16169 - * smp_rmb(); (A) <-- pair -. <waker thread>
16170 - * <load ->r_msg> | msr->r_msg = NULL;
16171 - * | wake_up_process();
16172 - * <continue> `------> smp_wmb(); (B)
16173 - * msr->r_msg = msg;
16175 - * Where (A) orders the message value read and where (B) orders
16176 - * the write to the r_msg -- done in both pipelined_send and
16178 + * Should the process wake up before this wakeup (due to a
16179 + * signal) it will either see the message and continue …
16183 - * Pairs with writer barrier in pipelined_send
16184 - * or expunge_all.
16186 - smp_rmb(); /* barrier (A) */
16187 - msg = (struct msg_msg *)msr_d.r_msg;
16192 - * The cpu_relax() call is a compiler barrier
16193 - * which forces everything in this loop to be
16199 - /* Lockless receive, part 3:
16200 - * If there is a message or an error then accept it without
16203 + msg = (struct msg_msg *)msr_d.r_msg;
16204 if (msg != ERR_PTR(-EAGAIN))
16207 - /* Lockless receive, part 3:
16208 - * Acquire the queue spinlock.
16211 + * … or see -EAGAIN, acquire the lock to check the message
16214 ipc_lock_object(&msq->q_perm);
16216 - /* Lockless receive, part 4:
16217 - * Repeat test after acquiring the spinlock.
16219 msg = (struct msg_msg *)msr_d.r_msg;
16220 if (msg != ERR_PTR(-EAGAIN))
16222 diff -Nur linux-4.4.46.orig/ipc/sem.c linux-4.4.46/ipc/sem.c
16223 --- linux-4.4.46.orig/ipc/sem.c 2017-02-01 08:31:11.000000000 +0100
16224 +++ linux-4.4.46/ipc/sem.c 2017-02-03 17:18:10.919618749 +0100
16225 @@ -708,6 +708,13 @@
16226 static void wake_up_sem_queue_prepare(struct list_head *pt,
16227 struct sem_queue *q, int error)
16229 +#ifdef CONFIG_PREEMPT_RT_BASE
16230 + struct task_struct *p = q->sleeper;
16231 + get_task_struct(p);
16232 + q->status = error;
16233 + wake_up_process(p);
16234 + put_task_struct(p);
16236 if (list_empty(pt)) {
16238 * Hold preempt off so that we don't get preempted and have the
16239 @@ -719,6 +726,7 @@
16242 list_add_tail(&q->list, pt);
16247 @@ -732,6 +740,7 @@
16249 static void wake_up_sem_queue_do(struct list_head *pt)
16251 +#ifndef CONFIG_PREEMPT_RT_BASE
16252 struct sem_queue *q, *t;
16255 @@ -744,6 +753,7 @@
16262 static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
16263 diff -Nur linux-4.4.46.orig/kernel/cgroup.c linux-4.4.46/kernel/cgroup.c
16264 --- linux-4.4.46.orig/kernel/cgroup.c 2017-02-01 08:31:11.000000000 +0100
16265 +++ linux-4.4.46/kernel/cgroup.c 2017-02-03 17:18:10.919618749 +0100
16266 @@ -4740,10 +4740,10 @@
16267 queue_work(cgroup_destroy_wq, &css->destroy_work);
16270 -static void css_release_work_fn(struct work_struct *work)
16271 +static void css_release_work_fn(struct swork_event *sev)
16273 struct cgroup_subsys_state *css =
16274 - container_of(work, struct cgroup_subsys_state, destroy_work);
16275 + container_of(sev, struct cgroup_subsys_state, destroy_swork);
16276 struct cgroup_subsys *ss = css->ss;
16277 struct cgroup *cgrp = css->cgroup;
16279 @@ -4782,8 +4782,8 @@
16280 struct cgroup_subsys_state *css =
16281 container_of(ref, struct cgroup_subsys_state, refcnt);
16283 - INIT_WORK(&css->destroy_work, css_release_work_fn);
16284 - queue_work(cgroup_destroy_wq, &css->destroy_work);
16285 + INIT_SWORK(&css->destroy_swork, css_release_work_fn);
16286 + swork_queue(&css->destroy_swork);
16289 static void init_and_link_css(struct cgroup_subsys_state *css,
16290 @@ -5400,6 +5400,7 @@
16292 cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
16293 BUG_ON(!cgroup_destroy_wq);
16294 + BUG_ON(swork_get());
16297 * Used to destroy pidlists and separate to serve as flush domain.
16298 diff -Nur linux-4.4.46.orig/kernel/cpu.c linux-4.4.46/kernel/cpu.c
16299 --- linux-4.4.46.orig/kernel/cpu.c 2017-02-01 08:31:11.000000000 +0100
16300 +++ linux-4.4.46/kernel/cpu.c 2017-02-03 17:18:10.919618749 +0100
16304 .active_writer = NULL,
16305 - .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
16306 .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
16307 + .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
16308 #ifdef CONFIG_DEBUG_LOCK_ALLOC
16309 .dep_map = {.name = "cpu_hotplug.lock" },
16311 @@ -89,6 +89,289 @@
16312 #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map)
16313 #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map)
16316 + * hotplug_pcp - per cpu hotplug descriptor
16317 + * @unplug: set when pin_current_cpu() needs to sync tasks
16318 + * @sync_tsk: the task that waits for tasks to finish pinned sections
16319 + * @refcount: counter of tasks in pinned sections
16320 + * @grab_lock: set when the tasks entering pinned sections should wait
16321 + * @synced: notifier for @sync_tsk to tell cpu_down it's finished
16322 + * @mutex: the mutex to make tasks wait (used when @grab_lock is true)
16323 + * @mutex_init: zero if the mutex hasn't been initialized yet.
16325 + * Although @unplug and @sync_tsk may point to the same task, the @unplug
16326 + * is used as a flag and still exists after @sync_tsk has exited and
16327 + * @sync_tsk set to NULL.
16329 +struct hotplug_pcp {
16330 + struct task_struct *unplug;
16331 + struct task_struct *sync_tsk;
16334 + struct completion synced;
16335 + struct completion unplug_wait;
16336 +#ifdef CONFIG_PREEMPT_RT_FULL
16338 + * Note, on PREEMPT_RT, the hotplug lock must save the state of
16339 + * the task, otherwise the mutex will cause the task to fail
16340 + * to sleep when required. (Because it's called from migrate_disable())
16342 + * The spinlock_t on PREEMPT_RT is a mutex that saves the task's
16347 + struct mutex mutex;
16352 +#ifdef CONFIG_PREEMPT_RT_FULL
16353 +# define hotplug_lock(hp) rt_spin_lock__no_mg(&(hp)->lock)
16354 +# define hotplug_unlock(hp) rt_spin_unlock__no_mg(&(hp)->lock)
16356 +# define hotplug_lock(hp) mutex_lock(&(hp)->mutex)
16357 +# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex)
16360 +static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
16363 + * pin_current_cpu - Prevent the current cpu from being unplugged
16365 + * Lightweight version of get_online_cpus() to prevent cpu from being
16366 + * unplugged when code runs in a migration disabled region.
16368 + * Must be called with preemption disabled (preempt_count = 1)!
16370 +void pin_current_cpu(void)
16372 + struct hotplug_pcp *hp;
16376 + hp = this_cpu_ptr(&hotplug_pcp);
16378 + if (!hp->unplug || hp->refcount || force || preempt_count() > 1 ||
16379 + hp->unplug == current) {
16383 + if (hp->grab_lock) {
16384 + preempt_enable();
16385 + hotplug_lock(hp);
16386 + hotplug_unlock(hp);
16388 + preempt_enable();
16390 + * Try to push this task off of this CPU.
16392 + if (!migrate_me()) {
16393 + preempt_disable();
16394 + hp = this_cpu_ptr(&hotplug_pcp);
16395 + if (!hp->grab_lock) {
16397 + * Just let it continue it's already pinned
16398 + * or about to sleep.
16403 + preempt_enable();
16406 + preempt_disable();
16411 + * unpin_current_cpu - Allow unplug of current cpu
16413 + * Must be called with preemption or interrupts disabled!
16415 +void unpin_current_cpu(void)
16417 + struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp);
16419 + WARN_ON(hp->refcount <= 0);
16421 + /* This is safe. sync_unplug_thread is pinned to this cpu */
16422 + if (!--hp->refcount && hp->unplug && hp->unplug != current)
16423 + wake_up_process(hp->unplug);
16426 +static void wait_for_pinned_cpus(struct hotplug_pcp *hp)
16428 + set_current_state(TASK_UNINTERRUPTIBLE);
16429 + while (hp->refcount) {
16430 + schedule_preempt_disabled();
16431 + set_current_state(TASK_UNINTERRUPTIBLE);
16435 +static int sync_unplug_thread(void *data)
16437 + struct hotplug_pcp *hp = data;
16439 + wait_for_completion(&hp->unplug_wait);
16440 + preempt_disable();
16441 + hp->unplug = current;
16442 + wait_for_pinned_cpus(hp);
16445 + * This thread will synchronize the cpu_down() with threads
16446 + * that have pinned the CPU. When the pinned CPU count reaches
16447 + * zero, we inform the cpu_down code to continue to the next step.
16449 + set_current_state(TASK_UNINTERRUPTIBLE);
16450 + preempt_enable();
16451 + complete(&hp->synced);
16454 + * If all succeeds, the next step will need tasks to wait till
16455 + * the CPU is offline before continuing. To do this, the grab_lock
16456 + * is set and tasks going into pin_current_cpu() will block on the
16457 + * mutex. But we still need to wait for those that are already in
16458 + * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop()
16459 + * will kick this thread out.
16461 + while (!hp->grab_lock && !kthread_should_stop()) {
16463 + set_current_state(TASK_UNINTERRUPTIBLE);
16466 + /* Make sure grab_lock is seen before we see a stale completion */
16470 + * Now just before cpu_down() enters stop machine, we need to make
16471 + * sure all tasks that are in pinned CPU sections are out, and new
16472 + * tasks will now grab the lock, keeping them from entering pinned
16475 + if (!kthread_should_stop()) {
16476 + preempt_disable();
16477 + wait_for_pinned_cpus(hp);
16478 + preempt_enable();
16479 + complete(&hp->synced);
16482 + set_current_state(TASK_UNINTERRUPTIBLE);
16483 + while (!kthread_should_stop()) {
16485 + set_current_state(TASK_UNINTERRUPTIBLE);
16487 + set_current_state(TASK_RUNNING);
16490 + * Force this thread off this CPU as it's going down and
16491 + * we don't want any more work on this CPU.
16493 + current->flags &= ~PF_NO_SETAFFINITY;
16494 + set_cpus_allowed_ptr(current, cpu_present_mask);
16499 +static void __cpu_unplug_sync(struct hotplug_pcp *hp)
16501 + wake_up_process(hp->sync_tsk);
16502 + wait_for_completion(&hp->synced);
16505 +static void __cpu_unplug_wait(unsigned int cpu)
16507 + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
16509 + complete(&hp->unplug_wait);
16510 + wait_for_completion(&hp->synced);
16514 + * Start the sync_unplug_thread on the target cpu and wait for it to
16517 +static int cpu_unplug_begin(unsigned int cpu)
16519 + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
16522 + /* Protected by cpu_hotplug.lock */
16523 + if (!hp->mutex_init) {
16524 +#ifdef CONFIG_PREEMPT_RT_FULL
16525 + spin_lock_init(&hp->lock);
16527 + mutex_init(&hp->mutex);
16529 + hp->mutex_init = 1;
16532 + /* Inform the scheduler to migrate tasks off this CPU */
16533 + tell_sched_cpu_down_begin(cpu);
16535 + init_completion(&hp->synced);
16536 + init_completion(&hp->unplug_wait);
16538 + hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
16539 + if (IS_ERR(hp->sync_tsk)) {
16540 + err = PTR_ERR(hp->sync_tsk);
16541 + hp->sync_tsk = NULL;
16544 + kthread_bind(hp->sync_tsk, cpu);
16547 + * Wait for tasks to get out of the pinned sections,
16548 + * it's still OK if new tasks enter. Some CPU notifiers will
16549 + * wait for tasks that are going to enter these sections and
16550 + * we must not have them block.
16552 + wake_up_process(hp->sync_tsk);
16556 +static void cpu_unplug_sync(unsigned int cpu)
16558 + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
16560 + init_completion(&hp->synced);
16561 + /* The completion needs to be initialzied before setting grab_lock */
16564 + /* Grab the mutex before setting grab_lock */
16565 + hotplug_lock(hp);
16566 + hp->grab_lock = 1;
16569 + * The CPU notifiers have been completed.
16570 + * Wait for tasks to get out of pinned CPU sections and have new
16571 + * tasks block until the CPU is completely down.
16573 + __cpu_unplug_sync(hp);
16575 + /* All done with the sync thread */
16576 + kthread_stop(hp->sync_tsk);
16577 + hp->sync_tsk = NULL;
16580 +static void cpu_unplug_done(unsigned int cpu)
16582 + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
16584 + hp->unplug = NULL;
16585 + /* Let all tasks know cpu unplug is finished before cleaning up */
16588 + if (hp->sync_tsk)
16589 + kthread_stop(hp->sync_tsk);
16591 + if (hp->grab_lock) {
16592 + hotplug_unlock(hp);
16593 + /* protected by cpu_hotplug.lock */
16594 + hp->grab_lock = 0;
16596 + tell_sched_cpu_down_done(cpu);
16599 void get_online_cpus(void)
16601 @@ -338,13 +621,15 @@
16602 /* Requires cpu_add_remove_lock to be held */
16603 static int _cpu_down(unsigned int cpu, int tasks_frozen)
16605 - int err, nr_calls = 0;
16606 + int mycpu, err, nr_calls = 0;
16607 void *hcpu = (void *)(long)cpu;
16608 unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
16609 struct take_cpu_down_param tcd_param = {
16613 + cpumask_var_t cpumask;
16614 + cpumask_var_t cpumask_org;
16616 if (num_online_cpus() == 1)
16618 @@ -352,7 +637,34 @@
16619 if (!cpu_online(cpu))
16622 + /* Move the downtaker off the unplug cpu */
16623 + if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
16625 + if (!alloc_cpumask_var(&cpumask_org, GFP_KERNEL)) {
16626 + free_cpumask_var(cpumask);
16630 + cpumask_copy(cpumask_org, tsk_cpus_allowed(current));
16631 + cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
16632 + set_cpus_allowed_ptr(current, cpumask);
16633 + free_cpumask_var(cpumask);
16634 + migrate_disable();
16635 + mycpu = smp_processor_id();
16636 + if (mycpu == cpu) {
16637 + printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
16638 + migrate_enable();
16640 + goto restore_cpus;
16642 + migrate_enable();
16644 cpu_hotplug_begin();
16645 + err = cpu_unplug_begin(cpu);
16647 + printk("cpu_unplug_begin(%d) failed\n", cpu);
16651 err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
16653 @@ -378,8 +690,12 @@
16657 + __cpu_unplug_wait(cpu);
16658 smpboot_park_threads(cpu);
16660 + /* Notifiers are done. Don't let any more tasks pin this CPU. */
16661 + cpu_unplug_sync(cpu);
16664 * Prevent irq alloc/free while the dying cpu reorganizes the
16665 * interrupt affinities.
16666 @@ -424,9 +740,14 @@
16667 check_for_tasks(cpu);
16670 + cpu_unplug_done(cpu);
16672 cpu_hotplug_done();
16674 cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
16676 + set_cpus_allowed_ptr(current, cpumask_org);
16677 + free_cpumask_var(cpumask_org);
16681 diff -Nur linux-4.4.46.orig/kernel/debug/kdb/kdb_io.c linux-4.4.46/kernel/debug/kdb/kdb_io.c
16682 --- linux-4.4.46.orig/kernel/debug/kdb/kdb_io.c 2017-02-01 08:31:11.000000000 +0100
16683 +++ linux-4.4.46/kernel/debug/kdb/kdb_io.c 2017-02-03 17:18:10.919618749 +0100
16684 @@ -554,7 +554,6 @@
16687 int logging, saved_loglevel = 0;
16688 - int saved_trap_printk;
16689 int got_printf_lock = 0;
16692 @@ -565,8 +564,6 @@
16693 unsigned long uninitialized_var(flags);
16696 - saved_trap_printk = kdb_trap_printk;
16697 - kdb_trap_printk = 0;
16699 /* Serialize kdb_printf if multiple cpus try to write at once.
16700 * But if any cpu goes recursive in kdb, just print the output,
16701 @@ -855,7 +852,6 @@
16703 __release(kdb_printf_lock);
16705 - kdb_trap_printk = saved_trap_printk;
16709 @@ -865,9 +861,11 @@
16713 + kdb_trap_printk++;
16715 r = vkdb_printf(KDB_MSGSRC_INTERNAL, fmt, ap);
16717 + kdb_trap_printk--;
16721 diff -Nur linux-4.4.46.orig/kernel/events/core.c linux-4.4.46/kernel/events/core.c
16722 --- linux-4.4.46.orig/kernel/events/core.c 2017-02-01 08:31:11.000000000 +0100
16723 +++ linux-4.4.46/kernel/events/core.c 2017-02-03 17:18:10.919618749 +0100
16724 @@ -802,6 +802,7 @@
16725 raw_spin_lock_init(&cpuctx->hrtimer_lock);
16726 hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
16727 timer->function = perf_mux_hrtimer_handler;
16728 + timer->irqsafe = 1;
16731 static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
16732 @@ -7240,6 +7241,7 @@
16734 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
16735 hwc->hrtimer.function = perf_swevent_hrtimer;
16736 + hwc->hrtimer.irqsafe = 1;
16739 * Since hrtimers have a fixed rate, we can do a static freq->period
16740 diff -Nur linux-4.4.46.orig/kernel/exit.c linux-4.4.46/kernel/exit.c
16741 --- linux-4.4.46.orig/kernel/exit.c 2017-02-01 08:31:11.000000000 +0100
16742 +++ linux-4.4.46/kernel/exit.c 2017-02-03 17:18:10.923618903 +0100
16743 @@ -144,7 +144,7 @@
16744 * Do this under ->siglock, we can race with another thread
16745 * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
16747 - flush_sigqueue(&tsk->pending);
16748 + flush_task_sigqueue(tsk);
16749 tsk->sighand = NULL;
16750 spin_unlock(&sighand->siglock);
16752 diff -Nur linux-4.4.46.orig/kernel/fork.c linux-4.4.46/kernel/fork.c
16753 --- linux-4.4.46.orig/kernel/fork.c 2017-02-01 08:31:11.000000000 +0100
16754 +++ linux-4.4.46/kernel/fork.c 2017-02-03 17:18:10.923618903 +0100
16755 @@ -108,7 +108,7 @@
16757 DEFINE_PER_CPU(unsigned long, process_counts) = 0;
16759 -__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
16760 +DEFINE_RWLOCK(tasklist_lock); /* outer */
16762 #ifdef CONFIG_PROVE_RCU
16763 int lockdep_tasklist_lock_is_held(void)
16764 @@ -244,7 +244,9 @@
16765 if (atomic_dec_and_test(&sig->sigcnt))
16766 free_signal_struct(sig);
16769 +#ifdef CONFIG_PREEMPT_RT_BASE
16772 void __put_task_struct(struct task_struct *tsk)
16774 WARN_ON(!tsk->exit_state);
16775 @@ -261,7 +263,18 @@
16776 if (!profile_handoff_task(tsk))
16779 +#ifndef CONFIG_PREEMPT_RT_BASE
16780 EXPORT_SYMBOL_GPL(__put_task_struct);
16782 +void __put_task_struct_cb(struct rcu_head *rhp)
16784 + struct task_struct *tsk = container_of(rhp, struct task_struct, put_rcu);
16786 + __put_task_struct(tsk);
16789 +EXPORT_SYMBOL_GPL(__put_task_struct_cb);
16792 void __init __weak arch_task_cache_init(void) { }
16794 @@ -692,6 +705,19 @@
16796 EXPORT_SYMBOL_GPL(__mmdrop);
16798 +#ifdef CONFIG_PREEMPT_RT_BASE
16800 + * RCU callback for delayed mm drop. Not strictly rcu, but we don't
16801 + * want another facility to make this work.
16803 +void __mmdrop_delayed(struct rcu_head *rhp)
16805 + struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop);
16812 * Decrement the use count and release all resources for an mm.
16814 @@ -1242,6 +1268,9 @@
16816 static void posix_cpu_timers_init(struct task_struct *tsk)
16818 +#ifdef CONFIG_PREEMPT_RT_BASE
16819 + tsk->posix_timer_list = NULL;
16821 tsk->cputime_expires.prof_exp = 0;
16822 tsk->cputime_expires.virt_exp = 0;
16823 tsk->cputime_expires.sched_exp = 0;
16824 @@ -1367,15 +1396,16 @@
16825 spin_lock_init(&p->alloc_lock);
16827 init_sigpending(&p->pending);
16828 + p->sigqueue_cache = NULL;
16830 p->utime = p->stime = p->gtime = 0;
16831 p->utimescaled = p->stimescaled = 0;
16832 prev_cputime_init(&p->prev_cputime);
16834 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
16835 - seqlock_init(&p->vtime_seqlock);
16836 + seqcount_init(&p->vtime_seqcount);
16838 - p->vtime_snap_whence = VTIME_SLEEPING;
16839 + p->vtime_snap_whence = VTIME_INACTIVE;
16842 #if defined(SPLIT_RSS_COUNTING)
16843 diff -Nur linux-4.4.46.orig/kernel/futex.c linux-4.4.46/kernel/futex.c
16844 --- linux-4.4.46.orig/kernel/futex.c 2017-02-01 08:31:11.000000000 +0100
16845 +++ linux-4.4.46/kernel/futex.c 2017-02-03 17:18:10.923618903 +0100
16846 @@ -815,7 +815,9 @@
16847 * task still owns the PI-state:
16849 if (head->next != next) {
16850 + raw_spin_unlock_irq(&curr->pi_lock);
16851 spin_unlock(&hb->lock);
16852 + raw_spin_lock_irq(&curr->pi_lock);
16856 @@ -1210,6 +1212,7 @@
16857 struct futex_pi_state *pi_state = this->pi_state;
16858 u32 uninitialized_var(curval), newval;
16860 + WAKE_Q(wake_sleeper_q);
16864 @@ -1223,7 +1226,7 @@
16865 if (pi_state->owner != current)
16868 - raw_spin_lock(&pi_state->pi_mutex.wait_lock);
16869 + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
16870 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
16873 @@ -1259,24 +1262,25 @@
16877 - raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
16878 + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
16882 - raw_spin_lock_irq(&pi_state->owner->pi_lock);
16883 + raw_spin_lock(&pi_state->owner->pi_lock);
16884 WARN_ON(list_empty(&pi_state->list));
16885 list_del_init(&pi_state->list);
16886 - raw_spin_unlock_irq(&pi_state->owner->pi_lock);
16887 + raw_spin_unlock(&pi_state->owner->pi_lock);
16889 - raw_spin_lock_irq(&new_owner->pi_lock);
16890 + raw_spin_lock(&new_owner->pi_lock);
16891 WARN_ON(!list_empty(&pi_state->list));
16892 list_add(&pi_state->list, &new_owner->pi_state_list);
16893 pi_state->owner = new_owner;
16894 - raw_spin_unlock_irq(&new_owner->pi_lock);
16895 + raw_spin_unlock(&new_owner->pi_lock);
16897 - raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
16898 + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
16900 - deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
16901 + deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q,
16902 + &wake_sleeper_q);
16905 * First unlock HB so the waiter does not spin on it once he got woken
16906 @@ -1284,8 +1288,9 @@
16907 * deboost first (and lose our higher priority), then the task might get
16908 * scheduled away before the wake up can take place.
16910 - spin_unlock(&hb->lock);
16911 + deboost |= spin_unlock_no_deboost(&hb->lock);
16912 wake_up_q(&wake_q);
16913 + wake_up_q_sleeper(&wake_sleeper_q);
16915 rt_mutex_adjust_prio(current);
16917 @@ -1822,6 +1827,16 @@
16918 requeue_pi_wake_futex(this, &key2, hb2);
16921 + } else if (ret == -EAGAIN) {
16923 + * Waiter was woken by timeout or
16924 + * signal and has set pi_blocked_on to
16925 + * PI_WAKEUP_INPROGRESS before we
16926 + * tried to enqueue it on the rtmutex.
16928 + this->pi_state = NULL;
16929 + free_pi_state(pi_state);
16933 this->pi_state = NULL;
16934 @@ -2139,11 +2154,11 @@
16935 * we returned due to timeout or signal without taking the
16936 * rt_mutex. Too late.
16938 - raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
16939 + raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock);
16940 owner = rt_mutex_owner(&q->pi_state->pi_mutex);
16942 owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
16943 - raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
16944 + raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock);
16945 ret = fixup_pi_state_owner(uaddr, q, owner);
16948 @@ -2691,7 +2706,7 @@
16949 struct hrtimer_sleeper timeout, *to = NULL;
16950 struct rt_mutex_waiter rt_waiter;
16951 struct rt_mutex *pi_mutex = NULL;
16952 - struct futex_hash_bucket *hb;
16953 + struct futex_hash_bucket *hb, *hb2;
16954 union futex_key key2 = FUTEX_KEY_INIT;
16955 struct futex_q q = futex_q_init;
16957 @@ -2716,10 +2731,7 @@
16958 * The waiter is allocated on our stack, manipulated by the requeue
16959 * code while we sleep on uaddr.
16961 - debug_rt_mutex_init_waiter(&rt_waiter);
16962 - RB_CLEAR_NODE(&rt_waiter.pi_tree_entry);
16963 - RB_CLEAR_NODE(&rt_waiter.tree_entry);
16964 - rt_waiter.task = NULL;
16965 + rt_mutex_init_waiter(&rt_waiter, false);
16967 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
16968 if (unlikely(ret != 0))
16969 @@ -2750,20 +2762,55 @@
16970 /* Queue the futex_q, drop the hb lock, wait for wakeup. */
16971 futex_wait_queue_me(hb, &q, to);
16973 - spin_lock(&hb->lock);
16974 - ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
16975 - spin_unlock(&hb->lock);
16977 - goto out_put_keys;
16979 + * On RT we must avoid races with requeue and trying to block
16980 + * on two mutexes (hb->lock and uaddr2's rtmutex) by
16981 + * serializing access to pi_blocked_on with pi_lock.
16983 + raw_spin_lock_irq(¤t->pi_lock);
16984 + if (current->pi_blocked_on) {
16986 + * We have been requeued or are in the process of
16987 + * being requeued.
16989 + raw_spin_unlock_irq(¤t->pi_lock);
16992 + * Setting pi_blocked_on to PI_WAKEUP_INPROGRESS
16993 + * prevents a concurrent requeue from moving us to the
16994 + * uaddr2 rtmutex. After that we can safely acquire
16995 + * (and possibly block on) hb->lock.
16997 + current->pi_blocked_on = PI_WAKEUP_INPROGRESS;
16998 + raw_spin_unlock_irq(¤t->pi_lock);
17000 + spin_lock(&hb->lock);
17003 + * Clean up pi_blocked_on. We might leak it otherwise
17004 + * when we succeeded with the hb->lock in the fast
17007 + raw_spin_lock_irq(¤t->pi_lock);
17008 + current->pi_blocked_on = NULL;
17009 + raw_spin_unlock_irq(¤t->pi_lock);
17011 + ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
17012 + spin_unlock(&hb->lock);
17014 + goto out_put_keys;
17018 - * In order for us to be here, we know our q.key == key2, and since
17019 - * we took the hb->lock above, we also know that futex_requeue() has
17020 - * completed and we no longer have to concern ourselves with a wakeup
17021 - * race with the atomic proxy lock acquisition by the requeue code. The
17022 - * futex_requeue dropped our key1 reference and incremented our key2
17023 - * reference count.
17024 + * In order to be here, we have either been requeued, are in
17025 + * the process of being requeued, or requeue successfully
17026 + * acquired uaddr2 on our behalf. If pi_blocked_on was
17027 + * non-null above, we may be racing with a requeue. Do not
17028 + * rely on q->lock_ptr to be hb2->lock until after blocking on
17029 + * hb->lock or hb2->lock. The futex_requeue dropped our key1
17030 + * reference and incremented our key2 reference count.
17032 + hb2 = hash_futex(&key2);
17034 /* Check if the requeue code acquired the second futex for us. */
17035 if (!q.rt_waiter) {
17036 @@ -2772,14 +2819,15 @@
17037 * did a lock-steal - fix up the PI-state in that case.
17039 if (q.pi_state && (q.pi_state->owner != current)) {
17040 - spin_lock(q.lock_ptr);
17041 + spin_lock(&hb2->lock);
17042 + BUG_ON(&hb2->lock != q.lock_ptr);
17043 ret = fixup_pi_state_owner(uaddr2, &q, current);
17045 * Drop the reference to the pi state which
17046 * the requeue_pi() code acquired for us.
17048 free_pi_state(q.pi_state);
17049 - spin_unlock(q.lock_ptr);
17050 + spin_unlock(&hb2->lock);
17054 @@ -2792,7 +2840,8 @@
17055 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter);
17056 debug_rt_mutex_free_waiter(&rt_waiter);
17058 - spin_lock(q.lock_ptr);
17059 + spin_lock(&hb2->lock);
17060 + BUG_ON(&hb2->lock != q.lock_ptr);
17062 * Fixup the pi_state owner and possibly acquire the lock if we
17064 diff -Nur linux-4.4.46.orig/kernel/irq/handle.c linux-4.4.46/kernel/irq/handle.c
17065 --- linux-4.4.46.orig/kernel/irq/handle.c 2017-02-01 08:31:11.000000000 +0100
17066 +++ linux-4.4.46/kernel/irq/handle.c 2017-02-03 17:18:10.923618903 +0100
17067 @@ -134,6 +134,8 @@
17069 irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
17071 + struct pt_regs *regs = get_irq_regs();
17072 + u64 ip = regs ? instruction_pointer(regs) : 0;
17073 irqreturn_t retval = IRQ_NONE;
17074 unsigned int flags = 0, irq = desc->irq_data.irq;
17075 struct irqaction *action = desc->action;
17076 @@ -176,7 +178,11 @@
17077 action = action->next;
17080 - add_interrupt_randomness(irq, flags);
17081 +#ifdef CONFIG_PREEMPT_RT_FULL
17082 + desc->random_ip = ip;
17084 + add_interrupt_randomness(irq, flags, ip);
17088 note_interrupt(desc, retval);
17089 diff -Nur linux-4.4.46.orig/kernel/irq/irqdesc.c linux-4.4.46/kernel/irq/irqdesc.c
17090 --- linux-4.4.46.orig/kernel/irq/irqdesc.c 2017-02-01 08:31:11.000000000 +0100
17091 +++ linux-4.4.46/kernel/irq/irqdesc.c 2017-02-03 17:18:10.923618903 +0100
17092 @@ -24,10 +24,27 @@
17093 static struct lock_class_key irq_desc_lock_class;
17095 #if defined(CONFIG_SMP)
17096 +static int __init irq_affinity_setup(char *str)
17098 + zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
17099 + cpulist_parse(str, irq_default_affinity);
17101 + * Set at least the boot cpu. We don't want to end up with
17102 + * bugreports caused by random comandline masks
17104 + cpumask_set_cpu(smp_processor_id(), irq_default_affinity);
17107 +__setup("irqaffinity=", irq_affinity_setup);
17109 static void __init init_irq_default_affinity(void)
17111 - alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
17112 - cpumask_setall(irq_default_affinity);
17113 +#ifdef CONFIG_CPUMASK_OFFSTACK
17114 + if (!irq_default_affinity)
17115 + zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
17117 + if (cpumask_empty(irq_default_affinity))
17118 + cpumask_setall(irq_default_affinity);
17121 static void __init init_irq_default_affinity(void)
17122 diff -Nur linux-4.4.46.orig/kernel/irq/manage.c linux-4.4.46/kernel/irq/manage.c
17123 --- linux-4.4.46.orig/kernel/irq/manage.c 2017-02-01 08:31:11.000000000 +0100
17124 +++ linux-4.4.46/kernel/irq/manage.c 2017-02-03 17:18:10.923618903 +0100
17126 #include "internals.h"
17128 #ifdef CONFIG_IRQ_FORCED_THREADING
17129 +# ifndef CONFIG_PREEMPT_RT_BASE
17130 __read_mostly bool force_irqthreads;
17132 static int __init setup_forced_irqthreads(char *arg)
17136 early_param("threadirqs", setup_forced_irqthreads);
17140 static void __synchronize_hardirq(struct irq_desc *desc)
17141 @@ -181,6 +183,62 @@
17142 irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { }
17145 +#ifdef CONFIG_PREEMPT_RT_FULL
17146 +static void _irq_affinity_notify(struct irq_affinity_notify *notify);
17147 +static struct task_struct *set_affinity_helper;
17148 +static LIST_HEAD(affinity_list);
17149 +static DEFINE_RAW_SPINLOCK(affinity_list_lock);
17151 +static int set_affinity_thread(void *unused)
17154 + struct irq_affinity_notify *notify;
17157 + set_current_state(TASK_INTERRUPTIBLE);
17159 + raw_spin_lock_irq(&affinity_list_lock);
17160 + empty = list_empty(&affinity_list);
17161 + raw_spin_unlock_irq(&affinity_list_lock);
17165 + if (kthread_should_stop())
17167 + set_current_state(TASK_RUNNING);
17171 + raw_spin_lock_irq(&affinity_list_lock);
17172 + if (!list_empty(&affinity_list)) {
17173 + notify = list_first_entry(&affinity_list,
17174 + struct irq_affinity_notify, list);
17175 + list_del_init(¬ify->list);
17177 + raw_spin_unlock_irq(&affinity_list_lock);
17181 + _irq_affinity_notify(notify);
17187 +static void init_helper_thread(void)
17189 + if (set_affinity_helper)
17191 + set_affinity_helper = kthread_run(set_affinity_thread, NULL,
17193 + WARN_ON(IS_ERR(set_affinity_helper));
17197 +static inline void init_helper_thread(void) { }
17201 int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
17204 @@ -220,7 +278,17 @@
17206 if (desc->affinity_notify) {
17207 kref_get(&desc->affinity_notify->kref);
17209 +#ifdef CONFIG_PREEMPT_RT_FULL
17210 + raw_spin_lock(&affinity_list_lock);
17211 + if (list_empty(&desc->affinity_notify->list))
17212 + list_add_tail(&affinity_list,
17213 + &desc->affinity_notify->list);
17214 + raw_spin_unlock(&affinity_list_lock);
17215 + wake_up_process(set_affinity_helper);
17217 schedule_work(&desc->affinity_notify->work);
17220 irqd_set(data, IRQD_AFFINITY_SET);
17222 @@ -258,10 +326,8 @@
17224 EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
17226 -static void irq_affinity_notify(struct work_struct *work)
17227 +static void _irq_affinity_notify(struct irq_affinity_notify *notify)
17229 - struct irq_affinity_notify *notify =
17230 - container_of(work, struct irq_affinity_notify, work);
17231 struct irq_desc *desc = irq_to_desc(notify->irq);
17232 cpumask_var_t cpumask;
17233 unsigned long flags;
17234 @@ -283,6 +349,13 @@
17235 kref_put(¬ify->kref, notify->release);
17238 +static void irq_affinity_notify(struct work_struct *work)
17240 + struct irq_affinity_notify *notify =
17241 + container_of(work, struct irq_affinity_notify, work);
17242 + _irq_affinity_notify(notify);
17246 * irq_set_affinity_notifier - control notification of IRQ affinity changes
17247 * @irq: Interrupt for which to enable/disable notification
17248 @@ -312,6 +385,8 @@
17250 kref_init(¬ify->kref);
17251 INIT_WORK(¬ify->work, irq_affinity_notify);
17252 + INIT_LIST_HEAD(¬ify->list);
17253 + init_helper_thread();
17256 raw_spin_lock_irqsave(&desc->lock, flags);
17257 @@ -865,7 +940,15 @@
17258 local_bh_disable();
17259 ret = action->thread_fn(action->irq, action->dev_id);
17260 irq_finalize_oneshot(desc, action);
17261 - local_bh_enable();
17263 + * Interrupts which have real time requirements can be set up
17264 + * to avoid softirq processing in the thread handler. This is
17265 + * safe as these interrupts do not raise soft interrupts.
17267 + if (irq_settings_no_softirq_call(desc))
17268 + _local_bh_enable();
17270 + local_bh_enable();
17274 @@ -962,6 +1045,12 @@
17275 if (action_ret == IRQ_WAKE_THREAD)
17276 irq_wake_secondary(desc, action);
17278 +#ifdef CONFIG_PREEMPT_RT_FULL
17279 + migrate_disable();
17280 + add_interrupt_randomness(action->irq, 0,
17281 + desc->random_ip ^ (unsigned long) action);
17282 + migrate_enable();
17284 wake_threads_waitq(desc);
17287 @@ -1315,6 +1404,9 @@
17288 irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
17291 + if (new->flags & IRQF_NO_SOFTIRQ_CALL)
17292 + irq_settings_set_no_softirq_call(desc);
17294 /* Set default affinity mask once everything is setup */
17295 setup_affinity(desc, mask);
17297 @@ -1968,7 +2060,7 @@
17298 * This call sets the internal irqchip state of an interrupt,
17299 * depending on the value of @which.
17301 - * This function should be called with preemption disabled if the
17302 + * This function should be called with migration disabled if the
17303 * interrupt controller has per-cpu registers.
17305 int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
17306 diff -Nur linux-4.4.46.orig/kernel/irq/settings.h linux-4.4.46/kernel/irq/settings.h
17307 --- linux-4.4.46.orig/kernel/irq/settings.h 2017-02-01 08:31:11.000000000 +0100
17308 +++ linux-4.4.46/kernel/irq/settings.h 2017-02-03 17:18:10.923618903 +0100
17310 _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID,
17311 _IRQ_IS_POLLED = IRQ_IS_POLLED,
17312 _IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY,
17313 + _IRQ_NO_SOFTIRQ_CALL = IRQ_NO_SOFTIRQ_CALL,
17314 _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK,
17318 #define IRQ_PER_CPU_DEVID GOT_YOU_MORON
17319 #define IRQ_IS_POLLED GOT_YOU_MORON
17320 #define IRQ_DISABLE_UNLAZY GOT_YOU_MORON
17321 +#define IRQ_NO_SOFTIRQ_CALL GOT_YOU_MORON
17322 #undef IRQF_MODIFY_MASK
17323 #define IRQF_MODIFY_MASK GOT_YOU_MORON
17326 desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK);
17329 +static inline bool irq_settings_no_softirq_call(struct irq_desc *desc)
17331 + return desc->status_use_accessors & _IRQ_NO_SOFTIRQ_CALL;
17334 +static inline void irq_settings_set_no_softirq_call(struct irq_desc *desc)
17336 + desc->status_use_accessors |= _IRQ_NO_SOFTIRQ_CALL;
17339 static inline bool irq_settings_is_per_cpu(struct irq_desc *desc)
17341 return desc->status_use_accessors & _IRQ_PER_CPU;
17342 diff -Nur linux-4.4.46.orig/kernel/irq/spurious.c linux-4.4.46/kernel/irq/spurious.c
17343 --- linux-4.4.46.orig/kernel/irq/spurious.c 2017-02-01 08:31:11.000000000 +0100
17344 +++ linux-4.4.46/kernel/irq/spurious.c 2017-02-03 17:18:10.923618903 +0100
17345 @@ -444,6 +444,10 @@
17347 static int __init irqfixup_setup(char *str)
17349 +#ifdef CONFIG_PREEMPT_RT_BASE
17350 + pr_warn("irqfixup boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
17354 printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
17355 printk(KERN_WARNING "This may impact system performance.\n");
17356 @@ -456,6 +460,10 @@
17358 static int __init irqpoll_setup(char *str)
17360 +#ifdef CONFIG_PREEMPT_RT_BASE
17361 + pr_warn("irqpoll boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
17365 printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
17367 diff -Nur linux-4.4.46.orig/kernel/irq_work.c linux-4.4.46/kernel/irq_work.c
17368 --- linux-4.4.46.orig/kernel/irq_work.c 2017-02-01 08:31:11.000000000 +0100
17369 +++ linux-4.4.46/kernel/irq_work.c 2017-02-03 17:18:10.923618903 +0100
17371 #include <linux/cpu.h>
17372 #include <linux/notifier.h>
17373 #include <linux/smp.h>
17374 +#include <linux/interrupt.h>
17375 #include <asm/processor.h>
17380 bool irq_work_queue_on(struct irq_work *work, int cpu)
17382 + struct llist_head *list;
17384 /* All work should have been flushed before going offline */
17385 WARN_ON_ONCE(cpu_is_offline(cpu));
17388 if (!irq_work_claim(work))
17391 - if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
17392 + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && !(work->flags & IRQ_WORK_HARD_IRQ))
17393 + list = &per_cpu(lazy_list, cpu);
17395 + list = &per_cpu(raised_list, cpu);
17397 + if (llist_add(&work->llnode, list))
17398 arch_send_call_function_single_ipi(cpu);
17402 /* Enqueue the irq work @work on the current CPU */
17403 bool irq_work_queue(struct irq_work *work)
17405 + struct llist_head *list;
17406 + bool lazy_work, realtime = IS_ENABLED(CONFIG_PREEMPT_RT_FULL);
17408 /* Only queue if not already pending */
17409 if (!irq_work_claim(work))
17411 @@ -93,13 +104,15 @@
17412 /* Queue the entry and raise the IPI if needed. */
17415 - /* If the work is "lazy", handle it from next tick if any */
17416 - if (work->flags & IRQ_WORK_LAZY) {
17417 - if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) &&
17418 - tick_nohz_tick_stopped())
17419 - arch_irq_work_raise();
17421 - if (llist_add(&work->llnode, this_cpu_ptr(&raised_list)))
17422 + lazy_work = work->flags & IRQ_WORK_LAZY;
17424 + if (lazy_work || (realtime && !(work->flags & IRQ_WORK_HARD_IRQ)))
17425 + list = this_cpu_ptr(&lazy_list);
17427 + list = this_cpu_ptr(&raised_list);
17429 + if (llist_add(&work->llnode, list)) {
17430 + if (!lazy_work || tick_nohz_tick_stopped())
17431 arch_irq_work_raise();
17434 @@ -116,9 +129,8 @@
17435 raised = this_cpu_ptr(&raised_list);
17436 lazy = this_cpu_ptr(&lazy_list);
17438 - if (llist_empty(raised) || arch_irq_work_has_interrupt())
17439 - if (llist_empty(lazy))
17441 + if (llist_empty(raised) && llist_empty(lazy))
17444 /* All work should have been flushed before going offline */
17445 WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
17446 @@ -132,7 +144,7 @@
17447 struct irq_work *work;
17448 struct llist_node *llnode;
17450 - BUG_ON(!irqs_disabled());
17451 + BUG_ON_NONRT(!irqs_disabled());
17453 if (llist_empty(list))
17455 @@ -169,7 +181,16 @@
17456 void irq_work_run(void)
17458 irq_work_run_list(this_cpu_ptr(&raised_list));
17459 - irq_work_run_list(this_cpu_ptr(&lazy_list));
17460 + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) {
17462 + * NOTE: we raise softirq via IPI for safety,
17463 + * and execute in irq_work_tick() to move the
17464 + * overhead from hard to soft irq context.
17466 + if (!llist_empty(this_cpu_ptr(&lazy_list)))
17467 + raise_softirq(TIMER_SOFTIRQ);
17469 + irq_work_run_list(this_cpu_ptr(&lazy_list));
17471 EXPORT_SYMBOL_GPL(irq_work_run);
17473 @@ -179,8 +200,17 @@
17475 if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
17476 irq_work_run_list(raised);
17478 + if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL))
17479 + irq_work_run_list(this_cpu_ptr(&lazy_list));
17482 +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
17483 +void irq_work_tick_soft(void)
17485 irq_work_run_list(this_cpu_ptr(&lazy_list));
17490 * Synchronize against the irq_work @entry, ensures the entry is not
17491 diff -Nur linux-4.4.46.orig/kernel/Kconfig.locks linux-4.4.46/kernel/Kconfig.locks
17492 --- linux-4.4.46.orig/kernel/Kconfig.locks 2017-02-01 08:31:11.000000000 +0100
17493 +++ linux-4.4.46/kernel/Kconfig.locks 2017-02-03 17:18:10.919618749 +0100
17494 @@ -225,11 +225,11 @@
17496 config MUTEX_SPIN_ON_OWNER
17498 - depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW
17499 + depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL
17501 config RWSEM_SPIN_ON_OWNER
17503 - depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
17504 + depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL
17506 config LOCK_SPIN_ON_OWNER
17508 diff -Nur linux-4.4.46.orig/kernel/Kconfig.preempt linux-4.4.46/kernel/Kconfig.preempt
17509 --- linux-4.4.46.orig/kernel/Kconfig.preempt 2017-02-01 08:31:11.000000000 +0100
17510 +++ linux-4.4.46/kernel/Kconfig.preempt 2017-02-03 17:18:10.919618749 +0100
17514 + select PREEMPT_COUNT
17516 +config PREEMPT_RT_BASE
17520 +config HAVE_PREEMPT_LAZY
17523 +config PREEMPT_LAZY
17524 + def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT_FULL
17527 prompt "Preemption Model"
17530 Select this if you are building a kernel for a desktop system.
17533 +config PREEMPT__LL
17534 bool "Preemptible Kernel (Low-Latency Desktop)"
17535 - select PREEMPT_COUNT
17537 select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
17539 This option reduces the latency of the kernel by making
17541 embedded system with latency requirements in the milliseconds
17544 +config PREEMPT_RTB
17545 + bool "Preemptible Kernel (Basic RT)"
17546 + select PREEMPT_RT_BASE
17548 + This option is basically the same as (Low-Latency Desktop) but
17549 + enables changes which are preliminary for the full preemptible
17552 +config PREEMPT_RT_FULL
17553 + bool "Fully Preemptible Kernel (RT)"
17554 + depends on IRQ_FORCED_THREADING
17555 + select PREEMPT_RT_BASE
17556 + select PREEMPT_RCU
17558 + All and everything
17562 config PREEMPT_COUNT
17563 diff -Nur linux-4.4.46.orig/kernel/ksysfs.c linux-4.4.46/kernel/ksysfs.c
17564 --- linux-4.4.46.orig/kernel/ksysfs.c 2017-02-01 08:31:11.000000000 +0100
17565 +++ linux-4.4.46/kernel/ksysfs.c 2017-02-03 17:18:10.923618903 +0100
17566 @@ -136,6 +136,15 @@
17568 #endif /* CONFIG_KEXEC_CORE */
17570 +#if defined(CONFIG_PREEMPT_RT_FULL)
17571 +static ssize_t realtime_show(struct kobject *kobj,
17572 + struct kobj_attribute *attr, char *buf)
17574 + return sprintf(buf, "%d\n", 1);
17576 +KERNEL_ATTR_RO(realtime);
17579 /* whether file capabilities are enabled */
17580 static ssize_t fscaps_show(struct kobject *kobj,
17581 struct kobj_attribute *attr, char *buf)
17582 @@ -203,6 +212,9 @@
17583 &vmcoreinfo_attr.attr,
17585 &rcu_expedited_attr.attr,
17586 +#ifdef CONFIG_PREEMPT_RT_FULL
17587 + &realtime_attr.attr,
17592 diff -Nur linux-4.4.46.orig/kernel/locking/lglock.c linux-4.4.46/kernel/locking/lglock.c
17593 --- linux-4.4.46.orig/kernel/locking/lglock.c 2017-02-01 08:31:11.000000000 +0100
17594 +++ linux-4.4.46/kernel/locking/lglock.c 2017-02-03 17:18:10.923618903 +0100
17596 #include <linux/cpu.h>
17597 #include <linux/string.h>
17599 +#ifndef CONFIG_PREEMPT_RT_FULL
17600 +# define lg_lock_ptr arch_spinlock_t
17601 +# define lg_do_lock(l) arch_spin_lock(l)
17602 +# define lg_do_unlock(l) arch_spin_unlock(l)
17604 +# define lg_lock_ptr struct rt_mutex
17605 +# define lg_do_lock(l) __rt_spin_lock__no_mg(l)
17606 +# define lg_do_unlock(l) __rt_spin_unlock(l)
17609 * Note there is no uninit, so lglocks cannot be defined in
17610 * modules (but it's fine to use them from there)
17611 @@ -12,51 +21,60 @@
17613 void lg_lock_init(struct lglock *lg, char *name)
17615 +#ifdef CONFIG_PREEMPT_RT_FULL
17618 + for_each_possible_cpu(i) {
17619 + struct rt_mutex *lock = per_cpu_ptr(lg->lock, i);
17621 + rt_mutex_init(lock);
17624 LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0);
17626 EXPORT_SYMBOL(lg_lock_init);
17628 void lg_local_lock(struct lglock *lg)
17630 - arch_spinlock_t *lock;
17631 + lg_lock_ptr *lock;
17633 - preempt_disable();
17634 + migrate_disable();
17635 lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
17636 lock = this_cpu_ptr(lg->lock);
17637 - arch_spin_lock(lock);
17638 + lg_do_lock(lock);
17640 EXPORT_SYMBOL(lg_local_lock);
17642 void lg_local_unlock(struct lglock *lg)
17644 - arch_spinlock_t *lock;
17645 + lg_lock_ptr *lock;
17647 lock_release(&lg->lock_dep_map, 1, _RET_IP_);
17648 lock = this_cpu_ptr(lg->lock);
17649 - arch_spin_unlock(lock);
17650 - preempt_enable();
17651 + lg_do_unlock(lock);
17652 + migrate_enable();
17654 EXPORT_SYMBOL(lg_local_unlock);
17656 void lg_local_lock_cpu(struct lglock *lg, int cpu)
17658 - arch_spinlock_t *lock;
17659 + lg_lock_ptr *lock;
17661 - preempt_disable();
17662 + preempt_disable_nort();
17663 lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
17664 lock = per_cpu_ptr(lg->lock, cpu);
17665 - arch_spin_lock(lock);
17666 + lg_do_lock(lock);
17668 EXPORT_SYMBOL(lg_local_lock_cpu);
17670 void lg_local_unlock_cpu(struct lglock *lg, int cpu)
17672 - arch_spinlock_t *lock;
17673 + lg_lock_ptr *lock;
17675 lock_release(&lg->lock_dep_map, 1, _RET_IP_);
17676 lock = per_cpu_ptr(lg->lock, cpu);
17677 - arch_spin_unlock(lock);
17678 - preempt_enable();
17679 + lg_do_unlock(lock);
17680 + preempt_enable_nort();
17682 EXPORT_SYMBOL(lg_local_unlock_cpu);
17684 @@ -68,30 +86,30 @@
17688 - preempt_disable();
17689 + preempt_disable_nort();
17690 lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
17691 - arch_spin_lock(per_cpu_ptr(lg->lock, cpu1));
17692 - arch_spin_lock(per_cpu_ptr(lg->lock, cpu2));
17693 + lg_do_lock(per_cpu_ptr(lg->lock, cpu1));
17694 + lg_do_lock(per_cpu_ptr(lg->lock, cpu2));
17697 void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2)
17699 lock_release(&lg->lock_dep_map, 1, _RET_IP_);
17700 - arch_spin_unlock(per_cpu_ptr(lg->lock, cpu1));
17701 - arch_spin_unlock(per_cpu_ptr(lg->lock, cpu2));
17702 - preempt_enable();
17703 + lg_do_unlock(per_cpu_ptr(lg->lock, cpu1));
17704 + lg_do_unlock(per_cpu_ptr(lg->lock, cpu2));
17705 + preempt_enable_nort();
17708 void lg_global_lock(struct lglock *lg)
17712 - preempt_disable();
17713 + preempt_disable_nort();
17714 lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
17715 for_each_possible_cpu(i) {
17716 - arch_spinlock_t *lock;
17717 + lg_lock_ptr *lock;
17718 lock = per_cpu_ptr(lg->lock, i);
17719 - arch_spin_lock(lock);
17720 + lg_do_lock(lock);
17723 EXPORT_SYMBOL(lg_global_lock);
17724 @@ -102,10 +120,35 @@
17726 lock_release(&lg->lock_dep_map, 1, _RET_IP_);
17727 for_each_possible_cpu(i) {
17728 - arch_spinlock_t *lock;
17729 + lg_lock_ptr *lock;
17730 lock = per_cpu_ptr(lg->lock, i);
17731 - arch_spin_unlock(lock);
17732 + lg_do_unlock(lock);
17734 - preempt_enable();
17735 + preempt_enable_nort();
17737 EXPORT_SYMBOL(lg_global_unlock);
17739 +#ifdef CONFIG_PREEMPT_RT_FULL
17741 + * HACK: If you use this, you get to keep the pieces.
17742 + * Used in queue_stop_cpus_work() when stop machinery
17743 + * is called from inactive CPU, so we can't schedule.
17745 +# define lg_do_trylock_relax(l) \
17747 + while (!__rt_spin_trylock(l)) \
17751 +void lg_global_trylock_relax(struct lglock *lg)
17755 + lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
17756 + for_each_possible_cpu(i) {
17757 + lg_lock_ptr *lock;
17758 + lock = per_cpu_ptr(lg->lock, i);
17759 + lg_do_trylock_relax(lock);
17763 diff -Nur linux-4.4.46.orig/kernel/locking/lockdep.c linux-4.4.46/kernel/locking/lockdep.c
17764 --- linux-4.4.46.orig/kernel/locking/lockdep.c 2017-02-01 08:31:11.000000000 +0100
17765 +++ linux-4.4.46/kernel/locking/lockdep.c 2017-02-03 17:18:10.923618903 +0100
17766 @@ -3525,6 +3525,7 @@
17770 +#ifndef CONFIG_PREEMPT_RT_FULL
17772 * We dont accurately track softirq state in e.g.
17773 * hardirq contexts (such as on 4KSTACKS), so only
17774 @@ -3539,6 +3540,7 @@
17775 DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
17781 print_irqtrace_events(current);
17782 diff -Nur linux-4.4.46.orig/kernel/locking/locktorture.c linux-4.4.46/kernel/locking/locktorture.c
17783 --- linux-4.4.46.orig/kernel/locking/locktorture.c 2017-02-01 08:31:11.000000000 +0100
17784 +++ linux-4.4.46/kernel/locking/locktorture.c 2017-02-03 17:18:10.923618903 +0100
17786 #include <linux/kthread.h>
17787 #include <linux/sched/rt.h>
17788 #include <linux/spinlock.h>
17789 -#include <linux/rwlock.h>
17790 #include <linux/mutex.h>
17791 #include <linux/rwsem.h>
17792 #include <linux/smp.h>
17793 diff -Nur linux-4.4.46.orig/kernel/locking/Makefile linux-4.4.46/kernel/locking/Makefile
17794 --- linux-4.4.46.orig/kernel/locking/Makefile 2017-02-01 08:31:11.000000000 +0100
17795 +++ linux-4.4.46/kernel/locking/Makefile 2017-02-03 17:18:10.923618903 +0100
17798 -obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
17799 +obj-y += semaphore.o percpu-rwsem.o
17801 ifdef CONFIG_FUNCTION_TRACER
17802 CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
17804 CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE)
17807 +ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
17809 obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
17812 obj-$(CONFIG_LOCKDEP) += lockdep.o
17813 ifeq ($(CONFIG_PROC_FS),y)
17814 obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
17816 obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
17817 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
17818 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
17819 +ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
17820 obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
17821 obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
17823 +obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o
17824 obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
17825 obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
17826 diff -Nur linux-4.4.46.orig/kernel/locking/rt.c linux-4.4.46/kernel/locking/rt.c
17827 --- linux-4.4.46.orig/kernel/locking/rt.c 1970-01-01 01:00:00.000000000 +0100
17828 +++ linux-4.4.46/kernel/locking/rt.c 2017-02-03 17:18:10.923618903 +0100
17833 + * Real-Time Preemption Support
17835 + * started by Ingo Molnar:
17837 + * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
17838 + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
17840 + * historic credit for proving that Linux spinlocks can be implemented via
17841 + * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow
17842 + * and others) who prototyped it on 2.4 and did lots of comparative
17843 + * research and analysis; TimeSys, for proving that you can implement a
17844 + * fully preemptible kernel via the use of IRQ threading and mutexes;
17845 + * Bill Huey for persuasively arguing on lkml that the mutex model is the
17846 + * right one; and to MontaVista, who ported pmutexes to 2.6.
17848 + * This code is a from-scratch implementation and is not based on pmutexes,
17849 + * but the idea of converting spinlocks to mutexes is used here too.
17851 + * lock debugging, locking tree, deadlock detection:
17853 + * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey
17854 + * Released under the General Public License (GPL).
17856 + * Includes portions of the generic R/W semaphore implementation from:
17858 + * Copyright (c) 2001 David Howells (dhowells@redhat.com).
17859 + * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
17860 + * - Derived also from comments by Linus
17862 + * Pending ownership of locks and ownership stealing:
17864 + * Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt
17866 + * (also by Steven Rostedt)
17867 + * - Converted single pi_lock to individual task locks.
17869 + * By Esben Nielsen:
17870 + * Doing priority inheritance with help of the scheduler.
17872 + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
17873 + * - major rework based on Esben Nielsens initial patch
17874 + * - replaced thread_info references by task_struct refs
17875 + * - removed task->pending_owner dependency
17876 + * - BKL drop/reacquire for semaphore style locks to avoid deadlocks
17877 + * in the scheduler return path as discussed with Steven Rostedt
17879 + * Copyright (C) 2006, Kihon Technologies Inc.
17880 + * Steven Rostedt <rostedt@goodmis.org>
17881 + * - debugged and patched Thomas Gleixner's rework.
17882 + * - added back the cmpxchg to the rework.
17883 + * - turned atomic require back on for SMP.
17886 +#include <linux/spinlock.h>
17887 +#include <linux/rtmutex.h>
17888 +#include <linux/sched.h>
17889 +#include <linux/delay.h>
17890 +#include <linux/module.h>
17891 +#include <linux/kallsyms.h>
17892 +#include <linux/syscalls.h>
17893 +#include <linux/interrupt.h>
17894 +#include <linux/plist.h>
17895 +#include <linux/fs.h>
17896 +#include <linux/futex.h>
17897 +#include <linux/hrtimer.h>
17899 +#include "rtmutex_common.h"
17902 + * struct mutex functions
17904 +void __mutex_do_init(struct mutex *mutex, const char *name,
17905 + struct lock_class_key *key)
17907 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
17909 + * Make sure we are not reinitializing a held lock:
17911 + debug_check_no_locks_freed((void *)mutex, sizeof(*mutex));
17912 + lockdep_init_map(&mutex->dep_map, name, key, 0);
17914 + mutex->lock.save_state = 0;
17916 +EXPORT_SYMBOL(__mutex_do_init);
17918 +void __lockfunc _mutex_lock(struct mutex *lock)
17920 + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
17921 + rt_mutex_lock(&lock->lock);
17923 +EXPORT_SYMBOL(_mutex_lock);
17925 +int __lockfunc _mutex_lock_interruptible(struct mutex *lock)
17929 + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
17930 + ret = rt_mutex_lock_interruptible(&lock->lock);
17932 + mutex_release(&lock->dep_map, 1, _RET_IP_);
17935 +EXPORT_SYMBOL(_mutex_lock_interruptible);
17937 +int __lockfunc _mutex_lock_killable(struct mutex *lock)
17941 + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
17942 + ret = rt_mutex_lock_killable(&lock->lock);
17944 + mutex_release(&lock->dep_map, 1, _RET_IP_);
17947 +EXPORT_SYMBOL(_mutex_lock_killable);
17949 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
17950 +void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass)
17952 + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
17953 + rt_mutex_lock(&lock->lock);
17955 +EXPORT_SYMBOL(_mutex_lock_nested);
17957 +void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
17959 + mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_);
17960 + rt_mutex_lock(&lock->lock);
17962 +EXPORT_SYMBOL(_mutex_lock_nest_lock);
17964 +int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass)
17968 + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
17969 + ret = rt_mutex_lock_interruptible(&lock->lock);
17971 + mutex_release(&lock->dep_map, 1, _RET_IP_);
17974 +EXPORT_SYMBOL(_mutex_lock_interruptible_nested);
17976 +int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass)
17980 + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
17981 + ret = rt_mutex_lock_killable(&lock->lock);
17983 + mutex_release(&lock->dep_map, 1, _RET_IP_);
17986 +EXPORT_SYMBOL(_mutex_lock_killable_nested);
17989 +int __lockfunc _mutex_trylock(struct mutex *lock)
17991 + int ret = rt_mutex_trylock(&lock->lock);
17994 + mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
17998 +EXPORT_SYMBOL(_mutex_trylock);
18000 +void __lockfunc _mutex_unlock(struct mutex *lock)
18002 + mutex_release(&lock->dep_map, 1, _RET_IP_);
18003 + rt_mutex_unlock(&lock->lock);
18005 +EXPORT_SYMBOL(_mutex_unlock);
18008 + * rwlock_t functions
18010 +int __lockfunc rt_write_trylock(rwlock_t *rwlock)
18014 + migrate_disable();
18015 + ret = rt_mutex_trylock(&rwlock->lock);
18017 + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
18019 + migrate_enable();
18023 +EXPORT_SYMBOL(rt_write_trylock);
18025 +int __lockfunc rt_write_trylock_irqsave(rwlock_t *rwlock, unsigned long *flags)
18030 + ret = rt_write_trylock(rwlock);
18033 +EXPORT_SYMBOL(rt_write_trylock_irqsave);
18035 +int __lockfunc rt_read_trylock(rwlock_t *rwlock)
18037 + struct rt_mutex *lock = &rwlock->lock;
18041 + * recursive read locks succeed when current owns the lock,
18042 + * but not when read_depth == 0 which means that the lock is
18045 + if (rt_mutex_owner(lock) != current) {
18046 + migrate_disable();
18047 + ret = rt_mutex_trylock(lock);
18049 + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
18051 + migrate_enable();
18053 + } else if (!rwlock->read_depth) {
18058 + rwlock->read_depth++;
18062 +EXPORT_SYMBOL(rt_read_trylock);
18064 +void __lockfunc rt_write_lock(rwlock_t *rwlock)
18066 + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
18067 + __rt_spin_lock(&rwlock->lock);
18069 +EXPORT_SYMBOL(rt_write_lock);
18071 +void __lockfunc rt_read_lock(rwlock_t *rwlock)
18073 + struct rt_mutex *lock = &rwlock->lock;
18077 + * recursive read locks succeed when current owns the lock
18079 + if (rt_mutex_owner(lock) != current) {
18080 + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
18081 + __rt_spin_lock(lock);
18083 + rwlock->read_depth++;
18086 +EXPORT_SYMBOL(rt_read_lock);
18088 +void __lockfunc rt_write_unlock(rwlock_t *rwlock)
18090 + /* NOTE: we always pass in '1' for nested, for simplicity */
18091 + rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
18092 + __rt_spin_unlock(&rwlock->lock);
18093 + migrate_enable();
18095 +EXPORT_SYMBOL(rt_write_unlock);
18097 +void __lockfunc rt_read_unlock(rwlock_t *rwlock)
18099 + /* Release the lock only when read_depth is down to 0 */
18100 + if (--rwlock->read_depth == 0) {
18101 + rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
18102 + __rt_spin_unlock(&rwlock->lock);
18103 + migrate_enable();
18106 +EXPORT_SYMBOL(rt_read_unlock);
18108 +unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock)
18110 + rt_write_lock(rwlock);
18114 +EXPORT_SYMBOL(rt_write_lock_irqsave);
18116 +unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock)
18118 + rt_read_lock(rwlock);
18122 +EXPORT_SYMBOL(rt_read_lock_irqsave);
18124 +void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key)
18126 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
18128 + * Make sure we are not reinitializing a held lock:
18130 + debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock));
18131 + lockdep_init_map(&rwlock->dep_map, name, key, 0);
18133 + rwlock->lock.save_state = 1;
18134 + rwlock->read_depth = 0;
18136 +EXPORT_SYMBOL(__rt_rwlock_init);
18142 +void rt_up_write(struct rw_semaphore *rwsem)
18144 + rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
18145 + rt_mutex_unlock(&rwsem->lock);
18147 +EXPORT_SYMBOL(rt_up_write);
18149 +void __rt_up_read(struct rw_semaphore *rwsem)
18151 + if (--rwsem->read_depth == 0)
18152 + rt_mutex_unlock(&rwsem->lock);
18155 +void rt_up_read(struct rw_semaphore *rwsem)
18157 + rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
18158 + __rt_up_read(rwsem);
18160 +EXPORT_SYMBOL(rt_up_read);
18163 + * downgrade a write lock into a read lock
18164 + * - just wake up any readers at the front of the queue
18166 +void rt_downgrade_write(struct rw_semaphore *rwsem)
18168 + BUG_ON(rt_mutex_owner(&rwsem->lock) != current);
18169 + rwsem->read_depth = 1;
18171 +EXPORT_SYMBOL(rt_downgrade_write);
18173 +int rt_down_write_trylock(struct rw_semaphore *rwsem)
18175 + int ret = rt_mutex_trylock(&rwsem->lock);
18178 + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
18181 +EXPORT_SYMBOL(rt_down_write_trylock);
18183 +void rt_down_write(struct rw_semaphore *rwsem)
18185 + rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_);
18186 + rt_mutex_lock(&rwsem->lock);
18188 +EXPORT_SYMBOL(rt_down_write);
18190 +void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass)
18192 + rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_);
18193 + rt_mutex_lock(&rwsem->lock);
18195 +EXPORT_SYMBOL(rt_down_write_nested);
18197 +void rt_down_write_nested_lock(struct rw_semaphore *rwsem,
18198 + struct lockdep_map *nest)
18200 + rwsem_acquire_nest(&rwsem->dep_map, 0, 0, nest, _RET_IP_);
18201 + rt_mutex_lock(&rwsem->lock);
18203 +EXPORT_SYMBOL(rt_down_write_nested_lock);
18205 +int rt__down_read_trylock(struct rw_semaphore *rwsem)
18207 + struct rt_mutex *lock = &rwsem->lock;
18211 + * recursive read locks succeed when current owns the rwsem,
18212 + * but not when read_depth == 0 which means that the rwsem is
18215 + if (rt_mutex_owner(lock) != current)
18216 + ret = rt_mutex_trylock(&rwsem->lock);
18217 + else if (!rwsem->read_depth)
18221 + rwsem->read_depth++;
18226 +int rt_down_read_trylock(struct rw_semaphore *rwsem)
18230 + ret = rt__down_read_trylock(rwsem);
18232 + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
18236 +EXPORT_SYMBOL(rt_down_read_trylock);
18238 +void rt__down_read(struct rw_semaphore *rwsem)
18240 + struct rt_mutex *lock = &rwsem->lock;
18242 + if (rt_mutex_owner(lock) != current)
18243 + rt_mutex_lock(&rwsem->lock);
18244 + rwsem->read_depth++;
18246 +EXPORT_SYMBOL(rt__down_read);
18248 +static void __rt_down_read(struct rw_semaphore *rwsem, int subclass)
18250 + rwsem_acquire_read(&rwsem->dep_map, subclass, 0, _RET_IP_);
18251 + rt__down_read(rwsem);
18254 +void rt_down_read(struct rw_semaphore *rwsem)
18256 + __rt_down_read(rwsem, 0);
18258 +EXPORT_SYMBOL(rt_down_read);
18260 +void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass)
18262 + __rt_down_read(rwsem, subclass);
18264 +EXPORT_SYMBOL(rt_down_read_nested);
18266 +void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name,
18267 + struct lock_class_key *key)
18269 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
18271 + * Make sure we are not reinitializing a held lock:
18273 + debug_check_no_locks_freed((void *)rwsem, sizeof(*rwsem));
18274 + lockdep_init_map(&rwsem->dep_map, name, key, 0);
18276 + rwsem->read_depth = 0;
18277 + rwsem->lock.save_state = 0;
18279 +EXPORT_SYMBOL(__rt_rwsem_init);
18282 + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
18283 + * @cnt: the atomic which we are to dec
18284 + * @lock: the mutex to return holding if we dec to 0
18286 + * return true and hold lock if we dec to 0, return false otherwise
18288 +int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
18290 + /* dec if we can't possibly hit 0 */
18291 + if (atomic_add_unless(cnt, -1, 1))
18293 + /* we might hit 0, so take the lock */
18294 + mutex_lock(lock);
18295 + if (!atomic_dec_and_test(cnt)) {
18296 + /* when we actually did the dec, we didn't hit 0 */
18297 + mutex_unlock(lock);
18300 + /* we hit 0, and we hold the lock */
18303 +EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
18304 diff -Nur linux-4.4.46.orig/kernel/locking/rtmutex.c linux-4.4.46/kernel/locking/rtmutex.c
18305 --- linux-4.4.46.orig/kernel/locking/rtmutex.c 2017-02-01 08:31:11.000000000 +0100
18306 +++ linux-4.4.46/kernel/locking/rtmutex.c 2017-02-03 17:18:10.927619058 +0100
18308 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
18309 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
18310 * Copyright (C) 2006 Esben Nielsen
18311 + * Adaptive Spinlocks:
18312 + * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
18313 + * and Peter Morreale,
18314 + * Adaptive Spinlocks simplification:
18315 + * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
18317 * See Documentation/locking/rt-mutex-design.txt for details.
18320 #include <linux/sched/rt.h>
18321 #include <linux/sched/deadline.h>
18322 #include <linux/timer.h>
18323 +#include <linux/ww_mutex.h>
18325 #include "rtmutex_common.h"
18327 @@ -133,6 +139,12 @@
18328 WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
18331 +static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter)
18333 + return waiter && waiter != PI_WAKEUP_INPROGRESS &&
18334 + waiter != PI_REQUEUE_INPROGRESS;
18338 * We can speed up the acquire/release, if there's no debugging state to be
18340 @@ -163,13 +175,14 @@
18341 * 2) Drop lock->wait_lock
18342 * 3) Try to unlock the lock with cmpxchg
18344 -static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
18345 +static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
18346 + unsigned long flags)
18347 __releases(lock->wait_lock)
18349 struct task_struct *owner = rt_mutex_owner(lock);
18351 clear_rt_mutex_waiters(lock);
18352 - raw_spin_unlock(&lock->wait_lock);
18353 + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
18355 * If a new waiter comes in between the unlock and the cmpxchg
18356 * we have two situations:
18357 @@ -211,11 +224,12 @@
18359 * Simple slow path only version: lock->owner is protected by lock->wait_lock.
18361 -static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
18362 +static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
18363 + unsigned long flags)
18364 __releases(lock->wait_lock)
18366 lock->owner = NULL;
18367 - raw_spin_unlock(&lock->wait_lock);
18368 + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
18372 @@ -412,6 +426,14 @@
18373 return debug_rt_mutex_detect_deadlock(waiter, chwalk);
18376 +static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter)
18378 + if (waiter->savestate)
18379 + wake_up_lock_sleeper(waiter->task);
18381 + wake_up_process(waiter->task);
18385 * Max number of times we'll walk the boosting chain:
18387 @@ -419,7 +441,8 @@
18389 static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
18391 - return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
18392 + return rt_mutex_real_waiter(p->pi_blocked_on) ?
18393 + p->pi_blocked_on->lock : NULL;
18397 @@ -497,7 +520,6 @@
18398 int ret = 0, depth = 0;
18399 struct rt_mutex *lock;
18400 bool detect_deadlock;
18401 - unsigned long flags;
18402 bool requeue = true;
18404 detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
18405 @@ -540,7 +562,7 @@
18407 * [1] Task cannot go away as we did a get_task() before !
18409 - raw_spin_lock_irqsave(&task->pi_lock, flags);
18410 + raw_spin_lock_irq(&task->pi_lock);
18413 * [2] Get the waiter on which @task is blocked on.
18414 @@ -556,7 +578,7 @@
18415 * reached or the state of the chain has changed while we
18416 * dropped the locks.
18419 + if (!rt_mutex_real_waiter(waiter))
18420 goto out_unlock_pi;
18423 @@ -624,7 +646,7 @@
18426 if (!raw_spin_trylock(&lock->wait_lock)) {
18427 - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
18428 + raw_spin_unlock_irq(&task->pi_lock);
18432 @@ -655,7 +677,7 @@
18434 * No requeue[7] here. Just release @task [8]
18436 - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
18437 + raw_spin_unlock(&task->pi_lock);
18438 put_task_struct(task);
18441 @@ -663,14 +685,14 @@
18442 * If there is no owner of the lock, end of chain.
18444 if (!rt_mutex_owner(lock)) {
18445 - raw_spin_unlock(&lock->wait_lock);
18446 + raw_spin_unlock_irq(&lock->wait_lock);
18450 /* [10] Grab the next task, i.e. owner of @lock */
18451 task = rt_mutex_owner(lock);
18452 get_task_struct(task);
18453 - raw_spin_lock_irqsave(&task->pi_lock, flags);
18454 + raw_spin_lock(&task->pi_lock);
18457 * No requeue [11] here. We just do deadlock detection.
18458 @@ -685,8 +707,8 @@
18459 top_waiter = rt_mutex_top_waiter(lock);
18461 /* [13] Drop locks */
18462 - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
18463 - raw_spin_unlock(&lock->wait_lock);
18464 + raw_spin_unlock(&task->pi_lock);
18465 + raw_spin_unlock_irq(&lock->wait_lock);
18467 /* If owner is not blocked, end of chain. */
18469 @@ -707,7 +729,7 @@
18470 rt_mutex_enqueue(lock, waiter);
18472 /* [8] Release the task */
18473 - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
18474 + raw_spin_unlock(&task->pi_lock);
18475 put_task_struct(task);
18478 @@ -718,21 +740,24 @@
18479 * follow here. This is the end of the chain we are walking.
18481 if (!rt_mutex_owner(lock)) {
18482 + struct rt_mutex_waiter *lock_top_waiter;
18485 * If the requeue [7] above changed the top waiter,
18486 * then we need to wake the new top waiter up to try
18489 - if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
18490 - wake_up_process(rt_mutex_top_waiter(lock)->task);
18491 - raw_spin_unlock(&lock->wait_lock);
18492 + lock_top_waiter = rt_mutex_top_waiter(lock);
18493 + if (prerequeue_top_waiter != lock_top_waiter)
18494 + rt_mutex_wake_waiter(lock_top_waiter);
18495 + raw_spin_unlock_irq(&lock->wait_lock);
18499 /* [10] Grab the next task, i.e. the owner of @lock */
18500 task = rt_mutex_owner(lock);
18501 get_task_struct(task);
18502 - raw_spin_lock_irqsave(&task->pi_lock, flags);
18503 + raw_spin_lock(&task->pi_lock);
18505 /* [11] requeue the pi waiters if necessary */
18506 if (waiter == rt_mutex_top_waiter(lock)) {
18507 @@ -786,8 +811,8 @@
18508 top_waiter = rt_mutex_top_waiter(lock);
18510 /* [13] Drop the locks */
18511 - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
18512 - raw_spin_unlock(&lock->wait_lock);
18513 + raw_spin_unlock(&task->pi_lock);
18514 + raw_spin_unlock_irq(&lock->wait_lock);
18517 * Make the actual exit decisions [12], based on the stored
18518 @@ -810,28 +835,46 @@
18522 - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
18523 + raw_spin_unlock_irq(&task->pi_lock);
18525 put_task_struct(task);
18531 +#define STEAL_NORMAL 0
18532 +#define STEAL_LATERAL 1
18535 + * Note that RT tasks are excluded from lateral-steals to prevent the
18536 + * introduction of an unbounded latency
18538 +static inline int lock_is_stealable(struct task_struct *task,
18539 + struct task_struct *pendowner, int mode)
18541 + if (mode == STEAL_NORMAL || rt_task(task)) {
18542 + if (task->prio >= pendowner->prio)
18544 + } else if (task->prio > pendowner->prio)
18550 * Try to take an rt-mutex
18552 - * Must be called with lock->wait_lock held.
18553 + * Must be called with lock->wait_lock held and interrupts disabled
18555 * @lock: The lock to be acquired.
18556 * @task: The task which wants to acquire the lock
18557 * @waiter: The waiter that is queued to the lock's wait tree if the
18558 * callsite called task_blocked_on_lock(), otherwise NULL
18560 -static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
18561 - struct rt_mutex_waiter *waiter)
18562 +static int __try_to_take_rt_mutex(struct rt_mutex *lock,
18563 + struct task_struct *task,
18564 + struct rt_mutex_waiter *waiter, int mode)
18566 - unsigned long flags;
18569 * Before testing whether we can acquire @lock, we set the
18570 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
18571 @@ -867,8 +910,10 @@
18572 * If waiter is not the highest priority waiter of
18575 - if (waiter != rt_mutex_top_waiter(lock))
18576 + if (waiter != rt_mutex_top_waiter(lock)) {
18577 + /* XXX lock_is_stealable() ? */
18582 * We can acquire the lock. Remove the waiter from the
18583 @@ -886,14 +931,10 @@
18584 * not need to be dequeued.
18586 if (rt_mutex_has_waiters(lock)) {
18588 - * If @task->prio is greater than or equal to
18589 - * the top waiter priority (kernel view),
18592 - if (task->prio >= rt_mutex_top_waiter(lock)->prio)
18594 + struct task_struct *pown = rt_mutex_top_waiter(lock)->task;
18596 + if (task != pown && !lock_is_stealable(task, pown, mode))
18599 * The current top waiter stays enqueued. We
18600 * don't have to change anything in the lock
18601 @@ -916,7 +957,7 @@
18602 * case, but conditionals are more expensive than a redundant
18605 - raw_spin_lock_irqsave(&task->pi_lock, flags);
18606 + raw_spin_lock(&task->pi_lock);
18607 task->pi_blocked_on = NULL;
18609 * Finish the lock acquisition. @task is the new owner. If
18610 @@ -925,7 +966,7 @@
18612 if (rt_mutex_has_waiters(lock))
18613 rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
18614 - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
18615 + raw_spin_unlock(&task->pi_lock);
18618 /* We got the lock. */
18619 @@ -942,12 +983,444 @@
18623 +#ifdef CONFIG_PREEMPT_RT_FULL
18625 + * preemptible spin_lock functions:
18627 +static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
18628 + void (*slowfn)(struct rt_mutex *lock,
18632 + might_sleep_no_state_check();
18635 + migrate_disable();
18637 + if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
18638 + rt_mutex_deadlock_account_lock(lock, current);
18640 + slowfn(lock, do_mig_dis);
18643 +static inline int rt_spin_lock_fastunlock(struct rt_mutex *lock,
18644 + int (*slowfn)(struct rt_mutex *lock))
18646 + if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
18647 + rt_mutex_deadlock_account_unlock(current);
18650 + return slowfn(lock);
18654 + * Note that owner is a speculative pointer and dereferencing relies
18655 + * on rcu_read_lock() and the check against the lock owner.
18657 +static int adaptive_wait(struct rt_mutex *lock,
18658 + struct task_struct *owner)
18664 + if (owner != rt_mutex_owner(lock))
18667 + * Ensure that owner->on_cpu is dereferenced _after_
18668 + * checking the above to be valid.
18671 + if (!owner->on_cpu) {
18677 + rcu_read_unlock();
18681 +static int adaptive_wait(struct rt_mutex *lock,
18682 + struct task_struct *orig_owner)
18688 +static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
18689 + struct rt_mutex_waiter *waiter,
18690 + struct task_struct *task,
18691 + enum rtmutex_chainwalk chwalk);
18693 + * Slow path lock function spin_lock style: this variant is very
18694 + * careful not to miss any non-lock wakeups.
18696 + * We store the current state under p->pi_lock in p->saved_state and
18697 + * the try_to_wake_up() code handles this accordingly.
18699 +static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock,
18702 + struct task_struct *lock_owner, *self = current;
18703 + struct rt_mutex_waiter waiter, *top_waiter;
18704 + unsigned long flags;
18707 + rt_mutex_init_waiter(&waiter, true);
18709 + raw_spin_lock_irqsave(&lock->wait_lock, flags);
18711 + if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) {
18712 + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
18716 + BUG_ON(rt_mutex_owner(lock) == self);
18719 + * We save whatever state the task is in and we'll restore it
18720 + * after acquiring the lock taking real wakeups into account
18721 + * as well. We are serialized via pi_lock against wakeups. See
18722 + * try_to_wake_up().
18724 + raw_spin_lock(&self->pi_lock);
18725 + self->saved_state = self->state;
18726 + __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
18727 + raw_spin_unlock(&self->pi_lock);
18729 + ret = task_blocks_on_rt_mutex(lock, &waiter, self, RT_MUTEX_MIN_CHAINWALK);
18733 + /* Try to acquire the lock again. */
18734 + if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL))
18737 + top_waiter = rt_mutex_top_waiter(lock);
18738 + lock_owner = rt_mutex_owner(lock);
18740 + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
18742 + debug_rt_mutex_print_deadlock(&waiter);
18744 + if (top_waiter != &waiter || adaptive_wait(lock, lock_owner)) {
18746 + migrate_enable();
18749 + migrate_disable();
18752 + raw_spin_lock_irqsave(&lock->wait_lock, flags);
18754 + raw_spin_lock(&self->pi_lock);
18755 + __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
18756 + raw_spin_unlock(&self->pi_lock);
18760 + * Restore the task state to current->saved_state. We set it
18761 + * to the original state above and the try_to_wake_up() code
18762 + * has possibly updated it when a real (non-rtmutex) wakeup
18763 + * happened while we were blocked. Clear saved_state so
18764 + * try_to_wakeup() does not get confused.
18766 + raw_spin_lock(&self->pi_lock);
18767 + __set_current_state_no_track(self->saved_state);
18768 + self->saved_state = TASK_RUNNING;
18769 + raw_spin_unlock(&self->pi_lock);
18772 + * try_to_take_rt_mutex() sets the waiter bit
18773 + * unconditionally. We might have to fix that up:
18775 + fixup_rt_mutex_waiters(lock);
18777 + BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock));
18778 + BUG_ON(!RB_EMPTY_NODE(&waiter.tree_entry));
18780 + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
18782 + debug_rt_mutex_free_waiter(&waiter);
18785 +static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
18786 + struct wake_q_head *wake_sleeper_q,
18787 + struct rt_mutex *lock);
18789 + * Slow path to release a rt_mutex spin_lock style
18791 +static int noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
18793 + unsigned long flags;
18795 + WAKE_Q(wake_sleeper_q);
18797 + raw_spin_lock_irqsave(&lock->wait_lock, flags);
18799 + debug_rt_mutex_unlock(lock);
18801 + rt_mutex_deadlock_account_unlock(current);
18803 + if (!rt_mutex_has_waiters(lock)) {
18804 + lock->owner = NULL;
18805 + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
18809 + mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock);
18811 + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
18812 + wake_up_q(&wake_q);
18813 + wake_up_q_sleeper(&wake_sleeper_q);
18815 + /* Undo pi boosting.when necessary */
18816 + rt_mutex_adjust_prio(current);
18820 +static int noinline __sched rt_spin_lock_slowunlock_no_deboost(struct rt_mutex *lock)
18822 + unsigned long flags;
18824 + WAKE_Q(wake_sleeper_q);
18826 + raw_spin_lock_irqsave(&lock->wait_lock, flags);
18828 + debug_rt_mutex_unlock(lock);
18830 + rt_mutex_deadlock_account_unlock(current);
18832 + if (!rt_mutex_has_waiters(lock)) {
18833 + lock->owner = NULL;
18834 + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
18838 + mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock);
18840 + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
18841 + wake_up_q(&wake_q);
18842 + wake_up_q_sleeper(&wake_sleeper_q);
18846 +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock)
18848 + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, false);
18849 + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
18851 +EXPORT_SYMBOL(rt_spin_lock__no_mg);
18853 +void __lockfunc rt_spin_lock(spinlock_t *lock)
18855 + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true);
18856 + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
18858 +EXPORT_SYMBOL(rt_spin_lock);
18860 +void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
18862 + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, true);
18864 +EXPORT_SYMBOL(__rt_spin_lock);
18866 +void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock)
18868 + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, false);
18870 +EXPORT_SYMBOL(__rt_spin_lock__no_mg);
18872 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
18873 +void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
18875 + spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
18876 + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true);
18878 +EXPORT_SYMBOL(rt_spin_lock_nested);
18881 +void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock)
18883 + /* NOTE: we always pass in '1' for nested, for simplicity */
18884 + spin_release(&lock->dep_map, 1, _RET_IP_);
18885 + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
18887 +EXPORT_SYMBOL(rt_spin_unlock__no_mg);
18889 +void __lockfunc rt_spin_unlock(spinlock_t *lock)
18891 + /* NOTE: we always pass in '1' for nested, for simplicity */
18892 + spin_release(&lock->dep_map, 1, _RET_IP_);
18893 + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
18894 + migrate_enable();
18896 +EXPORT_SYMBOL(rt_spin_unlock);
18898 +int __lockfunc rt_spin_unlock_no_deboost(spinlock_t *lock)
18902 + /* NOTE: we always pass in '1' for nested, for simplicity */
18903 + spin_release(&lock->dep_map, 1, _RET_IP_);
18904 + ret = rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock_no_deboost);
18905 + migrate_enable();
18909 +void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
18911 + rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
18913 +EXPORT_SYMBOL(__rt_spin_unlock);
18916 + * Wait for the lock to get unlocked: instead of polling for an unlock
18917 + * (like raw spinlocks do), we lock and unlock, to force the kernel to
18918 + * schedule if there's contention:
18920 +void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
18923 + spin_unlock(lock);
18925 +EXPORT_SYMBOL(rt_spin_unlock_wait);
18927 +int __lockfunc __rt_spin_trylock(struct rt_mutex *lock)
18929 + return rt_mutex_trylock(lock);
18932 +int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock)
18936 + ret = rt_mutex_trylock(&lock->lock);
18938 + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
18941 +EXPORT_SYMBOL(rt_spin_trylock__no_mg);
18943 +int __lockfunc rt_spin_trylock(spinlock_t *lock)
18947 + migrate_disable();
18948 + ret = rt_mutex_trylock(&lock->lock);
18950 + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
18952 + migrate_enable();
18955 +EXPORT_SYMBOL(rt_spin_trylock);
18957 +int __lockfunc rt_spin_trylock_bh(spinlock_t *lock)
18961 + local_bh_disable();
18962 + ret = rt_mutex_trylock(&lock->lock);
18964 + migrate_disable();
18965 + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
18967 + local_bh_enable();
18970 +EXPORT_SYMBOL(rt_spin_trylock_bh);
18972 +int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags)
18977 + ret = rt_mutex_trylock(&lock->lock);
18979 + migrate_disable();
18980 + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
18984 +EXPORT_SYMBOL(rt_spin_trylock_irqsave);
18986 +int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock)
18988 + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
18989 + if (atomic_add_unless(atomic, -1, 1))
18991 + rt_spin_lock(lock);
18992 + if (atomic_dec_and_test(atomic))
18994 + rt_spin_unlock(lock);
18997 +EXPORT_SYMBOL(atomic_dec_and_spin_lock);
19000 +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key)
19002 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
19004 + * Make sure we are not reinitializing a held lock:
19006 + debug_check_no_locks_freed((void *)lock, sizeof(*lock));
19007 + lockdep_init_map(&lock->dep_map, name, key, 0);
19010 +EXPORT_SYMBOL(__rt_spin_lock_init);
19012 +#endif /* PREEMPT_RT_FULL */
19014 +#ifdef CONFIG_PREEMPT_RT_FULL
19015 + static inline int __sched
19016 +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
19018 + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
19019 + struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx);
19024 + if (unlikely(ctx == hold_ctx))
19025 + return -EALREADY;
19027 + if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
19028 + (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
19029 +#ifdef CONFIG_DEBUG_MUTEXES
19030 + DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
19031 + ctx->contending_lock = ww;
19039 + static inline int __sched
19040 +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
19049 +try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
19050 + struct rt_mutex_waiter *waiter)
19052 + return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL);
19056 * Task blocks on lock.
19058 * Prepare waiter and propagate pi chain
19060 - * This must be called with lock->wait_lock held.
19061 + * This must be called with lock->wait_lock held and interrupts disabled
19063 static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
19064 struct rt_mutex_waiter *waiter,
19065 @@ -958,7 +1431,6 @@
19066 struct rt_mutex_waiter *top_waiter = waiter;
19067 struct rt_mutex *next_lock;
19068 int chain_walk = 0, res;
19069 - unsigned long flags;
19072 * Early deadlock detection. We really don't want the task to
19073 @@ -972,7 +1444,24 @@
19077 - raw_spin_lock_irqsave(&task->pi_lock, flags);
19078 + raw_spin_lock(&task->pi_lock);
19081 + * In the case of futex requeue PI, this will be a proxy
19082 + * lock. The task will wake unaware that it is enqueueed on
19083 + * this lock. Avoid blocking on two locks and corrupting
19084 + * pi_blocked_on via the PI_WAKEUP_INPROGRESS
19085 + * flag. futex_wait_requeue_pi() sets this when it wakes up
19086 + * before requeue (due to a signal or timeout). Do not enqueue
19087 + * the task if PI_WAKEUP_INPROGRESS is set.
19089 + if (task != current && task->pi_blocked_on == PI_WAKEUP_INPROGRESS) {
19090 + raw_spin_unlock(&task->pi_lock);
19094 + BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on));
19096 __rt_mutex_adjust_prio(task);
19097 waiter->task = task;
19098 waiter->lock = lock;
19099 @@ -985,18 +1474,18 @@
19101 task->pi_blocked_on = waiter;
19103 - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
19104 + raw_spin_unlock(&task->pi_lock);
19109 - raw_spin_lock_irqsave(&owner->pi_lock, flags);
19110 + raw_spin_lock(&owner->pi_lock);
19111 if (waiter == rt_mutex_top_waiter(lock)) {
19112 rt_mutex_dequeue_pi(owner, top_waiter);
19113 rt_mutex_enqueue_pi(owner, waiter);
19115 __rt_mutex_adjust_prio(owner);
19116 - if (owner->pi_blocked_on)
19117 + if (rt_mutex_real_waiter(owner->pi_blocked_on))
19119 } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
19121 @@ -1005,7 +1494,7 @@
19122 /* Store the lock on which owner is blocked or NULL */
19123 next_lock = task_blocked_on_lock(owner);
19125 - raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
19126 + raw_spin_unlock(&owner->pi_lock);
19128 * Even if full deadlock detection is on, if the owner is not
19129 * blocked itself, we can avoid finding this out in the chain
19130 @@ -1021,12 +1510,12 @@
19132 get_task_struct(owner);
19134 - raw_spin_unlock(&lock->wait_lock);
19135 + raw_spin_unlock_irq(&lock->wait_lock);
19137 res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
19138 next_lock, waiter, task);
19140 - raw_spin_lock(&lock->wait_lock);
19141 + raw_spin_lock_irq(&lock->wait_lock);
19145 @@ -1035,15 +1524,15 @@
19146 * Remove the top waiter from the current tasks pi waiter tree and
19149 - * Called with lock->wait_lock held.
19150 + * Called with lock->wait_lock held and interrupts disabled.
19152 static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
19153 + struct wake_q_head *wake_sleeper_q,
19154 struct rt_mutex *lock)
19156 struct rt_mutex_waiter *waiter;
19157 - unsigned long flags;
19159 - raw_spin_lock_irqsave(¤t->pi_lock, flags);
19160 + raw_spin_lock(¤t->pi_lock);
19162 waiter = rt_mutex_top_waiter(lock);
19164 @@ -1065,15 +1554,18 @@
19166 lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
19168 - raw_spin_unlock_irqrestore(¤t->pi_lock, flags);
19169 + raw_spin_unlock(¤t->pi_lock);
19171 - wake_q_add(wake_q, waiter->task);
19172 + if (waiter->savestate)
19173 + wake_q_add(wake_sleeper_q, waiter->task);
19175 + wake_q_add(wake_q, waiter->task);
19179 * Remove a waiter from a lock and give up
19181 - * Must be called with lock->wait_lock held and
19182 + * Must be called with lock->wait_lock held and interrupts disabled. I must
19183 * have just failed to try_to_take_rt_mutex().
19185 static void remove_waiter(struct rt_mutex *lock,
19186 @@ -1081,13 +1573,12 @@
19188 bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
19189 struct task_struct *owner = rt_mutex_owner(lock);
19190 - struct rt_mutex *next_lock;
19191 - unsigned long flags;
19192 + struct rt_mutex *next_lock = NULL;
19194 - raw_spin_lock_irqsave(¤t->pi_lock, flags);
19195 + raw_spin_lock(¤t->pi_lock);
19196 rt_mutex_dequeue(lock, waiter);
19197 current->pi_blocked_on = NULL;
19198 - raw_spin_unlock_irqrestore(¤t->pi_lock, flags);
19199 + raw_spin_unlock(¤t->pi_lock);
19202 * Only update priority if the waiter was the highest priority
19203 @@ -1096,7 +1587,7 @@
19204 if (!owner || !is_top_waiter)
19207 - raw_spin_lock_irqsave(&owner->pi_lock, flags);
19208 + raw_spin_lock(&owner->pi_lock);
19210 rt_mutex_dequeue_pi(owner, waiter);
19212 @@ -1106,9 +1597,10 @@
19213 __rt_mutex_adjust_prio(owner);
19215 /* Store the lock on which owner is blocked or NULL */
19216 - next_lock = task_blocked_on_lock(owner);
19217 + if (rt_mutex_real_waiter(owner->pi_blocked_on))
19218 + next_lock = task_blocked_on_lock(owner);
19220 - raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
19221 + raw_spin_unlock(&owner->pi_lock);
19224 * Don't walk the chain, if the owner task is not blocked
19225 @@ -1120,12 +1612,12 @@
19226 /* gets dropped in rt_mutex_adjust_prio_chain()! */
19227 get_task_struct(owner);
19229 - raw_spin_unlock(&lock->wait_lock);
19230 + raw_spin_unlock_irq(&lock->wait_lock);
19232 rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
19233 next_lock, NULL, current);
19235 - raw_spin_lock(&lock->wait_lock);
19236 + raw_spin_lock_irq(&lock->wait_lock);
19240 @@ -1142,17 +1634,17 @@
19241 raw_spin_lock_irqsave(&task->pi_lock, flags);
19243 waiter = task->pi_blocked_on;
19244 - if (!waiter || (waiter->prio == task->prio &&
19245 + if (!rt_mutex_real_waiter(waiter) || (waiter->prio == task->prio &&
19246 !dl_prio(task->prio))) {
19247 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
19250 next_lock = waiter->lock;
19251 - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
19253 /* gets dropped in rt_mutex_adjust_prio_chain()! */
19254 get_task_struct(task);
19256 + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
19257 rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL,
19258 next_lock, NULL, task);
19260 @@ -1161,16 +1653,17 @@
19261 * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
19262 * @lock: the rt_mutex to take
19263 * @state: the state the task should block in (TASK_INTERRUPTIBLE
19264 - * or TASK_UNINTERRUPTIBLE)
19265 + * or TASK_UNINTERRUPTIBLE)
19266 * @timeout: the pre-initialized and started timer, or NULL for none
19267 * @waiter: the pre-initialized rt_mutex_waiter
19269 - * lock->wait_lock must be held by the caller.
19270 + * Must be called with lock->wait_lock held and interrupts disabled
19273 __rt_mutex_slowlock(struct rt_mutex *lock, int state,
19274 struct hrtimer_sleeper *timeout,
19275 - struct rt_mutex_waiter *waiter)
19276 + struct rt_mutex_waiter *waiter,
19277 + struct ww_acquire_ctx *ww_ctx)
19281 @@ -1193,13 +1686,19 @@
19285 - raw_spin_unlock(&lock->wait_lock);
19286 + if (ww_ctx && ww_ctx->acquired > 0) {
19287 + ret = __mutex_lock_check_stamp(lock, ww_ctx);
19292 + raw_spin_unlock_irq(&lock->wait_lock);
19294 debug_rt_mutex_print_deadlock(waiter);
19298 - raw_spin_lock(&lock->wait_lock);
19299 + raw_spin_lock_irq(&lock->wait_lock);
19300 set_current_state(state);
19303 @@ -1227,26 +1726,112 @@
19307 +static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
19308 + struct ww_acquire_ctx *ww_ctx)
19310 +#ifdef CONFIG_DEBUG_MUTEXES
19312 + * If this WARN_ON triggers, you used ww_mutex_lock to acquire,
19313 + * but released with a normal mutex_unlock in this call.
19315 + * This should never happen, always use ww_mutex_unlock.
19317 + DEBUG_LOCKS_WARN_ON(ww->ctx);
19320 + * Not quite done after calling ww_acquire_done() ?
19322 + DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
19324 + if (ww_ctx->contending_lock) {
19326 + * After -EDEADLK you tried to
19327 + * acquire a different ww_mutex? Bad!
19329 + DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
19332 + * You called ww_mutex_lock after receiving -EDEADLK,
19333 + * but 'forgot' to unlock everything else first?
19335 + DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0);
19336 + ww_ctx->contending_lock = NULL;
19340 + * Naughty, using a different class will lead to undefined behavior!
19342 + DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
19344 + ww_ctx->acquired++;
19347 +#ifdef CONFIG_PREEMPT_RT_FULL
19348 +static void ww_mutex_account_lock(struct rt_mutex *lock,
19349 + struct ww_acquire_ctx *ww_ctx)
19351 + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
19352 + struct rt_mutex_waiter *waiter, *n;
19355 + * This branch gets optimized out for the common case,
19356 + * and is only important for ww_mutex_lock.
19358 + ww_mutex_lock_acquired(ww, ww_ctx);
19359 + ww->ctx = ww_ctx;
19362 + * Give any possible sleeping processes the chance to wake up,
19363 + * so they can recheck if they have to back off.
19365 + rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters,
19367 + /* XXX debug rt mutex waiter wakeup */
19369 + BUG_ON(waiter->lock != lock);
19370 + rt_mutex_wake_waiter(waiter);
19376 +static void ww_mutex_account_lock(struct rt_mutex *lock,
19377 + struct ww_acquire_ctx *ww_ctx)
19384 * Slow path lock function:
19387 rt_mutex_slowlock(struct rt_mutex *lock, int state,
19388 struct hrtimer_sleeper *timeout,
19389 - enum rtmutex_chainwalk chwalk)
19390 + enum rtmutex_chainwalk chwalk,
19391 + struct ww_acquire_ctx *ww_ctx)
19393 struct rt_mutex_waiter waiter;
19394 + unsigned long flags;
19397 - debug_rt_mutex_init_waiter(&waiter);
19398 - RB_CLEAR_NODE(&waiter.pi_tree_entry);
19399 - RB_CLEAR_NODE(&waiter.tree_entry);
19400 + rt_mutex_init_waiter(&waiter, false);
19402 - raw_spin_lock(&lock->wait_lock);
19404 + * Technically we could use raw_spin_[un]lock_irq() here, but this can
19405 + * be called in early boot if the cmpxchg() fast path is disabled
19406 + * (debug, no architecture support). In this case we will acquire the
19407 + * rtmutex with lock->wait_lock held. But we cannot unconditionally
19408 + * enable interrupts in that early boot case. So we need to use the
19409 + * irqsave/restore variants.
19411 + raw_spin_lock_irqsave(&lock->wait_lock, flags);
19413 /* Try to acquire the lock again: */
19414 if (try_to_take_rt_mutex(lock, current, NULL)) {
19415 - raw_spin_unlock(&lock->wait_lock);
19417 + ww_mutex_account_lock(lock, ww_ctx);
19418 + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
19422 @@ -1260,13 +1845,23 @@
19425 /* sleep on the mutex */
19426 - ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
19427 + ret = __rt_mutex_slowlock(lock, state, timeout, &waiter,
19429 + else if (ww_ctx) {
19430 + /* ww_mutex received EDEADLK, let it become EALREADY */
19431 + ret = __mutex_lock_check_stamp(lock, ww_ctx);
19435 if (unlikely(ret)) {
19436 __set_current_state(TASK_RUNNING);
19437 if (rt_mutex_has_waiters(lock))
19438 remove_waiter(lock, &waiter);
19439 - rt_mutex_handle_deadlock(ret, chwalk, &waiter);
19440 + /* ww_mutex want to report EDEADLK/EALREADY, let them */
19442 + rt_mutex_handle_deadlock(ret, chwalk, &waiter);
19443 + } else if (ww_ctx) {
19444 + ww_mutex_account_lock(lock, ww_ctx);
19448 @@ -1275,7 +1870,7 @@
19450 fixup_rt_mutex_waiters(lock);
19452 - raw_spin_unlock(&lock->wait_lock);
19453 + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
19455 /* Remove pending timer: */
19456 if (unlikely(timeout))
19457 @@ -1291,6 +1886,7 @@
19459 static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
19461 + unsigned long flags;
19465 @@ -1302,10 +1898,10 @@
19469 - * The mutex has currently no owner. Lock the wait lock and
19470 - * try to acquire the lock.
19471 + * The mutex has currently no owner. Lock the wait lock and try to
19472 + * acquire the lock. We use irqsave here to support early boot calls.
19474 - raw_spin_lock(&lock->wait_lock);
19475 + raw_spin_lock_irqsave(&lock->wait_lock, flags);
19477 ret = try_to_take_rt_mutex(lock, current, NULL);
19479 @@ -1315,7 +1911,7 @@
19481 fixup_rt_mutex_waiters(lock);
19483 - raw_spin_unlock(&lock->wait_lock);
19484 + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
19488 @@ -1325,9 +1921,13 @@
19489 * Return whether the current task needs to undo a potential priority boosting.
19491 static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
19492 - struct wake_q_head *wake_q)
19493 + struct wake_q_head *wake_q,
19494 + struct wake_q_head *wake_sleeper_q)
19496 - raw_spin_lock(&lock->wait_lock);
19497 + unsigned long flags;
19499 + /* irqsave required to support early boot calls */
19500 + raw_spin_lock_irqsave(&lock->wait_lock, flags);
19502 debug_rt_mutex_unlock(lock);
19504 @@ -1366,10 +1966,10 @@
19506 while (!rt_mutex_has_waiters(lock)) {
19507 /* Drops lock->wait_lock ! */
19508 - if (unlock_rt_mutex_safe(lock) == true)
19509 + if (unlock_rt_mutex_safe(lock, flags) == true)
19511 /* Relock the rtmutex and try again */
19512 - raw_spin_lock(&lock->wait_lock);
19513 + raw_spin_lock_irqsave(&lock->wait_lock, flags);
19517 @@ -1378,9 +1978,9 @@
19519 * Queue the next waiter for wakeup once we release the wait_lock.
19521 - mark_wakeup_next_waiter(wake_q, lock);
19522 + mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock);
19524 - raw_spin_unlock(&lock->wait_lock);
19525 + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
19527 /* check PI boosting */
19529 @@ -1394,31 +1994,36 @@
19532 rt_mutex_fastlock(struct rt_mutex *lock, int state,
19533 + struct ww_acquire_ctx *ww_ctx,
19534 int (*slowfn)(struct rt_mutex *lock, int state,
19535 struct hrtimer_sleeper *timeout,
19536 - enum rtmutex_chainwalk chwalk))
19537 + enum rtmutex_chainwalk chwalk,
19538 + struct ww_acquire_ctx *ww_ctx))
19540 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
19541 rt_mutex_deadlock_account_lock(lock, current);
19544 - return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
19545 + return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK,
19550 rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
19551 struct hrtimer_sleeper *timeout,
19552 enum rtmutex_chainwalk chwalk,
19553 + struct ww_acquire_ctx *ww_ctx,
19554 int (*slowfn)(struct rt_mutex *lock, int state,
19555 struct hrtimer_sleeper *timeout,
19556 - enum rtmutex_chainwalk chwalk))
19557 + enum rtmutex_chainwalk chwalk,
19558 + struct ww_acquire_ctx *ww_ctx))
19560 if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
19561 likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
19562 rt_mutex_deadlock_account_lock(lock, current);
19565 - return slowfn(lock, state, timeout, chwalk);
19566 + return slowfn(lock, state, timeout, chwalk, ww_ctx);
19570 @@ -1435,17 +2040,20 @@
19572 rt_mutex_fastunlock(struct rt_mutex *lock,
19573 bool (*slowfn)(struct rt_mutex *lock,
19574 - struct wake_q_head *wqh))
19575 + struct wake_q_head *wqh,
19576 + struct wake_q_head *wq_sleeper))
19579 + WAKE_Q(wake_sleeper_q);
19581 if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
19582 rt_mutex_deadlock_account_unlock(current);
19585 - bool deboost = slowfn(lock, &wake_q);
19586 + bool deboost = slowfn(lock, &wake_q, &wake_sleeper_q);
19588 wake_up_q(&wake_q);
19589 + wake_up_q_sleeper(&wake_sleeper_q);
19591 /* Undo pi boosting if necessary: */
19593 @@ -1462,7 +2070,7 @@
19597 - rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
19598 + rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, NULL, rt_mutex_slowlock);
19600 EXPORT_SYMBOL_GPL(rt_mutex_lock);
19602 @@ -1479,7 +2087,7 @@
19606 - return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
19607 + return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, NULL, rt_mutex_slowlock);
19609 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
19611 @@ -1492,11 +2100,30 @@
19614 return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
19615 - RT_MUTEX_FULL_CHAINWALK,
19616 + RT_MUTEX_FULL_CHAINWALK, NULL,
19617 rt_mutex_slowlock);
19621 + * rt_mutex_lock_killable - lock a rt_mutex killable
19623 + * @lock: the rt_mutex to be locked
19624 + * @detect_deadlock: deadlock detection on/off
19628 + * -EINTR when interrupted by a signal
19629 + * -EDEADLK when the lock would deadlock (when deadlock detection is on)
19631 +int __sched rt_mutex_lock_killable(struct rt_mutex *lock)
19635 + return rt_mutex_fastlock(lock, TASK_KILLABLE, NULL, rt_mutex_slowlock);
19637 +EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
19640 * rt_mutex_timed_lock - lock a rt_mutex interruptible
19641 * the timeout structure is provided
19643 @@ -1516,6 +2143,7 @@
19645 return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
19646 RT_MUTEX_MIN_CHAINWALK,
19648 rt_mutex_slowlock);
19650 EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
19651 @@ -1533,7 +2161,11 @@
19653 int __sched rt_mutex_trylock(struct rt_mutex *lock)
19655 +#ifdef CONFIG_PREEMPT_RT_FULL
19656 + if (WARN_ON_ONCE(in_irq() || in_nmi()))
19658 if (WARN_ON(in_irq() || in_nmi() || in_serving_softirq()))
19662 return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
19663 @@ -1559,13 +2191,14 @@
19666 bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock,
19667 - struct wake_q_head *wqh)
19668 + struct wake_q_head *wqh,
19669 + struct wake_q_head *wq_sleeper)
19671 if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
19672 rt_mutex_deadlock_account_unlock(current);
19675 - return rt_mutex_slowunlock(lock, wqh);
19676 + return rt_mutex_slowunlock(lock, wqh, wq_sleeper);
19680 @@ -1598,13 +2231,12 @@
19681 void __rt_mutex_init(struct rt_mutex *lock, const char *name)
19683 lock->owner = NULL;
19684 - raw_spin_lock_init(&lock->wait_lock);
19685 lock->waiters = RB_ROOT;
19686 lock->waiters_leftmost = NULL;
19688 debug_rt_mutex_init(lock, name);
19690 -EXPORT_SYMBOL_GPL(__rt_mutex_init);
19691 +EXPORT_SYMBOL(__rt_mutex_init);
19694 * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
19695 @@ -1619,7 +2251,7 @@
19696 void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
19697 struct task_struct *proxy_owner)
19699 - __rt_mutex_init(lock, NULL);
19700 + rt_mutex_init(lock);
19701 debug_rt_mutex_proxy_lock(lock, proxy_owner);
19702 rt_mutex_set_owner(lock, proxy_owner);
19703 rt_mutex_deadlock_account_lock(lock, proxy_owner);
19704 @@ -1660,13 +2292,42 @@
19708 - raw_spin_lock(&lock->wait_lock);
19709 + raw_spin_lock_irq(&lock->wait_lock);
19711 if (try_to_take_rt_mutex(lock, task, NULL)) {
19712 - raw_spin_unlock(&lock->wait_lock);
19713 + raw_spin_unlock_irq(&lock->wait_lock);
19717 +#ifdef CONFIG_PREEMPT_RT_FULL
19719 + * In PREEMPT_RT there's an added race.
19720 + * If the task, that we are about to requeue, times out,
19721 + * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue
19722 + * to skip this task. But right after the task sets
19723 + * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then
19724 + * block on the spin_lock(&hb->lock), which in RT is an rtmutex.
19725 + * This will replace the PI_WAKEUP_INPROGRESS with the actual
19726 + * lock that it blocks on. We *must not* place this task
19727 + * on this proxy lock in that case.
19729 + * To prevent this race, we first take the task's pi_lock
19730 + * and check if it has updated its pi_blocked_on. If it has,
19731 + * we assume that it woke up and we return -EAGAIN.
19732 + * Otherwise, we set the task's pi_blocked_on to
19733 + * PI_REQUEUE_INPROGRESS, so that if the task is waking up
19734 + * it will know that we are in the process of requeuing it.
19736 + raw_spin_lock(&task->pi_lock);
19737 + if (task->pi_blocked_on) {
19738 + raw_spin_unlock(&task->pi_lock);
19739 + raw_spin_unlock_irq(&lock->wait_lock);
19742 + task->pi_blocked_on = PI_REQUEUE_INPROGRESS;
19743 + raw_spin_unlock(&task->pi_lock);
19746 /* We enforce deadlock detection for futexes */
19747 ret = task_blocks_on_rt_mutex(lock, waiter, task,
19748 RT_MUTEX_FULL_CHAINWALK);
19749 @@ -1681,10 +2342,10 @@
19753 - if (unlikely(ret))
19754 + if (ret && rt_mutex_has_waiters(lock))
19755 remove_waiter(lock, waiter);
19757 - raw_spin_unlock(&lock->wait_lock);
19758 + raw_spin_unlock_irq(&lock->wait_lock);
19760 debug_rt_mutex_print_deadlock(waiter);
19762 @@ -1732,12 +2393,12 @@
19766 - raw_spin_lock(&lock->wait_lock);
19767 + raw_spin_lock_irq(&lock->wait_lock);
19769 set_current_state(TASK_INTERRUPTIBLE);
19771 /* sleep on the mutex */
19772 - ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
19773 + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL);
19776 remove_waiter(lock, waiter);
19777 @@ -1748,7 +2409,93 @@
19779 fixup_rt_mutex_waiters(lock);
19781 - raw_spin_unlock(&lock->wait_lock);
19782 + raw_spin_unlock_irq(&lock->wait_lock);
19788 +ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
19790 +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
19793 + if (ctx->deadlock_inject_countdown-- == 0) {
19794 + tmp = ctx->deadlock_inject_interval;
19795 + if (tmp > UINT_MAX/4)
19798 + tmp = tmp*2 + tmp + tmp/2;
19800 + ctx->deadlock_inject_interval = tmp;
19801 + ctx->deadlock_inject_countdown = tmp;
19802 + ctx->contending_lock = lock;
19804 + ww_mutex_unlock(lock);
19813 +#ifdef CONFIG_PREEMPT_RT_FULL
19815 +__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
19821 + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
19822 + ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, ww_ctx);
19824 + mutex_release(&lock->base.dep_map, 1, _RET_IP_);
19825 + else if (!ret && ww_ctx->acquired > 1)
19826 + return ww_mutex_deadlock_injection(lock, ww_ctx);
19830 +EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
19833 +__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
19839 + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
19840 + ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, ww_ctx);
19842 + mutex_release(&lock->base.dep_map, 1, _RET_IP_);
19843 + else if (!ret && ww_ctx->acquired > 1)
19844 + return ww_mutex_deadlock_injection(lock, ww_ctx);
19848 +EXPORT_SYMBOL_GPL(__ww_mutex_lock);
19850 +void __sched ww_mutex_unlock(struct ww_mutex *lock)
19852 + int nest = !!lock->ctx;
19855 + * The unlocking fastpath is the 0->1 transition from 'locked'
19856 + * into 'unlocked' state:
19859 +#ifdef CONFIG_DEBUG_MUTEXES
19860 + DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
19862 + if (lock->ctx->acquired > 0)
19863 + lock->ctx->acquired--;
19864 + lock->ctx = NULL;
19867 + mutex_release(&lock->base.dep_map, nest, _RET_IP_);
19868 + rt_mutex_unlock(&lock->base.lock);
19870 +EXPORT_SYMBOL(ww_mutex_unlock);
19872 diff -Nur linux-4.4.46.orig/kernel/locking/rtmutex_common.h linux-4.4.46/kernel/locking/rtmutex_common.h
19873 --- linux-4.4.46.orig/kernel/locking/rtmutex_common.h 2017-02-01 08:31:11.000000000 +0100
19874 +++ linux-4.4.46/kernel/locking/rtmutex_common.h 2017-02-03 17:18:10.927619058 +0100
19876 struct rb_node pi_tree_entry;
19877 struct task_struct *task;
19878 struct rt_mutex *lock;
19880 #ifdef CONFIG_DEBUG_RT_MUTEXES
19882 struct pid *deadlock_task_pid;
19885 * PI-futex support (proxy locking functions, etc.):
19887 +#define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1)
19888 +#define PI_REQUEUE_INPROGRESS ((struct rt_mutex_waiter *) 2)
19890 extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
19891 extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
19892 struct task_struct *proxy_owner);
19893 @@ -111,7 +115,8 @@
19894 struct rt_mutex_waiter *waiter);
19895 extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to);
19896 extern bool rt_mutex_futex_unlock(struct rt_mutex *lock,
19897 - struct wake_q_head *wqh);
19898 + struct wake_q_head *wqh,
19899 + struct wake_q_head *wq_sleeper);
19900 extern void rt_mutex_adjust_prio(struct task_struct *task);
19902 #ifdef CONFIG_DEBUG_RT_MUTEXES
19903 @@ -120,4 +125,14 @@
19904 # include "rtmutex.h"
19907 +static inline void
19908 +rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate)
19910 + debug_rt_mutex_init_waiter(waiter);
19911 + waiter->task = NULL;
19912 + waiter->savestate = savestate;
19913 + RB_CLEAR_NODE(&waiter->pi_tree_entry);
19914 + RB_CLEAR_NODE(&waiter->tree_entry);
19918 diff -Nur linux-4.4.46.orig/kernel/locking/spinlock.c linux-4.4.46/kernel/locking/spinlock.c
19919 --- linux-4.4.46.orig/kernel/locking/spinlock.c 2017-02-01 08:31:11.000000000 +0100
19920 +++ linux-4.4.46/kernel/locking/spinlock.c 2017-02-03 17:18:10.927619058 +0100
19921 @@ -124,8 +124,11 @@
19922 * __[spin|read|write]_lock_bh()
19924 BUILD_LOCK_OPS(spin, raw_spinlock);
19926 +#ifndef CONFIG_PREEMPT_RT_FULL
19927 BUILD_LOCK_OPS(read, rwlock);
19928 BUILD_LOCK_OPS(write, rwlock);
19933 @@ -209,6 +212,8 @@
19934 EXPORT_SYMBOL(_raw_spin_unlock_bh);
19937 +#ifndef CONFIG_PREEMPT_RT_FULL
19939 #ifndef CONFIG_INLINE_READ_TRYLOCK
19940 int __lockfunc _raw_read_trylock(rwlock_t *lock)
19942 @@ -353,6 +358,8 @@
19943 EXPORT_SYMBOL(_raw_write_unlock_bh);
19946 +#endif /* !PREEMPT_RT_FULL */
19948 #ifdef CONFIG_DEBUG_LOCK_ALLOC
19950 void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
19951 diff -Nur linux-4.4.46.orig/kernel/locking/spinlock_debug.c linux-4.4.46/kernel/locking/spinlock_debug.c
19952 --- linux-4.4.46.orig/kernel/locking/spinlock_debug.c 2017-02-01 08:31:11.000000000 +0100
19953 +++ linux-4.4.46/kernel/locking/spinlock_debug.c 2017-02-03 17:18:10.927619058 +0100
19956 EXPORT_SYMBOL(__raw_spin_lock_init);
19958 +#ifndef CONFIG_PREEMPT_RT_FULL
19959 void __rwlock_init(rwlock_t *lock, const char *name,
19960 struct lock_class_key *key)
19965 EXPORT_SYMBOL(__rwlock_init);
19968 static void spin_dump(raw_spinlock_t *lock, const char *msg)
19970 @@ -159,6 +161,7 @@
19971 arch_spin_unlock(&lock->raw_lock);
19974 +#ifndef CONFIG_PREEMPT_RT_FULL
19975 static void rwlock_bug(rwlock_t *lock, const char *msg)
19977 if (!debug_locks_off())
19978 @@ -300,3 +303,5 @@
19979 debug_write_unlock(lock);
19980 arch_write_unlock(&lock->raw_lock);
19984 diff -Nur linux-4.4.46.orig/kernel/panic.c linux-4.4.46/kernel/panic.c
19985 --- linux-4.4.46.orig/kernel/panic.c 2017-02-01 08:31:11.000000000 +0100
19986 +++ linux-4.4.46/kernel/panic.c 2017-02-03 17:18:10.927619058 +0100
19992 + * Stop ourselves in NMI context if another CPU has already panicked. Arch code
19993 + * may override this to prepare for crash dumping, e.g. save regs info.
19995 +void __weak nmi_panic_self_stop(struct pt_regs *regs)
19997 + panic_smp_self_stop();
20000 +atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
20003 + * A variant of panic() called from NMI context. We return if we've already
20004 + * panicked on this CPU. If another CPU already panicked, loop in
20005 + * nmi_panic_self_stop() which can provide architecture dependent code such
20006 + * as saving register state for crash dump.
20008 +void nmi_panic(struct pt_regs *regs, const char *msg)
20010 + int old_cpu, cpu;
20012 + cpu = raw_smp_processor_id();
20013 + old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu);
20015 + if (old_cpu == PANIC_CPU_INVALID)
20016 + panic("%s", msg);
20017 + else if (old_cpu != cpu)
20018 + nmi_panic_self_stop(regs);
20020 +EXPORT_SYMBOL(nmi_panic);
20023 * panic - halt the system
20024 * @fmt: The text string to print
20025 @@ -71,17 +102,17 @@
20027 void panic(const char *fmt, ...)
20029 - static DEFINE_SPINLOCK(panic_lock);
20030 static char buf[1024];
20032 long i, i_next = 0;
20034 + int old_cpu, this_cpu;
20037 * Disable local interrupts. This will prevent panic_smp_self_stop
20038 * from deadlocking the first cpu that invokes the panic, since
20039 * there is nothing to prevent an interrupt handler (that runs
20040 - * after the panic_lock is acquired) from invoking panic again.
20041 + * after setting panic_cpu) from invoking panic() again.
20043 local_irq_disable();
20045 @@ -94,8 +125,16 @@
20046 * multiple parallel invocations of panic, all other CPUs either
20047 * stop themself or will wait until they are stopped by the 1st CPU
20048 * with smp_send_stop().
20050 + * `old_cpu == PANIC_CPU_INVALID' means this is the 1st CPU which
20051 + * comes here, so go ahead.
20052 + * `old_cpu == this_cpu' means we came from nmi_panic() which sets
20053 + * panic_cpu to this CPU. In this case, this is also the 1st CPU.
20055 - if (!spin_trylock(&panic_lock))
20056 + this_cpu = raw_smp_processor_id();
20057 + old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
20059 + if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu)
20060 panic_smp_self_stop();
20063 @@ -400,9 +439,11 @@
20065 static int init_oops_id(void)
20067 +#ifndef CONFIG_PREEMPT_RT_FULL
20069 get_random_bytes(&oops_id, sizeof(oops_id));
20075 diff -Nur linux-4.4.46.orig/kernel/power/hibernate.c linux-4.4.46/kernel/power/hibernate.c
20076 --- linux-4.4.46.orig/kernel/power/hibernate.c 2017-02-01 08:31:11.000000000 +0100
20077 +++ linux-4.4.46/kernel/power/hibernate.c 2017-02-03 17:18:10.927619058 +0100
20078 @@ -285,6 +285,8 @@
20080 local_irq_disable();
20082 + system_state = SYSTEM_SUSPEND;
20084 error = syscore_suspend();
20086 printk(KERN_ERR "PM: Some system devices failed to power down, "
20087 @@ -314,6 +316,7 @@
20091 + system_state = SYSTEM_RUNNING;
20092 local_irq_enable();
20095 @@ -438,6 +441,7 @@
20098 local_irq_disable();
20099 + system_state = SYSTEM_SUSPEND;
20101 error = syscore_suspend();
20103 @@ -471,6 +475,7 @@
20107 + system_state = SYSTEM_RUNNING;
20108 local_irq_enable();
20111 @@ -556,6 +561,7 @@
20114 local_irq_disable();
20115 + system_state = SYSTEM_SUSPEND;
20117 if (pm_wakeup_pending()) {
20119 @@ -568,6 +574,7 @@
20123 + system_state = SYSTEM_RUNNING;
20124 local_irq_enable();
20127 @@ -642,6 +649,10 @@
20131 +#ifndef CONFIG_SUSPEND
20132 +bool pm_in_action;
20136 * hibernate - Carry out system hibernation, including saving the image.
20138 @@ -654,6 +665,8 @@
20142 + pm_in_action = true;
20144 lock_system_sleep();
20145 /* The snapshot device should not be opened while we're running */
20146 if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
20147 @@ -719,6 +732,7 @@
20148 atomic_inc(&snapshot_device_available);
20150 unlock_system_sleep();
20151 + pm_in_action = false;
20155 diff -Nur linux-4.4.46.orig/kernel/power/suspend.c linux-4.4.46/kernel/power/suspend.c
20156 --- linux-4.4.46.orig/kernel/power/suspend.c 2017-02-01 08:31:11.000000000 +0100
20157 +++ linux-4.4.46/kernel/power/suspend.c 2017-02-03 17:18:10.927619058 +0100
20158 @@ -359,6 +359,8 @@
20159 arch_suspend_disable_irqs();
20160 BUG_ON(!irqs_disabled());
20162 + system_state = SYSTEM_SUSPEND;
20164 error = syscore_suspend();
20166 *wakeup = pm_wakeup_pending();
20167 @@ -375,6 +377,8 @@
20171 + system_state = SYSTEM_RUNNING;
20173 arch_suspend_enable_irqs();
20174 BUG_ON(irqs_disabled());
20176 @@ -518,6 +522,8 @@
20180 +bool pm_in_action;
20183 * pm_suspend - Externally visible function for suspending the system.
20184 * @state: System sleep state to enter.
20185 @@ -532,6 +538,8 @@
20186 if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX)
20189 + pm_in_action = true;
20191 error = enter_state(state);
20193 suspend_stats.fail++;
20194 @@ -539,6 +547,7 @@
20196 suspend_stats.success++;
20198 + pm_in_action = false;
20201 EXPORT_SYMBOL(pm_suspend);
20202 diff -Nur linux-4.4.46.orig/kernel/printk/printk.c linux-4.4.46/kernel/printk/printk.c
20203 --- linux-4.4.46.orig/kernel/printk/printk.c 2017-02-01 08:31:11.000000000 +0100
20204 +++ linux-4.4.46/kernel/printk/printk.c 2017-02-03 17:18:10.927619058 +0100
20205 @@ -241,6 +241,65 @@
20207 static DEFINE_RAW_SPINLOCK(logbuf_lock);
20209 +#ifdef CONFIG_EARLY_PRINTK
20210 +struct console *early_console;
20212 +static void early_vprintk(const char *fmt, va_list ap)
20214 + if (early_console) {
20216 + int n = vscnprintf(buf, sizeof(buf), fmt, ap);
20218 + early_console->write(early_console, buf, n);
20222 +asmlinkage void early_printk(const char *fmt, ...)
20226 + va_start(ap, fmt);
20227 + early_vprintk(fmt, ap);
20232 + * This is independent of any log levels - a global
20233 + * kill switch that turns off all of printk.
20235 + * Used by the NMI watchdog if early-printk is enabled.
20237 +static bool __read_mostly printk_killswitch;
20239 +static int __init force_early_printk_setup(char *str)
20241 + printk_killswitch = true;
20244 +early_param("force_early_printk", force_early_printk_setup);
20246 +void printk_kill(void)
20248 + printk_killswitch = true;
20251 +#ifdef CONFIG_PRINTK
20252 +static int forced_early_printk(const char *fmt, va_list ap)
20254 + if (!printk_killswitch)
20256 + early_vprintk(fmt, ap);
20262 +static inline int forced_early_printk(const char *fmt, va_list ap)
20268 #ifdef CONFIG_PRINTK
20269 DECLARE_WAIT_QUEUE_HEAD(log_wait);
20270 /* the next printk record to read by syslog(READ) or /proc/kmsg */
20271 @@ -1203,6 +1262,7 @@
20275 + int attempts = 0;
20277 text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
20279 @@ -1214,7 +1274,14 @@
20282 enum log_flags prev;
20287 + if (attempts > 10) {
20292 if (clear_seq < log_first_seq) {
20293 /* messages are gone, move to first available one */
20294 clear_seq = log_first_seq;
20295 @@ -1235,6 +1302,14 @@
20297 idx = log_next(idx);
20300 + if (num_msg > 5) {
20302 + raw_spin_unlock_irq(&logbuf_lock);
20303 + raw_spin_lock_irq(&logbuf_lock);
20304 + if (clear_seq < log_first_seq)
20309 /* move first record forward until length fits into the buffer */
20310 @@ -1248,6 +1323,14 @@
20312 idx = log_next(idx);
20315 + if (num_msg > 5) {
20317 + raw_spin_unlock_irq(&logbuf_lock);
20318 + raw_spin_lock_irq(&logbuf_lock);
20319 + if (clear_seq < log_first_seq)
20324 /* last message fitting into this dump */
20325 @@ -1288,6 +1371,7 @@
20326 clear_seq = log_next_seq;
20327 clear_idx = log_next_idx;
20330 raw_spin_unlock_irq(&logbuf_lock);
20333 @@ -1443,6 +1527,12 @@
20334 if (!console_drivers)
20337 + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) {
20338 + if (in_irq() || in_nmi())
20342 + migrate_disable();
20343 for_each_console(con) {
20344 if (exclusive_console && con != exclusive_console)
20346 @@ -1458,6 +1548,7 @@
20348 con->write(con, text, len);
20350 + migrate_enable();
20354 @@ -1518,6 +1609,15 @@
20355 static int console_trylock_for_printk(void)
20357 unsigned int cpu = smp_processor_id();
20358 +#ifdef CONFIG_PREEMPT_RT_FULL
20359 + int lock = !early_boot_irqs_disabled && (preempt_count() == 0) &&
20360 + !irqs_disabled();
20368 if (!console_trylock())
20370 @@ -1672,6 +1772,13 @@
20371 /* cpu currently holding logbuf_lock in this function */
20372 static unsigned int logbuf_cpu = UINT_MAX;
20375 + * Fall back to early_printk if a debugging subsystem has
20376 + * killed printk output
20378 + if (unlikely(forced_early_printk(fmt, args)))
20381 if (level == LOGLEVEL_SCHED) {
20382 level = LOGLEVEL_DEFAULT;
20384 @@ -1813,8 +1920,7 @@
20385 * console_sem which would prevent anyone from printing to
20388 - preempt_disable();
20390 + migrate_disable();
20392 * Try to acquire and then immediately release the console
20393 * semaphore. The release will print out buffers and wake up
20394 @@ -1822,7 +1928,7 @@
20396 if (console_trylock_for_printk())
20398 - preempt_enable();
20399 + migrate_enable();
20403 @@ -1961,26 +2067,6 @@
20405 #endif /* CONFIG_PRINTK */
20407 -#ifdef CONFIG_EARLY_PRINTK
20408 -struct console *early_console;
20410 -asmlinkage __visible void early_printk(const char *fmt, ...)
20416 - if (!early_console)
20419 - va_start(ap, fmt);
20420 - n = vscnprintf(buf, sizeof(buf), fmt, ap);
20423 - early_console->write(early_console, buf, n);
20427 static int __add_preferred_console(char *name, int idx, char *options,
20430 @@ -2202,11 +2288,16 @@
20433 len = cont_print_text(text, size);
20434 +#ifdef CONFIG_PREEMPT_RT_FULL
20435 + raw_spin_unlock_irqrestore(&logbuf_lock, flags);
20436 + call_console_drivers(cont.level, NULL, 0, text, len);
20438 raw_spin_unlock(&logbuf_lock);
20439 stop_critical_timings();
20440 call_console_drivers(cont.level, NULL, 0, text, len);
20441 start_critical_timings();
20442 local_irq_restore(flags);
20446 raw_spin_unlock_irqrestore(&logbuf_lock, flags);
20447 @@ -2316,13 +2407,17 @@
20448 console_idx = log_next(console_idx);
20450 console_prev = msg->flags;
20451 +#ifdef CONFIG_PREEMPT_RT_FULL
20452 + raw_spin_unlock_irqrestore(&logbuf_lock, flags);
20453 + call_console_drivers(level, ext_text, ext_len, text, len);
20455 raw_spin_unlock(&logbuf_lock);
20457 stop_critical_timings(); /* don't trace print latency */
20458 call_console_drivers(level, ext_text, ext_len, text, len);
20459 start_critical_timings();
20460 local_irq_restore(flags);
20463 if (do_cond_resched)
20466 @@ -2374,6 +2469,11 @@
20470 + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) {
20471 + if (in_irq() || in_nmi())
20476 * console_unblank can no longer be called in interrupt context unless
20477 * oops_in_progress is set to 1..
20478 diff -Nur linux-4.4.46.orig/kernel/ptrace.c linux-4.4.46/kernel/ptrace.c
20479 --- linux-4.4.46.orig/kernel/ptrace.c 2017-02-01 08:31:11.000000000 +0100
20480 +++ linux-4.4.46/kernel/ptrace.c 2017-02-03 17:18:10.927619058 +0100
20481 @@ -136,7 +136,14 @@
20483 spin_lock_irq(&task->sighand->siglock);
20484 if (task_is_traced(task) && !__fatal_signal_pending(task)) {
20485 - task->state = __TASK_TRACED;
20486 + unsigned long flags;
20488 + raw_spin_lock_irqsave(&task->pi_lock, flags);
20489 + if (task->state & __TASK_TRACED)
20490 + task->state = __TASK_TRACED;
20492 + task->saved_state = __TASK_TRACED;
20493 + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
20496 spin_unlock_irq(&task->sighand->siglock);
20497 diff -Nur linux-4.4.46.orig/kernel/rcu/rcutorture.c linux-4.4.46/kernel/rcu/rcutorture.c
20498 --- linux-4.4.46.orig/kernel/rcu/rcutorture.c 2017-02-01 08:31:11.000000000 +0100
20499 +++ linux-4.4.46/kernel/rcu/rcutorture.c 2017-02-03 17:18:10.927619058 +0100
20500 @@ -390,6 +390,7 @@
20504 +#ifndef CONFIG_PREEMPT_RT_FULL
20506 * Definitions for rcu_bh torture testing.
20508 @@ -429,6 +430,12 @@
20513 +static struct rcu_torture_ops rcu_bh_ops = {
20514 + .ttype = INVALID_RCU_FLAVOR,
20519 * Don't even think about trying any of these in real life!!!
20520 * The names includes "busted", and they really means it!
20521 diff -Nur linux-4.4.46.orig/kernel/rcu/tree.c linux-4.4.46/kernel/rcu/tree.c
20522 --- linux-4.4.46.orig/kernel/rcu/tree.c 2017-02-01 08:31:11.000000000 +0100
20523 +++ linux-4.4.46/kernel/rcu/tree.c 2017-02-03 17:18:10.931619212 +0100
20525 #include <linux/random.h>
20526 #include <linux/trace_events.h>
20527 #include <linux/suspend.h>
20528 +#include <linux/delay.h>
20529 +#include <linux/gfp.h>
20530 +#include <linux/oom.h>
20531 +#include <linux/smpboot.h>
20532 +#include "../time/tick-internal.h"
20536 @@ -266,6 +271,19 @@
20540 +#ifdef CONFIG_PREEMPT_RT_FULL
20541 +static void rcu_preempt_qs(void);
20543 +void rcu_bh_qs(void)
20545 + unsigned long flags;
20547 + /* Callers to this function, rcu_preempt_qs(), must disable irqs. */
20548 + local_irq_save(flags);
20549 + rcu_preempt_qs();
20550 + local_irq_restore(flags);
20553 void rcu_bh_qs(void)
20555 if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) {
20556 @@ -275,6 +293,7 @@
20557 __this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false);
20562 static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
20564 @@ -435,11 +454,13 @@
20566 * Return the number of RCU BH batches started thus far for debug & stats.
20568 +#ifndef CONFIG_PREEMPT_RT_FULL
20569 unsigned long rcu_batches_started_bh(void)
20571 return rcu_bh_state.gpnum;
20573 EXPORT_SYMBOL_GPL(rcu_batches_started_bh);
20577 * Return the number of RCU batches completed thus far for debug & stats.
20578 @@ -459,6 +480,7 @@
20580 EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
20582 +#ifndef CONFIG_PREEMPT_RT_FULL
20584 * Return the number of RCU BH batches completed thus far for debug & stats.
20586 @@ -486,6 +508,13 @@
20588 EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
20591 +void rcu_force_quiescent_state(void)
20594 +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
20598 * Force a quiescent state for RCU-sched.
20600 @@ -536,9 +565,11 @@
20604 +#ifndef CONFIG_PREEMPT_RT_FULL
20605 case RCU_BH_FLAVOR:
20606 rsp = &rcu_bh_state;
20609 case RCU_SCHED_FLAVOR:
20610 rsp = &rcu_sched_state;
20612 @@ -1590,7 +1621,6 @@
20614 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
20616 - rcu_nocb_gp_cleanup(rsp, rnp);
20617 rnp->need_future_gp[c & 0x1] = 0;
20618 needmore = rnp->need_future_gp[(c + 1) & 0x1];
20619 trace_rcu_future_gp(rnp, rdp, c,
20620 @@ -1611,7 +1641,7 @@
20621 !READ_ONCE(rsp->gp_flags) ||
20624 - wake_up(&rsp->gp_wq);
20625 + swake_up(&rsp->gp_wq);
20629 @@ -1991,6 +2021,7 @@
20631 struct rcu_data *rdp;
20632 struct rcu_node *rnp = rcu_get_root(rsp);
20633 + struct swait_queue_head *sq;
20635 WRITE_ONCE(rsp->gp_activity, jiffies);
20636 raw_spin_lock_irq(&rnp->lock);
20637 @@ -2029,7 +2060,9 @@
20638 needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;
20639 /* smp_mb() provided by prior unlock-lock pair. */
20640 nocb += rcu_future_gp_cleanup(rsp, rnp);
20641 + sq = rcu_nocb_gp_get(rnp);
20642 raw_spin_unlock_irq(&rnp->lock);
20643 + rcu_nocb_gp_cleanup(sq);
20644 cond_resched_rcu_qs();
20645 WRITE_ONCE(rsp->gp_activity, jiffies);
20646 rcu_gp_slow(rsp, gp_cleanup_delay);
20647 @@ -2076,7 +2109,7 @@
20648 READ_ONCE(rsp->gpnum),
20650 rsp->gp_state = RCU_GP_WAIT_GPS;
20651 - wait_event_interruptible(rsp->gp_wq,
20652 + swait_event_interruptible(rsp->gp_wq,
20653 READ_ONCE(rsp->gp_flags) &
20655 rsp->gp_state = RCU_GP_DONE_GPS;
20656 @@ -2106,7 +2139,7 @@
20657 READ_ONCE(rsp->gpnum),
20659 rsp->gp_state = RCU_GP_WAIT_FQS;
20660 - ret = wait_event_interruptible_timeout(rsp->gp_wq,
20661 + ret = swait_event_interruptible_timeout(rsp->gp_wq,
20662 rcu_gp_fqs_check_wake(rsp, &gf), j);
20663 rsp->gp_state = RCU_GP_DOING_FQS;
20664 /* Locking provides needed memory barriers. */
20665 @@ -2230,7 +2263,7 @@
20666 WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
20667 WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
20668 raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
20669 - rcu_gp_kthread_wake(rsp);
20670 + swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
20674 @@ -2891,7 +2924,7 @@
20676 WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
20677 raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
20678 - rcu_gp_kthread_wake(rsp);
20679 + swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
20683 @@ -2934,18 +2967,17 @@
20685 * Do RCU core processing for the current CPU.
20687 -static void rcu_process_callbacks(struct softirq_action *unused)
20688 +static void rcu_process_callbacks(void)
20690 struct rcu_state *rsp;
20692 if (cpu_is_offline(smp_processor_id()))
20694 - trace_rcu_utilization(TPS("Start RCU core"));
20695 for_each_rcu_flavor(rsp)
20696 __rcu_process_callbacks(rsp);
20697 - trace_rcu_utilization(TPS("End RCU core"));
20700 +static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
20702 * Schedule RCU callback invocation. If the specified type of RCU
20703 * does not support RCU priority boosting, just do a direct call,
20704 @@ -2957,18 +2989,105 @@
20706 if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
20708 - if (likely(!rsp->boost)) {
20709 - rcu_do_batch(rsp, rdp);
20710 + rcu_do_batch(rsp, rdp);
20713 +static void rcu_wake_cond(struct task_struct *t, int status)
20716 + * If the thread is yielding, only wake it when this
20717 + * is invoked from idle
20719 + if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
20720 + wake_up_process(t);
20724 + * Wake up this CPU's rcuc kthread to do RCU core processing.
20726 +static void invoke_rcu_core(void)
20728 + unsigned long flags;
20729 + struct task_struct *t;
20731 + if (!cpu_online(smp_processor_id()))
20733 + local_irq_save(flags);
20734 + __this_cpu_write(rcu_cpu_has_work, 1);
20735 + t = __this_cpu_read(rcu_cpu_kthread_task);
20736 + if (t != NULL && current != t)
20737 + rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status));
20738 + local_irq_restore(flags);
20741 +static void rcu_cpu_kthread_park(unsigned int cpu)
20743 + per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
20746 +static int rcu_cpu_kthread_should_run(unsigned int cpu)
20748 + return __this_cpu_read(rcu_cpu_has_work);
20752 + * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
20753 + * RCU softirq used in flavors and configurations of RCU that do not
20754 + * support RCU priority boosting.
20756 +static void rcu_cpu_kthread(unsigned int cpu)
20758 + unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
20759 + char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
20762 + for (spincnt = 0; spincnt < 10; spincnt++) {
20763 + trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
20764 + local_bh_disable();
20765 + *statusp = RCU_KTHREAD_RUNNING;
20766 + this_cpu_inc(rcu_cpu_kthread_loops);
20767 + local_irq_disable();
20770 + local_irq_enable();
20772 + rcu_process_callbacks();
20773 + local_bh_enable();
20774 + if (*workp == 0) {
20775 + trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
20776 + *statusp = RCU_KTHREAD_WAITING;
20780 - invoke_rcu_callbacks_kthread();
20781 + *statusp = RCU_KTHREAD_YIELDING;
20782 + trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
20783 + schedule_timeout_interruptible(2);
20784 + trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
20785 + *statusp = RCU_KTHREAD_WAITING;
20788 -static void invoke_rcu_core(void)
20789 +static struct smp_hotplug_thread rcu_cpu_thread_spec = {
20790 + .store = &rcu_cpu_kthread_task,
20791 + .thread_should_run = rcu_cpu_kthread_should_run,
20792 + .thread_fn = rcu_cpu_kthread,
20793 + .thread_comm = "rcuc/%u",
20794 + .setup = rcu_cpu_kthread_setup,
20795 + .park = rcu_cpu_kthread_park,
20799 + * Spawn per-CPU RCU core processing kthreads.
20801 +static int __init rcu_spawn_core_kthreads(void)
20803 - if (cpu_online(smp_processor_id()))
20804 - raise_softirq(RCU_SOFTIRQ);
20807 + for_each_possible_cpu(cpu)
20808 + per_cpu(rcu_cpu_has_work, cpu) = 0;
20809 + BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
20812 +early_initcall(rcu_spawn_core_kthreads);
20815 * Handle any core-RCU processing required by a call_rcu() invocation.
20816 @@ -3114,6 +3233,7 @@
20818 EXPORT_SYMBOL_GPL(call_rcu_sched);
20820 +#ifndef CONFIG_PREEMPT_RT_FULL
20822 * Queue an RCU callback for invocation after a quicker grace period.
20824 @@ -3122,6 +3242,7 @@
20825 __call_rcu(head, func, &rcu_bh_state, -1, 0);
20827 EXPORT_SYMBOL_GPL(call_rcu_bh);
20831 * Queue an RCU callback for lazy invocation after a grace period.
20832 @@ -3213,6 +3334,7 @@
20834 EXPORT_SYMBOL_GPL(synchronize_sched);
20836 +#ifndef CONFIG_PREEMPT_RT_FULL
20838 * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
20840 @@ -3239,6 +3361,7 @@
20841 wait_rcu_gp(call_rcu_bh);
20843 EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
20847 * get_state_synchronize_rcu - Snapshot current RCU state
20848 @@ -3524,7 +3647,7 @@
20849 raw_spin_unlock_irqrestore(&rnp->lock, flags);
20851 smp_mb(); /* EGP done before wake_up(). */
20852 - wake_up(&rsp->expedited_wq);
20853 + swake_up(&rsp->expedited_wq);
20857 @@ -3781,7 +3904,7 @@
20858 jiffies_start = jiffies;
20861 - ret = wait_event_interruptible_timeout(
20862 + ret = swait_event_timeout(
20864 sync_rcu_preempt_exp_done(rnp_root),
20866 @@ -3789,7 +3912,7 @@
20869 /* Hit a signal, disable CPU stall warnings. */
20870 - wait_event(rsp->expedited_wq,
20871 + swait_event(rsp->expedited_wq,
20872 sync_rcu_preempt_exp_done(rnp_root));
20875 @@ -4101,6 +4224,7 @@
20876 mutex_unlock(&rsp->barrier_mutex);
20879 +#ifndef CONFIG_PREEMPT_RT_FULL
20881 * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
20883 @@ -4109,6 +4233,7 @@
20884 _rcu_barrier(&rcu_bh_state);
20886 EXPORT_SYMBOL_GPL(rcu_barrier_bh);
20890 * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
20891 @@ -4455,8 +4580,8 @@
20895 - init_waitqueue_head(&rsp->gp_wq);
20896 - init_waitqueue_head(&rsp->expedited_wq);
20897 + init_swait_queue_head(&rsp->gp_wq);
20898 + init_swait_queue_head(&rsp->expedited_wq);
20899 rnp = rsp->level[rcu_num_lvls - 1];
20900 for_each_possible_cpu(i) {
20901 while (i > rnp->grphi)
20902 @@ -4576,12 +4701,13 @@
20904 rcu_bootup_announce();
20905 rcu_init_geometry();
20906 +#ifndef CONFIG_PREEMPT_RT_FULL
20907 rcu_init_one(&rcu_bh_state, &rcu_bh_data);
20909 rcu_init_one(&rcu_sched_state, &rcu_sched_data);
20911 rcu_dump_rcu_node_tree(&rcu_sched_state);
20912 __rcu_init_preempt();
20913 - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
20916 * We don't need protection against CPU-hotplug here because
20917 diff -Nur linux-4.4.46.orig/kernel/rcu/tree.h linux-4.4.46/kernel/rcu/tree.h
20918 --- linux-4.4.46.orig/kernel/rcu/tree.h 2017-02-01 08:31:11.000000000 +0100
20919 +++ linux-4.4.46/kernel/rcu/tree.h 2017-02-03 17:18:10.931619212 +0100
20921 #include <linux/threads.h>
20922 #include <linux/cpumask.h>
20923 #include <linux/seqlock.h>
20924 +#include <linux/swait.h>
20925 #include <linux/stop_machine.h>
20928 @@ -241,7 +242,7 @@
20929 /* Refused to boost: not sure why, though. */
20930 /* This can happen due to race conditions. */
20931 #ifdef CONFIG_RCU_NOCB_CPU
20932 - wait_queue_head_t nocb_gp_wq[2];
20933 + struct swait_queue_head nocb_gp_wq[2];
20934 /* Place for rcu_nocb_kthread() to wait GP. */
20935 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
20936 int need_future_gp[2];
20937 @@ -393,7 +394,7 @@
20938 atomic_long_t nocb_q_count_lazy; /* invocation (all stages). */
20939 struct rcu_head *nocb_follower_head; /* CBs ready to invoke. */
20940 struct rcu_head **nocb_follower_tail;
20941 - wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
20942 + struct swait_queue_head nocb_wq; /* For nocb kthreads to sleep on. */
20943 struct task_struct *nocb_kthread;
20944 int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */
20946 @@ -472,7 +473,7 @@
20947 unsigned long gpnum; /* Current gp number. */
20948 unsigned long completed; /* # of last completed gp. */
20949 struct task_struct *gp_kthread; /* Task for grace periods. */
20950 - wait_queue_head_t gp_wq; /* Where GP task waits. */
20951 + struct swait_queue_head gp_wq; /* Where GP task waits. */
20952 short gp_flags; /* Commands for GP task. */
20953 short gp_state; /* GP kthread sleep state. */
20955 @@ -504,7 +505,7 @@
20956 atomic_long_t expedited_workdone3; /* # done by others #3. */
20957 atomic_long_t expedited_normal; /* # fallbacks to normal. */
20958 atomic_t expedited_need_qs; /* # CPUs left to check in. */
20959 - wait_queue_head_t expedited_wq; /* Wait for check-ins. */
20960 + struct swait_queue_head expedited_wq; /* Wait for check-ins. */
20961 int ncpus_snap; /* # CPUs seen last time. */
20963 unsigned long jiffies_force_qs; /* Time at which to invoke */
20964 @@ -556,18 +557,18 @@
20966 extern struct rcu_state rcu_sched_state;
20968 +#ifndef CONFIG_PREEMPT_RT_FULL
20969 extern struct rcu_state rcu_bh_state;
20972 #ifdef CONFIG_PREEMPT_RCU
20973 extern struct rcu_state rcu_preempt_state;
20974 #endif /* #ifdef CONFIG_PREEMPT_RCU */
20976 -#ifdef CONFIG_RCU_BOOST
20977 DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
20978 DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
20979 DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
20980 DECLARE_PER_CPU(char, rcu_cpu_has_work);
20981 -#endif /* #ifdef CONFIG_RCU_BOOST */
20983 #ifndef RCU_TREE_NONCORE
20985 @@ -587,10 +588,9 @@
20986 static void __init __rcu_init_preempt(void);
20987 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
20988 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
20989 -static void invoke_rcu_callbacks_kthread(void);
20990 static bool rcu_is_callbacks_kthread(void);
20991 +static void rcu_cpu_kthread_setup(unsigned int cpu);
20992 #ifdef CONFIG_RCU_BOOST
20993 -static void rcu_preempt_do_callbacks(void);
20994 static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
20995 struct rcu_node *rnp);
20996 #endif /* #ifdef CONFIG_RCU_BOOST */
20997 @@ -607,7 +607,8 @@
20998 static void increment_cpu_stall_ticks(void);
20999 static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu);
21000 static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
21001 -static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp);
21002 +static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
21003 +static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
21004 static void rcu_init_one_nocb(struct rcu_node *rnp);
21005 static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
21006 bool lazy, unsigned long flags);
21007 diff -Nur linux-4.4.46.orig/kernel/rcu/tree_plugin.h linux-4.4.46/kernel/rcu/tree_plugin.h
21008 --- linux-4.4.46.orig/kernel/rcu/tree_plugin.h 2017-02-01 08:31:11.000000000 +0100
21009 +++ linux-4.4.46/kernel/rcu/tree_plugin.h 2017-02-03 17:18:10.931619212 +0100
21010 @@ -24,25 +24,10 @@
21011 * Paul E. McKenney <paulmck@linux.vnet.ibm.com>
21014 -#include <linux/delay.h>
21015 -#include <linux/gfp.h>
21016 -#include <linux/oom.h>
21017 -#include <linux/smpboot.h>
21018 -#include "../time/tick-internal.h"
21020 #ifdef CONFIG_RCU_BOOST
21022 #include "../locking/rtmutex_common.h"
21025 - * Control variables for per-CPU and per-rcu_node kthreads. These
21026 - * handle all flavors of RCU.
21028 -static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
21029 -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
21030 -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
21031 -DEFINE_PER_CPU(char, rcu_cpu_has_work);
21033 #else /* #ifdef CONFIG_RCU_BOOST */
21038 #endif /* #else #ifdef CONFIG_RCU_BOOST */
21041 + * Control variables for per-CPU and per-rcu_node kthreads. These
21042 + * handle all flavors of RCU.
21044 +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
21045 +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
21046 +DEFINE_PER_CPU(char, rcu_cpu_has_work);
21048 #ifdef CONFIG_RCU_NOCB_CPU
21049 static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
21050 static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */
21051 @@ -432,7 +425,7 @@
21054 /* Hardware IRQ handlers cannot block, complain if they get here. */
21055 - if (in_irq() || in_serving_softirq()) {
21056 + if (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET)) {
21057 lockdep_rcu_suspicious(__FILE__, __LINE__,
21058 "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n");
21059 pr_alert("->rcu_read_unlock_special: %#x (b: %d, enq: %d nq: %d)\n",
21060 @@ -645,15 +638,6 @@
21061 t->rcu_read_unlock_special.b.need_qs = true;
21064 -#ifdef CONFIG_RCU_BOOST
21066 -static void rcu_preempt_do_callbacks(void)
21068 - rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p));
21071 -#endif /* #ifdef CONFIG_RCU_BOOST */
21074 * Queue a preemptible-RCU callback for invocation after a grace period.
21076 @@ -930,6 +914,19 @@
21078 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
21081 + * If boosting, set rcuc kthreads to realtime priority.
21083 +static void rcu_cpu_kthread_setup(unsigned int cpu)
21085 +#ifdef CONFIG_RCU_BOOST
21086 + struct sched_param sp;
21088 + sp.sched_priority = kthread_prio;
21089 + sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
21090 +#endif /* #ifdef CONFIG_RCU_BOOST */
21093 #ifdef CONFIG_RCU_BOOST
21095 #include "../locking/rtmutex_common.h"
21096 @@ -961,16 +958,6 @@
21098 #endif /* #else #ifdef CONFIG_RCU_TRACE */
21100 -static void rcu_wake_cond(struct task_struct *t, int status)
21103 - * If the thread is yielding, only wake it when this
21104 - * is invoked from idle
21106 - if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
21107 - wake_up_process(t);
21111 * Carry out RCU priority boosting on the task indicated by ->exp_tasks
21112 * or ->boost_tasks, advancing the pointer to the next task in the
21113 @@ -1115,23 +1102,6 @@
21117 - * Wake up the per-CPU kthread to invoke RCU callbacks.
21119 -static void invoke_rcu_callbacks_kthread(void)
21121 - unsigned long flags;
21123 - local_irq_save(flags);
21124 - __this_cpu_write(rcu_cpu_has_work, 1);
21125 - if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
21126 - current != __this_cpu_read(rcu_cpu_kthread_task)) {
21127 - rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
21128 - __this_cpu_read(rcu_cpu_kthread_status));
21130 - local_irq_restore(flags);
21134 * Is the current CPU running the RCU-callbacks kthread?
21135 * Caller must have preemption disabled.
21137 @@ -1186,67 +1156,6 @@
21141 -static void rcu_kthread_do_work(void)
21143 - rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
21144 - rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
21145 - rcu_preempt_do_callbacks();
21148 -static void rcu_cpu_kthread_setup(unsigned int cpu)
21150 - struct sched_param sp;
21152 - sp.sched_priority = kthread_prio;
21153 - sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
21156 -static void rcu_cpu_kthread_park(unsigned int cpu)
21158 - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
21161 -static int rcu_cpu_kthread_should_run(unsigned int cpu)
21163 - return __this_cpu_read(rcu_cpu_has_work);
21167 - * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
21168 - * RCU softirq used in flavors and configurations of RCU that do not
21169 - * support RCU priority boosting.
21171 -static void rcu_cpu_kthread(unsigned int cpu)
21173 - unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
21174 - char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
21177 - for (spincnt = 0; spincnt < 10; spincnt++) {
21178 - trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
21179 - local_bh_disable();
21180 - *statusp = RCU_KTHREAD_RUNNING;
21181 - this_cpu_inc(rcu_cpu_kthread_loops);
21182 - local_irq_disable();
21185 - local_irq_enable();
21187 - rcu_kthread_do_work();
21188 - local_bh_enable();
21189 - if (*workp == 0) {
21190 - trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
21191 - *statusp = RCU_KTHREAD_WAITING;
21195 - *statusp = RCU_KTHREAD_YIELDING;
21196 - trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
21197 - schedule_timeout_interruptible(2);
21198 - trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
21199 - *statusp = RCU_KTHREAD_WAITING;
21203 * Set the per-rcu_node kthread's affinity to cover all CPUs that are
21204 * served by the rcu_node in question. The CPU hotplug lock is still
21205 @@ -1276,26 +1185,12 @@
21206 free_cpumask_var(cm);
21209 -static struct smp_hotplug_thread rcu_cpu_thread_spec = {
21210 - .store = &rcu_cpu_kthread_task,
21211 - .thread_should_run = rcu_cpu_kthread_should_run,
21212 - .thread_fn = rcu_cpu_kthread,
21213 - .thread_comm = "rcuc/%u",
21214 - .setup = rcu_cpu_kthread_setup,
21215 - .park = rcu_cpu_kthread_park,
21219 * Spawn boost kthreads -- called as soon as the scheduler is running.
21221 static void __init rcu_spawn_boost_kthreads(void)
21223 struct rcu_node *rnp;
21226 - for_each_possible_cpu(cpu)
21227 - per_cpu(rcu_cpu_has_work, cpu) = 0;
21228 - BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
21229 rcu_for_each_leaf_node(rcu_state_p, rnp)
21230 (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
21232 @@ -1318,11 +1213,6 @@
21233 raw_spin_unlock_irqrestore(&rnp->lock, flags);
21236 -static void invoke_rcu_callbacks_kthread(void)
21241 static bool rcu_is_callbacks_kthread(void)
21244 @@ -1346,7 +1236,7 @@
21246 #endif /* #else #ifdef CONFIG_RCU_BOOST */
21248 -#if !defined(CONFIG_RCU_FAST_NO_HZ)
21249 +#if !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL)
21252 * Check to see if any future RCU-related work will need to be done
21253 @@ -1363,7 +1253,9 @@
21254 return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)
21255 ? 0 : rcu_cpu_has_callbacks(NULL);
21257 +#endif /* !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) */
21259 +#if !defined(CONFIG_RCU_FAST_NO_HZ)
21261 * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
21263 @@ -1459,6 +1351,8 @@
21267 +#ifndef CONFIG_PREEMPT_RT_FULL
21270 * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
21271 * to invoke. If the CPU has callbacks, try to advance them. Tell the
21272 @@ -1504,6 +1398,7 @@
21273 *nextevt = basemono + dj * TICK_NSEC;
21276 +#endif /* #ifndef CONFIG_PREEMPT_RT_FULL */
21279 * Prepare a CPU for idle from an RCU perspective. The first major task
21280 @@ -1822,9 +1717,9 @@
21281 * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
21284 -static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
21285 +static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
21287 - wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
21288 + swake_up_all(sq);
21292 @@ -1840,10 +1735,15 @@
21293 rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;
21296 +static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
21298 + return &rnp->nocb_gp_wq[rnp->completed & 0x1];
21301 static void rcu_init_one_nocb(struct rcu_node *rnp)
21303 - init_waitqueue_head(&rnp->nocb_gp_wq[0]);
21304 - init_waitqueue_head(&rnp->nocb_gp_wq[1]);
21305 + init_swait_queue_head(&rnp->nocb_gp_wq[0]);
21306 + init_swait_queue_head(&rnp->nocb_gp_wq[1]);
21309 #ifndef CONFIG_RCU_NOCB_CPU_ALL
21310 @@ -1868,7 +1768,7 @@
21311 if (READ_ONCE(rdp_leader->nocb_leader_sleep) || force) {
21312 /* Prior smp_mb__after_atomic() orders against prior enqueue. */
21313 WRITE_ONCE(rdp_leader->nocb_leader_sleep, false);
21314 - wake_up(&rdp_leader->nocb_wq);
21315 + swake_up(&rdp_leader->nocb_wq);
21319 @@ -2081,7 +1981,7 @@
21321 trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait"));
21323 - wait_event_interruptible(
21324 + swait_event_interruptible(
21325 rnp->nocb_gp_wq[c & 0x1],
21326 (d = ULONG_CMP_GE(READ_ONCE(rnp->completed), c)));
21328 @@ -2109,7 +2009,7 @@
21329 /* Wait for callbacks to appear. */
21330 if (!rcu_nocb_poll) {
21331 trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep");
21332 - wait_event_interruptible(my_rdp->nocb_wq,
21333 + swait_event_interruptible(my_rdp->nocb_wq,
21334 !READ_ONCE(my_rdp->nocb_leader_sleep));
21335 /* Memory barrier handled by smp_mb() calls below and repoll. */
21336 } else if (firsttime) {
21337 @@ -2184,7 +2084,7 @@
21338 * List was empty, wake up the follower.
21339 * Memory barriers supplied by atomic_long_add().
21341 - wake_up(&rdp->nocb_wq);
21342 + swake_up(&rdp->nocb_wq);
21346 @@ -2205,7 +2105,7 @@
21347 if (!rcu_nocb_poll) {
21348 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
21350 - wait_event_interruptible(rdp->nocb_wq,
21351 + swait_event_interruptible(rdp->nocb_wq,
21352 READ_ONCE(rdp->nocb_follower_head));
21353 } else if (firsttime) {
21354 /* Don't drown trace log with "Poll"! */
21355 @@ -2365,7 +2265,7 @@
21356 static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
21358 rdp->nocb_tail = &rdp->nocb_head;
21359 - init_waitqueue_head(&rdp->nocb_wq);
21360 + init_swait_queue_head(&rdp->nocb_wq);
21361 rdp->nocb_follower_tail = &rdp->nocb_follower_head;
21364 @@ -2515,7 +2415,7 @@
21368 -static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
21369 +static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
21373 @@ -2523,6 +2423,11 @@
21377 +static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
21382 static void rcu_init_one_nocb(struct rcu_node *rnp)
21385 diff -Nur linux-4.4.46.orig/kernel/rcu/update.c linux-4.4.46/kernel/rcu/update.c
21386 --- linux-4.4.46.orig/kernel/rcu/update.c 2017-02-01 08:31:11.000000000 +0100
21387 +++ linux-4.4.46/kernel/rcu/update.c 2017-02-03 17:18:10.931619212 +0100
21388 @@ -276,6 +276,7 @@
21390 EXPORT_SYMBOL_GPL(rcu_read_lock_held);
21392 +#ifndef CONFIG_PREEMPT_RT_FULL
21394 * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
21396 @@ -302,6 +303,7 @@
21397 return in_softirq() || irqs_disabled();
21399 EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
21402 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
21404 diff -Nur linux-4.4.46.orig/kernel/relay.c linux-4.4.46/kernel/relay.c
21405 --- linux-4.4.46.orig/kernel/relay.c 2017-02-01 08:31:11.000000000 +0100
21406 +++ linux-4.4.46/kernel/relay.c 2017-02-03 17:18:10.931619212 +0100
21407 @@ -336,6 +336,10 @@
21409 struct rchan_buf *buf = (struct rchan_buf *)data;
21410 wake_up_interruptible(&buf->read_wait);
21412 + * Stupid polling for now:
21414 + mod_timer(&buf->timer, jiffies + 1);
21418 @@ -353,6 +357,7 @@
21419 init_waitqueue_head(&buf->read_wait);
21420 kref_init(&buf->kref);
21421 setup_timer(&buf->timer, wakeup_readers, (unsigned long)buf);
21422 + mod_timer(&buf->timer, jiffies + 1);
21424 del_timer_sync(&buf->timer);
21426 @@ -736,15 +741,6 @@
21428 buf->early_bytes += buf->chan->subbuf_size -
21429 buf->padding[old_subbuf];
21431 - if (waitqueue_active(&buf->read_wait))
21433 - * Calling wake_up_interruptible() from here
21434 - * will deadlock if we happen to be logging
21435 - * from the scheduler (trying to re-grab
21436 - * rq->lock), so defer it.
21438 - mod_timer(&buf->timer, jiffies + 1);
21442 diff -Nur linux-4.4.46.orig/kernel/sched/completion.c linux-4.4.46/kernel/sched/completion.c
21443 --- linux-4.4.46.orig/kernel/sched/completion.c 2017-02-01 08:31:11.000000000 +0100
21444 +++ linux-4.4.46/kernel/sched/completion.c 2017-02-03 17:18:10.931619212 +0100
21445 @@ -30,10 +30,10 @@
21447 unsigned long flags;
21449 - spin_lock_irqsave(&x->wait.lock, flags);
21450 + raw_spin_lock_irqsave(&x->wait.lock, flags);
21452 - __wake_up_locked(&x->wait, TASK_NORMAL, 1);
21453 - spin_unlock_irqrestore(&x->wait.lock, flags);
21454 + swake_up_locked(&x->wait);
21455 + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
21457 EXPORT_SYMBOL(complete);
21459 @@ -50,10 +50,10 @@
21461 unsigned long flags;
21463 - spin_lock_irqsave(&x->wait.lock, flags);
21464 + raw_spin_lock_irqsave(&x->wait.lock, flags);
21465 x->done += UINT_MAX/2;
21466 - __wake_up_locked(&x->wait, TASK_NORMAL, 0);
21467 - spin_unlock_irqrestore(&x->wait.lock, flags);
21468 + swake_up_all_locked(&x->wait);
21469 + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
21471 EXPORT_SYMBOL(complete_all);
21473 @@ -62,20 +62,20 @@
21474 long (*action)(long), long timeout, int state)
21477 - DECLARE_WAITQUEUE(wait, current);
21478 + DECLARE_SWAITQUEUE(wait);
21480 - __add_wait_queue_tail_exclusive(&x->wait, &wait);
21481 + __prepare_to_swait(&x->wait, &wait);
21483 if (signal_pending_state(state, current)) {
21484 timeout = -ERESTARTSYS;
21487 __set_current_state(state);
21488 - spin_unlock_irq(&x->wait.lock);
21489 + raw_spin_unlock_irq(&x->wait.lock);
21490 timeout = action(timeout);
21491 - spin_lock_irq(&x->wait.lock);
21492 + raw_spin_lock_irq(&x->wait.lock);
21493 } while (!x->done && timeout);
21494 - __remove_wait_queue(&x->wait, &wait);
21495 + __finish_swait(&x->wait, &wait);
21503 - spin_lock_irq(&x->wait.lock);
21504 + raw_spin_lock_irq(&x->wait.lock);
21505 timeout = do_wait_for_common(x, action, timeout, state);
21506 - spin_unlock_irq(&x->wait.lock);
21507 + raw_spin_unlock_irq(&x->wait.lock);
21511 @@ -277,12 +277,12 @@
21512 if (!READ_ONCE(x->done))
21515 - spin_lock_irqsave(&x->wait.lock, flags);
21516 + raw_spin_lock_irqsave(&x->wait.lock, flags);
21521 - spin_unlock_irqrestore(&x->wait.lock, flags);
21522 + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
21525 EXPORT_SYMBOL(try_wait_for_completion);
21526 @@ -311,7 +311,7 @@
21527 * after it's acquired the lock.
21530 - spin_unlock_wait(&x->wait.lock);
21531 + raw_spin_unlock_wait(&x->wait.lock);
21534 EXPORT_SYMBOL(completion_done);
21535 diff -Nur linux-4.4.46.orig/kernel/sched/core.c linux-4.4.46/kernel/sched/core.c
21536 --- linux-4.4.46.orig/kernel/sched/core.c 2017-02-01 08:31:11.000000000 +0100
21537 +++ linux-4.4.46/kernel/sched/core.c 2017-02-03 17:18:10.931619212 +0100
21538 @@ -260,7 +260,11 @@
21539 * Number of tasks to iterate in a single balance run.
21540 * Limited because this is done with IRQs disabled.
21542 +#ifndef CONFIG_PREEMPT_RT_FULL
21543 const_debug unsigned int sysctl_sched_nr_migrate = 32;
21545 +const_debug unsigned int sysctl_sched_nr_migrate = 8;
21549 * period over which we average the RT time consumption, measured
21550 @@ -438,6 +442,7 @@
21552 hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
21553 rq->hrtick_timer.function = hrtick;
21554 + rq->hrtick_timer.irqsafe = 1;
21556 #else /* CONFIG_SCHED_HRTICK */
21557 static inline void hrtick_clear(struct rq *rq)
21558 @@ -542,7 +547,7 @@
21559 head->lastp = &node->next;
21562 -void wake_up_q(struct wake_q_head *head)
21563 +void __wake_up_q(struct wake_q_head *head, bool sleeper)
21565 struct wake_q_node *node = head->first;
21567 @@ -559,7 +564,10 @@
21568 * wake_up_process() implies a wmb() to pair with the queueing
21569 * in wake_q_add() so as not to miss wakeups.
21571 - wake_up_process(task);
21573 + wake_up_lock_sleeper(task);
21575 + wake_up_process(task);
21576 put_task_struct(task);
21579 @@ -595,6 +603,38 @@
21580 trace_sched_wake_idle_without_ipi(cpu);
21583 +#ifdef CONFIG_PREEMPT_LAZY
21584 +void resched_curr_lazy(struct rq *rq)
21586 + struct task_struct *curr = rq->curr;
21589 + if (!sched_feat(PREEMPT_LAZY)) {
21590 + resched_curr(rq);
21594 + lockdep_assert_held(&rq->lock);
21596 + if (test_tsk_need_resched(curr))
21599 + if (test_tsk_need_resched_lazy(curr))
21602 + set_tsk_need_resched_lazy(curr);
21604 + cpu = cpu_of(rq);
21605 + if (cpu == smp_processor_id())
21608 + /* NEED_RESCHED_LAZY must be visible before we test polling */
21610 + if (!tsk_is_polling(curr))
21611 + smp_send_reschedule(cpu);
21615 void resched_cpu(int cpu)
21617 struct rq *rq = cpu_rq(cpu);
21618 @@ -618,11 +658,14 @@
21620 int get_nohz_timer_target(void)
21622 - int i, cpu = smp_processor_id();
21624 struct sched_domain *sd;
21626 + preempt_disable_rt();
21627 + cpu = smp_processor_id();
21629 if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu))
21631 + goto preempt_en_rt;
21634 for_each_domain(cpu, sd) {
21635 @@ -641,6 +684,8 @@
21636 cpu = housekeeping_any_cpu();
21640 + preempt_enable_rt();
21644 @@ -1174,6 +1219,11 @@
21646 lockdep_assert_held(&p->pi_lock);
21648 + if (__migrate_disabled(p)) {
21649 + cpumask_copy(&p->cpus_allowed, new_mask);
21653 queued = task_on_rq_queued(p);
21654 running = task_current(rq, p);
21656 @@ -1196,6 +1246,84 @@
21657 enqueue_task(rq, p, ENQUEUE_RESTORE);
21660 +static DEFINE_PER_CPU(struct cpumask, sched_cpumasks);
21661 +static DEFINE_MUTEX(sched_down_mutex);
21662 +static cpumask_t sched_down_cpumask;
21664 +void tell_sched_cpu_down_begin(int cpu)
21666 + mutex_lock(&sched_down_mutex);
21667 + cpumask_set_cpu(cpu, &sched_down_cpumask);
21668 + mutex_unlock(&sched_down_mutex);
21671 +void tell_sched_cpu_down_done(int cpu)
21673 + mutex_lock(&sched_down_mutex);
21674 + cpumask_clear_cpu(cpu, &sched_down_cpumask);
21675 + mutex_unlock(&sched_down_mutex);
21679 + * migrate_me - try to move the current task off this cpu
21681 + * Used by the pin_current_cpu() code to try to get tasks
21682 + * to move off the current CPU as it is going down.
21683 + * It will only move the task if the task isn't pinned to
21684 + * the CPU (with migrate_disable, affinity or NO_SETAFFINITY)
21685 + * and the task has to be in a RUNNING state. Otherwise the
21686 + * movement of the task will wake it up (change its state
21687 + * to running) when the task did not expect it.
21689 + * Returns 1 if it succeeded in moving the current task
21692 +int migrate_me(void)
21694 + struct task_struct *p = current;
21695 + struct migration_arg arg;
21696 + struct cpumask *cpumask;
21697 + struct cpumask *mask;
21698 + unsigned long flags;
21699 + unsigned int dest_cpu;
21703 + * We can not migrate tasks bounded to a CPU or tasks not
21704 + * running. The movement of the task will wake it up.
21706 + if (p->flags & PF_NO_SETAFFINITY || p->state)
21709 + mutex_lock(&sched_down_mutex);
21710 + rq = task_rq_lock(p, &flags);
21712 + cpumask = this_cpu_ptr(&sched_cpumasks);
21713 + mask = &p->cpus_allowed;
21715 + cpumask_andnot(cpumask, mask, &sched_down_cpumask);
21717 + if (!cpumask_weight(cpumask)) {
21718 + /* It's only on this CPU? */
21719 + task_rq_unlock(rq, p, &flags);
21720 + mutex_unlock(&sched_down_mutex);
21724 + dest_cpu = cpumask_any_and(cpu_active_mask, cpumask);
21727 + arg.dest_cpu = dest_cpu;
21729 + task_rq_unlock(rq, p, &flags);
21731 + stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
21732 + tlb_migrate_finish(p->mm);
21733 + mutex_unlock(&sched_down_mutex);
21739 * Change a given task's CPU affinity. Migrate the thread to a
21740 * proper CPU and schedule it away if the CPU it's executing on
21741 @@ -1235,7 +1363,7 @@
21742 do_set_cpus_allowed(p, new_mask);
21744 /* Can the task run on the task's current CPU? If so, we're done */
21745 - if (cpumask_test_cpu(task_cpu(p), new_mask))
21746 + if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p))
21749 dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
21750 @@ -1411,6 +1539,18 @@
21754 +static bool check_task_state(struct task_struct *p, long match_state)
21756 + bool match = false;
21758 + raw_spin_lock_irq(&p->pi_lock);
21759 + if (p->state == match_state || p->saved_state == match_state)
21761 + raw_spin_unlock_irq(&p->pi_lock);
21767 * wait_task_inactive - wait for a thread to unschedule.
21769 @@ -1455,7 +1595,7 @@
21770 * is actually now running somewhere else!
21772 while (task_running(rq, p)) {
21773 - if (match_state && unlikely(p->state != match_state))
21774 + if (match_state && !check_task_state(p, match_state))
21778 @@ -1470,7 +1610,8 @@
21779 running = task_running(rq, p);
21780 queued = task_on_rq_queued(p);
21782 - if (!match_state || p->state == match_state)
21783 + if (!match_state || p->state == match_state ||
21784 + p->saved_state == match_state)
21785 ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
21786 task_rq_unlock(rq, p, &flags);
21788 @@ -1627,7 +1768,7 @@
21790 lockdep_assert_held(&p->pi_lock);
21792 - if (p->nr_cpus_allowed > 1)
21793 + if (tsk_nr_cpus_allowed(p) > 1)
21794 cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
21797 @@ -1707,10 +1848,6 @@
21799 activate_task(rq, p, en_flags);
21800 p->on_rq = TASK_ON_RQ_QUEUED;
21802 - /* if a worker is waking up, notify workqueue */
21803 - if (p->flags & PF_WQ_WORKER)
21804 - wq_worker_waking_up(p, cpu_of(rq));
21808 @@ -1937,8 +2074,27 @@
21810 smp_mb__before_spinlock();
21811 raw_spin_lock_irqsave(&p->pi_lock, flags);
21812 - if (!(p->state & state))
21813 + if (!(p->state & state)) {
21815 + * The task might be running due to a spinlock sleeper
21816 + * wakeup. Check the saved state and set it to running
21817 + * if the wakeup condition is true.
21819 + if (!(wake_flags & WF_LOCK_SLEEPER)) {
21820 + if (p->saved_state & state) {
21821 + p->saved_state = TASK_RUNNING;
21829 + * If this is a regular wakeup, then we can unconditionally
21830 + * clear the saved state of a "lock sleeper".
21832 + if (!(wake_flags & WF_LOCK_SLEEPER))
21833 + p->saved_state = TASK_RUNNING;
21835 trace_sched_waking(p);
21837 @@ -2030,52 +2186,6 @@
21841 - * try_to_wake_up_local - try to wake up a local task with rq lock held
21842 - * @p: the thread to be awakened
21844 - * Put @p on the run-queue if it's not already there. The caller must
21845 - * ensure that this_rq() is locked, @p is bound to this_rq() and not
21846 - * the current task.
21848 -static void try_to_wake_up_local(struct task_struct *p)
21850 - struct rq *rq = task_rq(p);
21852 - if (WARN_ON_ONCE(rq != this_rq()) ||
21853 - WARN_ON_ONCE(p == current))
21856 - lockdep_assert_held(&rq->lock);
21858 - if (!raw_spin_trylock(&p->pi_lock)) {
21860 - * This is OK, because current is on_cpu, which avoids it being
21861 - * picked for load-balance and preemption/IRQs are still
21862 - * disabled avoiding further scheduler activity on it and we've
21863 - * not yet picked a replacement task.
21865 - lockdep_unpin_lock(&rq->lock);
21866 - raw_spin_unlock(&rq->lock);
21867 - raw_spin_lock(&p->pi_lock);
21868 - raw_spin_lock(&rq->lock);
21869 - lockdep_pin_lock(&rq->lock);
21872 - if (!(p->state & TASK_NORMAL))
21875 - trace_sched_waking(p);
21877 - if (!task_on_rq_queued(p))
21878 - ttwu_activate(rq, p, ENQUEUE_WAKEUP);
21880 - ttwu_do_wakeup(rq, p, 0);
21881 - ttwu_stat(p, smp_processor_id(), 0);
21883 - raw_spin_unlock(&p->pi_lock);
21887 * wake_up_process - Wake up a specific process
21888 * @p: The process to be woken up.
21890 @@ -2093,6 +2203,18 @@
21892 EXPORT_SYMBOL(wake_up_process);
21895 + * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock"
21896 + * @p: The process to be woken up.
21898 + * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate
21899 + * the nature of the wakeup.
21901 +int wake_up_lock_sleeper(struct task_struct *p)
21903 + return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER);
21906 int wake_up_state(struct task_struct *p, unsigned int state)
21908 return try_to_wake_up(p, state, 0);
21909 @@ -2279,6 +2401,9 @@
21912 init_task_preempt_count(p);
21913 +#ifdef CONFIG_HAVE_PREEMPT_LAZY
21914 + task_thread_info(p)->preempt_lazy_count = 0;
21917 plist_node_init(&p->pushable_tasks, MAX_PRIO);
21918 RB_CLEAR_NODE(&p->pushable_dl_tasks);
21919 @@ -2603,8 +2728,12 @@
21920 finish_arch_post_lock_switch();
21922 fire_sched_in_preempt_notifiers(current);
21924 + * We use mmdrop_delayed() here so we don't have to do the
21925 + * full __mmdrop() when we are the last user.
21929 + mmdrop_delayed(mm);
21930 if (unlikely(prev_state == TASK_DEAD)) {
21931 if (prev->sched_class->task_dead)
21932 prev->sched_class->task_dead(prev);
21933 @@ -2935,16 +3064,6 @@
21937 -notrace unsigned long get_parent_ip(unsigned long addr)
21939 - if (in_lock_functions(addr)) {
21940 - addr = CALLER_ADDR2;
21941 - if (in_lock_functions(addr))
21942 - addr = CALLER_ADDR3;
21947 #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
21948 defined(CONFIG_PREEMPT_TRACER))
21950 @@ -2966,7 +3085,7 @@
21951 PREEMPT_MASK - 10);
21953 if (preempt_count() == val) {
21954 - unsigned long ip = get_parent_ip(CALLER_ADDR1);
21955 + unsigned long ip = get_lock_parent_ip();
21956 #ifdef CONFIG_DEBUG_PREEMPT
21957 current->preempt_disable_ip = ip;
21959 @@ -2993,7 +3112,7 @@
21962 if (preempt_count() == val)
21963 - trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
21964 + trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
21965 __preempt_count_sub(val);
21967 EXPORT_SYMBOL(preempt_count_sub);
21968 @@ -3048,6 +3167,77 @@
21969 schedstat_inc(this_rq(), sched_count);
21972 +#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP)
21974 +void migrate_disable(void)
21976 + struct task_struct *p = current;
21978 + if (in_atomic() || irqs_disabled()) {
21979 +#ifdef CONFIG_SCHED_DEBUG
21980 + p->migrate_disable_atomic++;
21985 +#ifdef CONFIG_SCHED_DEBUG
21986 + if (unlikely(p->migrate_disable_atomic)) {
21992 + if (p->migrate_disable) {
21993 + p->migrate_disable++;
21997 + preempt_disable();
21998 + preempt_lazy_disable();
21999 + pin_current_cpu();
22000 + p->migrate_disable = 1;
22001 + preempt_enable();
22003 +EXPORT_SYMBOL(migrate_disable);
22005 +void migrate_enable(void)
22007 + struct task_struct *p = current;
22009 + if (in_atomic() || irqs_disabled()) {
22010 +#ifdef CONFIG_SCHED_DEBUG
22011 + p->migrate_disable_atomic--;
22016 +#ifdef CONFIG_SCHED_DEBUG
22017 + if (unlikely(p->migrate_disable_atomic)) {
22022 + WARN_ON_ONCE(p->migrate_disable <= 0);
22024 + if (p->migrate_disable > 1) {
22025 + p->migrate_disable--;
22029 + preempt_disable();
22031 + * Clearing migrate_disable causes tsk_cpus_allowed to
22032 + * show the tasks original cpu affinity.
22034 + p->migrate_disable = 0;
22036 + unpin_current_cpu();
22037 + preempt_enable();
22038 + preempt_lazy_enable();
22040 +EXPORT_SYMBOL(migrate_enable);
22044 * Pick up the highest-prio task:
22046 @@ -3172,19 +3362,6 @@
22048 deactivate_task(rq, prev, DEQUEUE_SLEEP);
22052 - * If a worker went to sleep, notify and ask workqueue
22053 - * whether it wants to wake up a task to maintain
22056 - if (prev->flags & PF_WQ_WORKER) {
22057 - struct task_struct *to_wakeup;
22059 - to_wakeup = wq_worker_sleeping(prev, cpu);
22061 - try_to_wake_up_local(to_wakeup);
22064 switch_count = &prev->nvcsw;
22066 @@ -3194,6 +3371,7 @@
22068 next = pick_next_task(rq, prev);
22069 clear_tsk_need_resched(prev);
22070 + clear_tsk_need_resched_lazy(prev);
22071 clear_preempt_need_resched();
22072 rq->clock_skip_update = 0;
22074 @@ -3215,9 +3393,20 @@
22076 static inline void sched_submit_work(struct task_struct *tsk)
22078 - if (!tsk->state || tsk_is_pi_blocked(tsk))
22082 + * If a worker went to sleep, notify and ask workqueue whether
22083 + * it wants to wake up a task to maintain concurrency.
22085 + if (tsk->flags & PF_WQ_WORKER)
22086 + wq_worker_sleeping(tsk);
22089 + if (tsk_is_pi_blocked(tsk))
22093 * If we are going to sleep and we have plugged IO queued,
22094 * make sure to submit it to avoid deadlocks.
22096 @@ -3225,6 +3414,12 @@
22097 blk_schedule_flush_plug(tsk);
22100 +static void sched_update_worker(struct task_struct *tsk)
22102 + if (tsk->flags & PF_WQ_WORKER)
22103 + wq_worker_running(tsk);
22106 asmlinkage __visible void __sched schedule(void)
22108 struct task_struct *tsk = current;
22109 @@ -3235,6 +3430,7 @@
22111 sched_preempt_enable_no_resched();
22112 } while (need_resched());
22113 + sched_update_worker(tsk);
22115 EXPORT_SYMBOL(schedule);
22117 @@ -3283,6 +3479,30 @@
22118 } while (need_resched());
22121 +#ifdef CONFIG_PREEMPT_LAZY
22123 + * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is
22124 + * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as
22125 + * preempt_lazy_count counter >0.
22127 +static __always_inline int preemptible_lazy(void)
22129 + if (test_thread_flag(TIF_NEED_RESCHED))
22131 + if (current_thread_info()->preempt_lazy_count)
22138 +static inline int preemptible_lazy(void)
22145 #ifdef CONFIG_PREEMPT
22147 * this is the entry point to schedule() from in-kernel preemption
22148 @@ -3297,6 +3517,8 @@
22150 if (likely(!preemptible()))
22152 + if (!preemptible_lazy())
22155 preempt_schedule_common();
22157 @@ -3323,6 +3545,8 @@
22159 if (likely(!preemptible()))
22161 + if (!preemptible_lazy())
22165 preempt_disable_notrace();
22166 @@ -3332,7 +3556,16 @@
22167 * an infinite recursion.
22169 prev_ctx = exception_enter();
22171 + * The add/subtract must not be traced by the function
22172 + * tracer. But we still want to account for the
22173 + * preempt off latency tracer. Since the _notrace versions
22174 + * of add/subtract skip the accounting for latency tracer
22175 + * we must force it manually.
22177 + start_critical_timings();
22179 + stop_critical_timings();
22180 exception_exit(prev_ctx);
22182 preempt_enable_no_resched_notrace();
22183 @@ -4676,6 +4909,7 @@
22185 EXPORT_SYMBOL(__cond_resched_lock);
22187 +#ifndef CONFIG_PREEMPT_RT_FULL
22188 int __sched __cond_resched_softirq(void)
22190 BUG_ON(!in_softirq());
22191 @@ -4689,6 +4923,7 @@
22194 EXPORT_SYMBOL(__cond_resched_softirq);
22198 * yield - yield the current processor to other threads.
22199 @@ -5055,7 +5290,9 @@
22201 /* Set the preempt count _outside_ the spinlocks! */
22202 init_idle_preempt_count(idle, cpu);
22204 +#ifdef CONFIG_HAVE_PREEMPT_LAZY
22205 + task_thread_info(idle)->preempt_lazy_count = 0;
22208 * The idle tasks have their own, simple scheduling class:
22210 @@ -5196,6 +5433,8 @@
22211 #endif /* CONFIG_NUMA_BALANCING */
22213 #ifdef CONFIG_HOTPLUG_CPU
22214 +static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm);
22217 * Ensures that the idle task is using init_mm right before its cpu goes
22219 @@ -5210,7 +5449,11 @@
22220 switch_mm(mm, &init_mm, current);
22221 finish_arch_post_lock_switch();
22225 + * Defer the cleanup to an alive cpu. On RT we can neither
22226 + * call mmdrop() nor mmdrop_delayed() from here.
22228 + per_cpu(idle_last_mm, smp_processor_id()) = mm;
22232 @@ -5583,6 +5826,10 @@
22235 calc_load_migrate(rq);
22236 + if (per_cpu(idle_last_mm, cpu)) {
22237 + mmdrop(per_cpu(idle_last_mm, cpu));
22238 + per_cpu(idle_last_mm, cpu) = NULL;
22243 @@ -7566,7 +7813,7 @@
22244 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
22245 static inline int preempt_count_equals(int preempt_offset)
22247 - int nested = preempt_count() + rcu_preempt_depth();
22248 + int nested = preempt_count() + sched_rcu_preempt_depth();
22250 return (nested == preempt_offset);
22252 diff -Nur linux-4.4.46.orig/kernel/sched/cpudeadline.c linux-4.4.46/kernel/sched/cpudeadline.c
22253 --- linux-4.4.46.orig/kernel/sched/cpudeadline.c 2017-02-01 08:31:11.000000000 +0100
22254 +++ linux-4.4.46/kernel/sched/cpudeadline.c 2017-02-03 17:18:10.931619212 +0100
22255 @@ -103,10 +103,10 @@
22256 const struct sched_dl_entity *dl_se = &p->dl;
22259 - cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
22260 + cpumask_and(later_mask, cp->free_cpus, tsk_cpus_allowed(p))) {
22261 best_cpu = cpumask_any(later_mask);
22263 - } else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
22264 + } else if (cpumask_test_cpu(cpudl_maximum(cp), tsk_cpus_allowed(p)) &&
22265 dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
22266 best_cpu = cpudl_maximum(cp);
22268 diff -Nur linux-4.4.46.orig/kernel/sched/cpupri.c linux-4.4.46/kernel/sched/cpupri.c
22269 --- linux-4.4.46.orig/kernel/sched/cpupri.c 2017-02-01 08:31:11.000000000 +0100
22270 +++ linux-4.4.46/kernel/sched/cpupri.c 2017-02-03 17:18:10.931619212 +0100
22271 @@ -103,11 +103,11 @@
22275 - if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
22276 + if (cpumask_any_and(tsk_cpus_allowed(p), vec->mask) >= nr_cpu_ids)
22280 - cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
22281 + cpumask_and(lowest_mask, tsk_cpus_allowed(p), vec->mask);
22284 * We have to ensure that we have at least one bit
22285 diff -Nur linux-4.4.46.orig/kernel/sched/cputime.c linux-4.4.46/kernel/sched/cputime.c
22286 --- linux-4.4.46.orig/kernel/sched/cputime.c 2017-02-01 08:31:11.000000000 +0100
22287 +++ linux-4.4.46/kernel/sched/cputime.c 2017-02-03 17:18:10.931619212 +0100
22288 @@ -685,7 +685,7 @@
22290 unsigned long long delta = vtime_delta(tsk);
22292 - WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING);
22293 + WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
22294 tsk->vtime_snap += delta;
22296 /* CHECKME: always safe to convert nsecs to cputime? */
22297 @@ -701,37 +701,37 @@
22299 void vtime_account_system(struct task_struct *tsk)
22301 - write_seqlock(&tsk->vtime_seqlock);
22302 + write_seqcount_begin(&tsk->vtime_seqcount);
22303 __vtime_account_system(tsk);
22304 - write_sequnlock(&tsk->vtime_seqlock);
22305 + write_seqcount_end(&tsk->vtime_seqcount);
22308 void vtime_gen_account_irq_exit(struct task_struct *tsk)
22310 - write_seqlock(&tsk->vtime_seqlock);
22311 + write_seqcount_begin(&tsk->vtime_seqcount);
22312 __vtime_account_system(tsk);
22313 if (context_tracking_in_user())
22314 tsk->vtime_snap_whence = VTIME_USER;
22315 - write_sequnlock(&tsk->vtime_seqlock);
22316 + write_seqcount_end(&tsk->vtime_seqcount);
22319 void vtime_account_user(struct task_struct *tsk)
22321 cputime_t delta_cpu;
22323 - write_seqlock(&tsk->vtime_seqlock);
22324 + write_seqcount_begin(&tsk->vtime_seqcount);
22325 delta_cpu = get_vtime_delta(tsk);
22326 tsk->vtime_snap_whence = VTIME_SYS;
22327 account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
22328 - write_sequnlock(&tsk->vtime_seqlock);
22329 + write_seqcount_end(&tsk->vtime_seqcount);
22332 void vtime_user_enter(struct task_struct *tsk)
22334 - write_seqlock(&tsk->vtime_seqlock);
22335 + write_seqcount_begin(&tsk->vtime_seqcount);
22336 __vtime_account_system(tsk);
22337 tsk->vtime_snap_whence = VTIME_USER;
22338 - write_sequnlock(&tsk->vtime_seqlock);
22339 + write_seqcount_end(&tsk->vtime_seqcount);
22342 void vtime_guest_enter(struct task_struct *tsk)
22343 @@ -743,19 +743,19 @@
22344 * synchronization against the reader (task_gtime())
22345 * that can thus safely catch up with a tickless delta.
22347 - write_seqlock(&tsk->vtime_seqlock);
22348 + write_seqcount_begin(&tsk->vtime_seqcount);
22349 __vtime_account_system(tsk);
22350 current->flags |= PF_VCPU;
22351 - write_sequnlock(&tsk->vtime_seqlock);
22352 + write_seqcount_end(&tsk->vtime_seqcount);
22354 EXPORT_SYMBOL_GPL(vtime_guest_enter);
22356 void vtime_guest_exit(struct task_struct *tsk)
22358 - write_seqlock(&tsk->vtime_seqlock);
22359 + write_seqcount_begin(&tsk->vtime_seqcount);
22360 __vtime_account_system(tsk);
22361 current->flags &= ~PF_VCPU;
22362 - write_sequnlock(&tsk->vtime_seqlock);
22363 + write_seqcount_end(&tsk->vtime_seqcount);
22365 EXPORT_SYMBOL_GPL(vtime_guest_exit);
22367 @@ -768,24 +768,26 @@
22369 void arch_vtime_task_switch(struct task_struct *prev)
22371 - write_seqlock(&prev->vtime_seqlock);
22372 - prev->vtime_snap_whence = VTIME_SLEEPING;
22373 - write_sequnlock(&prev->vtime_seqlock);
22374 + write_seqcount_begin(&prev->vtime_seqcount);
22375 + prev->vtime_snap_whence = VTIME_INACTIVE;
22376 + write_seqcount_end(&prev->vtime_seqcount);
22378 - write_seqlock(¤t->vtime_seqlock);
22379 + write_seqcount_begin(¤t->vtime_seqcount);
22380 current->vtime_snap_whence = VTIME_SYS;
22381 current->vtime_snap = sched_clock_cpu(smp_processor_id());
22382 - write_sequnlock(¤t->vtime_seqlock);
22383 + write_seqcount_end(¤t->vtime_seqcount);
22386 void vtime_init_idle(struct task_struct *t, int cpu)
22388 unsigned long flags;
22390 - write_seqlock_irqsave(&t->vtime_seqlock, flags);
22391 + local_irq_save(flags);
22392 + write_seqcount_begin(&t->vtime_seqcount);
22393 t->vtime_snap_whence = VTIME_SYS;
22394 t->vtime_snap = sched_clock_cpu(cpu);
22395 - write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
22396 + write_seqcount_end(&t->vtime_seqcount);
22397 + local_irq_restore(flags);
22400 cputime_t task_gtime(struct task_struct *t)
22401 @@ -797,13 +799,13 @@
22405 - seq = read_seqbegin(&t->vtime_seqlock);
22406 + seq = read_seqcount_begin(&t->vtime_seqcount);
22409 if (t->flags & PF_VCPU)
22410 gtime += vtime_delta(t);
22412 - } while (read_seqretry(&t->vtime_seqlock, seq));
22413 + } while (read_seqcount_retry(&t->vtime_seqcount, seq));
22417 @@ -826,7 +828,7 @@
22421 - seq = read_seqbegin(&t->vtime_seqlock);
22422 + seq = read_seqcount_begin(&t->vtime_seqcount);
22426 @@ -834,7 +836,7 @@
22429 /* Task is sleeping, nothing to add */
22430 - if (t->vtime_snap_whence == VTIME_SLEEPING ||
22431 + if (t->vtime_snap_whence == VTIME_INACTIVE ||
22435 @@ -850,7 +852,7 @@
22436 if (t->vtime_snap_whence == VTIME_SYS)
22439 - } while (read_seqretry(&t->vtime_seqlock, seq));
22440 + } while (read_seqcount_retry(&t->vtime_seqcount, seq));
22444 diff -Nur linux-4.4.46.orig/kernel/sched/deadline.c linux-4.4.46/kernel/sched/deadline.c
22445 --- linux-4.4.46.orig/kernel/sched/deadline.c 2017-02-01 08:31:11.000000000 +0100
22446 +++ linux-4.4.46/kernel/sched/deadline.c 2017-02-03 17:18:10.935619367 +0100
22447 @@ -134,7 +134,7 @@
22449 struct task_struct *p = dl_task_of(dl_se);
22451 - if (p->nr_cpus_allowed > 1)
22452 + if (tsk_nr_cpus_allowed(p) > 1)
22453 dl_rq->dl_nr_migratory++;
22455 update_dl_migration(dl_rq);
22456 @@ -144,7 +144,7 @@
22458 struct task_struct *p = dl_task_of(dl_se);
22460 - if (p->nr_cpus_allowed > 1)
22461 + if (tsk_nr_cpus_allowed(p) > 1)
22462 dl_rq->dl_nr_migratory--;
22464 update_dl_migration(dl_rq);
22465 @@ -697,6 +697,7 @@
22467 hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
22468 timer->function = dl_task_timer;
22469 + timer->irqsafe = 1;
22473 @@ -989,7 +990,7 @@
22475 enqueue_dl_entity(&p->dl, pi_se, flags);
22477 - if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
22478 + if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1)
22479 enqueue_pushable_dl_task(rq, p);
22482 @@ -1067,9 +1068,9 @@
22483 * try to make it stay here, it might be important.
22485 if (unlikely(dl_task(curr)) &&
22486 - (curr->nr_cpus_allowed < 2 ||
22487 + (tsk_nr_cpus_allowed(curr) < 2 ||
22488 !dl_entity_preempt(&p->dl, &curr->dl)) &&
22489 - (p->nr_cpus_allowed > 1)) {
22490 + (tsk_nr_cpus_allowed(p) > 1)) {
22491 int target = find_later_rq(p);
22493 if (target != -1 &&
22494 @@ -1090,7 +1091,7 @@
22495 * Current can't be migrated, useless to reschedule,
22496 * let's hope p can move out.
22498 - if (rq->curr->nr_cpus_allowed == 1 ||
22499 + if (tsk_nr_cpus_allowed(rq->curr) == 1 ||
22500 cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
22503 @@ -1098,7 +1099,7 @@
22504 * p is migratable, so let's not schedule it and
22505 * see if it is pushed or pulled somewhere else.
22507 - if (p->nr_cpus_allowed != 1 &&
22508 + if (tsk_nr_cpus_allowed(p) != 1 &&
22509 cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
22512 @@ -1212,7 +1213,7 @@
22514 update_curr_dl(rq);
22516 - if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
22517 + if (on_dl_rq(&p->dl) && tsk_nr_cpus_allowed(p) > 1)
22518 enqueue_pushable_dl_task(rq, p);
22521 @@ -1335,7 +1336,7 @@
22522 if (unlikely(!later_mask))
22525 - if (task->nr_cpus_allowed == 1)
22526 + if (tsk_nr_cpus_allowed(task) == 1)
22530 @@ -1441,7 +1442,7 @@
22531 if (double_lock_balance(rq, later_rq)) {
22532 if (unlikely(task_rq(task) != rq ||
22533 !cpumask_test_cpu(later_rq->cpu,
22534 - &task->cpus_allowed) ||
22535 + tsk_cpus_allowed(task)) ||
22536 task_running(rq, task) ||
22537 !task_on_rq_queued(task))) {
22538 double_unlock_balance(rq, later_rq);
22539 @@ -1480,7 +1481,7 @@
22541 BUG_ON(rq->cpu != task_cpu(p));
22542 BUG_ON(task_current(rq, p));
22543 - BUG_ON(p->nr_cpus_allowed <= 1);
22544 + BUG_ON(tsk_nr_cpus_allowed(p) <= 1);
22546 BUG_ON(!task_on_rq_queued(p));
22547 BUG_ON(!dl_task(p));
22548 @@ -1519,7 +1520,7 @@
22550 if (dl_task(rq->curr) &&
22551 dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
22552 - rq->curr->nr_cpus_allowed > 1) {
22553 + tsk_nr_cpus_allowed(rq->curr) > 1) {
22557 @@ -1666,9 +1667,9 @@
22559 if (!task_running(rq, p) &&
22560 !test_tsk_need_resched(rq->curr) &&
22561 - p->nr_cpus_allowed > 1 &&
22562 + tsk_nr_cpus_allowed(p) > 1 &&
22563 dl_task(rq->curr) &&
22564 - (rq->curr->nr_cpus_allowed < 2 ||
22565 + (tsk_nr_cpus_allowed(rq->curr) < 2 ||
22566 !dl_entity_preempt(&p->dl, &rq->curr->dl))) {
22569 @@ -1769,7 +1770,7 @@
22571 if (task_on_rq_queued(p) && rq->curr != p) {
22573 - if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
22574 + if (tsk_nr_cpus_allowed(p) > 1 && rq->dl.overloaded)
22575 queue_push_tasks(rq);
22577 if (dl_task(rq->curr))
22578 diff -Nur linux-4.4.46.orig/kernel/sched/debug.c linux-4.4.46/kernel/sched/debug.c
22579 --- linux-4.4.46.orig/kernel/sched/debug.c 2017-02-01 08:31:11.000000000 +0100
22580 +++ linux-4.4.46/kernel/sched/debug.c 2017-02-03 17:18:10.935619367 +0100
22581 @@ -251,6 +251,9 @@
22586 + P(rt_nr_migratory);
22591 @@ -635,6 +638,10 @@
22595 +#ifdef CONFIG_PREEMPT_RT_FULL
22596 + P(migrate_disable);
22598 + P(nr_cpus_allowed);
22602 diff -Nur linux-4.4.46.orig/kernel/sched/fair.c linux-4.4.46/kernel/sched/fair.c
22603 --- linux-4.4.46.orig/kernel/sched/fair.c 2017-02-01 08:31:11.000000000 +0100
22604 +++ linux-4.4.46/kernel/sched/fair.c 2017-02-03 17:18:10.935619367 +0100
22605 @@ -3166,7 +3166,7 @@
22606 ideal_runtime = sched_slice(cfs_rq, curr);
22607 delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
22608 if (delta_exec > ideal_runtime) {
22609 - resched_curr(rq_of(cfs_rq));
22610 + resched_curr_lazy(rq_of(cfs_rq));
22612 * The current task ran long enough, ensure it doesn't get
22613 * re-elected due to buddy favours.
22614 @@ -3190,7 +3190,7 @@
22617 if (delta > ideal_runtime)
22618 - resched_curr(rq_of(cfs_rq));
22619 + resched_curr_lazy(rq_of(cfs_rq));
22623 @@ -3330,7 +3330,7 @@
22624 * validating it and just reschedule.
22627 - resched_curr(rq_of(cfs_rq));
22628 + resched_curr_lazy(rq_of(cfs_rq));
22632 @@ -3512,7 +3512,7 @@
22633 * hierarchy can be throttled
22635 if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
22636 - resched_curr(rq_of(cfs_rq));
22637 + resched_curr_lazy(rq_of(cfs_rq));
22640 static __always_inline
22641 @@ -4124,7 +4124,7 @@
22645 - resched_curr(rq);
22646 + resched_curr_lazy(rq);
22649 hrtick_start(rq, delta);
22650 @@ -5213,7 +5213,7 @@
22654 - resched_curr(rq);
22655 + resched_curr_lazy(rq);
22657 * Only set the backward buddy when the current task is still
22658 * on the rq. This can happen when a wakeup gets interleaved
22659 @@ -7964,7 +7964,7 @@
22660 * 'current' within the tree based on its new key value.
22662 swap(curr->vruntime, se->vruntime);
22663 - resched_curr(rq);
22664 + resched_curr_lazy(rq);
22667 se->vruntime -= cfs_rq->min_vruntime;
22668 @@ -7989,7 +7989,7 @@
22670 if (rq->curr == p) {
22671 if (p->prio > oldprio)
22672 - resched_curr(rq);
22673 + resched_curr_lazy(rq);
22675 check_preempt_curr(rq, p, 0);
22677 diff -Nur linux-4.4.46.orig/kernel/sched/features.h linux-4.4.46/kernel/sched/features.h
22678 --- linux-4.4.46.orig/kernel/sched/features.h 2017-02-01 08:31:11.000000000 +0100
22679 +++ linux-4.4.46/kernel/sched/features.h 2017-02-03 17:18:10.935619367 +0100
22680 @@ -45,11 +45,19 @@
22682 SCHED_FEAT(NONTASK_CAPACITY, true)
22684 +#ifdef CONFIG_PREEMPT_RT_FULL
22685 +SCHED_FEAT(TTWU_QUEUE, false)
22686 +# ifdef CONFIG_PREEMPT_LAZY
22687 +SCHED_FEAT(PREEMPT_LAZY, true)
22692 * Queue remote wakeups on the target CPU and process them
22693 * using the scheduler IPI. Reduces rq->lock contention/bounces.
22695 SCHED_FEAT(TTWU_QUEUE, true)
22698 #ifdef HAVE_RT_PUSH_IPI
22700 diff -Nur linux-4.4.46.orig/kernel/sched/Makefile linux-4.4.46/kernel/sched/Makefile
22701 --- linux-4.4.46.orig/kernel/sched/Makefile 2017-02-01 08:31:11.000000000 +0100
22702 +++ linux-4.4.46/kernel/sched/Makefile 2017-02-03 17:18:10.931619212 +0100
22705 obj-y += core.o loadavg.o clock.o cputime.o
22706 obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
22707 -obj-y += wait.o completion.o idle.o
22708 +obj-y += wait.o swait.o swork.o completion.o idle.o
22709 obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o
22710 obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
22711 obj-$(CONFIG_SCHEDSTATS) += stats.o
22712 diff -Nur linux-4.4.46.orig/kernel/sched/rt.c linux-4.4.46/kernel/sched/rt.c
22713 --- linux-4.4.46.orig/kernel/sched/rt.c 2017-02-01 08:31:11.000000000 +0100
22714 +++ linux-4.4.46/kernel/sched/rt.c 2017-02-03 17:18:10.935619367 +0100
22717 hrtimer_init(&rt_b->rt_period_timer,
22718 CLOCK_MONOTONIC, HRTIMER_MODE_REL);
22719 + rt_b->rt_period_timer.irqsafe = 1;
22720 rt_b->rt_period_timer.function = sched_rt_period_timer;
22724 rt_rq->push_cpu = nr_cpu_ids;
22725 raw_spin_lock_init(&rt_rq->push_lock);
22726 init_irq_work(&rt_rq->push_work, push_irq_work_func);
22727 + rt_rq->push_work.flags |= IRQ_WORK_HARD_IRQ;
22729 #endif /* CONFIG_SMP */
22730 /* We start is dequeued state, because no RT tasks are queued */
22731 @@ -326,7 +328,7 @@
22732 rt_rq = &rq_of_rt_rq(rt_rq)->rt;
22734 rt_rq->rt_nr_total++;
22735 - if (p->nr_cpus_allowed > 1)
22736 + if (tsk_nr_cpus_allowed(p) > 1)
22737 rt_rq->rt_nr_migratory++;
22739 update_rt_migration(rt_rq);
22740 @@ -343,7 +345,7 @@
22741 rt_rq = &rq_of_rt_rq(rt_rq)->rt;
22743 rt_rq->rt_nr_total--;
22744 - if (p->nr_cpus_allowed > 1)
22745 + if (tsk_nr_cpus_allowed(p) > 1)
22746 rt_rq->rt_nr_migratory--;
22748 update_rt_migration(rt_rq);
22749 @@ -1262,7 +1264,7 @@
22751 enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
22753 - if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
22754 + if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1)
22755 enqueue_pushable_task(rq, p);
22758 @@ -1351,7 +1353,7 @@
22759 * will have to sort it out.
22761 if (curr && unlikely(rt_task(curr)) &&
22762 - (curr->nr_cpus_allowed < 2 ||
22763 + (tsk_nr_cpus_allowed(curr) < 2 ||
22764 curr->prio <= p->prio)) {
22765 int target = find_lowest_rq(p);
22767 @@ -1375,7 +1377,7 @@
22768 * Current can't be migrated, useless to reschedule,
22769 * let's hope p can move out.
22771 - if (rq->curr->nr_cpus_allowed == 1 ||
22772 + if (tsk_nr_cpus_allowed(rq->curr) == 1 ||
22773 !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
22776 @@ -1383,7 +1385,7 @@
22777 * p is migratable, so let's not schedule it and
22778 * see if it is pushed or pulled somewhere else.
22780 - if (p->nr_cpus_allowed != 1
22781 + if (tsk_nr_cpus_allowed(p) != 1
22782 && cpupri_find(&rq->rd->cpupri, p, NULL))
22785 @@ -1517,7 +1519,7 @@
22786 * The previous task needs to be made eligible for pushing
22787 * if it is still active
22789 - if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
22790 + if (on_rt_rq(&p->rt) && tsk_nr_cpus_allowed(p) > 1)
22791 enqueue_pushable_task(rq, p);
22794 @@ -1567,7 +1569,7 @@
22795 if (unlikely(!lowest_mask))
22798 - if (task->nr_cpus_allowed == 1)
22799 + if (tsk_nr_cpus_allowed(task) == 1)
22800 return -1; /* No other targets possible */
22802 if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
22803 @@ -1699,7 +1701,7 @@
22805 BUG_ON(rq->cpu != task_cpu(p));
22806 BUG_ON(task_current(rq, p));
22807 - BUG_ON(p->nr_cpus_allowed <= 1);
22808 + BUG_ON(tsk_nr_cpus_allowed(p) <= 1);
22810 BUG_ON(!task_on_rq_queued(p));
22811 BUG_ON(!rt_task(p));
22812 @@ -2059,9 +2061,9 @@
22814 if (!task_running(rq, p) &&
22815 !test_tsk_need_resched(rq->curr) &&
22816 - p->nr_cpus_allowed > 1 &&
22817 + tsk_nr_cpus_allowed(p) > 1 &&
22818 (dl_task(rq->curr) || rt_task(rq->curr)) &&
22819 - (rq->curr->nr_cpus_allowed < 2 ||
22820 + (tsk_nr_cpus_allowed(rq->curr) < 2 ||
22821 rq->curr->prio <= p->prio))
22824 @@ -2134,7 +2136,7 @@
22826 if (task_on_rq_queued(p) && rq->curr != p) {
22828 - if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
22829 + if (tsk_nr_cpus_allowed(p) > 1 && rq->rt.overloaded)
22830 queue_push_tasks(rq);
22832 if (p->prio < rq->curr->prio)
22833 diff -Nur linux-4.4.46.orig/kernel/sched/sched.h linux-4.4.46/kernel/sched/sched.h
22834 --- linux-4.4.46.orig/kernel/sched/sched.h 2017-02-01 08:31:11.000000000 +0100
22835 +++ linux-4.4.46/kernel/sched/sched.h 2017-02-03 17:18:10.935619367 +0100
22836 @@ -1100,6 +1100,7 @@
22837 #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */
22838 #define WF_FORK 0x02 /* child wakeup after fork */
22839 #define WF_MIGRATED 0x4 /* internal use, task got migrated */
22840 +#define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */
22843 * To aid in avoiding the subversion of "niceness" due to uneven distribution
22844 @@ -1299,6 +1300,15 @@
22845 extern void resched_curr(struct rq *rq);
22846 extern void resched_cpu(int cpu);
22848 +#ifdef CONFIG_PREEMPT_LAZY
22849 +extern void resched_curr_lazy(struct rq *rq);
22851 +static inline void resched_curr_lazy(struct rq *rq)
22853 + resched_curr(rq);
22857 extern struct rt_bandwidth def_rt_bandwidth;
22858 extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
22860 diff -Nur linux-4.4.46.orig/kernel/sched/swait.c linux-4.4.46/kernel/sched/swait.c
22861 --- linux-4.4.46.orig/kernel/sched/swait.c 1970-01-01 01:00:00.000000000 +0100
22862 +++ linux-4.4.46/kernel/sched/swait.c 2017-02-03 17:18:10.935619367 +0100
22864 +#include <linux/sched.h>
22865 +#include <linux/swait.h>
22866 +#include <linux/suspend.h>
22868 +void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
22869 + struct lock_class_key *key)
22871 + raw_spin_lock_init(&q->lock);
22872 + lockdep_set_class_and_name(&q->lock, key, name);
22873 + INIT_LIST_HEAD(&q->task_list);
22875 +EXPORT_SYMBOL(__init_swait_queue_head);
22878 + * The thing about the wake_up_state() return value; I think we can ignore it.
22880 + * If for some reason it would return 0, that means the previously waiting
22881 + * task is already running, so it will observe condition true (or has already).
22883 +void swake_up_locked(struct swait_queue_head *q)
22885 + struct swait_queue *curr;
22887 + if (list_empty(&q->task_list))
22890 + curr = list_first_entry(&q->task_list, typeof(*curr), task_list);
22891 + wake_up_process(curr->task);
22892 + list_del_init(&curr->task_list);
22894 +EXPORT_SYMBOL(swake_up_locked);
22896 +void swake_up_all_locked(struct swait_queue_head *q)
22898 + struct swait_queue *curr;
22901 + while (!list_empty(&q->task_list)) {
22903 + curr = list_first_entry(&q->task_list, typeof(*curr),
22905 + wake_up_process(curr->task);
22906 + list_del_init(&curr->task_list);
22909 + if (pm_in_action)
22911 + WARN(wakes > 2, "complate_all() with %d waiters\n", wakes);
22913 +EXPORT_SYMBOL(swake_up_all_locked);
22915 +void swake_up(struct swait_queue_head *q)
22917 + unsigned long flags;
22919 + if (!swait_active(q))
22922 + raw_spin_lock_irqsave(&q->lock, flags);
22923 + swake_up_locked(q);
22924 + raw_spin_unlock_irqrestore(&q->lock, flags);
22926 +EXPORT_SYMBOL(swake_up);
22929 + * Does not allow usage from IRQ disabled, since we must be able to
22930 + * release IRQs to guarantee bounded hold time.
22932 +void swake_up_all(struct swait_queue_head *q)
22934 + struct swait_queue *curr;
22937 + if (!swait_active(q))
22940 + raw_spin_lock_irq(&q->lock);
22941 + list_splice_init(&q->task_list, &tmp);
22942 + while (!list_empty(&tmp)) {
22943 + curr = list_first_entry(&tmp, typeof(*curr), task_list);
22945 + wake_up_state(curr->task, TASK_NORMAL);
22946 + list_del_init(&curr->task_list);
22948 + if (list_empty(&tmp))
22951 + raw_spin_unlock_irq(&q->lock);
22952 + raw_spin_lock_irq(&q->lock);
22954 + raw_spin_unlock_irq(&q->lock);
22956 +EXPORT_SYMBOL(swake_up_all);
22958 +void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait)
22960 + wait->task = current;
22961 + if (list_empty(&wait->task_list))
22962 + list_add(&wait->task_list, &q->task_list);
22965 +void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state)
22967 + unsigned long flags;
22969 + raw_spin_lock_irqsave(&q->lock, flags);
22970 + __prepare_to_swait(q, wait);
22971 + set_current_state(state);
22972 + raw_spin_unlock_irqrestore(&q->lock, flags);
22974 +EXPORT_SYMBOL(prepare_to_swait);
22976 +long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state)
22978 + if (signal_pending_state(state, current))
22979 + return -ERESTARTSYS;
22981 + prepare_to_swait(q, wait, state);
22985 +EXPORT_SYMBOL(prepare_to_swait_event);
22987 +void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait)
22989 + __set_current_state(TASK_RUNNING);
22990 + if (!list_empty(&wait->task_list))
22991 + list_del_init(&wait->task_list);
22994 +void finish_swait(struct swait_queue_head *q, struct swait_queue *wait)
22996 + unsigned long flags;
22998 + __set_current_state(TASK_RUNNING);
23000 + if (!list_empty_careful(&wait->task_list)) {
23001 + raw_spin_lock_irqsave(&q->lock, flags);
23002 + list_del_init(&wait->task_list);
23003 + raw_spin_unlock_irqrestore(&q->lock, flags);
23006 +EXPORT_SYMBOL(finish_swait);
23007 diff -Nur linux-4.4.46.orig/kernel/sched/swork.c linux-4.4.46/kernel/sched/swork.c
23008 --- linux-4.4.46.orig/kernel/sched/swork.c 1970-01-01 01:00:00.000000000 +0100
23009 +++ linux-4.4.46/kernel/sched/swork.c 2017-02-03 17:18:10.935619367 +0100
23012 + * Copyright (C) 2014 BMW Car IT GmbH, Daniel Wagner daniel.wagner@bmw-carit.de
23014 + * Provides a framework for enqueuing callbacks from irq context
23015 + * PREEMPT_RT_FULL safe. The callbacks are executed in kthread context.
23018 +#include <linux/swait.h>
23019 +#include <linux/swork.h>
23020 +#include <linux/kthread.h>
23021 +#include <linux/slab.h>
23022 +#include <linux/spinlock.h>
23023 +#include <linux/export.h>
23025 +#define SWORK_EVENT_PENDING (1 << 0)
23027 +static DEFINE_MUTEX(worker_mutex);
23028 +static struct sworker *glob_worker;
23031 + struct list_head events;
23032 + struct swait_queue_head wq;
23034 + raw_spinlock_t lock;
23036 + struct task_struct *task;
23040 +static bool swork_readable(struct sworker *worker)
23044 + if (kthread_should_stop())
23047 + raw_spin_lock_irq(&worker->lock);
23048 + r = !list_empty(&worker->events);
23049 + raw_spin_unlock_irq(&worker->lock);
23054 +static int swork_kthread(void *arg)
23056 + struct sworker *worker = arg;
23059 + swait_event_interruptible(worker->wq,
23060 + swork_readable(worker));
23061 + if (kthread_should_stop())
23064 + raw_spin_lock_irq(&worker->lock);
23065 + while (!list_empty(&worker->events)) {
23066 + struct swork_event *sev;
23068 + sev = list_first_entry(&worker->events,
23069 + struct swork_event, item);
23070 + list_del(&sev->item);
23071 + raw_spin_unlock_irq(&worker->lock);
23073 + WARN_ON_ONCE(!test_and_clear_bit(SWORK_EVENT_PENDING,
23076 + raw_spin_lock_irq(&worker->lock);
23078 + raw_spin_unlock_irq(&worker->lock);
23083 +static struct sworker *swork_create(void)
23085 + struct sworker *worker;
23087 + worker = kzalloc(sizeof(*worker), GFP_KERNEL);
23089 + return ERR_PTR(-ENOMEM);
23091 + INIT_LIST_HEAD(&worker->events);
23092 + raw_spin_lock_init(&worker->lock);
23093 + init_swait_queue_head(&worker->wq);
23095 + worker->task = kthread_run(swork_kthread, worker, "kswork");
23096 + if (IS_ERR(worker->task)) {
23098 + return ERR_PTR(-ENOMEM);
23104 +static void swork_destroy(struct sworker *worker)
23106 + kthread_stop(worker->task);
23108 + WARN_ON(!list_empty(&worker->events));
23113 + * swork_queue - queue swork
23115 + * Returns %false if @work was already on a queue, %true otherwise.
23117 + * The work is queued and processed on a random CPU
23119 +bool swork_queue(struct swork_event *sev)
23121 + unsigned long flags;
23123 + if (test_and_set_bit(SWORK_EVENT_PENDING, &sev->flags))
23126 + raw_spin_lock_irqsave(&glob_worker->lock, flags);
23127 + list_add_tail(&sev->item, &glob_worker->events);
23128 + raw_spin_unlock_irqrestore(&glob_worker->lock, flags);
23130 + swake_up(&glob_worker->wq);
23133 +EXPORT_SYMBOL_GPL(swork_queue);
23136 + * swork_get - get an instance of the sworker
23138 + * Returns an negative error code if the initialization if the worker did not
23139 + * work, %0 otherwise.
23142 +int swork_get(void)
23144 + struct sworker *worker;
23146 + mutex_lock(&worker_mutex);
23147 + if (!glob_worker) {
23148 + worker = swork_create();
23149 + if (IS_ERR(worker)) {
23150 + mutex_unlock(&worker_mutex);
23154 + glob_worker = worker;
23157 + glob_worker->refs++;
23158 + mutex_unlock(&worker_mutex);
23162 +EXPORT_SYMBOL_GPL(swork_get);
23165 + * swork_put - puts an instance of the sworker
23167 + * Will destroy the sworker thread. This function must not be called until all
23168 + * queued events have been completed.
23170 +void swork_put(void)
23172 + mutex_lock(&worker_mutex);
23174 + glob_worker->refs--;
23175 + if (glob_worker->refs > 0)
23178 + swork_destroy(glob_worker);
23179 + glob_worker = NULL;
23181 + mutex_unlock(&worker_mutex);
23183 +EXPORT_SYMBOL_GPL(swork_put);
23184 diff -Nur linux-4.4.46.orig/kernel/signal.c linux-4.4.46/kernel/signal.c
23185 --- linux-4.4.46.orig/kernel/signal.c 2017-02-01 08:31:11.000000000 +0100
23186 +++ linux-4.4.46/kernel/signal.c 2017-02-03 17:18:10.935619367 +0100
23188 #include <linux/export.h>
23189 #include <linux/init.h>
23190 #include <linux/sched.h>
23191 +#include <linux/sched/rt.h>
23192 #include <linux/fs.h>
23193 #include <linux/tty.h>
23194 #include <linux/binfmts.h>
23195 @@ -352,13 +353,30 @@
23199 +static inline struct sigqueue *get_task_cache(struct task_struct *t)
23201 + struct sigqueue *q = t->sigqueue_cache;
23203 + if (cmpxchg(&t->sigqueue_cache, q, NULL) != q)
23208 +static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
23210 + if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL)
23216 * allocate a new signal queue record
23217 * - this may be called without locks if and only if t == current, otherwise an
23218 * appropriate lock must be held to stop the target task from exiting
23220 static struct sigqueue *
23221 -__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
23222 +__sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags,
23223 + int override_rlimit, int fromslab)
23225 struct sigqueue *q = NULL;
23226 struct user_struct *user;
23227 @@ -375,7 +393,10 @@
23228 if (override_rlimit ||
23229 atomic_read(&user->sigpending) <=
23230 task_rlimit(t, RLIMIT_SIGPENDING)) {
23231 - q = kmem_cache_alloc(sigqueue_cachep, flags);
23233 + q = get_task_cache(t);
23235 + q = kmem_cache_alloc(sigqueue_cachep, flags);
23237 print_dropped_signal(sig);
23239 @@ -392,6 +413,13 @@
23243 +static struct sigqueue *
23244 +__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags,
23245 + int override_rlimit)
23247 + return __sigqueue_do_alloc(sig, t, flags, override_rlimit, 0);
23250 static void __sigqueue_free(struct sigqueue *q)
23252 if (q->flags & SIGQUEUE_PREALLOC)
23253 @@ -401,6 +429,21 @@
23254 kmem_cache_free(sigqueue_cachep, q);
23257 +static void sigqueue_free_current(struct sigqueue *q)
23259 + struct user_struct *up;
23261 + if (q->flags & SIGQUEUE_PREALLOC)
23265 + if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) {
23266 + atomic_dec(&up->sigpending);
23269 + __sigqueue_free(q);
23272 void flush_sigqueue(struct sigpending *queue)
23274 struct sigqueue *q;
23275 @@ -414,6 +457,21 @@
23279 + * Called from __exit_signal. Flush tsk->pending and
23280 + * tsk->sigqueue_cache
23282 +void flush_task_sigqueue(struct task_struct *tsk)
23284 + struct sigqueue *q;
23286 + flush_sigqueue(&tsk->pending);
23288 + q = get_task_cache(tsk);
23290 + kmem_cache_free(sigqueue_cachep, q);
23294 * Flush all pending signals for this kthread.
23296 void flush_signals(struct task_struct *t)
23297 @@ -525,7 +583,7 @@
23299 list_del_init(&first->list);
23300 copy_siginfo(info, &first->info);
23301 - __sigqueue_free(first);
23302 + sigqueue_free_current(first);
23305 * Ok, it wasn't in the queue. This must be
23306 @@ -560,6 +618,8 @@
23310 + WARN_ON_ONCE(tsk != current);
23312 /* We only dequeue private signals from ourselves, we don't let
23313 * signalfd steal them
23315 @@ -1156,8 +1216,8 @@
23316 * We don't want to have recursive SIGSEGV's etc, for example,
23317 * that is why we also clear SIGNAL_UNKILLABLE.
23320 -force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
23322 +do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
23324 unsigned long int flags;
23325 int ret, blocked, ignored;
23326 @@ -1182,6 +1242,39 @@
23330 +int force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
23333 + * On some archs, PREEMPT_RT has to delay sending a signal from a trap
23334 + * since it can not enable preemption, and the signal code's spin_locks
23335 + * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will
23336 + * send the signal on exit of the trap.
23338 +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
23339 + if (in_atomic()) {
23340 + if (WARN_ON_ONCE(t != current))
23342 + if (WARN_ON_ONCE(t->forced_info.si_signo))
23345 + if (is_si_special(info)) {
23346 + WARN_ON_ONCE(info != SEND_SIG_PRIV);
23347 + t->forced_info.si_signo = sig;
23348 + t->forced_info.si_errno = 0;
23349 + t->forced_info.si_code = SI_KERNEL;
23350 + t->forced_info.si_pid = 0;
23351 + t->forced_info.si_uid = 0;
23353 + t->forced_info = *info;
23356 + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
23360 + return do_force_sig_info(sig, info, t);
23364 * Nuke all other threads in the group.
23366 @@ -1216,12 +1309,12 @@
23367 * Disable interrupts early to avoid deadlocks.
23368 * See rcu_read_unlock() comment header for details.
23370 - local_irq_save(*flags);
23371 + local_irq_save_nort(*flags);
23373 sighand = rcu_dereference(tsk->sighand);
23374 if (unlikely(sighand == NULL)) {
23376 - local_irq_restore(*flags);
23377 + local_irq_restore_nort(*flags);
23381 @@ -1242,7 +1335,7 @@
23383 spin_unlock(&sighand->siglock);
23385 - local_irq_restore(*flags);
23386 + local_irq_restore_nort(*flags);
23390 @@ -1485,7 +1578,8 @@
23392 struct sigqueue *sigqueue_alloc(void)
23394 - struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0);
23395 + /* Preallocated sigqueue objects always from the slabcache ! */
23396 + struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, 1);
23399 q->flags |= SIGQUEUE_PREALLOC;
23400 @@ -1846,15 +1940,7 @@
23401 if (gstop_done && ptrace_reparented(current))
23402 do_notify_parent_cldstop(current, false, why);
23405 - * Don't want to allow preemption here, because
23406 - * sys_ptrace() needs this task to be inactive.
23408 - * XXX: implement read_unlock_no_resched().
23410 - preempt_disable();
23411 read_unlock(&tasklist_lock);
23412 - preempt_enable_no_resched();
23413 freezable_schedule();
23416 diff -Nur linux-4.4.46.orig/kernel/softirq.c linux-4.4.46/kernel/softirq.c
23417 --- linux-4.4.46.orig/kernel/softirq.c 2017-02-01 08:31:11.000000000 +0100
23418 +++ linux-4.4.46/kernel/softirq.c 2017-02-03 17:18:10.935619367 +0100
23419 @@ -21,10 +21,12 @@
23420 #include <linux/freezer.h>
23421 #include <linux/kthread.h>
23422 #include <linux/rcupdate.h>
23423 +#include <linux/delay.h>
23424 #include <linux/ftrace.h>
23425 #include <linux/smp.h>
23426 #include <linux/smpboot.h>
23427 #include <linux/tick.h>
23428 +#include <linux/locallock.h>
23429 #include <linux/irq.h>
23431 #define CREATE_TRACE_POINTS
23432 @@ -56,12 +58,108 @@
23433 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
23435 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
23436 +#ifdef CONFIG_PREEMPT_RT_FULL
23437 +#define TIMER_SOFTIRQS ((1 << TIMER_SOFTIRQ) | (1 << HRTIMER_SOFTIRQ))
23438 +DEFINE_PER_CPU(struct task_struct *, ktimer_softirqd);
23441 const char * const softirq_to_name[NR_SOFTIRQS] = {
23442 "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
23443 "TASKLET", "SCHED", "HRTIMER", "RCU"
23446 +#ifdef CONFIG_NO_HZ_COMMON
23447 +# ifdef CONFIG_PREEMPT_RT_FULL
23449 +struct softirq_runner {
23450 + struct task_struct *runner[NR_SOFTIRQS];
23453 +static DEFINE_PER_CPU(struct softirq_runner, softirq_runners);
23455 +static inline void softirq_set_runner(unsigned int sirq)
23457 + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
23459 + sr->runner[sirq] = current;
23462 +static inline void softirq_clr_runner(unsigned int sirq)
23464 + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
23466 + sr->runner[sirq] = NULL;
23470 + * On preempt-rt a softirq running context might be blocked on a
23471 + * lock. There might be no other runnable task on this CPU because the
23472 + * lock owner runs on some other CPU. So we have to go into idle with
23473 + * the pending bit set. Therefor we need to check this otherwise we
23474 + * warn about false positives which confuses users and defeats the
23475 + * whole purpose of this test.
23477 + * This code is called with interrupts disabled.
23479 +void softirq_check_pending_idle(void)
23481 + static int rate_limit;
23482 + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
23486 + if (rate_limit >= 10)
23489 + warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK;
23490 + for (i = 0; i < NR_SOFTIRQS; i++) {
23491 + struct task_struct *tsk = sr->runner[i];
23494 + * The wakeup code in rtmutex.c wakes up the task
23495 + * _before_ it sets pi_blocked_on to NULL under
23496 + * tsk->pi_lock. So we need to check for both: state
23497 + * and pi_blocked_on.
23500 + raw_spin_lock(&tsk->pi_lock);
23501 + if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING) {
23502 + /* Clear all bits pending in that task */
23503 + warnpending &= ~(tsk->softirqs_raised);
23504 + warnpending &= ~(1 << i);
23506 + raw_spin_unlock(&tsk->pi_lock);
23510 + if (warnpending) {
23511 + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
23518 + * On !PREEMPT_RT we just printk rate limited:
23520 +void softirq_check_pending_idle(void)
23522 + static int rate_limit;
23524 + if (rate_limit < 10 &&
23525 + (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
23526 + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
23527 + local_softirq_pending());
23533 +#else /* !CONFIG_NO_HZ_COMMON */
23534 +static inline void softirq_set_runner(unsigned int sirq) { }
23535 +static inline void softirq_clr_runner(unsigned int sirq) { }
23539 * we cannot loop indefinitely here to avoid userspace starvation,
23540 * but we also don't want to introduce a worst case 1/HZ latency
23541 @@ -77,6 +175,79 @@
23542 wake_up_process(tsk);
23545 +#ifdef CONFIG_PREEMPT_RT_FULL
23546 +static void wakeup_timer_softirqd(void)
23548 + /* Interrupts are disabled: no need to stop preemption */
23549 + struct task_struct *tsk = __this_cpu_read(ktimer_softirqd);
23551 + if (tsk && tsk->state != TASK_RUNNING)
23552 + wake_up_process(tsk);
23556 +static void handle_softirq(unsigned int vec_nr)
23558 + struct softirq_action *h = softirq_vec + vec_nr;
23561 + prev_count = preempt_count();
23563 + kstat_incr_softirqs_this_cpu(vec_nr);
23565 + trace_softirq_entry(vec_nr);
23567 + trace_softirq_exit(vec_nr);
23568 + if (unlikely(prev_count != preempt_count())) {
23569 + pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
23570 + vec_nr, softirq_to_name[vec_nr], h->action,
23571 + prev_count, preempt_count());
23572 + preempt_count_set(prev_count);
23576 +#ifndef CONFIG_PREEMPT_RT_FULL
23577 +static inline int ksoftirqd_softirq_pending(void)
23579 + return local_softirq_pending();
23582 +static void handle_pending_softirqs(u32 pending)
23584 + struct softirq_action *h = softirq_vec;
23587 + local_irq_enable();
23591 + while ((softirq_bit = ffs(pending))) {
23592 + unsigned int vec_nr;
23594 + h += softirq_bit - 1;
23595 + vec_nr = h - softirq_vec;
23596 + handle_softirq(vec_nr);
23599 + pending >>= softirq_bit;
23603 + local_irq_disable();
23606 +static void run_ksoftirqd(unsigned int cpu)
23608 + local_irq_disable();
23609 + if (ksoftirqd_softirq_pending()) {
23611 + local_irq_enable();
23612 + cond_resched_rcu_qs();
23615 + local_irq_enable();
23619 * preempt_count and SOFTIRQ_OFFSET usage:
23620 * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
23621 @@ -116,9 +287,9 @@
23623 if (preempt_count() == cnt) {
23624 #ifdef CONFIG_DEBUG_PREEMPT
23625 - current->preempt_disable_ip = get_parent_ip(CALLER_ADDR1);
23626 + current->preempt_disable_ip = get_lock_parent_ip();
23628 - trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
23629 + trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
23632 EXPORT_SYMBOL(__local_bh_disable_ip);
23633 @@ -232,10 +403,8 @@
23634 unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
23635 unsigned long old_flags = current->flags;
23636 int max_restart = MAX_SOFTIRQ_RESTART;
23637 - struct softirq_action *h;
23643 * Mask out PF_MEMALLOC s current task context is borrowed for the
23644 @@ -254,36 +423,7 @@
23645 /* Reset the pending bitmask before enabling irqs */
23646 set_softirq_pending(0);
23648 - local_irq_enable();
23652 - while ((softirq_bit = ffs(pending))) {
23653 - unsigned int vec_nr;
23656 - h += softirq_bit - 1;
23658 - vec_nr = h - softirq_vec;
23659 - prev_count = preempt_count();
23661 - kstat_incr_softirqs_this_cpu(vec_nr);
23663 - trace_softirq_entry(vec_nr);
23665 - trace_softirq_exit(vec_nr);
23666 - if (unlikely(prev_count != preempt_count())) {
23667 - pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
23668 - vec_nr, softirq_to_name[vec_nr], h->action,
23669 - prev_count, preempt_count());
23670 - preempt_count_set(prev_count);
23673 - pending >>= softirq_bit;
23677 - local_irq_disable();
23678 + handle_pending_softirqs(pending);
23680 pending = local_softirq_pending();
23682 @@ -320,6 +460,310 @@
23686 + * This function must run with irqs disabled!
23688 +void raise_softirq_irqoff(unsigned int nr)
23690 + __raise_softirq_irqoff(nr);
23693 + * If we're in an interrupt or softirq, we're done
23694 + * (this also catches softirq-disabled code). We will
23695 + * actually run the softirq once we return from
23696 + * the irq or softirq.
23698 + * Otherwise we wake up ksoftirqd to make sure we
23699 + * schedule the softirq soon.
23701 + if (!in_interrupt())
23702 + wakeup_softirqd();
23705 +void __raise_softirq_irqoff(unsigned int nr)
23707 + trace_softirq_raise(nr);
23708 + or_softirq_pending(1UL << nr);
23711 +static inline void local_bh_disable_nort(void) { local_bh_disable(); }
23712 +static inline void _local_bh_enable_nort(void) { _local_bh_enable(); }
23713 +static void ksoftirqd_set_sched_params(unsigned int cpu) { }
23715 +#else /* !PREEMPT_RT_FULL */
23718 + * On RT we serialize softirq execution with a cpu local lock per softirq
23720 +static DEFINE_PER_CPU(struct local_irq_lock [NR_SOFTIRQS], local_softirq_locks);
23722 +void __init softirq_early_init(void)
23726 + for (i = 0; i < NR_SOFTIRQS; i++)
23727 + local_irq_lock_init(local_softirq_locks[i]);
23730 +static void lock_softirq(int which)
23732 + local_lock(local_softirq_locks[which]);
23735 +static void unlock_softirq(int which)
23737 + local_unlock(local_softirq_locks[which]);
23740 +static void do_single_softirq(int which)
23742 + unsigned long old_flags = current->flags;
23744 + current->flags &= ~PF_MEMALLOC;
23745 + vtime_account_irq_enter(current);
23746 + current->flags |= PF_IN_SOFTIRQ;
23747 + lockdep_softirq_enter();
23748 + local_irq_enable();
23749 + handle_softirq(which);
23750 + local_irq_disable();
23751 + lockdep_softirq_exit();
23752 + current->flags &= ~PF_IN_SOFTIRQ;
23753 + vtime_account_irq_enter(current);
23754 + tsk_restore_flags(current, old_flags, PF_MEMALLOC);
23758 + * Called with interrupts disabled. Process softirqs which were raised
23759 + * in current context (or on behalf of ksoftirqd).
23761 +static void do_current_softirqs(void)
23763 + while (current->softirqs_raised) {
23764 + int i = __ffs(current->softirqs_raised);
23765 + unsigned int pending, mask = (1U << i);
23767 + current->softirqs_raised &= ~mask;
23768 + local_irq_enable();
23771 + * If the lock is contended, we boost the owner to
23772 + * process the softirq or leave the critical section
23776 + local_irq_disable();
23777 + softirq_set_runner(i);
23779 + * Check with the local_softirq_pending() bits,
23780 + * whether we need to process this still or if someone
23781 + * else took care of it.
23783 + pending = local_softirq_pending();
23784 + if (pending & mask) {
23785 + set_softirq_pending(pending & ~mask);
23786 + do_single_softirq(i);
23788 + softirq_clr_runner(i);
23789 + WARN_ON(current->softirq_nestcnt != 1);
23790 + local_irq_enable();
23791 + unlock_softirq(i);
23792 + local_irq_disable();
23796 +void __local_bh_disable(void)
23798 + if (++current->softirq_nestcnt == 1)
23799 + migrate_disable();
23801 +EXPORT_SYMBOL(__local_bh_disable);
23803 +void __local_bh_enable(void)
23805 + if (WARN_ON(current->softirq_nestcnt == 0))
23808 + local_irq_disable();
23809 + if (current->softirq_nestcnt == 1 && current->softirqs_raised)
23810 + do_current_softirqs();
23811 + local_irq_enable();
23813 + if (--current->softirq_nestcnt == 0)
23814 + migrate_enable();
23816 +EXPORT_SYMBOL(__local_bh_enable);
23818 +void _local_bh_enable(void)
23820 + if (WARN_ON(current->softirq_nestcnt == 0))
23822 + if (--current->softirq_nestcnt == 0)
23823 + migrate_enable();
23825 +EXPORT_SYMBOL(_local_bh_enable);
23827 +int in_serving_softirq(void)
23829 + return current->flags & PF_IN_SOFTIRQ;
23831 +EXPORT_SYMBOL(in_serving_softirq);
23833 +/* Called with preemption disabled */
23834 +static void run_ksoftirqd(unsigned int cpu)
23836 + local_irq_disable();
23837 + current->softirq_nestcnt++;
23839 + do_current_softirqs();
23840 + current->softirq_nestcnt--;
23841 + local_irq_enable();
23842 + cond_resched_rcu_qs();
23846 + * Called from netif_rx_ni(). Preemption enabled, but migration
23847 + * disabled. So the cpu can't go away under us.
23849 +void thread_do_softirq(void)
23851 + if (!in_serving_softirq() && current->softirqs_raised) {
23852 + current->softirq_nestcnt++;
23853 + do_current_softirqs();
23854 + current->softirq_nestcnt--;
23858 +static void do_raise_softirq_irqoff(unsigned int nr)
23860 + unsigned int mask;
23862 + mask = 1UL << nr;
23864 + trace_softirq_raise(nr);
23865 + or_softirq_pending(mask);
23868 + * If we are not in a hard interrupt and inside a bh disabled
23869 + * region, we simply raise the flag on current. local_bh_enable()
23870 + * will make sure that the softirq is executed. Otherwise we
23871 + * delegate it to ksoftirqd.
23873 + if (!in_irq() && current->softirq_nestcnt)
23874 + current->softirqs_raised |= mask;
23875 + else if (!__this_cpu_read(ksoftirqd) || !__this_cpu_read(ktimer_softirqd))
23878 + if (mask & TIMER_SOFTIRQS)
23879 + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask;
23881 + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask;
23884 +static void wakeup_proper_softirq(unsigned int nr)
23886 + if ((1UL << nr) & TIMER_SOFTIRQS)
23887 + wakeup_timer_softirqd();
23889 + wakeup_softirqd();
23893 +void __raise_softirq_irqoff(unsigned int nr)
23895 + do_raise_softirq_irqoff(nr);
23896 + if (!in_irq() && !current->softirq_nestcnt)
23897 + wakeup_proper_softirq(nr);
23901 + * Same as __raise_softirq_irqoff() but will process them in ksoftirqd
23903 +void __raise_softirq_irqoff_ksoft(unsigned int nr)
23905 + unsigned int mask;
23907 + if (WARN_ON_ONCE(!__this_cpu_read(ksoftirqd) ||
23908 + !__this_cpu_read(ktimer_softirqd)))
23910 + mask = 1UL << nr;
23912 + trace_softirq_raise(nr);
23913 + or_softirq_pending(mask);
23914 + if (mask & TIMER_SOFTIRQS)
23915 + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask;
23917 + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask;
23918 + wakeup_proper_softirq(nr);
23922 + * This function must run with irqs disabled!
23924 +void raise_softirq_irqoff(unsigned int nr)
23926 + do_raise_softirq_irqoff(nr);
23929 + * If we're in an hard interrupt we let irq return code deal
23930 + * with the wakeup of ksoftirqd.
23935 + * If we are in thread context but outside of a bh disabled
23936 + * region, we need to wake ksoftirqd as well.
23938 + * CHECKME: Some of the places which do that could be wrapped
23939 + * into local_bh_disable/enable pairs. Though it's unclear
23940 + * whether this is worth the effort. To find those places just
23941 + * raise a WARN() if the condition is met.
23943 + if (!current->softirq_nestcnt)
23944 + wakeup_proper_softirq(nr);
23947 +static inline int ksoftirqd_softirq_pending(void)
23949 + return current->softirqs_raised;
23952 +static inline void local_bh_disable_nort(void) { }
23953 +static inline void _local_bh_enable_nort(void) { }
23955 +static inline void ksoftirqd_set_sched_params(unsigned int cpu)
23957 + /* Take over all but timer pending softirqs when starting */
23958 + local_irq_disable();
23959 + current->softirqs_raised = local_softirq_pending() & ~TIMER_SOFTIRQS;
23960 + local_irq_enable();
23963 +static inline void ktimer_softirqd_set_sched_params(unsigned int cpu)
23965 + struct sched_param param = { .sched_priority = 1 };
23967 + sched_setscheduler(current, SCHED_FIFO, ¶m);
23969 + /* Take over timer pending softirqs when starting */
23970 + local_irq_disable();
23971 + current->softirqs_raised = local_softirq_pending() & TIMER_SOFTIRQS;
23972 + local_irq_enable();
23975 +static inline void ktimer_softirqd_clr_sched_params(unsigned int cpu,
23978 + struct sched_param param = { .sched_priority = 0 };
23980 + sched_setscheduler(current, SCHED_NORMAL, ¶m);
23983 +static int ktimer_softirqd_should_run(unsigned int cpu)
23985 + return current->softirqs_raised;
23988 +#endif /* PREEMPT_RT_FULL */
23990 * Enter an interrupt context.
23992 void irq_enter(void)
23993 @@ -330,9 +774,9 @@
23994 * Prevent raise_softirq from needlessly waking up ksoftirqd
23995 * here, as softirq will be serviced on return from interrupt.
23997 - local_bh_disable();
23998 + local_bh_disable_nort();
24000 - _local_bh_enable();
24001 + _local_bh_enable_nort();
24005 @@ -340,6 +784,7 @@
24007 static inline void invoke_softirq(void)
24009 +#ifndef CONFIG_PREEMPT_RT_FULL
24010 if (!force_irqthreads) {
24011 #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
24013 @@ -359,6 +804,18 @@
24017 +#else /* PREEMPT_RT_FULL */
24018 + unsigned long flags;
24020 + local_irq_save(flags);
24021 + if (__this_cpu_read(ksoftirqd) &&
24022 + __this_cpu_read(ksoftirqd)->softirqs_raised)
24023 + wakeup_softirqd();
24024 + if (__this_cpu_read(ktimer_softirqd) &&
24025 + __this_cpu_read(ktimer_softirqd)->softirqs_raised)
24026 + wakeup_timer_softirqd();
24027 + local_irq_restore(flags);
24031 static inline void tick_irq_exit(void)
24032 @@ -395,26 +852,6 @@
24033 trace_hardirq_exit(); /* must be last! */
24037 - * This function must run with irqs disabled!
24039 -inline void raise_softirq_irqoff(unsigned int nr)
24041 - __raise_softirq_irqoff(nr);
24044 - * If we're in an interrupt or softirq, we're done
24045 - * (this also catches softirq-disabled code). We will
24046 - * actually run the softirq once we return from
24047 - * the irq or softirq.
24049 - * Otherwise we wake up ksoftirqd to make sure we
24050 - * schedule the softirq soon.
24052 - if (!in_interrupt())
24053 - wakeup_softirqd();
24056 void raise_softirq(unsigned int nr)
24058 unsigned long flags;
24059 @@ -424,12 +861,6 @@
24060 local_irq_restore(flags);
24063 -void __raise_softirq_irqoff(unsigned int nr)
24065 - trace_softirq_raise(nr);
24066 - or_softirq_pending(1UL << nr);
24069 void open_softirq(int nr, void (*action)(struct softirq_action *))
24071 softirq_vec[nr].action = action;
24072 @@ -446,15 +877,45 @@
24073 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
24074 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
24076 +static void inline
24077 +__tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr)
24079 + if (tasklet_trylock(t)) {
24081 + /* We may have been preempted before tasklet_trylock
24082 + * and __tasklet_action may have already run.
24083 + * So double check the sched bit while the takslet
24084 + * is locked before adding it to the list.
24086 + if (test_bit(TASKLET_STATE_SCHED, &t->state)) {
24089 + head->tail = &(t->next);
24090 + raise_softirq_irqoff(nr);
24091 + tasklet_unlock(t);
24093 + /* This is subtle. If we hit the corner case above
24094 + * It is possible that we get preempted right here,
24095 + * and another task has successfully called
24096 + * tasklet_schedule(), then this function, and
24097 + * failed on the trylock. Thus we must be sure
24098 + * before releasing the tasklet lock, that the
24099 + * SCHED_BIT is clear. Otherwise the tasklet
24100 + * may get its SCHED_BIT set, but not added to the
24103 + if (!tasklet_tryunlock(t))
24109 void __tasklet_schedule(struct tasklet_struct *t)
24111 unsigned long flags;
24113 local_irq_save(flags);
24115 - *__this_cpu_read(tasklet_vec.tail) = t;
24116 - __this_cpu_write(tasklet_vec.tail, &(t->next));
24117 - raise_softirq_irqoff(TASKLET_SOFTIRQ);
24118 + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
24119 local_irq_restore(flags);
24121 EXPORT_SYMBOL(__tasklet_schedule);
24122 @@ -464,10 +925,7 @@
24123 unsigned long flags;
24125 local_irq_save(flags);
24127 - *__this_cpu_read(tasklet_hi_vec.tail) = t;
24128 - __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
24129 - raise_softirq_irqoff(HI_SOFTIRQ);
24130 + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
24131 local_irq_restore(flags);
24133 EXPORT_SYMBOL(__tasklet_hi_schedule);
24134 @@ -476,82 +934,122 @@
24136 BUG_ON(!irqs_disabled());
24138 - t->next = __this_cpu_read(tasklet_hi_vec.head);
24139 - __this_cpu_write(tasklet_hi_vec.head, t);
24140 - __raise_softirq_irqoff(HI_SOFTIRQ);
24141 + __tasklet_hi_schedule(t);
24143 EXPORT_SYMBOL(__tasklet_hi_schedule_first);
24145 -static void tasklet_action(struct softirq_action *a)
24146 +void tasklet_enable(struct tasklet_struct *t)
24148 - struct tasklet_struct *list;
24149 + if (!atomic_dec_and_test(&t->count))
24151 + if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state))
24152 + tasklet_schedule(t);
24154 +EXPORT_SYMBOL(tasklet_enable);
24156 - local_irq_disable();
24157 - list = __this_cpu_read(tasklet_vec.head);
24158 - __this_cpu_write(tasklet_vec.head, NULL);
24159 - __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head));
24160 - local_irq_enable();
24161 +static void __tasklet_action(struct softirq_action *a,
24162 + struct tasklet_struct *list)
24164 + int loops = 1000000;
24167 struct tasklet_struct *t = list;
24171 - if (tasklet_trylock(t)) {
24172 - if (!atomic_read(&t->count)) {
24173 - if (!test_and_clear_bit(TASKLET_STATE_SCHED,
24176 - t->func(t->data);
24177 - tasklet_unlock(t);
24180 - tasklet_unlock(t);
24182 + * Should always succeed - after a tasklist got on the
24183 + * list (after getting the SCHED bit set from 0 to 1),
24184 + * nothing but the tasklet softirq it got queued to can
24187 + if (!tasklet_trylock(t)) {
24192 - local_irq_disable();
24194 - *__this_cpu_read(tasklet_vec.tail) = t;
24195 - __this_cpu_write(tasklet_vec.tail, &(t->next));
24196 - __raise_softirq_irqoff(TASKLET_SOFTIRQ);
24197 - local_irq_enable();
24200 + * If we cannot handle the tasklet because it's disabled,
24201 + * mark it as pending. tasklet_enable() will later
24202 + * re-schedule the tasklet.
24204 + if (unlikely(atomic_read(&t->count))) {
24206 + /* implicit unlock: */
24208 + t->state = TASKLET_STATEF_PENDING;
24213 + * After this point on the tasklet might be rescheduled
24214 + * on another CPU, but it can only be added to another
24215 + * CPU's tasklet list if we unlock the tasklet (which we
24218 + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
24222 + t->func(t->data);
24225 + * Try to unlock the tasklet. We must use cmpxchg, because
24226 + * another CPU might have scheduled or disabled the tasklet.
24227 + * We only allow the STATE_RUN -> 0 transition here.
24229 + while (!tasklet_tryunlock(t)) {
24231 + * If it got disabled meanwhile, bail out:
24233 + if (atomic_read(&t->count))
24234 + goto out_disabled;
24236 + * If it got scheduled meanwhile, re-execute
24237 + * the tasklet function:
24239 + if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
24242 + printk("hm, tasklet state: %08lx\n", t->state);
24244 + tasklet_unlock(t);
24251 +static void tasklet_action(struct softirq_action *a)
24253 + struct tasklet_struct *list;
24255 + local_irq_disable();
24257 + list = __this_cpu_read(tasklet_vec.head);
24258 + __this_cpu_write(tasklet_vec.head, NULL);
24259 + __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head));
24261 + local_irq_enable();
24263 + __tasklet_action(a, list);
24266 static void tasklet_hi_action(struct softirq_action *a)
24268 struct tasklet_struct *list;
24270 local_irq_disable();
24272 list = __this_cpu_read(tasklet_hi_vec.head);
24273 __this_cpu_write(tasklet_hi_vec.head, NULL);
24274 __this_cpu_write(tasklet_hi_vec.tail, this_cpu_ptr(&tasklet_hi_vec.head));
24275 - local_irq_enable();
24278 - struct tasklet_struct *t = list;
24280 - list = list->next;
24282 - if (tasklet_trylock(t)) {
24283 - if (!atomic_read(&t->count)) {
24284 - if (!test_and_clear_bit(TASKLET_STATE_SCHED,
24287 - t->func(t->data);
24288 - tasklet_unlock(t);
24291 - tasklet_unlock(t);
24293 + local_irq_enable();
24295 - local_irq_disable();
24297 - *__this_cpu_read(tasklet_hi_vec.tail) = t;
24298 - __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
24299 - __raise_softirq_irqoff(HI_SOFTIRQ);
24300 - local_irq_enable();
24302 + __tasklet_action(a, list);
24305 void tasklet_init(struct tasklet_struct *t,
24306 @@ -572,7 +1070,7 @@
24308 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
24312 } while (test_bit(TASKLET_STATE_SCHED, &t->state));
24314 tasklet_unlock_wait(t);
24315 @@ -646,25 +1144,26 @@
24316 open_softirq(HI_SOFTIRQ, tasklet_hi_action);
24319 -static int ksoftirqd_should_run(unsigned int cpu)
24321 - return local_softirq_pending();
24324 -static void run_ksoftirqd(unsigned int cpu)
24325 +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
24326 +void tasklet_unlock_wait(struct tasklet_struct *t)
24328 - local_irq_disable();
24329 - if (local_softirq_pending()) {
24330 + while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
24332 - * We can safely run softirq on inline stack, as we are not deep
24333 - * in the task stack here.
24334 + * Hack for now to avoid this busy-loop:
24337 - local_irq_enable();
24338 - cond_resched_rcu_qs();
24340 +#ifdef CONFIG_PREEMPT_RT_FULL
24346 - local_irq_enable();
24348 +EXPORT_SYMBOL(tasklet_unlock_wait);
24351 +static int ksoftirqd_should_run(unsigned int cpu)
24353 + return ksoftirqd_softirq_pending();
24356 #ifdef CONFIG_HOTPLUG_CPU
24357 @@ -746,16 +1245,31 @@
24359 static struct smp_hotplug_thread softirq_threads = {
24360 .store = &ksoftirqd,
24361 + .setup = ksoftirqd_set_sched_params,
24362 .thread_should_run = ksoftirqd_should_run,
24363 .thread_fn = run_ksoftirqd,
24364 .thread_comm = "ksoftirqd/%u",
24367 +#ifdef CONFIG_PREEMPT_RT_FULL
24368 +static struct smp_hotplug_thread softirq_timer_threads = {
24369 + .store = &ktimer_softirqd,
24370 + .setup = ktimer_softirqd_set_sched_params,
24371 + .cleanup = ktimer_softirqd_clr_sched_params,
24372 + .thread_should_run = ktimer_softirqd_should_run,
24373 + .thread_fn = run_ksoftirqd,
24374 + .thread_comm = "ktimersoftd/%u",
24378 static __init int spawn_ksoftirqd(void)
24380 register_cpu_notifier(&cpu_nfb);
24382 BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
24383 +#ifdef CONFIG_PREEMPT_RT_FULL
24384 + BUG_ON(smpboot_register_percpu_thread(&softirq_timer_threads));
24389 diff -Nur linux-4.4.46.orig/kernel/stop_machine.c linux-4.4.46/kernel/stop_machine.c
24390 --- linux-4.4.46.orig/kernel/stop_machine.c 2017-02-01 08:31:11.000000000 +0100
24391 +++ linux-4.4.46/kernel/stop_machine.c 2017-02-03 17:18:10.935619367 +0100
24393 struct cpu_stopper {
24394 struct task_struct *thread;
24397 + raw_spinlock_t lock;
24398 bool enabled; /* is this stopper enabled? */
24399 struct list_head works; /* list of pending works */
24401 @@ -86,12 +86,12 @@
24402 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
24403 unsigned long flags;
24405 - spin_lock_irqsave(&stopper->lock, flags);
24406 + raw_spin_lock_irqsave(&stopper->lock, flags);
24407 if (stopper->enabled)
24408 __cpu_stop_queue_work(stopper, work);
24410 cpu_stop_signal_done(work->done, false);
24411 - spin_unlock_irqrestore(&stopper->lock, flags);
24412 + raw_spin_unlock_irqrestore(&stopper->lock, flags);
24416 @@ -224,8 +224,8 @@
24419 lg_double_lock(&stop_cpus_lock, cpu1, cpu2);
24420 - spin_lock_irq(&stopper1->lock);
24421 - spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
24422 + raw_spin_lock_irq(&stopper1->lock);
24423 + raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
24426 if (!stopper1->enabled || !stopper2->enabled)
24427 @@ -235,8 +235,8 @@
24428 __cpu_stop_queue_work(stopper1, work1);
24429 __cpu_stop_queue_work(stopper2, work2);
24431 - spin_unlock(&stopper2->lock);
24432 - spin_unlock_irq(&stopper1->lock);
24433 + raw_spin_unlock(&stopper2->lock);
24434 + raw_spin_unlock_irq(&stopper1->lock);
24435 lg_double_unlock(&stop_cpus_lock, cpu1, cpu2);
24438 @@ -258,7 +258,7 @@
24439 struct cpu_stop_work work1, work2;
24440 struct multi_stop_data msdata;
24442 - preempt_disable();
24443 + preempt_disable_nort();
24444 msdata = (struct multi_stop_data){
24447 @@ -278,11 +278,11 @@
24450 if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2)) {
24451 - preempt_enable();
24452 + preempt_enable_nort();
24456 - preempt_enable();
24457 + preempt_enable_nort();
24459 wait_for_completion(&done.completion);
24461 @@ -315,17 +315,20 @@
24463 static void queue_stop_cpus_work(const struct cpumask *cpumask,
24464 cpu_stop_fn_t fn, void *arg,
24465 - struct cpu_stop_done *done)
24466 + struct cpu_stop_done *done, bool inactive)
24468 struct cpu_stop_work *work;
24472 - * Disable preemption while queueing to avoid getting
24473 - * preempted by a stopper which might wait for other stoppers
24474 - * to enter @fn which can lead to deadlock.
24475 + * Make sure that all work is queued on all cpus before
24476 + * any of the cpus can execute it.
24478 - lg_global_lock(&stop_cpus_lock);
24480 + lg_global_lock(&stop_cpus_lock);
24482 + lg_global_trylock_relax(&stop_cpus_lock);
24484 for_each_cpu(cpu, cpumask) {
24485 work = &per_cpu(cpu_stopper.stop_work, cpu);
24487 @@ -342,7 +345,7 @@
24488 struct cpu_stop_done done;
24490 cpu_stop_init_done(&done, cpumask_weight(cpumask));
24491 - queue_stop_cpus_work(cpumask, fn, arg, &done);
24492 + queue_stop_cpus_work(cpumask, fn, arg, &done, false);
24493 wait_for_completion(&done.completion);
24494 return done.executed ? done.ret : -ENOENT;
24496 @@ -422,9 +425,9 @@
24497 unsigned long flags;
24500 - spin_lock_irqsave(&stopper->lock, flags);
24501 + raw_spin_lock_irqsave(&stopper->lock, flags);
24502 run = !list_empty(&stopper->works);
24503 - spin_unlock_irqrestore(&stopper->lock, flags);
24504 + raw_spin_unlock_irqrestore(&stopper->lock, flags);
24508 @@ -436,13 +439,13 @@
24512 - spin_lock_irq(&stopper->lock);
24513 + raw_spin_lock_irq(&stopper->lock);
24514 if (!list_empty(&stopper->works)) {
24515 work = list_first_entry(&stopper->works,
24516 struct cpu_stop_work, list);
24517 list_del_init(&work->list);
24519 - spin_unlock_irq(&stopper->lock);
24520 + raw_spin_unlock_irq(&stopper->lock);
24523 cpu_stop_fn_t fn = work->fn;
24524 @@ -450,6 +453,16 @@
24525 struct cpu_stop_done *done = work->done;
24526 char ksym_buf[KSYM_NAME_LEN] __maybe_unused;
24529 + * Wait until the stopper finished scheduling on all
24532 + lg_global_lock(&stop_cpus_lock);
24534 + * Let other cpu threads continue as well
24536 + lg_global_unlock(&stop_cpus_lock);
24538 /* cpu stop callbacks are not allowed to sleep */
24541 @@ -520,10 +533,12 @@
24542 for_each_possible_cpu(cpu) {
24543 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
24545 - spin_lock_init(&stopper->lock);
24546 + raw_spin_lock_init(&stopper->lock);
24547 INIT_LIST_HEAD(&stopper->works);
24550 + lg_lock_init(&stop_cpus_lock, "stop_cpus_lock");
24552 BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
24553 stop_machine_unpark(raw_smp_processor_id());
24554 stop_machine_initialized = true;
24555 @@ -620,7 +635,7 @@
24556 set_state(&msdata, MULTI_STOP_PREPARE);
24557 cpu_stop_init_done(&done, num_active_cpus());
24558 queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata,
24561 ret = multi_cpu_stop(&msdata);
24563 /* Busy wait for completion. */
24564 diff -Nur linux-4.4.46.orig/kernel/time/hrtimer.c linux-4.4.46/kernel/time/hrtimer.c
24565 --- linux-4.4.46.orig/kernel/time/hrtimer.c 2017-02-01 08:31:11.000000000 +0100
24566 +++ linux-4.4.46/kernel/time/hrtimer.c 2017-02-03 17:18:10.939619522 +0100
24567 @@ -48,11 +48,13 @@
24568 #include <linux/sched/rt.h>
24569 #include <linux/sched/deadline.h>
24570 #include <linux/timer.h>
24571 +#include <linux/kthread.h>
24572 #include <linux/freezer.h>
24574 #include <asm/uaccess.h>
24576 #include <trace/events/timer.h>
24577 +#include <trace/events/hist.h>
24579 #include "tick-internal.h"
24581 @@ -717,6 +719,44 @@
24583 static DECLARE_WORK(hrtimer_work, clock_was_set_work);
24585 +#ifdef CONFIG_PREEMPT_RT_FULL
24587 + * RT can not call schedule_work from real interrupt context.
24588 + * Need to make a thread to do the real work.
24590 +static struct task_struct *clock_set_delay_thread;
24591 +static bool do_clock_set_delay;
24593 +static int run_clock_set_delay(void *ignore)
24595 + while (!kthread_should_stop()) {
24596 + set_current_state(TASK_INTERRUPTIBLE);
24597 + if (do_clock_set_delay) {
24598 + do_clock_set_delay = false;
24599 + schedule_work(&hrtimer_work);
24603 + __set_current_state(TASK_RUNNING);
24607 +void clock_was_set_delayed(void)
24609 + do_clock_set_delay = true;
24610 + /* Make visible before waking up process */
24612 + wake_up_process(clock_set_delay_thread);
24615 +static __init int create_clock_set_delay_thread(void)
24617 + clock_set_delay_thread = kthread_run(run_clock_set_delay, NULL, "kclksetdelayd");
24618 + BUG_ON(!clock_set_delay_thread);
24621 +early_initcall(create_clock_set_delay_thread);
24622 +#else /* PREEMPT_RT_FULL */
24624 * Called from timekeeping and resume code to reprogramm the hrtimer
24625 * interrupt device on all cpus.
24626 @@ -725,6 +765,7 @@
24628 schedule_work(&hrtimer_work);
24634 @@ -734,11 +775,8 @@
24635 static inline void hrtimer_switch_to_hres(void) { }
24637 hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
24638 -static inline int hrtimer_reprogram(struct hrtimer *timer,
24639 - struct hrtimer_clock_base *base)
24643 +static inline void hrtimer_reprogram(struct hrtimer *timer,
24644 + struct hrtimer_clock_base *base) { }
24645 static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
24646 static inline void retrigger_next_event(void *arg) { }
24648 @@ -870,6 +908,32 @@
24650 EXPORT_SYMBOL_GPL(hrtimer_forward);
24652 +#ifdef CONFIG_PREEMPT_RT_BASE
24653 +# define wake_up_timer_waiters(b) wake_up(&(b)->wait)
24656 + * hrtimer_wait_for_timer - Wait for a running timer
24658 + * @timer: timer to wait for
24660 + * The function waits in case the timers callback function is
24661 + * currently executed on the waitqueue of the timer base. The
24662 + * waitqueue is woken up after the timer callback function has
24663 + * finished execution.
24665 +void hrtimer_wait_for_timer(const struct hrtimer *timer)
24667 + struct hrtimer_clock_base *base = timer->base;
24669 + if (base && base->cpu_base && !timer->irqsafe)
24670 + wait_event(base->cpu_base->wait,
24671 + !(hrtimer_callback_running(timer)));
24675 +# define wake_up_timer_waiters(b) do { } while (0)
24679 * enqueue_hrtimer - internal function to (re)start a timer
24681 @@ -911,6 +975,11 @@
24682 if (!(state & HRTIMER_STATE_ENQUEUED))
24685 + if (unlikely(!list_empty(&timer->cb_entry))) {
24686 + list_del_init(&timer->cb_entry);
24690 if (!timerqueue_del(&base->active, &timer->node))
24691 cpu_base->active_bases &= ~(1 << base->index);
24693 @@ -1006,7 +1075,16 @@
24694 new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
24696 timer_stats_hrtimer_set_start_info(timer);
24697 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
24699 + ktime_t now = new_base->get_time();
24701 + if (ktime_to_ns(tim) < ktime_to_ns(now))
24702 + timer->praecox = now;
24704 + timer->praecox = ktime_set(0, 0);
24707 leftmost = enqueue_hrtimer(timer, new_base);
24710 @@ -1078,7 +1156,7 @@
24715 + hrtimer_wait_for_timer(timer);
24718 EXPORT_SYMBOL_GPL(hrtimer_cancel);
24719 @@ -1142,6 +1220,7 @@
24721 base = hrtimer_clockid_to_base(clock_id);
24722 timer->base = &cpu_base->clock_base[base];
24723 + INIT_LIST_HEAD(&timer->cb_entry);
24724 timerqueue_init(&timer->node);
24726 #ifdef CONFIG_TIMER_STATS
24727 @@ -1182,6 +1261,7 @@
24728 seq = raw_read_seqcount_begin(&cpu_base->seq);
24730 if (timer->state != HRTIMER_STATE_INACTIVE ||
24731 + cpu_base->running_soft == timer ||
24732 cpu_base->running == timer)
24735 @@ -1280,10 +1360,112 @@
24736 cpu_base->running = NULL;
24739 +#ifdef CONFIG_PREEMPT_RT_BASE
24740 +static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer,
24741 + struct hrtimer_clock_base *base)
24745 + if (restart != HRTIMER_NORESTART &&
24746 + !(timer->state & HRTIMER_STATE_ENQUEUED)) {
24748 + leftmost = enqueue_hrtimer(timer, base);
24751 +#ifdef CONFIG_HIGH_RES_TIMERS
24752 + if (!hrtimer_is_hres_active(timer)) {
24754 + * Kick to reschedule the next tick to handle the new timer
24755 + * on dynticks target.
24757 + if (base->cpu_base->nohz_active)
24758 + wake_up_nohz_cpu(base->cpu_base->cpu);
24761 + hrtimer_reprogram(timer, base);
24768 + * The changes in mainline which removed the callback modes from
24769 + * hrtimer are not yet working with -rt. The non wakeup_process()
24770 + * based callbacks which involve sleeping locks need to be treated
24773 +static void hrtimer_rt_run_pending(void)
24775 + enum hrtimer_restart (*fn)(struct hrtimer *);
24776 + struct hrtimer_cpu_base *cpu_base;
24777 + struct hrtimer_clock_base *base;
24778 + struct hrtimer *timer;
24779 + int index, restart;
24781 + local_irq_disable();
24782 + cpu_base = &per_cpu(hrtimer_bases, smp_processor_id());
24784 + raw_spin_lock(&cpu_base->lock);
24786 + for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
24787 + base = &cpu_base->clock_base[index];
24789 + while (!list_empty(&base->expired)) {
24790 + timer = list_first_entry(&base->expired,
24791 + struct hrtimer, cb_entry);
24794 + * Same as the above __run_hrtimer function
24795 + * just we run with interrupts enabled.
24797 + debug_deactivate(timer);
24798 + cpu_base->running_soft = timer;
24799 + raw_write_seqcount_barrier(&cpu_base->seq);
24801 + __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
24802 + timer_stats_account_hrtimer(timer);
24803 + fn = timer->function;
24805 + raw_spin_unlock_irq(&cpu_base->lock);
24806 + restart = fn(timer);
24807 + raw_spin_lock_irq(&cpu_base->lock);
24809 + hrtimer_rt_reprogram(restart, timer, base);
24810 + raw_write_seqcount_barrier(&cpu_base->seq);
24812 + WARN_ON_ONCE(cpu_base->running_soft != timer);
24813 + cpu_base->running_soft = NULL;
24817 + raw_spin_unlock_irq(&cpu_base->lock);
24819 + wake_up_timer_waiters(cpu_base);
24822 +static int hrtimer_rt_defer(struct hrtimer *timer)
24824 + if (timer->irqsafe)
24827 + __remove_hrtimer(timer, timer->base, timer->state, 0);
24828 + list_add_tail(&timer->cb_entry, &timer->base->expired);
24834 +static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; }
24838 +static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer);
24840 static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)
24842 struct hrtimer_clock_base *base = cpu_base->clock_base;
24843 unsigned int active = cpu_base->active_bases;
24846 for (; active; base++, active >>= 1) {
24847 struct timerqueue_node *node;
24848 @@ -1299,6 +1481,15 @@
24850 timer = container_of(node, struct hrtimer, node);
24852 + trace_hrtimer_interrupt(raw_smp_processor_id(),
24853 + ktime_to_ns(ktime_sub(ktime_to_ns(timer->praecox) ?
24854 + timer->praecox : hrtimer_get_expires(timer),
24857 + timer->function == hrtimer_wakeup ?
24858 + container_of(timer, struct hrtimer_sleeper,
24859 + timer)->task : NULL);
24862 * The immediate goal for using the softexpires is
24863 * minimizing wakeups, not running timers at the
24864 @@ -1314,9 +1505,14 @@
24865 if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer))
24868 - __run_hrtimer(cpu_base, base, timer, &basenow);
24869 + if (!hrtimer_rt_defer(timer))
24870 + __run_hrtimer(cpu_base, base, timer, &basenow);
24876 + raise_softirq_irqoff(HRTIMER_SOFTIRQ);
24879 #ifdef CONFIG_HIGH_RES_TIMERS
24880 @@ -1479,16 +1675,18 @@
24881 void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
24883 sl->timer.function = hrtimer_wakeup;
24884 + sl->timer.irqsafe = 1;
24887 EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
24889 -static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
24890 +static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode,
24891 + unsigned long state)
24893 hrtimer_init_sleeper(t, current);
24896 - set_current_state(TASK_INTERRUPTIBLE);
24897 + set_current_state(state);
24898 hrtimer_start_expires(&t->timer, mode);
24900 if (likely(t->task))
24901 @@ -1530,7 +1728,8 @@
24903 hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
24905 - if (do_nanosleep(&t, HRTIMER_MODE_ABS))
24906 + /* cpu_chill() does not care about restart state. */
24907 + if (do_nanosleep(&t, HRTIMER_MODE_ABS, TASK_INTERRUPTIBLE))
24910 rmtp = restart->nanosleep.rmtp;
24911 @@ -1547,8 +1746,10 @@
24915 -long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
24916 - const enum hrtimer_mode mode, const clockid_t clockid)
24918 +__hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
24919 + const enum hrtimer_mode mode, const clockid_t clockid,
24920 + unsigned long state)
24922 struct restart_block *restart;
24923 struct hrtimer_sleeper t;
24924 @@ -1561,7 +1762,7 @@
24926 hrtimer_init_on_stack(&t.timer, clockid, mode);
24927 hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
24928 - if (do_nanosleep(&t, mode))
24929 + if (do_nanosleep(&t, mode, state))
24932 /* Absolute timers do not update the rmtp value and restart: */
24933 @@ -1588,6 +1789,12 @@
24937 +long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
24938 + const enum hrtimer_mode mode, const clockid_t clockid)
24940 + return __hrtimer_nanosleep(rqtp, rmtp, mode, clockid, TASK_INTERRUPTIBLE);
24943 SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
24944 struct timespec __user *, rmtp)
24946 @@ -1602,6 +1809,26 @@
24947 return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
24950 +#ifdef CONFIG_PREEMPT_RT_FULL
24952 + * Sleep for 1 ms in hope whoever holds what we want will let it go.
24954 +void cpu_chill(void)
24956 + struct timespec tu = {
24957 + .tv_nsec = NSEC_PER_MSEC,
24959 + unsigned int freeze_flag = current->flags & PF_NOFREEZE;
24961 + current->flags |= PF_NOFREEZE;
24962 + __hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC,
24963 + TASK_UNINTERRUPTIBLE);
24964 + if (!freeze_flag)
24965 + current->flags &= ~PF_NOFREEZE;
24967 +EXPORT_SYMBOL(cpu_chill);
24971 * Functions related to boot-time initialization:
24973 @@ -1613,10 +1840,14 @@
24974 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
24975 cpu_base->clock_base[i].cpu_base = cpu_base;
24976 timerqueue_init_head(&cpu_base->clock_base[i].active);
24977 + INIT_LIST_HEAD(&cpu_base->clock_base[i].expired);
24980 cpu_base->cpu = cpu;
24981 hrtimer_init_hres(cpu_base);
24982 +#ifdef CONFIG_PREEMPT_RT_BASE
24983 + init_waitqueue_head(&cpu_base->wait);
24987 #ifdef CONFIG_HOTPLUG_CPU
24988 @@ -1714,11 +1945,21 @@
24989 .notifier_call = hrtimer_cpu_notify,
24992 +#ifdef CONFIG_PREEMPT_RT_BASE
24993 +static void run_hrtimer_softirq(struct softirq_action *h)
24995 + hrtimer_rt_run_pending();
24999 void __init hrtimers_init(void)
25001 hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
25002 (void *)(long)smp_processor_id());
25003 register_cpu_notifier(&hrtimers_nb);
25004 +#ifdef CONFIG_PREEMPT_RT_BASE
25005 + open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
25010 diff -Nur linux-4.4.46.orig/kernel/time/itimer.c linux-4.4.46/kernel/time/itimer.c
25011 --- linux-4.4.46.orig/kernel/time/itimer.c 2017-02-01 08:31:11.000000000 +0100
25012 +++ linux-4.4.46/kernel/time/itimer.c 2017-02-03 17:18:10.939619522 +0100
25013 @@ -213,6 +213,7 @@
25014 /* We are sharing ->siglock with it_real_fn() */
25015 if (hrtimer_try_to_cancel(timer) < 0) {
25016 spin_unlock_irq(&tsk->sighand->siglock);
25017 + hrtimer_wait_for_timer(&tsk->signal->real_timer);
25020 expires = timeval_to_ktime(value->it_value);
25021 diff -Nur linux-4.4.46.orig/kernel/time/jiffies.c linux-4.4.46/kernel/time/jiffies.c
25022 --- linux-4.4.46.orig/kernel/time/jiffies.c 2017-02-01 08:31:11.000000000 +0100
25023 +++ linux-4.4.46/kernel/time/jiffies.c 2017-02-03 17:18:10.939619522 +0100
25028 -__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
25029 +__cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock);
25030 +__cacheline_aligned_in_smp seqcount_t jiffies_seq;
25032 #if (BITS_PER_LONG < 64)
25033 u64 get_jiffies_64(void)
25038 - seq = read_seqbegin(&jiffies_lock);
25039 + seq = read_seqcount_begin(&jiffies_seq);
25041 - } while (read_seqretry(&jiffies_lock, seq));
25042 + } while (read_seqcount_retry(&jiffies_seq, seq));
25045 EXPORT_SYMBOL(get_jiffies_64);
25046 diff -Nur linux-4.4.46.orig/kernel/time/ntp.c linux-4.4.46/kernel/time/ntp.c
25047 --- linux-4.4.46.orig/kernel/time/ntp.c 2017-02-01 08:31:11.000000000 +0100
25048 +++ linux-4.4.46/kernel/time/ntp.c 2017-02-03 17:18:10.939619522 +0100
25050 #include <linux/workqueue.h>
25051 #include <linux/hrtimer.h>
25052 #include <linux/jiffies.h>
25053 +#include <linux/kthread.h>
25054 #include <linux/math64.h>
25055 #include <linux/timex.h>
25056 #include <linux/time.h>
25057 @@ -562,10 +563,52 @@
25058 &sync_cmos_work, timespec64_to_jiffies(&next));
25061 +#ifdef CONFIG_PREEMPT_RT_FULL
25063 + * RT can not call schedule_delayed_work from real interrupt context.
25064 + * Need to make a thread to do the real work.
25066 +static struct task_struct *cmos_delay_thread;
25067 +static bool do_cmos_delay;
25069 +static int run_cmos_delay(void *ignore)
25071 + while (!kthread_should_stop()) {
25072 + set_current_state(TASK_INTERRUPTIBLE);
25073 + if (do_cmos_delay) {
25074 + do_cmos_delay = false;
25075 + queue_delayed_work(system_power_efficient_wq,
25076 + &sync_cmos_work, 0);
25080 + __set_current_state(TASK_RUNNING);
25084 +void ntp_notify_cmos_timer(void)
25086 + do_cmos_delay = true;
25087 + /* Make visible before waking up process */
25089 + wake_up_process(cmos_delay_thread);
25092 +static __init int create_cmos_delay_thread(void)
25094 + cmos_delay_thread = kthread_run(run_cmos_delay, NULL, "kcmosdelayd");
25095 + BUG_ON(!cmos_delay_thread);
25098 +early_initcall(create_cmos_delay_thread);
25102 void ntp_notify_cmos_timer(void)
25104 queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0);
25106 +#endif /* CONFIG_PREEMPT_RT_FULL */
25109 void ntp_notify_cmos_timer(void) { }
25110 diff -Nur linux-4.4.46.orig/kernel/time/posix-cpu-timers.c linux-4.4.46/kernel/time/posix-cpu-timers.c
25111 --- linux-4.4.46.orig/kernel/time/posix-cpu-timers.c 2017-02-01 08:31:11.000000000 +0100
25112 +++ linux-4.4.46/kernel/time/posix-cpu-timers.c 2017-02-03 17:18:10.939619522 +0100
25116 #include <linux/sched.h>
25117 +#include <linux/sched/rt.h>
25118 #include <linux/posix-timers.h>
25119 #include <linux/errno.h>
25120 #include <linux/math64.h>
25121 @@ -650,7 +651,7 @@
25123 * Disarm any old timer after extracting its expiry time.
25125 - WARN_ON_ONCE(!irqs_disabled());
25126 + WARN_ON_ONCE_NONRT(!irqs_disabled());
25129 old_incr = timer->it.cpu.incr;
25130 @@ -1092,7 +1093,7 @@
25132 * Now re-arm for the new expiry time.
25134 - WARN_ON_ONCE(!irqs_disabled());
25135 + WARN_ON_ONCE_NONRT(!irqs_disabled());
25137 unlock_task_sighand(p, &flags);
25139 @@ -1183,13 +1184,13 @@
25140 * already updated our counts. We need to check if any timers fire now.
25141 * Interrupts are disabled.
25143 -void run_posix_cpu_timers(struct task_struct *tsk)
25144 +static void __run_posix_cpu_timers(struct task_struct *tsk)
25147 struct k_itimer *timer, *next;
25148 unsigned long flags;
25150 - WARN_ON_ONCE(!irqs_disabled());
25151 + WARN_ON_ONCE_NONRT(!irqs_disabled());
25154 * The fast path checks that there are no expired thread or thread
25155 @@ -1243,6 +1244,190 @@
25159 +#ifdef CONFIG_PREEMPT_RT_BASE
25160 +#include <linux/kthread.h>
25161 +#include <linux/cpu.h>
25162 +DEFINE_PER_CPU(struct task_struct *, posix_timer_task);
25163 +DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist);
25165 +static int posix_cpu_timers_thread(void *data)
25167 + int cpu = (long)data;
25169 + BUG_ON(per_cpu(posix_timer_task,cpu) != current);
25171 + while (!kthread_should_stop()) {
25172 + struct task_struct *tsk = NULL;
25173 + struct task_struct *next = NULL;
25175 + if (cpu_is_offline(cpu))
25176 + goto wait_to_die;
25178 + /* grab task list */
25179 + raw_local_irq_disable();
25180 + tsk = per_cpu(posix_timer_tasklist, cpu);
25181 + per_cpu(posix_timer_tasklist, cpu) = NULL;
25182 + raw_local_irq_enable();
25184 + /* its possible the list is empty, just return */
25186 + set_current_state(TASK_INTERRUPTIBLE);
25188 + __set_current_state(TASK_RUNNING);
25192 + /* Process task list */
25195 + next = tsk->posix_timer_list;
25197 + /* run the task timers, clear its ptr and
25200 + __run_posix_cpu_timers(tsk);
25201 + tsk->posix_timer_list = NULL;
25202 + put_task_struct(tsk);
25204 + /* check if this is the last on the list */
25213 + /* Wait for kthread_stop */
25214 + set_current_state(TASK_INTERRUPTIBLE);
25215 + while (!kthread_should_stop()) {
25217 + set_current_state(TASK_INTERRUPTIBLE);
25219 + __set_current_state(TASK_RUNNING);
25223 +static inline int __fastpath_timer_check(struct task_struct *tsk)
25225 + /* tsk == current, ensure it is safe to use ->signal/sighand */
25226 + if (unlikely(tsk->exit_state))
25229 + if (!task_cputime_zero(&tsk->cputime_expires))
25232 + if (!task_cputime_zero(&tsk->signal->cputime_expires))
25238 +void run_posix_cpu_timers(struct task_struct *tsk)
25240 + unsigned long cpu = smp_processor_id();
25241 + struct task_struct *tasklist;
25243 + BUG_ON(!irqs_disabled());
25244 + if(!per_cpu(posix_timer_task, cpu))
25246 + /* get per-cpu references */
25247 + tasklist = per_cpu(posix_timer_tasklist, cpu);
25249 + /* check to see if we're already queued */
25250 + if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) {
25251 + get_task_struct(tsk);
25253 + tsk->posix_timer_list = tasklist;
25256 + * The list is terminated by a self-pointing
25259 + tsk->posix_timer_list = tsk;
25261 + per_cpu(posix_timer_tasklist, cpu) = tsk;
25263 + wake_up_process(per_cpu(posix_timer_task, cpu));
25268 + * posix_cpu_thread_call - callback that gets triggered when a CPU is added.
25269 + * Here we can start up the necessary migration thread for the new CPU.
25271 +static int posix_cpu_thread_call(struct notifier_block *nfb,
25272 + unsigned long action, void *hcpu)
25274 + int cpu = (long)hcpu;
25275 + struct task_struct *p;
25276 + struct sched_param param;
25278 + switch (action) {
25279 + case CPU_UP_PREPARE:
25280 + p = kthread_create(posix_cpu_timers_thread, hcpu,
25281 + "posixcputmr/%d",cpu);
25283 + return NOTIFY_BAD;
25284 + p->flags |= PF_NOFREEZE;
25285 + kthread_bind(p, cpu);
25286 + /* Must be high prio to avoid getting starved */
25287 + param.sched_priority = MAX_RT_PRIO-1;
25288 + sched_setscheduler(p, SCHED_FIFO, ¶m);
25289 + per_cpu(posix_timer_task,cpu) = p;
25292 + /* Strictly unneccessary, as first user will wake it. */
25293 + wake_up_process(per_cpu(posix_timer_task,cpu));
25295 +#ifdef CONFIG_HOTPLUG_CPU
25296 + case CPU_UP_CANCELED:
25297 + /* Unbind it from offline cpu so it can run. Fall thru. */
25298 + kthread_bind(per_cpu(posix_timer_task, cpu),
25299 + cpumask_any(cpu_online_mask));
25300 + kthread_stop(per_cpu(posix_timer_task,cpu));
25301 + per_cpu(posix_timer_task,cpu) = NULL;
25304 + kthread_stop(per_cpu(posix_timer_task,cpu));
25305 + per_cpu(posix_timer_task,cpu) = NULL;
25309 + return NOTIFY_OK;
25312 +/* Register at highest priority so that task migration (migrate_all_tasks)
25313 + * happens before everything else.
25315 +static struct notifier_block posix_cpu_thread_notifier = {
25316 + .notifier_call = posix_cpu_thread_call,
25320 +static int __init posix_cpu_thread_init(void)
25322 + void *hcpu = (void *)(long)smp_processor_id();
25323 + /* Start one for boot CPU. */
25324 + unsigned long cpu;
25326 + /* init the per-cpu posix_timer_tasklets */
25327 + for_each_possible_cpu(cpu)
25328 + per_cpu(posix_timer_tasklist, cpu) = NULL;
25330 + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_UP_PREPARE, hcpu);
25331 + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_ONLINE, hcpu);
25332 + register_cpu_notifier(&posix_cpu_thread_notifier);
25335 +early_initcall(posix_cpu_thread_init);
25336 +#else /* CONFIG_PREEMPT_RT_BASE */
25337 +void run_posix_cpu_timers(struct task_struct *tsk)
25339 + __run_posix_cpu_timers(tsk);
25341 +#endif /* CONFIG_PREEMPT_RT_BASE */
25344 * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
25345 * The tsk->sighand->siglock must be held by the caller.
25346 diff -Nur linux-4.4.46.orig/kernel/time/posix-timers.c linux-4.4.46/kernel/time/posix-timers.c
25347 --- linux-4.4.46.orig/kernel/time/posix-timers.c 2017-02-01 08:31:11.000000000 +0100
25348 +++ linux-4.4.46/kernel/time/posix-timers.c 2017-02-03 17:18:10.939619522 +0100
25349 @@ -506,6 +506,7 @@
25350 static struct pid *good_sigevent(sigevent_t * event)
25352 struct task_struct *rtn = current->group_leader;
25353 + int sig = event->sigev_signo;
25355 if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
25356 (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
25357 @@ -514,7 +515,8 @@
25360 if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
25361 - ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
25362 + (sig <= 0 || sig > SIGRTMAX || sig_kernel_only(sig) ||
25363 + sig_kernel_coredump(sig)))
25366 return task_pid(rtn);
25367 @@ -826,6 +828,20 @@
25372 + * Protected by RCU!
25374 +static void timer_wait_for_callback(struct k_clock *kc, struct k_itimer *timr)
25376 +#ifdef CONFIG_PREEMPT_RT_FULL
25377 + if (kc->timer_set == common_timer_set)
25378 + hrtimer_wait_for_timer(&timr->it.real.timer);
25380 + /* FIXME: Whacky hack for posix-cpu-timers */
25381 + schedule_timeout(1);
25385 /* Set a POSIX.1b interval timer. */
25386 /* timr->it_lock is taken. */
25388 @@ -903,6 +919,7 @@
25393 kc = clockid_to_kclock(timr->it_clock);
25394 if (WARN_ON_ONCE(!kc || !kc->timer_set))
25396 @@ -911,9 +928,12 @@
25398 unlock_timer(timr, flag);
25399 if (error == TIMER_RETRY) {
25400 + timer_wait_for_callback(kc, timr);
25401 rtn = NULL; // We already got the old time...
25402 + rcu_read_unlock();
25405 + rcu_read_unlock();
25407 if (old_setting && !error &&
25408 copy_to_user(old_setting, &old_spec, sizeof (old_spec)))
25409 @@ -951,10 +971,15 @@
25414 if (timer_delete_hook(timer) == TIMER_RETRY) {
25415 unlock_timer(timer, flags);
25416 + timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
25418 + rcu_read_unlock();
25421 + rcu_read_unlock();
25423 spin_lock(¤t->sighand->siglock);
25424 list_del(&timer->list);
25425 @@ -980,8 +1005,18 @@
25427 spin_lock_irqsave(&timer->it_lock, flags);
25429 + /* On RT we can race with a deletion */
25430 + if (!timer->it_signal) {
25431 + unlock_timer(timer, flags);
25435 if (timer_delete_hook(timer) == TIMER_RETRY) {
25437 unlock_timer(timer, flags);
25438 + timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
25440 + rcu_read_unlock();
25443 list_del(&timer->list);
25444 diff -Nur linux-4.4.46.orig/kernel/time/tick-broadcast-hrtimer.c linux-4.4.46/kernel/time/tick-broadcast-hrtimer.c
25445 --- linux-4.4.46.orig/kernel/time/tick-broadcast-hrtimer.c 2017-02-01 08:31:11.000000000 +0100
25446 +++ linux-4.4.46/kernel/time/tick-broadcast-hrtimer.c 2017-02-03 17:18:10.939619522 +0100
25447 @@ -106,5 +106,6 @@
25449 hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
25450 bctimer.function = bc_handler;
25451 + bctimer.irqsafe = true;
25452 clockevents_register_device(&ce_broadcast_hrtimer);
25454 diff -Nur linux-4.4.46.orig/kernel/time/tick-common.c linux-4.4.46/kernel/time/tick-common.c
25455 --- linux-4.4.46.orig/kernel/time/tick-common.c 2017-02-01 08:31:11.000000000 +0100
25456 +++ linux-4.4.46/kernel/time/tick-common.c 2017-02-03 17:18:10.939619522 +0100
25457 @@ -79,13 +79,15 @@
25458 static void tick_periodic(int cpu)
25460 if (tick_do_timer_cpu == cpu) {
25461 - write_seqlock(&jiffies_lock);
25462 + raw_spin_lock(&jiffies_lock);
25463 + write_seqcount_begin(&jiffies_seq);
25465 /* Keep track of the next tick event */
25466 tick_next_period = ktime_add(tick_next_period, tick_period);
25469 - write_sequnlock(&jiffies_lock);
25470 + write_seqcount_end(&jiffies_seq);
25471 + raw_spin_unlock(&jiffies_lock);
25472 update_wall_time();
25475 @@ -157,9 +159,9 @@
25479 - seq = read_seqbegin(&jiffies_lock);
25480 + seq = read_seqcount_begin(&jiffies_seq);
25481 next = tick_next_period;
25482 - } while (read_seqretry(&jiffies_lock, seq));
25483 + } while (read_seqcount_retry(&jiffies_seq, seq));
25485 clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
25487 diff -Nur linux-4.4.46.orig/kernel/time/tick-sched.c linux-4.4.46/kernel/time/tick-sched.c
25488 --- linux-4.4.46.orig/kernel/time/tick-sched.c 2017-02-01 08:31:11.000000000 +0100
25489 +++ linux-4.4.46/kernel/time/tick-sched.c 2017-02-03 17:18:10.939619522 +0100
25493 /* Reevalute with jiffies_lock held */
25494 - write_seqlock(&jiffies_lock);
25495 + raw_spin_lock(&jiffies_lock);
25496 + write_seqcount_begin(&jiffies_seq);
25498 delta = ktime_sub(now, last_jiffies_update);
25499 if (delta.tv64 >= tick_period.tv64) {
25500 @@ -85,10 +86,12 @@
25501 /* Keep the tick_next_period variable up to date */
25502 tick_next_period = ktime_add(last_jiffies_update, tick_period);
25504 - write_sequnlock(&jiffies_lock);
25505 + write_seqcount_end(&jiffies_seq);
25506 + raw_spin_unlock(&jiffies_lock);
25509 - write_sequnlock(&jiffies_lock);
25510 + write_seqcount_end(&jiffies_seq);
25511 + raw_spin_unlock(&jiffies_lock);
25512 update_wall_time();
25515 @@ -99,12 +102,14 @@
25519 - write_seqlock(&jiffies_lock);
25520 + raw_spin_lock(&jiffies_lock);
25521 + write_seqcount_begin(&jiffies_seq);
25522 /* Did we start the jiffies update yet ? */
25523 if (last_jiffies_update.tv64 == 0)
25524 last_jiffies_update = tick_next_period;
25525 period = last_jiffies_update;
25526 - write_sequnlock(&jiffies_lock);
25527 + write_seqcount_end(&jiffies_seq);
25528 + raw_spin_unlock(&jiffies_lock);
25532 @@ -176,6 +181,11 @@
25536 + if (!arch_irq_work_has_interrupt()) {
25537 + trace_tick_stop(0, "missing irq work interrupt\n");
25541 /* sched_clock_tick() needs us? */
25542 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
25544 @@ -204,6 +214,7 @@
25546 static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
25547 .func = nohz_full_kick_work_func,
25548 + .flags = IRQ_WORK_HARD_IRQ,
25552 @@ -578,10 +589,10 @@
25554 /* Read jiffies and the time when jiffies were updated last */
25556 - seq = read_seqbegin(&jiffies_lock);
25557 + seq = read_seqcount_begin(&jiffies_seq);
25558 basemono = last_jiffies_update.tv64;
25559 basejiff = jiffies;
25560 - } while (read_seqretry(&jiffies_lock, seq));
25561 + } while (read_seqcount_retry(&jiffies_seq, seq));
25562 ts->last_jiffies = basejiff;
25564 if (rcu_needs_cpu(basemono, &next_rcu) ||
25565 @@ -753,14 +764,7 @@
25568 if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
25569 - static int ratelimit;
25571 - if (ratelimit < 10 &&
25572 - (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
25573 - pr_warn("NOHZ: local_softirq_pending %02x\n",
25574 - (unsigned int) local_softirq_pending());
25577 + softirq_check_pending_idle();
25581 @@ -1100,6 +1104,7 @@
25582 * Emulate tick processing via per-CPU hrtimers:
25584 hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
25585 + ts->sched_timer.irqsafe = 1;
25586 ts->sched_timer.function = tick_sched_timer;
25588 /* Get the next period (per cpu) */
25589 diff -Nur linux-4.4.46.orig/kernel/time/timekeeping.c linux-4.4.46/kernel/time/timekeeping.c
25590 --- linux-4.4.46.orig/kernel/time/timekeeping.c 2017-02-01 08:31:11.000000000 +0100
25591 +++ linux-4.4.46/kernel/time/timekeeping.c 2017-02-03 17:18:10.939619522 +0100
25592 @@ -2070,8 +2070,10 @@
25594 void xtime_update(unsigned long ticks)
25596 - write_seqlock(&jiffies_lock);
25597 + raw_spin_lock(&jiffies_lock);
25598 + write_seqcount_begin(&jiffies_seq);
25600 - write_sequnlock(&jiffies_lock);
25601 + write_seqcount_end(&jiffies_seq);
25602 + raw_spin_unlock(&jiffies_lock);
25603 update_wall_time();
25605 diff -Nur linux-4.4.46.orig/kernel/time/timekeeping.h linux-4.4.46/kernel/time/timekeeping.h
25606 --- linux-4.4.46.orig/kernel/time/timekeeping.h 2017-02-01 08:31:11.000000000 +0100
25607 +++ linux-4.4.46/kernel/time/timekeeping.h 2017-02-03 17:18:10.939619522 +0100
25609 extern void do_timer(unsigned long ticks);
25610 extern void update_wall_time(void);
25612 -extern seqlock_t jiffies_lock;
25613 +extern raw_spinlock_t jiffies_lock;
25614 +extern seqcount_t jiffies_seq;
25616 #define CS_NAME_LEN 32
25618 diff -Nur linux-4.4.46.orig/kernel/time/timer.c linux-4.4.46/kernel/time/timer.c
25619 --- linux-4.4.46.orig/kernel/time/timer.c 2017-02-01 08:31:11.000000000 +0100
25620 +++ linux-4.4.46/kernel/time/timer.c 2017-02-03 17:18:10.939619522 +0100
25624 struct timer_list *running_timer;
25625 +#ifdef CONFIG_PREEMPT_RT_FULL
25626 + wait_queue_head_t wait_for_running_timer;
25628 unsigned long timer_jiffies;
25629 unsigned long next_timer;
25630 unsigned long active_timers;
25631 @@ -777,6 +780,39 @@
25635 +#ifdef CONFIG_PREEMPT_RT_FULL
25636 +static inline struct tvec_base *switch_timer_base(struct timer_list *timer,
25637 + struct tvec_base *old,
25638 + struct tvec_base *new)
25641 + * We cannot do the below because we might be preempted and
25642 + * then the preempter would see NULL and loop forever.
25644 + if (spin_trylock(&new->lock)) {
25645 + WRITE_ONCE(timer->flags,
25646 + (timer->flags & ~TIMER_BASEMASK) | new->cpu);
25647 + spin_unlock(&old->lock);
25654 +static inline struct tvec_base *switch_timer_base(struct timer_list *timer,
25655 + struct tvec_base *old,
25656 + struct tvec_base *new)
25658 + /* See the comment in lock_timer_base() */
25659 + timer->flags |= TIMER_MIGRATING;
25661 + spin_unlock(&old->lock);
25662 + spin_lock(&new->lock);
25663 + WRITE_ONCE(timer->flags,
25664 + (timer->flags & ~TIMER_BASEMASK) | new->cpu);
25670 __mod_timer(struct timer_list *timer, unsigned long expires,
25671 @@ -807,16 +843,8 @@
25672 * handler yet has not finished. This also guarantees that
25673 * the timer is serialized wrt itself.
25675 - if (likely(base->running_timer != timer)) {
25676 - /* See the comment in lock_timer_base() */
25677 - timer->flags |= TIMER_MIGRATING;
25679 - spin_unlock(&base->lock);
25681 - spin_lock(&base->lock);
25682 - WRITE_ONCE(timer->flags,
25683 - (timer->flags & ~TIMER_BASEMASK) | base->cpu);
25685 + if (likely(base->running_timer != timer))
25686 + base = switch_timer_base(timer, base, new_base);
25689 timer->expires = expires;
25690 @@ -1006,6 +1034,33 @@
25692 EXPORT_SYMBOL_GPL(add_timer_on);
25694 +#ifdef CONFIG_PREEMPT_RT_FULL
25696 + * Wait for a running timer
25698 +static void wait_for_running_timer(struct timer_list *timer)
25700 + struct tvec_base *base;
25701 + u32 tf = timer->flags;
25703 + if (tf & TIMER_MIGRATING)
25706 + base = per_cpu_ptr(&tvec_bases, tf & TIMER_CPUMASK);
25707 + wait_event(base->wait_for_running_timer,
25708 + base->running_timer != timer);
25711 +# define wakeup_timer_waiters(b) wake_up_all(&(b)->wait_for_running_timer)
25713 +static inline void wait_for_running_timer(struct timer_list *timer)
25718 +# define wakeup_timer_waiters(b) do { } while (0)
25722 * del_timer - deactive a timer.
25723 * @timer: the timer to be deactivated
25724 @@ -1063,7 +1118,7 @@
25726 EXPORT_SYMBOL(try_to_del_timer_sync);
25729 +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
25731 * del_timer_sync - deactivate a timer and wait for the handler to finish.
25732 * @timer: the timer to be deactivated
25733 @@ -1123,7 +1178,7 @@
25734 int ret = try_to_del_timer_sync(timer);
25738 + wait_for_running_timer(timer);
25741 EXPORT_SYMBOL(del_timer_sync);
25742 @@ -1248,16 +1303,18 @@
25744 spin_unlock(&base->lock);
25745 call_timer_fn(timer, fn, data);
25746 + base->running_timer = NULL;
25747 spin_lock(&base->lock);
25749 spin_unlock_irq(&base->lock);
25750 call_timer_fn(timer, fn, data);
25751 + base->running_timer = NULL;
25752 spin_lock_irq(&base->lock);
25756 - base->running_timer = NULL;
25757 spin_unlock_irq(&base->lock);
25758 + wakeup_timer_waiters(base);
25761 #ifdef CONFIG_NO_HZ_COMMON
25762 @@ -1390,6 +1447,14 @@
25763 if (cpu_is_offline(smp_processor_id()))
25766 +#ifdef CONFIG_PREEMPT_RT_FULL
25768 + * On PREEMPT_RT we cannot sleep here. As a result we can't take
25769 + * the base lock to check when the next timer is pending and so
25770 + * we assume the next jiffy.
25772 + return basem + TICK_NSEC;
25774 spin_lock(&base->lock);
25775 if (base->active_timers) {
25776 if (time_before_eq(base->next_timer, base->timer_jiffies))
25777 @@ -1416,13 +1481,13 @@
25779 /* Note: this timer irq context must be accounted for as well. */
25780 account_process_tick(p, user_tick);
25781 + scheduler_tick();
25782 run_local_timers();
25783 rcu_check_callbacks(user_tick);
25784 -#ifdef CONFIG_IRQ_WORK
25785 +#if defined(CONFIG_IRQ_WORK)
25789 - scheduler_tick();
25790 run_posix_cpu_timers(p);
25793 @@ -1433,6 +1498,8 @@
25795 struct tvec_base *base = this_cpu_ptr(&tvec_bases);
25797 + irq_work_tick_soft();
25799 if (time_after_eq(jiffies, base->timer_jiffies))
25800 __run_timers(base);
25802 @@ -1589,7 +1656,7 @@
25804 BUG_ON(cpu_online(cpu));
25805 old_base = per_cpu_ptr(&tvec_bases, cpu);
25806 - new_base = get_cpu_ptr(&tvec_bases);
25807 + new_base = get_local_ptr(&tvec_bases);
25809 * The caller is globally serialized and nobody else
25810 * takes two locks at once, deadlock is not possible.
25811 @@ -1613,7 +1680,7 @@
25813 spin_unlock(&old_base->lock);
25814 spin_unlock_irq(&new_base->lock);
25815 - put_cpu_ptr(&tvec_bases);
25816 + put_local_ptr(&tvec_bases);
25819 static int timer_cpu_notify(struct notifier_block *self,
25820 @@ -1645,6 +1712,9 @@
25823 spin_lock_init(&base->lock);
25824 +#ifdef CONFIG_PREEMPT_RT_FULL
25825 + init_waitqueue_head(&base->wait_for_running_timer);
25828 base->timer_jiffies = jiffies;
25829 base->next_timer = base->timer_jiffies;
25830 diff -Nur linux-4.4.46.orig/kernel/trace/Kconfig linux-4.4.46/kernel/trace/Kconfig
25831 --- linux-4.4.46.orig/kernel/trace/Kconfig 2017-02-01 08:31:11.000000000 +0100
25832 +++ linux-4.4.46/kernel/trace/Kconfig 2017-02-03 17:18:10.939619522 +0100
25833 @@ -187,6 +187,24 @@
25834 enabled. This option and the preempt-off timing option can be
25835 used together or separately.)
25837 +config INTERRUPT_OFF_HIST
25838 + bool "Interrupts-off Latency Histogram"
25839 + depends on IRQSOFF_TRACER
25841 + This option generates continuously updated histograms (one per cpu)
25842 + of the duration of time periods with interrupts disabled. The
25843 + histograms are disabled by default. To enable them, write a non-zero
25846 + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
25848 + If PREEMPT_OFF_HIST is also selected, additional histograms (one
25849 + per cpu) are generated that accumulate the duration of time periods
25850 + when both interrupts and preemption are disabled. The histogram data
25851 + will be located in the debug file system at
25853 + /sys/kernel/debug/tracing/latency_hist/irqsoff
25855 config PREEMPT_TRACER
25856 bool "Preemption-off Latency Tracer"
25858 @@ -211,6 +229,24 @@
25859 enabled. This option and the irqs-off timing option can be
25860 used together or separately.)
25862 +config PREEMPT_OFF_HIST
25863 + bool "Preemption-off Latency Histogram"
25864 + depends on PREEMPT_TRACER
25866 + This option generates continuously updated histograms (one per cpu)
25867 + of the duration of time periods with preemption disabled. The
25868 + histograms are disabled by default. To enable them, write a non-zero
25871 + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
25873 + If INTERRUPT_OFF_HIST is also selected, additional histograms (one
25874 + per cpu) are generated that accumulate the duration of time periods
25875 + when both interrupts and preemption are disabled. The histogram data
25876 + will be located in the debug file system at
25878 + /sys/kernel/debug/tracing/latency_hist/preemptoff
25880 config SCHED_TRACER
25881 bool "Scheduling Latency Tracer"
25882 select GENERIC_TRACER
25883 @@ -221,6 +257,74 @@
25884 This tracer tracks the latency of the highest priority task
25885 to be scheduled in, starting from the point it has woken up.
25887 +config WAKEUP_LATENCY_HIST
25888 + bool "Scheduling Latency Histogram"
25889 + depends on SCHED_TRACER
25891 + This option generates continuously updated histograms (one per cpu)
25892 + of the scheduling latency of the highest priority task.
25893 + The histograms are disabled by default. To enable them, write a
25894 + non-zero number to
25896 + /sys/kernel/debug/tracing/latency_hist/enable/wakeup
25898 + Two different algorithms are used, one to determine the latency of
25899 + processes that exclusively use the highest priority of the system and
25900 + another one to determine the latency of processes that share the
25901 + highest system priority with other processes. The former is used to
25902 + improve hardware and system software, the latter to optimize the
25903 + priority design of a given system. The histogram data will be
25904 + located in the debug file system at
25906 + /sys/kernel/debug/tracing/latency_hist/wakeup
25910 + /sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio
25912 + If both Scheduling Latency Histogram and Missed Timer Offsets
25913 + Histogram are selected, additional histogram data will be collected
25914 + that contain, in addition to the wakeup latency, the timer latency, in
25915 + case the wakeup was triggered by an expired timer. These histograms
25916 + are available in the
25918 + /sys/kernel/debug/tracing/latency_hist/timerandwakeup
25920 + directory. They reflect the apparent interrupt and scheduling latency
25921 + and are best suitable to determine the worst-case latency of a given
25922 + system. To enable these histograms, write a non-zero number to
25924 + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
25926 +config MISSED_TIMER_OFFSETS_HIST
25927 + depends on HIGH_RES_TIMERS
25928 + select GENERIC_TRACER
25929 + bool "Missed Timer Offsets Histogram"
25931 + Generate a histogram of missed timer offsets in microseconds. The
25932 + histograms are disabled by default. To enable them, write a non-zero
25935 + /sys/kernel/debug/tracing/latency_hist/enable/missed_timer_offsets
25937 + The histogram data will be located in the debug file system at
25939 + /sys/kernel/debug/tracing/latency_hist/missed_timer_offsets
25941 + If both Scheduling Latency Histogram and Missed Timer Offsets
25942 + Histogram are selected, additional histogram data will be collected
25943 + that contain, in addition to the wakeup latency, the timer latency, in
25944 + case the wakeup was triggered by an expired timer. These histograms
25945 + are available in the
25947 + /sys/kernel/debug/tracing/latency_hist/timerandwakeup
25949 + directory. They reflect the apparent interrupt and scheduling latency
25950 + and are best suitable to determine the worst-case latency of a given
25951 + system. To enable these histograms, write a non-zero number to
25953 + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
25955 config ENABLE_DEFAULT_TRACERS
25956 bool "Trace process context switches and events"
25957 depends on !GENERIC_TRACER
25958 diff -Nur linux-4.4.46.orig/kernel/trace/latency_hist.c linux-4.4.46/kernel/trace/latency_hist.c
25959 --- linux-4.4.46.orig/kernel/trace/latency_hist.c 1970-01-01 01:00:00.000000000 +0100
25960 +++ linux-4.4.46/kernel/trace/latency_hist.c 2017-02-03 17:18:10.939619522 +0100
25963 + * kernel/trace/latency_hist.c
25965 + * Add support for histograms of preemption-off latency and
25966 + * interrupt-off latency and wakeup latency, it depends on
25967 + * Real-Time Preemption Support.
25969 + * Copyright (C) 2005 MontaVista Software, Inc.
25970 + * Yi Yang <yyang@ch.mvista.com>
25972 + * Converted to work with the new latency tracer.
25973 + * Copyright (C) 2008 Red Hat, Inc.
25974 + * Steven Rostedt <srostedt@redhat.com>
25977 +#include <linux/module.h>
25978 +#include <linux/debugfs.h>
25979 +#include <linux/seq_file.h>
25980 +#include <linux/percpu.h>
25981 +#include <linux/kallsyms.h>
25982 +#include <linux/uaccess.h>
25983 +#include <linux/sched.h>
25984 +#include <linux/sched/rt.h>
25985 +#include <linux/slab.h>
25986 +#include <linux/atomic.h>
25987 +#include <asm/div64.h>
25989 +#include "trace.h"
25990 +#include <trace/events/sched.h>
25992 +#define NSECS_PER_USECS 1000L
25994 +#define CREATE_TRACE_POINTS
25995 +#include <trace/events/hist.h>
25998 + IRQSOFF_LATENCY = 0,
25999 + PREEMPTOFF_LATENCY,
26000 + PREEMPTIRQSOFF_LATENCY,
26002 + WAKEUP_LATENCY_SHAREDPRIO,
26003 + MISSED_TIMER_OFFSETS,
26004 + TIMERANDWAKEUP_LATENCY,
26005 + MAX_LATENCY_TYPE,
26008 +#define MAX_ENTRY_NUM 10240
26010 +struct hist_data {
26011 + atomic_t hist_mode; /* 0 log, 1 don't log */
26012 + long offset; /* set it to MAX_ENTRY_NUM/2 for a bipolar scale */
26015 + unsigned long long below_hist_bound_samples;
26016 + unsigned long long above_hist_bound_samples;
26017 + long long accumulate_lat;
26018 + unsigned long long total_samples;
26019 + unsigned long long hist_array[MAX_ENTRY_NUM];
26022 +struct enable_data {
26023 + int latency_type;
26027 +static char *latency_hist_dir_root = "latency_hist";
26029 +#ifdef CONFIG_INTERRUPT_OFF_HIST
26030 +static DEFINE_PER_CPU(struct hist_data, irqsoff_hist);
26031 +static char *irqsoff_hist_dir = "irqsoff";
26032 +static DEFINE_PER_CPU(cycles_t, hist_irqsoff_start);
26033 +static DEFINE_PER_CPU(int, hist_irqsoff_counting);
26036 +#ifdef CONFIG_PREEMPT_OFF_HIST
26037 +static DEFINE_PER_CPU(struct hist_data, preemptoff_hist);
26038 +static char *preemptoff_hist_dir = "preemptoff";
26039 +static DEFINE_PER_CPU(cycles_t, hist_preemptoff_start);
26040 +static DEFINE_PER_CPU(int, hist_preemptoff_counting);
26043 +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
26044 +static DEFINE_PER_CPU(struct hist_data, preemptirqsoff_hist);
26045 +static char *preemptirqsoff_hist_dir = "preemptirqsoff";
26046 +static DEFINE_PER_CPU(cycles_t, hist_preemptirqsoff_start);
26047 +static DEFINE_PER_CPU(int, hist_preemptirqsoff_counting);
26050 +#if defined(CONFIG_PREEMPT_OFF_HIST) || defined(CONFIG_INTERRUPT_OFF_HIST)
26051 +static notrace void probe_preemptirqsoff_hist(void *v, int reason, int start);
26052 +static struct enable_data preemptirqsoff_enabled_data = {
26053 + .latency_type = PREEMPTIRQSOFF_LATENCY,
26058 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
26059 + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26060 +struct maxlatproc_data {
26061 + char comm[FIELD_SIZEOF(struct task_struct, comm)];
26062 + char current_comm[FIELD_SIZEOF(struct task_struct, comm)];
26066 + int current_prio;
26068 + long timeroffset;
26069 + cycle_t timestamp;
26073 +#ifdef CONFIG_WAKEUP_LATENCY_HIST
26074 +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist);
26075 +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist_sharedprio);
26076 +static char *wakeup_latency_hist_dir = "wakeup";
26077 +static char *wakeup_latency_hist_dir_sharedprio = "sharedprio";
26078 +static notrace void probe_wakeup_latency_hist_start(void *v,
26079 + struct task_struct *p);
26080 +static notrace void probe_wakeup_latency_hist_stop(void *v,
26081 + bool preempt, struct task_struct *prev, struct task_struct *next);
26082 +static notrace void probe_sched_migrate_task(void *,
26083 + struct task_struct *task, int cpu);
26084 +static struct enable_data wakeup_latency_enabled_data = {
26085 + .latency_type = WAKEUP_LATENCY,
26088 +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc);
26089 +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc_sharedprio);
26090 +static DEFINE_PER_CPU(struct task_struct *, wakeup_task);
26091 +static DEFINE_PER_CPU(int, wakeup_sharedprio);
26092 +static unsigned long wakeup_pid;
26095 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
26096 +static DEFINE_PER_CPU(struct hist_data, missed_timer_offsets);
26097 +static char *missed_timer_offsets_dir = "missed_timer_offsets";
26098 +static notrace void probe_hrtimer_interrupt(void *v, int cpu,
26099 + long long offset, struct task_struct *curr, struct task_struct *task);
26100 +static struct enable_data missed_timer_offsets_enabled_data = {
26101 + .latency_type = MISSED_TIMER_OFFSETS,
26104 +static DEFINE_PER_CPU(struct maxlatproc_data, missed_timer_offsets_maxlatproc);
26105 +static unsigned long missed_timer_offsets_pid;
26108 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
26109 + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26110 +static DEFINE_PER_CPU(struct hist_data, timerandwakeup_latency_hist);
26111 +static char *timerandwakeup_latency_hist_dir = "timerandwakeup";
26112 +static struct enable_data timerandwakeup_enabled_data = {
26113 + .latency_type = TIMERANDWAKEUP_LATENCY,
26116 +static DEFINE_PER_CPU(struct maxlatproc_data, timerandwakeup_maxlatproc);
26119 +void notrace latency_hist(int latency_type, int cpu, long latency,
26120 + long timeroffset, cycle_t stop,
26121 + struct task_struct *p)
26123 + struct hist_data *my_hist;
26124 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
26125 + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26126 + struct maxlatproc_data *mp = NULL;
26129 + if (!cpu_possible(cpu) || latency_type < 0 ||
26130 + latency_type >= MAX_LATENCY_TYPE)
26133 + switch (latency_type) {
26134 +#ifdef CONFIG_INTERRUPT_OFF_HIST
26135 + case IRQSOFF_LATENCY:
26136 + my_hist = &per_cpu(irqsoff_hist, cpu);
26139 +#ifdef CONFIG_PREEMPT_OFF_HIST
26140 + case PREEMPTOFF_LATENCY:
26141 + my_hist = &per_cpu(preemptoff_hist, cpu);
26144 +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
26145 + case PREEMPTIRQSOFF_LATENCY:
26146 + my_hist = &per_cpu(preemptirqsoff_hist, cpu);
26149 +#ifdef CONFIG_WAKEUP_LATENCY_HIST
26150 + case WAKEUP_LATENCY:
26151 + my_hist = &per_cpu(wakeup_latency_hist, cpu);
26152 + mp = &per_cpu(wakeup_maxlatproc, cpu);
26154 + case WAKEUP_LATENCY_SHAREDPRIO:
26155 + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
26156 + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
26159 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
26160 + case MISSED_TIMER_OFFSETS:
26161 + my_hist = &per_cpu(missed_timer_offsets, cpu);
26162 + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
26165 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
26166 + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26167 + case TIMERANDWAKEUP_LATENCY:
26168 + my_hist = &per_cpu(timerandwakeup_latency_hist, cpu);
26169 + mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
26177 + latency += my_hist->offset;
26179 + if (atomic_read(&my_hist->hist_mode) == 0)
26182 + if (latency < 0 || latency >= MAX_ENTRY_NUM) {
26184 + my_hist->below_hist_bound_samples++;
26186 + my_hist->above_hist_bound_samples++;
26188 + my_hist->hist_array[latency]++;
26190 + if (unlikely(latency > my_hist->max_lat ||
26191 + my_hist->min_lat == LONG_MAX)) {
26192 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
26193 + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26194 + if (latency_type == WAKEUP_LATENCY ||
26195 + latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
26196 + latency_type == MISSED_TIMER_OFFSETS ||
26197 + latency_type == TIMERANDWAKEUP_LATENCY) {
26198 + strncpy(mp->comm, p->comm, sizeof(mp->comm));
26199 + strncpy(mp->current_comm, current->comm,
26200 + sizeof(mp->current_comm));
26201 + mp->pid = task_pid_nr(p);
26202 + mp->current_pid = task_pid_nr(current);
26203 + mp->prio = p->prio;
26204 + mp->current_prio = current->prio;
26205 + mp->latency = latency;
26206 + mp->timeroffset = timeroffset;
26207 + mp->timestamp = stop;
26210 + my_hist->max_lat = latency;
26212 + if (unlikely(latency < my_hist->min_lat))
26213 + my_hist->min_lat = latency;
26214 + my_hist->total_samples++;
26215 + my_hist->accumulate_lat += latency;
26218 +static void *l_start(struct seq_file *m, loff_t *pos)
26220 + loff_t *index_ptr = NULL;
26221 + loff_t index = *pos;
26222 + struct hist_data *my_hist = m->private;
26224 + if (index == 0) {
26225 + char minstr[32], avgstr[32], maxstr[32];
26227 + atomic_dec(&my_hist->hist_mode);
26229 + if (likely(my_hist->total_samples)) {
26230 + long avg = (long) div64_s64(my_hist->accumulate_lat,
26231 + my_hist->total_samples);
26232 + snprintf(minstr, sizeof(minstr), "%ld",
26233 + my_hist->min_lat - my_hist->offset);
26234 + snprintf(avgstr, sizeof(avgstr), "%ld",
26235 + avg - my_hist->offset);
26236 + snprintf(maxstr, sizeof(maxstr), "%ld",
26237 + my_hist->max_lat - my_hist->offset);
26239 + strcpy(minstr, "<undef>");
26240 + strcpy(avgstr, minstr);
26241 + strcpy(maxstr, minstr);
26244 + seq_printf(m, "#Minimum latency: %s microseconds\n"
26245 + "#Average latency: %s microseconds\n"
26246 + "#Maximum latency: %s microseconds\n"
26247 + "#Total samples: %llu\n"
26248 + "#There are %llu samples lower than %ld"
26249 + " microseconds.\n"
26250 + "#There are %llu samples greater or equal"
26251 + " than %ld microseconds.\n"
26252 + "#usecs\t%16s\n",
26253 + minstr, avgstr, maxstr,
26254 + my_hist->total_samples,
26255 + my_hist->below_hist_bound_samples,
26256 + -my_hist->offset,
26257 + my_hist->above_hist_bound_samples,
26258 + MAX_ENTRY_NUM - my_hist->offset,
26261 + if (index < MAX_ENTRY_NUM) {
26262 + index_ptr = kmalloc(sizeof(loff_t), GFP_KERNEL);
26264 + *index_ptr = index;
26267 + return index_ptr;
26270 +static void *l_next(struct seq_file *m, void *p, loff_t *pos)
26272 + loff_t *index_ptr = p;
26273 + struct hist_data *my_hist = m->private;
26275 + if (++*pos >= MAX_ENTRY_NUM) {
26276 + atomic_inc(&my_hist->hist_mode);
26279 + *index_ptr = *pos;
26280 + return index_ptr;
26283 +static void l_stop(struct seq_file *m, void *p)
26288 +static int l_show(struct seq_file *m, void *p)
26290 + int index = *(loff_t *) p;
26291 + struct hist_data *my_hist = m->private;
26293 + seq_printf(m, "%6ld\t%16llu\n", index - my_hist->offset,
26294 + my_hist->hist_array[index]);
26298 +static const struct seq_operations latency_hist_seq_op = {
26299 + .start = l_start,
26305 +static int latency_hist_open(struct inode *inode, struct file *file)
26309 + ret = seq_open(file, &latency_hist_seq_op);
26311 + struct seq_file *seq = file->private_data;
26312 + seq->private = inode->i_private;
26317 +static const struct file_operations latency_hist_fops = {
26318 + .open = latency_hist_open,
26319 + .read = seq_read,
26320 + .llseek = seq_lseek,
26321 + .release = seq_release,
26324 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
26325 + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26326 +static void clear_maxlatprocdata(struct maxlatproc_data *mp)
26328 + mp->comm[0] = mp->current_comm[0] = '\0';
26329 + mp->prio = mp->current_prio = mp->pid = mp->current_pid =
26330 + mp->latency = mp->timeroffset = -1;
26331 + mp->timestamp = 0;
26335 +static void hist_reset(struct hist_data *hist)
26337 + atomic_dec(&hist->hist_mode);
26339 + memset(hist->hist_array, 0, sizeof(hist->hist_array));
26340 + hist->below_hist_bound_samples = 0ULL;
26341 + hist->above_hist_bound_samples = 0ULL;
26342 + hist->min_lat = LONG_MAX;
26343 + hist->max_lat = LONG_MIN;
26344 + hist->total_samples = 0ULL;
26345 + hist->accumulate_lat = 0LL;
26347 + atomic_inc(&hist->hist_mode);
26351 +latency_hist_reset(struct file *file, const char __user *a,
26352 + size_t size, loff_t *off)
26355 + struct hist_data *hist = NULL;
26356 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
26357 + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26358 + struct maxlatproc_data *mp = NULL;
26360 + off_t latency_type = (off_t) file->private_data;
26362 + for_each_online_cpu(cpu) {
26364 + switch (latency_type) {
26365 +#ifdef CONFIG_PREEMPT_OFF_HIST
26366 + case PREEMPTOFF_LATENCY:
26367 + hist = &per_cpu(preemptoff_hist, cpu);
26370 +#ifdef CONFIG_INTERRUPT_OFF_HIST
26371 + case IRQSOFF_LATENCY:
26372 + hist = &per_cpu(irqsoff_hist, cpu);
26375 +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
26376 + case PREEMPTIRQSOFF_LATENCY:
26377 + hist = &per_cpu(preemptirqsoff_hist, cpu);
26380 +#ifdef CONFIG_WAKEUP_LATENCY_HIST
26381 + case WAKEUP_LATENCY:
26382 + hist = &per_cpu(wakeup_latency_hist, cpu);
26383 + mp = &per_cpu(wakeup_maxlatproc, cpu);
26385 + case WAKEUP_LATENCY_SHAREDPRIO:
26386 + hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
26387 + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
26390 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
26391 + case MISSED_TIMER_OFFSETS:
26392 + hist = &per_cpu(missed_timer_offsets, cpu);
26393 + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
26396 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
26397 + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26398 + case TIMERANDWAKEUP_LATENCY:
26399 + hist = &per_cpu(timerandwakeup_latency_hist, cpu);
26400 + mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
26405 + hist_reset(hist);
26406 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
26407 + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26408 + if (latency_type == WAKEUP_LATENCY ||
26409 + latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
26410 + latency_type == MISSED_TIMER_OFFSETS ||
26411 + latency_type == TIMERANDWAKEUP_LATENCY)
26412 + clear_maxlatprocdata(mp);
26419 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
26420 + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26422 +show_pid(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
26426 + unsigned long *this_pid = file->private_data;
26428 + r = snprintf(buf, sizeof(buf), "%lu\n", *this_pid);
26429 + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
26432 +static ssize_t do_pid(struct file *file, const char __user *ubuf,
26433 + size_t cnt, loff_t *ppos)
26436 + unsigned long pid;
26437 + unsigned long *this_pid = file->private_data;
26439 + if (cnt >= sizeof(buf))
26442 + if (copy_from_user(&buf, ubuf, cnt))
26447 + if (kstrtoul(buf, 10, &pid))
26456 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
26457 + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26459 +show_maxlatproc(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
26462 + struct maxlatproc_data *mp = file->private_data;
26463 + int strmaxlen = (TASK_COMM_LEN * 2) + (8 * 8);
26464 + unsigned long long t;
26465 + unsigned long usecs, secs;
26468 + if (mp->pid == -1 || mp->current_pid == -1) {
26469 + buf = "(none)\n";
26470 + return simple_read_from_buffer(ubuf, cnt, ppos, buf,
26474 + buf = kmalloc(strmaxlen, GFP_KERNEL);
26478 + t = ns2usecs(mp->timestamp);
26479 + usecs = do_div(t, USEC_PER_SEC);
26480 + secs = (unsigned long) t;
26481 + r = snprintf(buf, strmaxlen,
26482 + "%d %d %ld (%ld) %s <- %d %d %s %lu.%06lu\n", mp->pid,
26483 + MAX_RT_PRIO-1 - mp->prio, mp->latency, mp->timeroffset, mp->comm,
26484 + mp->current_pid, MAX_RT_PRIO-1 - mp->current_prio, mp->current_comm,
26486 + r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
26493 +show_enable(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
26496 + struct enable_data *ed = file->private_data;
26499 + r = snprintf(buf, sizeof(buf), "%d\n", ed->enabled);
26500 + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
26504 +do_enable(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos)
26508 + struct enable_data *ed = file->private_data;
26510 + if (cnt >= sizeof(buf))
26513 + if (copy_from_user(&buf, ubuf, cnt))
26518 + if (kstrtoul(buf, 10, &enable))
26521 + if ((enable && ed->enabled) || (!enable && !ed->enabled))
26527 + switch (ed->latency_type) {
26528 +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
26529 + case PREEMPTIRQSOFF_LATENCY:
26530 + ret = register_trace_preemptirqsoff_hist(
26531 + probe_preemptirqsoff_hist, NULL);
26533 + pr_info("wakeup trace: Couldn't assign "
26534 + "probe_preemptirqsoff_hist "
26535 + "to trace_preemptirqsoff_hist\n");
26540 +#ifdef CONFIG_WAKEUP_LATENCY_HIST
26541 + case WAKEUP_LATENCY:
26542 + ret = register_trace_sched_wakeup(
26543 + probe_wakeup_latency_hist_start, NULL);
26545 + pr_info("wakeup trace: Couldn't assign "
26546 + "probe_wakeup_latency_hist_start "
26547 + "to trace_sched_wakeup\n");
26550 + ret = register_trace_sched_wakeup_new(
26551 + probe_wakeup_latency_hist_start, NULL);
26553 + pr_info("wakeup trace: Couldn't assign "
26554 + "probe_wakeup_latency_hist_start "
26555 + "to trace_sched_wakeup_new\n");
26556 + unregister_trace_sched_wakeup(
26557 + probe_wakeup_latency_hist_start, NULL);
26560 + ret = register_trace_sched_switch(
26561 + probe_wakeup_latency_hist_stop, NULL);
26563 + pr_info("wakeup trace: Couldn't assign "
26564 + "probe_wakeup_latency_hist_stop "
26565 + "to trace_sched_switch\n");
26566 + unregister_trace_sched_wakeup(
26567 + probe_wakeup_latency_hist_start, NULL);
26568 + unregister_trace_sched_wakeup_new(
26569 + probe_wakeup_latency_hist_start, NULL);
26572 + ret = register_trace_sched_migrate_task(
26573 + probe_sched_migrate_task, NULL);
26575 + pr_info("wakeup trace: Couldn't assign "
26576 + "probe_sched_migrate_task "
26577 + "to trace_sched_migrate_task\n");
26578 + unregister_trace_sched_wakeup(
26579 + probe_wakeup_latency_hist_start, NULL);
26580 + unregister_trace_sched_wakeup_new(
26581 + probe_wakeup_latency_hist_start, NULL);
26582 + unregister_trace_sched_switch(
26583 + probe_wakeup_latency_hist_stop, NULL);
26588 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
26589 + case MISSED_TIMER_OFFSETS:
26590 + ret = register_trace_hrtimer_interrupt(
26591 + probe_hrtimer_interrupt, NULL);
26593 + pr_info("wakeup trace: Couldn't assign "
26594 + "probe_hrtimer_interrupt "
26595 + "to trace_hrtimer_interrupt\n");
26600 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
26601 + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26602 + case TIMERANDWAKEUP_LATENCY:
26603 + if (!wakeup_latency_enabled_data.enabled ||
26604 + !missed_timer_offsets_enabled_data.enabled)
26612 + switch (ed->latency_type) {
26613 +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
26614 + case PREEMPTIRQSOFF_LATENCY:
26618 + unregister_trace_preemptirqsoff_hist(
26619 + probe_preemptirqsoff_hist, NULL);
26620 + for_each_online_cpu(cpu) {
26621 +#ifdef CONFIG_INTERRUPT_OFF_HIST
26622 + per_cpu(hist_irqsoff_counting,
26625 +#ifdef CONFIG_PREEMPT_OFF_HIST
26626 + per_cpu(hist_preemptoff_counting,
26629 +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
26630 + per_cpu(hist_preemptirqsoff_counting,
26637 +#ifdef CONFIG_WAKEUP_LATENCY_HIST
26638 + case WAKEUP_LATENCY:
26642 + unregister_trace_sched_wakeup(
26643 + probe_wakeup_latency_hist_start, NULL);
26644 + unregister_trace_sched_wakeup_new(
26645 + probe_wakeup_latency_hist_start, NULL);
26646 + unregister_trace_sched_switch(
26647 + probe_wakeup_latency_hist_stop, NULL);
26648 + unregister_trace_sched_migrate_task(
26649 + probe_sched_migrate_task, NULL);
26651 + for_each_online_cpu(cpu) {
26652 + per_cpu(wakeup_task, cpu) = NULL;
26653 + per_cpu(wakeup_sharedprio, cpu) = 0;
26656 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
26657 + timerandwakeup_enabled_data.enabled = 0;
26661 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
26662 + case MISSED_TIMER_OFFSETS:
26663 + unregister_trace_hrtimer_interrupt(
26664 + probe_hrtimer_interrupt, NULL);
26665 +#ifdef CONFIG_WAKEUP_LATENCY_HIST
26666 + timerandwakeup_enabled_data.enabled = 0;
26674 + ed->enabled = enable;
26678 +static const struct file_operations latency_hist_reset_fops = {
26679 + .open = tracing_open_generic,
26680 + .write = latency_hist_reset,
26683 +static const struct file_operations enable_fops = {
26684 + .open = tracing_open_generic,
26685 + .read = show_enable,
26686 + .write = do_enable,
26689 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
26690 + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26691 +static const struct file_operations pid_fops = {
26692 + .open = tracing_open_generic,
26693 + .read = show_pid,
26697 +static const struct file_operations maxlatproc_fops = {
26698 + .open = tracing_open_generic,
26699 + .read = show_maxlatproc,
26703 +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
26704 +static notrace void probe_preemptirqsoff_hist(void *v, int reason,
26707 + int cpu = raw_smp_processor_id();
26708 + int time_set = 0;
26711 + cycle_t uninitialized_var(start);
26713 + if (!preempt_count() && !irqs_disabled())
26716 +#ifdef CONFIG_INTERRUPT_OFF_HIST
26717 + if ((reason == IRQS_OFF || reason == TRACE_START) &&
26718 + !per_cpu(hist_irqsoff_counting, cpu)) {
26719 + per_cpu(hist_irqsoff_counting, cpu) = 1;
26720 + start = ftrace_now(cpu);
26722 + per_cpu(hist_irqsoff_start, cpu) = start;
26726 +#ifdef CONFIG_PREEMPT_OFF_HIST
26727 + if ((reason == PREEMPT_OFF || reason == TRACE_START) &&
26728 + !per_cpu(hist_preemptoff_counting, cpu)) {
26729 + per_cpu(hist_preemptoff_counting, cpu) = 1;
26730 + if (!(time_set++))
26731 + start = ftrace_now(cpu);
26732 + per_cpu(hist_preemptoff_start, cpu) = start;
26736 +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
26737 + if (per_cpu(hist_irqsoff_counting, cpu) &&
26738 + per_cpu(hist_preemptoff_counting, cpu) &&
26739 + !per_cpu(hist_preemptirqsoff_counting, cpu)) {
26740 + per_cpu(hist_preemptirqsoff_counting, cpu) = 1;
26742 + start = ftrace_now(cpu);
26743 + per_cpu(hist_preemptirqsoff_start, cpu) = start;
26747 + cycle_t uninitialized_var(stop);
26749 +#ifdef CONFIG_INTERRUPT_OFF_HIST
26750 + if ((reason == IRQS_ON || reason == TRACE_STOP) &&
26751 + per_cpu(hist_irqsoff_counting, cpu)) {
26752 + cycle_t start = per_cpu(hist_irqsoff_start, cpu);
26754 + stop = ftrace_now(cpu);
26757 + long latency = ((long) (stop - start)) /
26760 + latency_hist(IRQSOFF_LATENCY, cpu, latency, 0,
26763 + per_cpu(hist_irqsoff_counting, cpu) = 0;
26767 +#ifdef CONFIG_PREEMPT_OFF_HIST
26768 + if ((reason == PREEMPT_ON || reason == TRACE_STOP) &&
26769 + per_cpu(hist_preemptoff_counting, cpu)) {
26770 + cycle_t start = per_cpu(hist_preemptoff_start, cpu);
26772 + if (!(time_set++))
26773 + stop = ftrace_now(cpu);
26775 + long latency = ((long) (stop - start)) /
26778 + latency_hist(PREEMPTOFF_LATENCY, cpu, latency,
26781 + per_cpu(hist_preemptoff_counting, cpu) = 0;
26785 +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
26786 + if ((!per_cpu(hist_irqsoff_counting, cpu) ||
26787 + !per_cpu(hist_preemptoff_counting, cpu)) &&
26788 + per_cpu(hist_preemptirqsoff_counting, cpu)) {
26789 + cycle_t start = per_cpu(hist_preemptirqsoff_start, cpu);
26792 + stop = ftrace_now(cpu);
26794 + long latency = ((long) (stop - start)) /
26797 + latency_hist(PREEMPTIRQSOFF_LATENCY, cpu,
26798 + latency, 0, stop, NULL);
26800 + per_cpu(hist_preemptirqsoff_counting, cpu) = 0;
26807 +#ifdef CONFIG_WAKEUP_LATENCY_HIST
26808 +static DEFINE_RAW_SPINLOCK(wakeup_lock);
26809 +static notrace void probe_sched_migrate_task(void *v, struct task_struct *task,
26812 + int old_cpu = task_cpu(task);
26814 + if (cpu != old_cpu) {
26815 + unsigned long flags;
26816 + struct task_struct *cpu_wakeup_task;
26818 + raw_spin_lock_irqsave(&wakeup_lock, flags);
26820 + cpu_wakeup_task = per_cpu(wakeup_task, old_cpu);
26821 + if (task == cpu_wakeup_task) {
26822 + put_task_struct(cpu_wakeup_task);
26823 + per_cpu(wakeup_task, old_cpu) = NULL;
26824 + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = task;
26825 + get_task_struct(cpu_wakeup_task);
26828 + raw_spin_unlock_irqrestore(&wakeup_lock, flags);
26832 +static notrace void probe_wakeup_latency_hist_start(void *v,
26833 + struct task_struct *p)
26835 + unsigned long flags;
26836 + struct task_struct *curr = current;
26837 + int cpu = task_cpu(p);
26838 + struct task_struct *cpu_wakeup_task;
26840 + raw_spin_lock_irqsave(&wakeup_lock, flags);
26842 + cpu_wakeup_task = per_cpu(wakeup_task, cpu);
26844 + if (wakeup_pid) {
26845 + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
26846 + p->prio == curr->prio)
26847 + per_cpu(wakeup_sharedprio, cpu) = 1;
26848 + if (likely(wakeup_pid != task_pid_nr(p)))
26851 + if (likely(!rt_task(p)) ||
26852 + (cpu_wakeup_task && p->prio > cpu_wakeup_task->prio) ||
26853 + p->prio > curr->prio)
26855 + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
26856 + p->prio == curr->prio)
26857 + per_cpu(wakeup_sharedprio, cpu) = 1;
26860 + if (cpu_wakeup_task)
26861 + put_task_struct(cpu_wakeup_task);
26862 + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = p;
26863 + get_task_struct(cpu_wakeup_task);
26864 + cpu_wakeup_task->preempt_timestamp_hist =
26865 + ftrace_now(raw_smp_processor_id());
26867 + raw_spin_unlock_irqrestore(&wakeup_lock, flags);
26870 +static notrace void probe_wakeup_latency_hist_stop(void *v,
26871 + bool preempt, struct task_struct *prev, struct task_struct *next)
26873 + unsigned long flags;
26874 + int cpu = task_cpu(next);
26877 + struct task_struct *cpu_wakeup_task;
26879 + raw_spin_lock_irqsave(&wakeup_lock, flags);
26881 + cpu_wakeup_task = per_cpu(wakeup_task, cpu);
26883 + if (cpu_wakeup_task == NULL)
26886 + /* Already running? */
26887 + if (unlikely(current == cpu_wakeup_task))
26890 + if (next != cpu_wakeup_task) {
26891 + if (next->prio < cpu_wakeup_task->prio)
26894 + if (next->prio == cpu_wakeup_task->prio)
26895 + per_cpu(wakeup_sharedprio, cpu) = 1;
26900 + if (current->prio == cpu_wakeup_task->prio)
26901 + per_cpu(wakeup_sharedprio, cpu) = 1;
26904 + * The task we are waiting for is about to be switched to.
26905 + * Calculate latency and store it in histogram.
26907 + stop = ftrace_now(raw_smp_processor_id());
26909 + latency = ((long) (stop - next->preempt_timestamp_hist)) /
26912 + if (per_cpu(wakeup_sharedprio, cpu)) {
26913 + latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, 0, stop,
26915 + per_cpu(wakeup_sharedprio, cpu) = 0;
26917 + latency_hist(WAKEUP_LATENCY, cpu, latency, 0, stop, next);
26918 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
26919 + if (timerandwakeup_enabled_data.enabled) {
26920 + latency_hist(TIMERANDWAKEUP_LATENCY, cpu,
26921 + next->timer_offset + latency, next->timer_offset,
26928 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
26929 + next->timer_offset = 0;
26931 + put_task_struct(cpu_wakeup_task);
26932 + per_cpu(wakeup_task, cpu) = NULL;
26934 + raw_spin_unlock_irqrestore(&wakeup_lock, flags);
26938 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
26939 +static notrace void probe_hrtimer_interrupt(void *v, int cpu,
26940 + long long latency_ns, struct task_struct *curr,
26941 + struct task_struct *task)
26943 + if (latency_ns <= 0 && task != NULL && rt_task(task) &&
26944 + (task->prio < curr->prio ||
26945 + (task->prio == curr->prio &&
26946 + !cpumask_test_cpu(cpu, &task->cpus_allowed)))) {
26950 + if (missed_timer_offsets_pid) {
26951 + if (likely(missed_timer_offsets_pid !=
26952 + task_pid_nr(task)))
26956 + now = ftrace_now(cpu);
26957 + latency = (long) div_s64(-latency_ns, NSECS_PER_USECS);
26958 + latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, latency, now,
26960 +#ifdef CONFIG_WAKEUP_LATENCY_HIST
26961 + task->timer_offset = latency;
26967 +static __init int latency_hist_init(void)
26969 + struct dentry *latency_hist_root = NULL;
26970 + struct dentry *dentry;
26971 +#ifdef CONFIG_WAKEUP_LATENCY_HIST
26972 + struct dentry *dentry_sharedprio;
26974 + struct dentry *entry;
26975 + struct dentry *enable_root;
26977 + struct hist_data *my_hist;
26979 + char *cpufmt = "CPU%d";
26980 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
26981 + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26982 + char *cpufmt_maxlatproc = "max_latency-CPU%d";
26983 + struct maxlatproc_data *mp = NULL;
26986 + dentry = tracing_init_dentry();
26987 + latency_hist_root = debugfs_create_dir(latency_hist_dir_root, dentry);
26988 + enable_root = debugfs_create_dir("enable", latency_hist_root);
26990 +#ifdef CONFIG_INTERRUPT_OFF_HIST
26991 + dentry = debugfs_create_dir(irqsoff_hist_dir, latency_hist_root);
26992 + for_each_possible_cpu(i) {
26993 + sprintf(name, cpufmt, i);
26994 + entry = debugfs_create_file(name, 0444, dentry,
26995 + &per_cpu(irqsoff_hist, i), &latency_hist_fops);
26996 + my_hist = &per_cpu(irqsoff_hist, i);
26997 + atomic_set(&my_hist->hist_mode, 1);
26998 + my_hist->min_lat = LONG_MAX;
27000 + entry = debugfs_create_file("reset", 0644, dentry,
27001 + (void *)IRQSOFF_LATENCY, &latency_hist_reset_fops);
27004 +#ifdef CONFIG_PREEMPT_OFF_HIST
27005 + dentry = debugfs_create_dir(preemptoff_hist_dir,
27006 + latency_hist_root);
27007 + for_each_possible_cpu(i) {
27008 + sprintf(name, cpufmt, i);
27009 + entry = debugfs_create_file(name, 0444, dentry,
27010 + &per_cpu(preemptoff_hist, i), &latency_hist_fops);
27011 + my_hist = &per_cpu(preemptoff_hist, i);
27012 + atomic_set(&my_hist->hist_mode, 1);
27013 + my_hist->min_lat = LONG_MAX;
27015 + entry = debugfs_create_file("reset", 0644, dentry,
27016 + (void *)PREEMPTOFF_LATENCY, &latency_hist_reset_fops);
27019 +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
27020 + dentry = debugfs_create_dir(preemptirqsoff_hist_dir,
27021 + latency_hist_root);
27022 + for_each_possible_cpu(i) {
27023 + sprintf(name, cpufmt, i);
27024 + entry = debugfs_create_file(name, 0444, dentry,
27025 + &per_cpu(preemptirqsoff_hist, i), &latency_hist_fops);
27026 + my_hist = &per_cpu(preemptirqsoff_hist, i);
27027 + atomic_set(&my_hist->hist_mode, 1);
27028 + my_hist->min_lat = LONG_MAX;
27030 + entry = debugfs_create_file("reset", 0644, dentry,
27031 + (void *)PREEMPTIRQSOFF_LATENCY, &latency_hist_reset_fops);
27034 +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
27035 + entry = debugfs_create_file("preemptirqsoff", 0644,
27036 + enable_root, (void *)&preemptirqsoff_enabled_data,
27040 +#ifdef CONFIG_WAKEUP_LATENCY_HIST
27041 + dentry = debugfs_create_dir(wakeup_latency_hist_dir,
27042 + latency_hist_root);
27043 + dentry_sharedprio = debugfs_create_dir(
27044 + wakeup_latency_hist_dir_sharedprio, dentry);
27045 + for_each_possible_cpu(i) {
27046 + sprintf(name, cpufmt, i);
27048 + entry = debugfs_create_file(name, 0444, dentry,
27049 + &per_cpu(wakeup_latency_hist, i),
27050 + &latency_hist_fops);
27051 + my_hist = &per_cpu(wakeup_latency_hist, i);
27052 + atomic_set(&my_hist->hist_mode, 1);
27053 + my_hist->min_lat = LONG_MAX;
27055 + entry = debugfs_create_file(name, 0444, dentry_sharedprio,
27056 + &per_cpu(wakeup_latency_hist_sharedprio, i),
27057 + &latency_hist_fops);
27058 + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, i);
27059 + atomic_set(&my_hist->hist_mode, 1);
27060 + my_hist->min_lat = LONG_MAX;
27062 + sprintf(name, cpufmt_maxlatproc, i);
27064 + mp = &per_cpu(wakeup_maxlatproc, i);
27065 + entry = debugfs_create_file(name, 0444, dentry, mp,
27066 + &maxlatproc_fops);
27067 + clear_maxlatprocdata(mp);
27069 + mp = &per_cpu(wakeup_maxlatproc_sharedprio, i);
27070 + entry = debugfs_create_file(name, 0444, dentry_sharedprio, mp,
27071 + &maxlatproc_fops);
27072 + clear_maxlatprocdata(mp);
27074 + entry = debugfs_create_file("pid", 0644, dentry,
27075 + (void *)&wakeup_pid, &pid_fops);
27076 + entry = debugfs_create_file("reset", 0644, dentry,
27077 + (void *)WAKEUP_LATENCY, &latency_hist_reset_fops);
27078 + entry = debugfs_create_file("reset", 0644, dentry_sharedprio,
27079 + (void *)WAKEUP_LATENCY_SHAREDPRIO, &latency_hist_reset_fops);
27080 + entry = debugfs_create_file("wakeup", 0644,
27081 + enable_root, (void *)&wakeup_latency_enabled_data,
27085 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
27086 + dentry = debugfs_create_dir(missed_timer_offsets_dir,
27087 + latency_hist_root);
27088 + for_each_possible_cpu(i) {
27089 + sprintf(name, cpufmt, i);
27090 + entry = debugfs_create_file(name, 0444, dentry,
27091 + &per_cpu(missed_timer_offsets, i), &latency_hist_fops);
27092 + my_hist = &per_cpu(missed_timer_offsets, i);
27093 + atomic_set(&my_hist->hist_mode, 1);
27094 + my_hist->min_lat = LONG_MAX;
27096 + sprintf(name, cpufmt_maxlatproc, i);
27097 + mp = &per_cpu(missed_timer_offsets_maxlatproc, i);
27098 + entry = debugfs_create_file(name, 0444, dentry, mp,
27099 + &maxlatproc_fops);
27100 + clear_maxlatprocdata(mp);
27102 + entry = debugfs_create_file("pid", 0644, dentry,
27103 + (void *)&missed_timer_offsets_pid, &pid_fops);
27104 + entry = debugfs_create_file("reset", 0644, dentry,
27105 + (void *)MISSED_TIMER_OFFSETS, &latency_hist_reset_fops);
27106 + entry = debugfs_create_file("missed_timer_offsets", 0644,
27107 + enable_root, (void *)&missed_timer_offsets_enabled_data,
27111 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
27112 + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
27113 + dentry = debugfs_create_dir(timerandwakeup_latency_hist_dir,
27114 + latency_hist_root);
27115 + for_each_possible_cpu(i) {
27116 + sprintf(name, cpufmt, i);
27117 + entry = debugfs_create_file(name, 0444, dentry,
27118 + &per_cpu(timerandwakeup_latency_hist, i),
27119 + &latency_hist_fops);
27120 + my_hist = &per_cpu(timerandwakeup_latency_hist, i);
27121 + atomic_set(&my_hist->hist_mode, 1);
27122 + my_hist->min_lat = LONG_MAX;
27124 + sprintf(name, cpufmt_maxlatproc, i);
27125 + mp = &per_cpu(timerandwakeup_maxlatproc, i);
27126 + entry = debugfs_create_file(name, 0444, dentry, mp,
27127 + &maxlatproc_fops);
27128 + clear_maxlatprocdata(mp);
27130 + entry = debugfs_create_file("reset", 0644, dentry,
27131 + (void *)TIMERANDWAKEUP_LATENCY, &latency_hist_reset_fops);
27132 + entry = debugfs_create_file("timerandwakeup", 0644,
27133 + enable_root, (void *)&timerandwakeup_enabled_data,
27139 +device_initcall(latency_hist_init);
27140 diff -Nur linux-4.4.46.orig/kernel/trace/Makefile linux-4.4.46/kernel/trace/Makefile
27141 --- linux-4.4.46.orig/kernel/trace/Makefile 2017-02-01 08:31:11.000000000 +0100
27142 +++ linux-4.4.46/kernel/trace/Makefile 2017-02-03 17:18:10.939619522 +0100
27144 obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
27145 obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
27146 obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
27147 +obj-$(CONFIG_INTERRUPT_OFF_HIST) += latency_hist.o
27148 +obj-$(CONFIG_PREEMPT_OFF_HIST) += latency_hist.o
27149 +obj-$(CONFIG_WAKEUP_LATENCY_HIST) += latency_hist.o
27150 +obj-$(CONFIG_MISSED_TIMER_OFFSETS_HIST) += latency_hist.o
27151 obj-$(CONFIG_NOP_TRACER) += trace_nop.o
27152 obj-$(CONFIG_STACK_TRACER) += trace_stack.o
27153 obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
27154 diff -Nur linux-4.4.46.orig/kernel/trace/trace.c linux-4.4.46/kernel/trace/trace.c
27155 --- linux-4.4.46.orig/kernel/trace/trace.c 2017-02-01 08:31:11.000000000 +0100
27156 +++ linux-4.4.46/kernel/trace/trace.c 2017-02-03 17:18:10.943619676 +0100
27157 @@ -1652,6 +1652,7 @@
27158 struct task_struct *tsk = current;
27160 entry->preempt_count = pc & 0xff;
27161 + entry->preempt_lazy_count = preempt_lazy_count();
27162 entry->pid = (tsk) ? tsk->pid : 0;
27164 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
27165 @@ -1661,8 +1662,11 @@
27167 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
27168 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
27169 - (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
27170 + (tif_need_resched_now() ? TRACE_FLAG_NEED_RESCHED : 0) |
27171 + (need_resched_lazy() ? TRACE_FLAG_NEED_RESCHED_LAZY : 0) |
27172 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
27174 + entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0;
27176 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
27178 @@ -2555,14 +2559,17 @@
27180 static void print_lat_help_header(struct seq_file *m)
27182 - seq_puts(m, "# _------=> CPU# \n"
27183 - "# / _-----=> irqs-off \n"
27184 - "# | / _----=> need-resched \n"
27185 - "# || / _---=> hardirq/softirq \n"
27186 - "# ||| / _--=> preempt-depth \n"
27187 - "# |||| / delay \n"
27188 - "# cmd pid ||||| time | caller \n"
27189 - "# \\ / ||||| \\ | / \n");
27190 + seq_puts(m, "# _--------=> CPU# \n"
27191 + "# / _-------=> irqs-off \n"
27192 + "# | / _------=> need-resched \n"
27193 + "# || / _-----=> need-resched_lazy \n"
27194 + "# ||| / _----=> hardirq/softirq \n"
27195 + "# |||| / _---=> preempt-depth \n"
27196 + "# ||||| / _--=> preempt-lazy-depth\n"
27197 + "# |||||| / _-=> migrate-disable \n"
27198 + "# ||||||| / delay \n"
27199 + "# cmd pid |||||||| time | caller \n"
27200 + "# \\ / |||||||| \\ | / \n");
27203 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
27204 @@ -2588,11 +2595,14 @@
27205 print_event_info(buf, m);
27206 seq_puts(m, "# _-----=> irqs-off\n"
27207 "# / _----=> need-resched\n"
27208 - "# | / _---=> hardirq/softirq\n"
27209 - "# || / _--=> preempt-depth\n"
27210 - "# ||| / delay\n"
27211 - "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n"
27212 - "# | | | |||| | |\n");
27213 + "# |/ _-----=> need-resched_lazy\n"
27214 + "# || / _---=> hardirq/softirq\n"
27215 + "# ||| / _--=> preempt-depth\n"
27216 + "# |||| / _-=> preempt-lazy-depth\n"
27217 + "# ||||| / _-=> migrate-disable \n"
27218 + "# |||||| / delay\n"
27219 + "# TASK-PID CPU# ||||||| TIMESTAMP FUNCTION\n"
27220 + "# | | | ||||||| | |\n");
27224 diff -Nur linux-4.4.46.orig/kernel/trace/trace_events.c linux-4.4.46/kernel/trace/trace_events.c
27225 --- linux-4.4.46.orig/kernel/trace/trace_events.c 2017-02-01 08:31:11.000000000 +0100
27226 +++ linux-4.4.46/kernel/trace/trace_events.c 2017-02-03 17:18:10.943619676 +0100
27227 @@ -188,6 +188,8 @@
27228 __common_field(unsigned char, flags);
27229 __common_field(unsigned char, preempt_count);
27230 __common_field(int, pid);
27231 + __common_field(unsigned short, migrate_disable);
27232 + __common_field(unsigned short, padding);
27236 @@ -244,6 +246,14 @@
27238 local_save_flags(fbuffer->flags);
27239 fbuffer->pc = preempt_count();
27241 + * If CONFIG_PREEMPT is enabled, then the tracepoint itself disables
27242 + * preemption (adding one to the preempt_count). Since we are
27243 + * interested in the preempt_count at the time the tracepoint was
27244 + * hit, we need to subtract one to offset the increment.
27246 + if (IS_ENABLED(CONFIG_PREEMPT))
27248 fbuffer->trace_file = trace_file;
27251 diff -Nur linux-4.4.46.orig/kernel/trace/trace.h linux-4.4.46/kernel/trace/trace.h
27252 --- linux-4.4.46.orig/kernel/trace/trace.h 2017-02-01 08:31:11.000000000 +0100
27253 +++ linux-4.4.46/kernel/trace/trace.h 2017-02-03 17:18:10.943619676 +0100
27254 @@ -117,6 +117,7 @@
27255 * NEED_RESCHED - reschedule is requested
27256 * HARDIRQ - inside an interrupt handler
27257 * SOFTIRQ - inside a softirq handler
27258 + * NEED_RESCHED_LAZY - lazy reschedule is requested
27260 enum trace_flag_type {
27261 TRACE_FLAG_IRQS_OFF = 0x01,
27262 @@ -125,6 +126,7 @@
27263 TRACE_FLAG_HARDIRQ = 0x08,
27264 TRACE_FLAG_SOFTIRQ = 0x10,
27265 TRACE_FLAG_PREEMPT_RESCHED = 0x20,
27266 + TRACE_FLAG_NEED_RESCHED_LAZY = 0x40,
27269 #define TRACE_BUF_SIZE 1024
27270 diff -Nur linux-4.4.46.orig/kernel/trace/trace_irqsoff.c linux-4.4.46/kernel/trace/trace_irqsoff.c
27271 --- linux-4.4.46.orig/kernel/trace/trace_irqsoff.c 2017-02-01 08:31:11.000000000 +0100
27272 +++ linux-4.4.46/kernel/trace/trace_irqsoff.c 2017-02-03 17:18:10.943619676 +0100
27274 #include <linux/uaccess.h>
27275 #include <linux/module.h>
27276 #include <linux/ftrace.h>
27277 +#include <trace/events/hist.h>
27281 @@ -424,11 +425,13 @@
27283 if (preempt_trace() || irq_trace())
27284 start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
27285 + trace_preemptirqsoff_hist_rcuidle(TRACE_START, 1);
27287 EXPORT_SYMBOL_GPL(start_critical_timings);
27289 void stop_critical_timings(void)
27291 + trace_preemptirqsoff_hist_rcuidle(TRACE_STOP, 0);
27292 if (preempt_trace() || irq_trace())
27293 stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
27295 @@ -438,6 +441,7 @@
27296 #ifdef CONFIG_PROVE_LOCKING
27297 void time_hardirqs_on(unsigned long a0, unsigned long a1)
27299 + trace_preemptirqsoff_hist_rcuidle(IRQS_ON, 0);
27300 if (!preempt_trace() && irq_trace())
27301 stop_critical_timing(a0, a1);
27303 @@ -446,6 +450,7 @@
27305 if (!preempt_trace() && irq_trace())
27306 start_critical_timing(a0, a1);
27307 + trace_preemptirqsoff_hist_rcuidle(IRQS_OFF, 1);
27310 #else /* !CONFIG_PROVE_LOCKING */
27311 @@ -471,6 +476,7 @@
27313 void trace_hardirqs_on(void)
27315 + trace_preemptirqsoff_hist(IRQS_ON, 0);
27316 if (!preempt_trace() && irq_trace())
27317 stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
27319 @@ -480,11 +486,13 @@
27321 if (!preempt_trace() && irq_trace())
27322 start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
27323 + trace_preemptirqsoff_hist(IRQS_OFF, 1);
27325 EXPORT_SYMBOL(trace_hardirqs_off);
27327 __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
27329 + trace_preemptirqsoff_hist(IRQS_ON, 0);
27330 if (!preempt_trace() && irq_trace())
27331 stop_critical_timing(CALLER_ADDR0, caller_addr);
27333 @@ -494,6 +502,7 @@
27335 if (!preempt_trace() && irq_trace())
27336 start_critical_timing(CALLER_ADDR0, caller_addr);
27337 + trace_preemptirqsoff_hist(IRQS_OFF, 1);
27339 EXPORT_SYMBOL(trace_hardirqs_off_caller);
27341 @@ -503,12 +512,14 @@
27342 #ifdef CONFIG_PREEMPT_TRACER
27343 void trace_preempt_on(unsigned long a0, unsigned long a1)
27345 + trace_preemptirqsoff_hist(PREEMPT_ON, 0);
27346 if (preempt_trace() && !irq_trace())
27347 stop_critical_timing(a0, a1);
27350 void trace_preempt_off(unsigned long a0, unsigned long a1)
27352 + trace_preemptirqsoff_hist(PREEMPT_ON, 1);
27353 if (preempt_trace() && !irq_trace())
27354 start_critical_timing(a0, a1);
27356 diff -Nur linux-4.4.46.orig/kernel/trace/trace_output.c linux-4.4.46/kernel/trace/trace_output.c
27357 --- linux-4.4.46.orig/kernel/trace/trace_output.c 2017-02-01 08:31:11.000000000 +0100
27358 +++ linux-4.4.46/kernel/trace/trace_output.c 2017-02-03 17:18:10.943619676 +0100
27359 @@ -386,6 +386,7 @@
27363 + char need_resched_lazy;
27367 @@ -413,6 +414,8 @@
27368 need_resched = '.';
27371 + need_resched_lazy =
27372 + (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.';
27375 (hardirq && softirq) ? 'H' :
27376 @@ -420,14 +423,25 @@
27380 - trace_seq_printf(s, "%c%c%c",
27381 - irqs_off, need_resched, hardsoft_irq);
27382 + trace_seq_printf(s, "%c%c%c%c",
27383 + irqs_off, need_resched, need_resched_lazy,
27386 if (entry->preempt_count)
27387 trace_seq_printf(s, "%x", entry->preempt_count);
27389 trace_seq_putc(s, '.');
27391 + if (entry->preempt_lazy_count)
27392 + trace_seq_printf(s, "%x", entry->preempt_lazy_count);
27394 + trace_seq_putc(s, '.');
27396 + if (entry->migrate_disable)
27397 + trace_seq_printf(s, "%x", entry->migrate_disable);
27399 + trace_seq_putc(s, '.');
27401 return !trace_seq_has_overflowed(s);
27404 diff -Nur linux-4.4.46.orig/kernel/user.c linux-4.4.46/kernel/user.c
27405 --- linux-4.4.46.orig/kernel/user.c 2017-02-01 08:31:11.000000000 +0100
27406 +++ linux-4.4.46/kernel/user.c 2017-02-03 17:18:10.943619676 +0100
27407 @@ -161,11 +161,11 @@
27411 - local_irq_save(flags);
27412 + local_irq_save_nort(flags);
27413 if (atomic_dec_and_lock(&up->__count, &uidhash_lock))
27414 free_user(up, flags);
27416 - local_irq_restore(flags);
27417 + local_irq_restore_nort(flags);
27420 struct user_struct *alloc_uid(kuid_t uid)
27421 diff -Nur linux-4.4.46.orig/kernel/watchdog.c linux-4.4.46/kernel/watchdog.c
27422 --- linux-4.4.46.orig/kernel/watchdog.c 2017-02-01 08:31:11.000000000 +0100
27423 +++ linux-4.4.46/kernel/watchdog.c 2017-02-03 17:18:10.943619676 +0100
27424 @@ -299,6 +299,8 @@
27426 #ifdef CONFIG_HARDLOCKUP_DETECTOR
27428 +static DEFINE_RAW_SPINLOCK(watchdog_output_lock);
27430 static struct perf_event_attr wd_hw_attr = {
27431 .type = PERF_TYPE_HARDWARE,
27432 .config = PERF_COUNT_HW_CPU_CYCLES,
27433 @@ -332,6 +334,13 @@
27434 /* only print hardlockups once */
27435 if (__this_cpu_read(hard_watchdog_warn) == true)
27438 + * If early-printk is enabled then make sure we do not
27439 + * lock up in printk() and kill console logging:
27443 + raw_spin_lock(&watchdog_output_lock);
27445 pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
27447 @@ -349,8 +358,9 @@
27448 !test_and_set_bit(0, &hardlockup_allcpu_dumped))
27449 trigger_allbutself_cpu_backtrace();
27451 + raw_spin_unlock(&watchdog_output_lock);
27452 if (hardlockup_panic)
27453 - panic("Hard LOCKUP");
27454 + nmi_panic(regs, "Hard LOCKUP");
27456 __this_cpu_write(hard_watchdog_warn, true);
27458 @@ -496,6 +506,7 @@
27459 /* kick off the timer for the hardlockup detector */
27460 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
27461 hrtimer->function = watchdog_timer_fn;
27462 + hrtimer->irqsafe = 1;
27464 /* Enable the perf event */
27465 watchdog_nmi_enable(cpu);
27466 diff -Nur linux-4.4.46.orig/kernel/workqueue.c linux-4.4.46/kernel/workqueue.c
27467 --- linux-4.4.46.orig/kernel/workqueue.c 2017-02-01 08:31:11.000000000 +0100
27468 +++ linux-4.4.46/kernel/workqueue.c 2017-02-03 17:18:10.943619676 +0100
27470 #include <linux/nodemask.h>
27471 #include <linux/moduleparam.h>
27472 #include <linux/uaccess.h>
27473 +#include <linux/locallock.h>
27474 +#include <linux/delay.h>
27476 #include "workqueue_internal.h"
27478 @@ -121,11 +123,16 @@
27479 * cpu or grabbing pool->lock is enough for read access. If
27480 * POOL_DISASSOCIATED is set, it's identical to L.
27482 + * On RT we need the extra protection via rt_lock_idle_list() for
27483 + * the list manipulations against read access from
27484 + * wq_worker_sleeping(). All other places are nicely serialized via
27487 * A: pool->attach_mutex protected.
27489 * PL: wq_pool_mutex protected.
27491 - * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads.
27492 + * PR: wq_pool_mutex protected for writes. RCU protected for reads.
27494 * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads.
27496 @@ -134,7 +141,7 @@
27498 * WQ: wq->mutex protected.
27500 - * WR: wq->mutex protected for writes. Sched-RCU protected for reads.
27501 + * WR: wq->mutex protected for writes. RCU protected for reads.
27503 * MD: wq_mayday_lock protected.
27505 @@ -183,7 +190,7 @@
27506 atomic_t nr_running ____cacheline_aligned_in_smp;
27509 - * Destruction of pool is sched-RCU protected to allow dereferences
27510 + * Destruction of pool is RCU protected to allow dereferences
27511 * from get_work_pool().
27513 struct rcu_head rcu;
27514 @@ -212,7 +219,7 @@
27516 * Release of unbound pwq is punted to system_wq. See put_pwq()
27517 * and pwq_unbound_release_workfn() for details. pool_workqueue
27518 - * itself is also sched-RCU protected so that the first pwq can be
27519 + * itself is also RCU protected so that the first pwq can be
27520 * determined without grabbing wq->mutex.
27522 struct work_struct unbound_release_work;
27523 @@ -331,6 +338,8 @@
27524 struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
27525 EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
27527 +static DEFINE_LOCAL_IRQ_LOCK(pendingb_lock);
27529 static int worker_thread(void *__worker);
27530 static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
27532 @@ -338,20 +347,20 @@
27533 #include <trace/events/workqueue.h>
27535 #define assert_rcu_or_pool_mutex() \
27536 - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
27537 + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
27538 !lockdep_is_held(&wq_pool_mutex), \
27539 - "sched RCU or wq_pool_mutex should be held")
27540 + "RCU or wq_pool_mutex should be held")
27542 #define assert_rcu_or_wq_mutex(wq) \
27543 - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
27544 + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
27545 !lockdep_is_held(&wq->mutex), \
27546 - "sched RCU or wq->mutex should be held")
27547 + "RCU or wq->mutex should be held")
27549 #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
27550 - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
27551 + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
27552 !lockdep_is_held(&wq->mutex) && \
27553 !lockdep_is_held(&wq_pool_mutex), \
27554 - "sched RCU, wq->mutex or wq_pool_mutex should be held")
27555 + "RCU, wq->mutex or wq_pool_mutex should be held")
27557 #define for_each_cpu_worker_pool(pool, cpu) \
27558 for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
27559 @@ -363,7 +372,7 @@
27560 * @pool: iteration cursor
27561 * @pi: integer used for iteration
27563 - * This must be called either with wq_pool_mutex held or sched RCU read
27564 + * This must be called either with wq_pool_mutex held or RCU read
27565 * locked. If the pool needs to be used beyond the locking in effect, the
27566 * caller is responsible for guaranteeing that the pool stays online.
27568 @@ -395,7 +404,7 @@
27569 * @pwq: iteration cursor
27570 * @wq: the target workqueue
27572 - * This must be called either with wq->mutex held or sched RCU read locked.
27573 + * This must be called either with wq->mutex held or RCU read locked.
27574 * If the pwq needs to be used beyond the locking in effect, the caller is
27575 * responsible for guaranteeing that the pwq stays online.
27577 @@ -407,6 +416,31 @@
27578 if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
27581 +#ifdef CONFIG_PREEMPT_RT_BASE
27582 +static inline void rt_lock_idle_list(struct worker_pool *pool)
27584 + preempt_disable();
27586 +static inline void rt_unlock_idle_list(struct worker_pool *pool)
27588 + preempt_enable();
27590 +static inline void sched_lock_idle_list(struct worker_pool *pool) { }
27591 +static inline void sched_unlock_idle_list(struct worker_pool *pool) { }
27593 +static inline void rt_lock_idle_list(struct worker_pool *pool) { }
27594 +static inline void rt_unlock_idle_list(struct worker_pool *pool) { }
27595 +static inline void sched_lock_idle_list(struct worker_pool *pool)
27597 + spin_lock_irq(&pool->lock);
27599 +static inline void sched_unlock_idle_list(struct worker_pool *pool)
27601 + spin_unlock_irq(&pool->lock);
27606 #ifdef CONFIG_DEBUG_OBJECTS_WORK
27608 static struct debug_obj_descr work_debug_descr;
27609 @@ -557,7 +591,7 @@
27610 * @wq: the target workqueue
27611 * @node: the node ID
27613 - * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU
27614 + * This must be called with any of wq_pool_mutex, wq->mutex or RCU
27616 * If the pwq needs to be used beyond the locking in effect, the caller is
27617 * responsible for guaranteeing that the pwq stays online.
27618 @@ -701,8 +735,8 @@
27619 * @work: the work item of interest
27621 * Pools are created and destroyed under wq_pool_mutex, and allows read
27622 - * access under sched-RCU read lock. As such, this function should be
27623 - * called under wq_pool_mutex or with preemption disabled.
27624 + * access under RCU read lock. As such, this function should be
27625 + * called under wq_pool_mutex or inside of a rcu_read_lock() region.
27627 * All fields of the returned pool are accessible as long as the above
27628 * mentioned locking is in effect. If the returned pool needs to be used
27629 @@ -839,51 +873,44 @@
27631 static void wake_up_worker(struct worker_pool *pool)
27633 - struct worker *worker = first_idle_worker(pool);
27634 + struct worker *worker;
27636 + rt_lock_idle_list(pool);
27638 + worker = first_idle_worker(pool);
27640 if (likely(worker))
27641 wake_up_process(worker->task);
27643 + rt_unlock_idle_list(pool);
27647 - * wq_worker_waking_up - a worker is waking up
27648 - * @task: task waking up
27649 - * @cpu: CPU @task is waking up to
27650 + * wq_worker_running - a worker is running again
27651 + * @task: task returning from sleep
27653 - * This function is called during try_to_wake_up() when a worker is
27657 - * spin_lock_irq(rq->lock)
27658 + * This function is called when a worker returns from schedule()
27660 -void wq_worker_waking_up(struct task_struct *task, int cpu)
27661 +void wq_worker_running(struct task_struct *task)
27663 struct worker *worker = kthread_data(task);
27665 - if (!(worker->flags & WORKER_NOT_RUNNING)) {
27666 - WARN_ON_ONCE(worker->pool->cpu != cpu);
27667 + if (!worker->sleeping)
27669 + if (!(worker->flags & WORKER_NOT_RUNNING))
27670 atomic_inc(&worker->pool->nr_running);
27672 + worker->sleeping = 0;
27676 * wq_worker_sleeping - a worker is going to sleep
27677 * @task: task going to sleep
27678 - * @cpu: CPU in question, must be the current CPU number
27680 - * This function is called during schedule() when a busy worker is
27681 - * going to sleep. Worker on the same cpu can be woken up by
27682 - * returning pointer to its task.
27685 - * spin_lock_irq(rq->lock)
27688 - * Worker task on @cpu to wake up, %NULL if none.
27689 + * This function is called from schedule() when a busy worker is
27690 + * going to sleep.
27692 -struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
27693 +void wq_worker_sleeping(struct task_struct *task)
27695 - struct worker *worker = kthread_data(task), *to_wakeup = NULL;
27696 + struct worker *worker = kthread_data(task);
27697 struct worker_pool *pool;
27700 @@ -892,29 +919,26 @@
27701 * checking NOT_RUNNING.
27703 if (worker->flags & WORKER_NOT_RUNNING)
27707 pool = worker->pool;
27709 - /* this can only happen on the local cpu */
27710 - if (WARN_ON_ONCE(cpu != raw_smp_processor_id() || pool->cpu != cpu))
27712 + if (WARN_ON_ONCE(worker->sleeping))
27715 + worker->sleeping = 1;
27718 * The counterpart of the following dec_and_test, implied mb,
27719 * worklist not empty test sequence is in insert_work().
27720 * Please read comment there.
27722 - * NOT_RUNNING is clear. This means that we're bound to and
27723 - * running on the local cpu w/ rq lock held and preemption
27724 - * disabled, which in turn means that none else could be
27725 - * manipulating idle_list, so dereferencing idle_list without pool
27728 if (atomic_dec_and_test(&pool->nr_running) &&
27729 - !list_empty(&pool->worklist))
27730 - to_wakeup = first_idle_worker(pool);
27731 - return to_wakeup ? to_wakeup->task : NULL;
27732 + !list_empty(&pool->worklist)) {
27733 + sched_lock_idle_list(pool);
27734 + wake_up_worker(pool);
27735 + sched_unlock_idle_list(pool);
27740 @@ -1108,12 +1132,12 @@
27744 - * As both pwqs and pools are sched-RCU protected, the
27745 + * As both pwqs and pools are RCU protected, the
27746 * following lock operations are safe.
27748 - spin_lock_irq(&pwq->pool->lock);
27749 + local_spin_lock_irq(pendingb_lock, &pwq->pool->lock);
27751 - spin_unlock_irq(&pwq->pool->lock);
27752 + local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock);
27756 @@ -1215,7 +1239,7 @@
27757 struct worker_pool *pool;
27758 struct pool_workqueue *pwq;
27760 - local_irq_save(*flags);
27761 + local_lock_irqsave(pendingb_lock, *flags);
27763 /* try to steal the timer if it exists */
27765 @@ -1234,6 +1258,7 @@
27766 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
27771 * The queueing is in progress, or it is already queued. Try to
27772 * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
27773 @@ -1272,14 +1297,16 @@
27774 set_work_pool_and_keep_pending(work, pool->id);
27776 spin_unlock(&pool->lock);
27777 + rcu_read_unlock();
27780 spin_unlock(&pool->lock);
27782 - local_irq_restore(*flags);
27783 + rcu_read_unlock();
27784 + local_unlock_irqrestore(pendingb_lock, *flags);
27785 if (work_is_canceling(work))
27792 @@ -1348,7 +1375,7 @@
27793 * queued or lose PENDING. Grabbing PENDING and queueing should
27794 * happen with IRQ disabled.
27796 - WARN_ON_ONCE(!irqs_disabled());
27797 + WARN_ON_ONCE_NONRT(!irqs_disabled());
27799 debug_work_activate(work);
27801 @@ -1356,6 +1383,8 @@
27802 if (unlikely(wq->flags & __WQ_DRAINING) &&
27803 WARN_ON_ONCE(!is_chained_work(wq)))
27808 if (req_cpu == WORK_CPU_UNBOUND)
27809 cpu = raw_smp_processor_id();
27810 @@ -1412,10 +1441,8 @@
27811 /* pwq determined, queue */
27812 trace_workqueue_queue_work(req_cpu, pwq, work);
27814 - if (WARN_ON(!list_empty(&work->entry))) {
27815 - spin_unlock(&pwq->pool->lock);
27818 + if (WARN_ON(!list_empty(&work->entry)))
27821 pwq->nr_in_flight[pwq->work_color]++;
27822 work_flags = work_color_to_flags(pwq->work_color);
27823 @@ -1431,7 +1458,9 @@
27825 insert_work(pwq, work, worklist, work_flags);
27828 spin_unlock(&pwq->pool->lock);
27829 + rcu_read_unlock();
27833 @@ -1451,14 +1480,14 @@
27835 unsigned long flags;
27837 - local_irq_save(flags);
27838 + local_lock_irqsave(pendingb_lock,flags);
27840 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
27841 __queue_work(cpu, wq, work);
27845 - local_irq_restore(flags);
27846 + local_unlock_irqrestore(pendingb_lock, flags);
27849 EXPORT_SYMBOL(queue_work_on);
27850 @@ -1525,14 +1554,14 @@
27851 unsigned long flags;
27853 /* read the comment in __queue_work() */
27854 - local_irq_save(flags);
27855 + local_lock_irqsave(pendingb_lock, flags);
27857 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
27858 __queue_delayed_work(cpu, wq, dwork, delay);
27862 - local_irq_restore(flags);
27863 + local_unlock_irqrestore(pendingb_lock, flags);
27866 EXPORT_SYMBOL(queue_delayed_work_on);
27867 @@ -1567,7 +1596,7 @@
27869 if (likely(ret >= 0)) {
27870 __queue_delayed_work(cpu, wq, dwork, delay);
27871 - local_irq_restore(flags);
27872 + local_unlock_irqrestore(pendingb_lock, flags);
27875 /* -ENOENT from try_to_grab_pending() becomes %true */
27876 @@ -1600,7 +1629,9 @@
27877 worker->last_active = jiffies;
27879 /* idle_list is LIFO */
27880 + rt_lock_idle_list(pool);
27881 list_add(&worker->entry, &pool->idle_list);
27882 + rt_unlock_idle_list(pool);
27884 if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
27885 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
27886 @@ -1633,7 +1664,9 @@
27888 worker_clr_flags(worker, WORKER_IDLE);
27890 + rt_lock_idle_list(pool);
27891 list_del_init(&worker->entry);
27892 + rt_unlock_idle_list(pool);
27895 static struct worker *alloc_worker(int node)
27896 @@ -1799,7 +1832,9 @@
27897 pool->nr_workers--;
27900 + rt_lock_idle_list(pool);
27901 list_del_init(&worker->entry);
27902 + rt_unlock_idle_list(pool);
27903 worker->flags |= WORKER_DIE;
27904 wake_up_process(worker->task);
27906 @@ -2716,14 +2751,14 @@
27910 - local_irq_disable();
27912 pool = get_work_pool(work);
27914 - local_irq_enable();
27915 + rcu_read_unlock();
27919 - spin_lock(&pool->lock);
27920 + spin_lock_irq(&pool->lock);
27921 /* see the comment in try_to_grab_pending() with the same code */
27922 pwq = get_work_pwq(work);
27924 @@ -2750,10 +2785,11 @@
27926 lock_map_acquire_read(&pwq->wq->lockdep_map);
27927 lock_map_release(&pwq->wq->lockdep_map);
27929 + rcu_read_unlock();
27932 spin_unlock_irq(&pool->lock);
27933 + rcu_read_unlock();
27937 @@ -2840,7 +2876,7 @@
27939 /* tell other tasks trying to grab @work to back off */
27940 mark_work_canceling(work);
27941 - local_irq_restore(flags);
27942 + local_unlock_irqrestore(pendingb_lock, flags);
27945 clear_work_data(work);
27946 @@ -2895,10 +2931,10 @@
27948 bool flush_delayed_work(struct delayed_work *dwork)
27950 - local_irq_disable();
27951 + local_lock_irq(pendingb_lock);
27952 if (del_timer_sync(&dwork->timer))
27953 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
27954 - local_irq_enable();
27955 + local_unlock_irq(pendingb_lock);
27956 return flush_work(&dwork->work);
27958 EXPORT_SYMBOL(flush_delayed_work);
27959 @@ -2933,7 +2969,7 @@
27961 set_work_pool_and_clear_pending(&dwork->work,
27962 get_work_pool_id(&dwork->work));
27963 - local_irq_restore(flags);
27964 + local_unlock_irqrestore(pendingb_lock, flags);
27967 EXPORT_SYMBOL(cancel_delayed_work);
27968 @@ -3161,7 +3197,7 @@
27969 * put_unbound_pool - put a worker_pool
27970 * @pool: worker_pool to put
27972 - * Put @pool. If its refcnt reaches zero, it gets destroyed in sched-RCU
27973 + * Put @pool. If its refcnt reaches zero, it gets destroyed in RCU
27974 * safe manner. get_unbound_pool() calls this function on its failure path
27975 * and this function should be able to release pools which went through,
27976 * successfully or not, init_worker_pool().
27977 @@ -3215,8 +3251,8 @@
27978 del_timer_sync(&pool->idle_timer);
27979 del_timer_sync(&pool->mayday_timer);
27981 - /* sched-RCU protected to allow dereferences from get_work_pool() */
27982 - call_rcu_sched(&pool->rcu, rcu_free_pool);
27983 + /* RCU protected to allow dereferences from get_work_pool() */
27984 + call_rcu(&pool->rcu, rcu_free_pool);
27988 @@ -3323,14 +3359,14 @@
27989 put_unbound_pool(pool);
27990 mutex_unlock(&wq_pool_mutex);
27992 - call_rcu_sched(&pwq->rcu, rcu_free_pwq);
27993 + call_rcu(&pwq->rcu, rcu_free_pwq);
27996 * If we're the last pwq going away, @wq is already dead and no one
27997 * is gonna access it anymore. Schedule RCU free.
28000 - call_rcu_sched(&wq->rcu, rcu_free_wq);
28001 + call_rcu(&wq->rcu, rcu_free_wq);
28005 @@ -3983,7 +4019,7 @@
28006 * The base ref is never dropped on per-cpu pwqs. Directly
28007 * schedule RCU free.
28009 - call_rcu_sched(&wq->rcu, rcu_free_wq);
28010 + call_rcu(&wq->rcu, rcu_free_wq);
28013 * We're the sole accessor of @wq at this point. Directly
28014 @@ -4076,7 +4112,8 @@
28015 struct pool_workqueue *pwq;
28018 - rcu_read_lock_sched();
28020 + preempt_disable();
28022 if (cpu == WORK_CPU_UNBOUND)
28023 cpu = smp_processor_id();
28024 @@ -4087,7 +4124,8 @@
28025 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
28027 ret = !list_empty(&pwq->delayed_works);
28028 - rcu_read_unlock_sched();
28029 + preempt_enable();
28030 + rcu_read_unlock();
28034 @@ -4113,15 +4151,15 @@
28035 if (work_pending(work))
28036 ret |= WORK_BUSY_PENDING;
28038 - local_irq_save(flags);
28040 pool = get_work_pool(work);
28042 - spin_lock(&pool->lock);
28043 + spin_lock_irqsave(&pool->lock, flags);
28044 if (find_worker_executing_work(pool, work))
28045 ret |= WORK_BUSY_RUNNING;
28046 - spin_unlock(&pool->lock);
28047 + spin_unlock_irqrestore(&pool->lock, flags);
28049 - local_irq_restore(flags);
28050 + rcu_read_unlock();
28054 @@ -4310,7 +4348,7 @@
28055 unsigned long flags;
28058 - rcu_read_lock_sched();
28061 pr_info("Showing busy workqueues and worker pools:\n");
28063 @@ -4361,7 +4399,7 @@
28064 spin_unlock_irqrestore(&pool->lock, flags);
28067 - rcu_read_unlock_sched();
28068 + rcu_read_unlock();
28072 @@ -4722,16 +4760,16 @@
28073 * nr_active is monotonically decreasing. It's safe
28074 * to peek without lock.
28076 - rcu_read_lock_sched();
28078 for_each_pwq(pwq, wq) {
28079 WARN_ON_ONCE(pwq->nr_active < 0);
28080 if (pwq->nr_active) {
28082 - rcu_read_unlock_sched();
28083 + rcu_read_unlock();
28087 - rcu_read_unlock_sched();
28088 + rcu_read_unlock();
28091 mutex_unlock(&wq_pool_mutex);
28092 @@ -4921,7 +4959,8 @@
28093 const char *delim = "";
28094 int node, written = 0;
28096 - rcu_read_lock_sched();
28097 + get_online_cpus();
28099 for_each_node(node) {
28100 written += scnprintf(buf + written, PAGE_SIZE - written,
28101 "%s%d:%d", delim, node,
28102 @@ -4929,7 +4968,8 @@
28105 written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
28106 - rcu_read_unlock_sched();
28107 + rcu_read_unlock();
28108 + put_online_cpus();
28112 diff -Nur linux-4.4.46.orig/kernel/workqueue_internal.h linux-4.4.46/kernel/workqueue_internal.h
28113 --- linux-4.4.46.orig/kernel/workqueue_internal.h 2017-02-01 08:31:11.000000000 +0100
28114 +++ linux-4.4.46/kernel/workqueue_internal.h 2017-02-03 17:18:10.943619676 +0100
28116 unsigned long last_active; /* L: last active timestamp */
28117 unsigned int flags; /* X: flags */
28118 int id; /* I: worker id */
28119 + int sleeping; /* None */
28122 * Opaque string set with work_set_desc(). Printed out with task
28124 * Scheduler hooks for concurrency managed workqueue. Only to be used from
28125 * sched/core.c and workqueue.c.
28127 -void wq_worker_waking_up(struct task_struct *task, int cpu);
28128 -struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu);
28129 +void wq_worker_running(struct task_struct *task);
28130 +void wq_worker_sleeping(struct task_struct *task);
28132 #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
28133 diff -Nur linux-4.4.46.orig/lib/debugobjects.c linux-4.4.46/lib/debugobjects.c
28134 --- linux-4.4.46.orig/lib/debugobjects.c 2017-02-01 08:31:11.000000000 +0100
28135 +++ linux-4.4.46/lib/debugobjects.c 2017-02-03 17:18:10.943619676 +0100
28136 @@ -309,7 +309,10 @@
28137 struct debug_obj *obj;
28138 unsigned long flags;
28141 +#ifdef CONFIG_PREEMPT_RT_FULL
28142 + if (preempt_count() == 0 && !irqs_disabled())
28146 db = get_bucket((unsigned long) addr);
28148 diff -Nur linux-4.4.46.orig/lib/idr.c linux-4.4.46/lib/idr.c
28149 --- linux-4.4.46.orig/lib/idr.c 2017-02-01 08:31:11.000000000 +0100
28150 +++ linux-4.4.46/lib/idr.c 2017-02-03 17:18:10.943619676 +0100
28152 #include <linux/idr.h>
28153 #include <linux/spinlock.h>
28154 #include <linux/percpu.h>
28155 +#include <linux/locallock.h>
28157 #define MAX_IDR_SHIFT (sizeof(int) * 8 - 1)
28158 #define MAX_IDR_BIT (1U << MAX_IDR_SHIFT)
28160 static DEFINE_PER_CPU(int, idr_preload_cnt);
28161 static DEFINE_SPINLOCK(simple_ida_lock);
28163 +#ifdef CONFIG_PREEMPT_RT_FULL
28164 +static DEFINE_LOCAL_IRQ_LOCK(idr_lock);
28166 +static inline void idr_preload_lock(void)
28168 + local_lock(idr_lock);
28171 +static inline void idr_preload_unlock(void)
28173 + local_unlock(idr_lock);
28176 +void idr_preload_end(void)
28178 + idr_preload_unlock();
28180 +EXPORT_SYMBOL(idr_preload_end);
28182 +static inline void idr_preload_lock(void)
28184 + preempt_disable();
28187 +static inline void idr_preload_unlock(void)
28189 + preempt_enable();
28194 /* the maximum ID which can be allocated given idr->layers */
28195 static int idr_max(int layers)
28197 @@ -115,14 +147,14 @@
28198 * context. See idr_preload() for details.
28200 if (!in_interrupt()) {
28201 - preempt_disable();
28202 + idr_preload_lock();
28203 new = __this_cpu_read(idr_preload_head);
28205 __this_cpu_write(idr_preload_head, new->ary[0]);
28206 __this_cpu_dec(idr_preload_cnt);
28207 new->ary[0] = NULL;
28209 - preempt_enable();
28210 + idr_preload_unlock();
28214 @@ -366,7 +398,6 @@
28215 idr_mark_full(pa, id);
28220 * idr_preload - preload for idr_alloc()
28221 * @gfp_mask: allocation mask to use for preloading
28222 @@ -401,7 +432,7 @@
28223 WARN_ON_ONCE(in_interrupt());
28224 might_sleep_if(gfpflags_allow_blocking(gfp_mask));
28226 - preempt_disable();
28227 + idr_preload_lock();
28230 * idr_alloc() is likely to succeed w/o full idr_layer buffer and
28231 @@ -413,9 +444,9 @@
28232 while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) {
28233 struct idr_layer *new;
28235 - preempt_enable();
28236 + idr_preload_unlock();
28237 new = kmem_cache_zalloc(idr_layer_cache, gfp_mask);
28238 - preempt_disable();
28239 + idr_preload_lock();
28243 diff -Nur linux-4.4.46.orig/lib/Kconfig linux-4.4.46/lib/Kconfig
28244 --- linux-4.4.46.orig/lib/Kconfig 2017-02-01 08:31:11.000000000 +0100
28245 +++ linux-4.4.46/lib/Kconfig 2017-02-03 17:18:10.943619676 +0100
28246 @@ -397,6 +397,7 @@
28248 config CPUMASK_OFFSTACK
28249 bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
28250 + depends on !PREEMPT_RT_FULL
28252 Use dynamic allocation for cpumask_var_t, instead of putting
28253 them on the stack. This is a bit more expensive, but avoids
28254 diff -Nur linux-4.4.46.orig/lib/locking-selftest.c linux-4.4.46/lib/locking-selftest.c
28255 --- linux-4.4.46.orig/lib/locking-selftest.c 2017-02-01 08:31:11.000000000 +0100
28256 +++ linux-4.4.46/lib/locking-selftest.c 2017-02-03 17:18:10.947619830 +0100
28257 @@ -590,6 +590,8 @@
28258 #include "locking-selftest-spin-hardirq.h"
28259 GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin)
28261 +#ifndef CONFIG_PREEMPT_RT_FULL
28263 #include "locking-selftest-rlock-hardirq.h"
28264 GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock)
28266 @@ -605,9 +607,12 @@
28267 #include "locking-selftest-wlock-softirq.h"
28268 GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock)
28275 +#ifndef CONFIG_PREEMPT_RT_FULL
28277 * Enabling hardirqs with a softirq-safe lock held:
28279 @@ -640,6 +645,8 @@
28286 * Enabling irqs with an irq-safe lock held:
28288 @@ -663,6 +670,8 @@
28289 #include "locking-selftest-spin-hardirq.h"
28290 GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin)
28292 +#ifndef CONFIG_PREEMPT_RT_FULL
28294 #include "locking-selftest-rlock-hardirq.h"
28295 GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock)
28297 @@ -678,6 +687,8 @@
28298 #include "locking-selftest-wlock-softirq.h"
28299 GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
28306 @@ -709,6 +720,8 @@
28307 #include "locking-selftest-spin-hardirq.h"
28308 GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin)
28310 +#ifndef CONFIG_PREEMPT_RT_FULL
28312 #include "locking-selftest-rlock-hardirq.h"
28313 GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock)
28315 @@ -724,6 +737,8 @@
28316 #include "locking-selftest-wlock-softirq.h"
28317 GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
28324 @@ -757,6 +772,8 @@
28325 #include "locking-selftest-spin-hardirq.h"
28326 GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin)
28328 +#ifndef CONFIG_PREEMPT_RT_FULL
28330 #include "locking-selftest-rlock-hardirq.h"
28331 GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock)
28333 @@ -772,10 +789,14 @@
28334 #include "locking-selftest-wlock-softirq.h"
28335 GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock)
28343 +#ifndef CONFIG_PREEMPT_RT_FULL
28346 * read-lock / write-lock irq inversion.
28348 @@ -838,6 +859,10 @@
28354 +#ifndef CONFIG_PREEMPT_RT_FULL
28357 * read-lock / write-lock recursion that is actually safe.
28359 @@ -876,6 +901,8 @@
28366 * read-lock / write-lock recursion that is unsafe.
28368 @@ -1858,6 +1885,7 @@
28370 printk(" --------------------------------------------------------------------------\n");
28372 +#ifndef CONFIG_PREEMPT_RT_FULL
28374 * irq-context testcases:
28376 @@ -1870,6 +1898,28 @@
28378 DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
28379 // DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);
28381 + /* On -rt, we only do hardirq context test for raw spinlock */
28382 + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 12);
28383 + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 21);
28385 + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 12);
28386 + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 21);
28388 + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 123);
28389 + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 132);
28390 + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 213);
28391 + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 231);
28392 + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 312);
28393 + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 321);
28395 + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 123);
28396 + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 132);
28397 + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 213);
28398 + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 231);
28399 + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 312);
28400 + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 321);
28405 diff -Nur linux-4.4.46.orig/lib/percpu_ida.c linux-4.4.46/lib/percpu_ida.c
28406 --- linux-4.4.46.orig/lib/percpu_ida.c 2017-02-01 08:31:11.000000000 +0100
28407 +++ linux-4.4.46/lib/percpu_ida.c 2017-02-03 17:18:10.947619830 +0100
28409 #include <linux/string.h>
28410 #include <linux/spinlock.h>
28411 #include <linux/percpu_ida.h>
28412 +#include <linux/locallock.h>
28414 +static DEFINE_LOCAL_IRQ_LOCK(irq_off_lock);
28416 struct percpu_ida_cpu {
28418 @@ -148,13 +151,13 @@
28419 unsigned long flags;
28422 - local_irq_save(flags);
28423 + local_lock_irqsave(irq_off_lock, flags);
28424 tags = this_cpu_ptr(pool->tag_cpu);
28427 tag = alloc_local_tag(tags);
28428 if (likely(tag >= 0)) {
28429 - local_irq_restore(flags);
28430 + local_unlock_irqrestore(irq_off_lock, flags);
28434 @@ -173,6 +176,7 @@
28436 if (!tags->nr_free)
28437 alloc_global_tags(pool, tags);
28439 if (!tags->nr_free)
28440 steal_tags(pool, tags);
28442 @@ -184,7 +188,7 @@
28445 spin_unlock(&pool->lock);
28446 - local_irq_restore(flags);
28447 + local_unlock_irqrestore(irq_off_lock, flags);
28449 if (tag >= 0 || state == TASK_RUNNING)
28451 @@ -196,7 +200,7 @@
28455 - local_irq_save(flags);
28456 + local_lock_irqsave(irq_off_lock, flags);
28457 tags = this_cpu_ptr(pool->tag_cpu);
28459 if (state != TASK_RUNNING)
28460 @@ -221,7 +225,7 @@
28462 BUG_ON(tag >= pool->nr_tags);
28464 - local_irq_save(flags);
28465 + local_lock_irqsave(irq_off_lock, flags);
28466 tags = this_cpu_ptr(pool->tag_cpu);
28468 spin_lock(&tags->lock);
28469 @@ -253,7 +257,7 @@
28470 spin_unlock(&pool->lock);
28473 - local_irq_restore(flags);
28474 + local_unlock_irqrestore(irq_off_lock, flags);
28476 EXPORT_SYMBOL_GPL(percpu_ida_free);
28478 @@ -345,7 +349,7 @@
28479 struct percpu_ida_cpu *remote;
28480 unsigned cpu, i, err = 0;
28482 - local_irq_save(flags);
28483 + local_lock_irqsave(irq_off_lock, flags);
28484 for_each_possible_cpu(cpu) {
28485 remote = per_cpu_ptr(pool->tag_cpu, cpu);
28486 spin_lock(&remote->lock);
28487 @@ -367,7 +371,7 @@
28489 spin_unlock(&pool->lock);
28491 - local_irq_restore(flags);
28492 + local_unlock_irqrestore(irq_off_lock, flags);
28495 EXPORT_SYMBOL_GPL(percpu_ida_for_each_free);
28496 diff -Nur linux-4.4.46.orig/lib/radix-tree.c linux-4.4.46/lib/radix-tree.c
28497 --- linux-4.4.46.orig/lib/radix-tree.c 2017-02-01 08:31:11.000000000 +0100
28498 +++ linux-4.4.46/lib/radix-tree.c 2017-02-03 17:18:10.947619830 +0100
28499 @@ -196,13 +196,14 @@
28500 * succeed in getting a node here (and never reach
28501 * kmem_cache_alloc)
28503 - rtp = this_cpu_ptr(&radix_tree_preloads);
28504 + rtp = &get_cpu_var(radix_tree_preloads);
28507 rtp->nodes = ret->private_data;
28508 ret->private_data = NULL;
28511 + put_cpu_var(radix_tree_preloads);
28513 * Update the allocation stack trace as this is more useful
28515 @@ -242,6 +243,7 @@
28516 call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
28519 +#ifndef CONFIG_PREEMPT_RT_FULL
28521 * Load up this CPU's radix_tree_node buffer with sufficient objects to
28522 * ensure that the addition of a single element in the tree cannot fail. On
28523 @@ -310,6 +312,7 @@
28526 EXPORT_SYMBOL(radix_tree_maybe_preload);
28530 * Return the maximum key which can be store into a
28531 diff -Nur linux-4.4.46.orig/lib/rbtree.c linux-4.4.46/lib/rbtree.c
28532 --- linux-4.4.46.orig/lib/rbtree.c 2017-02-01 08:31:11.000000000 +0100
28533 +++ linux-4.4.46/lib/rbtree.c 2017-02-03 17:18:10.947619830 +0100
28536 #include <linux/rbtree_augmented.h>
28537 #include <linux/export.h>
28538 +#include <linux/rcupdate.h>
28541 * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree
28542 @@ -590,3 +591,13 @@
28543 return rb_left_deepest_node(root->rb_node);
28545 EXPORT_SYMBOL(rb_first_postorder);
28547 +void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent,
28548 + struct rb_node **rb_link)
28550 + node->__rb_parent_color = (unsigned long)parent;
28551 + node->rb_left = node->rb_right = NULL;
28553 + rcu_assign_pointer(*rb_link, node);
28555 +EXPORT_SYMBOL(rb_link_node_rcu);
28556 diff -Nur linux-4.4.46.orig/lib/scatterlist.c linux-4.4.46/lib/scatterlist.c
28557 --- linux-4.4.46.orig/lib/scatterlist.c 2017-02-01 08:31:11.000000000 +0100
28558 +++ linux-4.4.46/lib/scatterlist.c 2017-02-03 17:18:10.947619830 +0100
28559 @@ -620,7 +620,7 @@
28560 flush_kernel_dcache_page(miter->page);
28562 if (miter->__flags & SG_MITER_ATOMIC) {
28563 - WARN_ON_ONCE(preemptible());
28564 + WARN_ON_ONCE(!pagefault_disabled());
28565 kunmap_atomic(miter->addr);
28567 kunmap(miter->page);
28568 @@ -664,7 +664,7 @@
28569 if (!sg_miter_skip(&miter, skip))
28572 - local_irq_save(flags);
28573 + local_irq_save_nort(flags);
28575 while (sg_miter_next(&miter) && offset < buflen) {
28577 @@ -681,7 +681,7 @@
28579 sg_miter_stop(&miter);
28581 - local_irq_restore(flags);
28582 + local_irq_restore_nort(flags);
28585 EXPORT_SYMBOL(sg_copy_buffer);
28586 diff -Nur linux-4.4.46.orig/lib/smp_processor_id.c linux-4.4.46/lib/smp_processor_id.c
28587 --- linux-4.4.46.orig/lib/smp_processor_id.c 2017-02-01 08:31:11.000000000 +0100
28588 +++ linux-4.4.46/lib/smp_processor_id.c 2017-02-03 17:18:10.947619830 +0100
28590 if (!printk_ratelimit())
28593 - printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x] code: %s/%d\n",
28594 - what1, what2, preempt_count() - 1, current->comm, current->pid);
28595 + printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x %08x] code: %s/%d\n",
28596 + what1, what2, preempt_count() - 1, __migrate_disabled(current),
28597 + current->comm, current->pid);
28599 print_symbol("caller is %s\n", (long)__builtin_return_address(0));
28601 diff -Nur linux-4.4.46.orig/Makefile linux-4.4.46/Makefile
28602 --- linux-4.4.46.orig/Makefile 2017-02-01 08:31:11.000000000 +0100
28603 +++ linux-4.4.46/Makefile 2017-02-03 17:18:05.627414322 +0100
28604 @@ -785,6 +785,9 @@
28605 # Prohibit date/time macros, which would make the build non-deterministic
28606 KBUILD_CFLAGS += $(call cc-option,-Werror=date-time)
28608 +# enforce correct pointer usage
28609 +KBUILD_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types)
28611 # use the deterministic mode of AR if available
28612 KBUILD_ARFLAGS := $(call ar-option,D)
28614 diff -Nur linux-4.4.46.orig/mm/backing-dev.c linux-4.4.46/mm/backing-dev.c
28615 --- linux-4.4.46.orig/mm/backing-dev.c 2017-02-01 08:31:11.000000000 +0100
28616 +++ linux-4.4.46/mm/backing-dev.c 2017-02-03 17:18:10.947619830 +0100
28617 @@ -457,9 +457,9 @@
28619 unsigned long flags;
28621 - local_irq_save(flags);
28622 + local_irq_save_nort(flags);
28623 if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
28624 - local_irq_restore(flags);
28625 + local_irq_restore_nort(flags);
28629 diff -Nur linux-4.4.46.orig/mm/compaction.c linux-4.4.46/mm/compaction.c
28630 --- linux-4.4.46.orig/mm/compaction.c 2017-02-01 08:31:11.000000000 +0100
28631 +++ linux-4.4.46/mm/compaction.c 2017-02-03 17:18:10.947619830 +0100
28632 @@ -1430,10 +1430,12 @@
28633 cc->migrate_pfn & ~((1UL << cc->order) - 1);
28635 if (cc->last_migrated_pfn < current_block_start) {
28637 + cpu = get_cpu_light();
28638 + local_lock_irq(swapvec_lock);
28639 lru_add_drain_cpu(cpu);
28640 + local_unlock_irq(swapvec_lock);
28641 drain_local_pages(zone);
28644 /* No more flushing until we migrate again */
28645 cc->last_migrated_pfn = 0;
28647 diff -Nur linux-4.4.46.orig/mm/filemap.c linux-4.4.46/mm/filemap.c
28648 --- linux-4.4.46.orig/mm/filemap.c 2017-02-01 08:31:11.000000000 +0100
28649 +++ linux-4.4.46/mm/filemap.c 2017-02-03 17:18:10.947619830 +0100
28650 @@ -144,9 +144,12 @@
28651 * node->private_list is protected by
28652 * mapping->tree_lock.
28654 - if (!list_empty(&node->private_list))
28655 - list_lru_del(&workingset_shadow_nodes,
28656 + if (!list_empty(&node->private_list)) {
28657 + local_lock(workingset_shadow_lock);
28658 + list_lru_del(&__workingset_shadow_nodes,
28659 &node->private_list);
28660 + local_unlock(workingset_shadow_lock);
28665 @@ -218,7 +221,9 @@
28666 if (!workingset_node_pages(node) &&
28667 list_empty(&node->private_list)) {
28668 node->private_data = mapping;
28669 - list_lru_add(&workingset_shadow_nodes, &node->private_list);
28670 + local_lock(workingset_shadow_lock);
28671 + list_lru_add(&__workingset_shadow_nodes, &node->private_list);
28672 + local_unlock(workingset_shadow_lock);
28676 diff -Nur linux-4.4.46.orig/mm/highmem.c linux-4.4.46/mm/highmem.c
28677 --- linux-4.4.46.orig/mm/highmem.c 2017-02-01 08:31:11.000000000 +0100
28678 +++ linux-4.4.46/mm/highmem.c 2017-02-03 17:18:10.947619830 +0100
28679 @@ -29,10 +29,11 @@
28680 #include <linux/kgdb.h>
28681 #include <asm/tlbflush.h>
28684 +#ifndef CONFIG_PREEMPT_RT_FULL
28685 #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
28686 DEFINE_PER_CPU(int, __kmap_atomic_idx);
28691 * Virtual_count is not a pure "count".
28692 @@ -107,8 +108,9 @@
28693 unsigned long totalhigh_pages __read_mostly;
28694 EXPORT_SYMBOL(totalhigh_pages);
28697 +#ifndef CONFIG_PREEMPT_RT_FULL
28698 EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx);
28701 unsigned int nr_free_highpages (void)
28703 diff -Nur linux-4.4.46.orig/mm/Kconfig linux-4.4.46/mm/Kconfig
28704 --- linux-4.4.46.orig/mm/Kconfig 2017-02-01 08:31:11.000000000 +0100
28705 +++ linux-4.4.46/mm/Kconfig 2017-02-03 17:18:10.947619830 +0100
28706 @@ -392,7 +392,7 @@
28708 config TRANSPARENT_HUGEPAGE
28709 bool "Transparent Hugepage Support"
28710 - depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
28711 + depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT_FULL
28714 Transparent Hugepages allows the kernel to use huge pages and
28715 diff -Nur linux-4.4.46.orig/mm/memcontrol.c linux-4.4.46/mm/memcontrol.c
28716 --- linux-4.4.46.orig/mm/memcontrol.c 2017-02-01 08:31:11.000000000 +0100
28717 +++ linux-4.4.46/mm/memcontrol.c 2017-02-03 17:18:10.947619830 +0100
28719 #include <net/sock.h>
28720 #include <net/ip.h>
28721 #include <net/tcp_memcontrol.h>
28722 +#include <linux/locallock.h>
28726 #include <asm/uaccess.h>
28728 #define do_swap_account 0
28731 +static DEFINE_LOCAL_IRQ_LOCK(event_lock);
28732 static const char * const mem_cgroup_stat_names[] = {
28735 @@ -1922,14 +1925,17 @@
28737 static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
28739 - struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
28740 + struct memcg_stock_pcp *stock;
28741 + int cpu = get_cpu_light();
28743 + stock = &per_cpu(memcg_stock, cpu);
28745 if (stock->cached != memcg) { /* reset if necessary */
28746 drain_stock(stock);
28747 stock->cached = memcg;
28749 stock->nr_pages += nr_pages;
28750 - put_cpu_var(memcg_stock);
28755 @@ -1945,7 +1951,7 @@
28757 /* Notify other cpus that system-wide "drain" is running */
28759 - curcpu = get_cpu();
28760 + curcpu = get_cpu_light();
28761 for_each_online_cpu(cpu) {
28762 struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
28763 struct mem_cgroup *memcg;
28764 @@ -1962,7 +1968,7 @@
28765 schedule_work_on(cpu, &stock->work);
28771 mutex_unlock(&percpu_charge_mutex);
28773 @@ -4709,12 +4715,12 @@
28777 - local_irq_disable();
28778 + local_lock_irq(event_lock);
28779 mem_cgroup_charge_statistics(to, page, nr_pages);
28780 memcg_check_events(to, page);
28781 mem_cgroup_charge_statistics(from, page, -nr_pages);
28782 memcg_check_events(from, page);
28783 - local_irq_enable();
28784 + local_unlock_irq(event_lock);
28788 @@ -5504,10 +5510,10 @@
28789 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
28792 - local_irq_disable();
28793 + local_lock_irq(event_lock);
28794 mem_cgroup_charge_statistics(memcg, page, nr_pages);
28795 memcg_check_events(memcg, page);
28796 - local_irq_enable();
28797 + local_unlock_irq(event_lock);
28799 if (do_swap_account && PageSwapCache(page)) {
28800 swp_entry_t entry = { .val = page_private(page) };
28801 @@ -5563,14 +5569,14 @@
28802 memcg_oom_recover(memcg);
28805 - local_irq_save(flags);
28806 + local_lock_irqsave(event_lock, flags);
28807 __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);
28808 __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
28809 __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
28810 __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
28811 __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
28812 memcg_check_events(memcg, dummy_page);
28813 - local_irq_restore(flags);
28814 + local_unlock_irqrestore(event_lock, flags);
28816 if (!mem_cgroup_is_root(memcg))
28817 css_put_many(&memcg->css, nr_pages);
28818 @@ -5762,6 +5768,7 @@
28820 struct mem_cgroup *memcg, *swap_memcg;
28821 unsigned short oldid;
28822 + unsigned long flags;
28824 VM_BUG_ON_PAGE(PageLRU(page), page);
28825 VM_BUG_ON_PAGE(page_count(page), page);
28826 @@ -5802,12 +5809,16 @@
28827 * important here to have the interrupts disabled because it is the
28828 * only synchronisation we have for udpating the per-CPU variables.
28830 + local_lock_irqsave(event_lock, flags);
28831 +#ifndef CONFIG_PREEMPT_RT_BASE
28832 VM_BUG_ON(!irqs_disabled());
28834 mem_cgroup_charge_statistics(memcg, page, -1);
28835 memcg_check_events(memcg, page);
28837 if (!mem_cgroup_is_root(memcg))
28838 css_put(&memcg->css);
28839 + local_unlock_irqrestore(event_lock, flags);
28843 diff -Nur linux-4.4.46.orig/mm/mmu_context.c linux-4.4.46/mm/mmu_context.c
28844 --- linux-4.4.46.orig/mm/mmu_context.c 2017-02-01 08:31:11.000000000 +0100
28845 +++ linux-4.4.46/mm/mmu_context.c 2017-02-03 17:18:10.947619830 +0100
28847 struct task_struct *tsk = current;
28850 + preempt_disable_rt();
28851 active_mm = tsk->active_mm;
28852 if (active_mm != mm) {
28853 atomic_inc(&mm->mm_count);
28857 switch_mm(active_mm, mm, tsk);
28858 + preempt_enable_rt();
28860 #ifdef finish_arch_post_lock_switch
28861 finish_arch_post_lock_switch();
28862 diff -Nur linux-4.4.46.orig/mm/page_alloc.c linux-4.4.46/mm/page_alloc.c
28863 --- linux-4.4.46.orig/mm/page_alloc.c 2017-02-01 08:31:11.000000000 +0100
28864 +++ linux-4.4.46/mm/page_alloc.c 2017-02-03 17:18:10.951619984 +0100
28866 #include <linux/page_ext.h>
28867 #include <linux/hugetlb.h>
28868 #include <linux/sched/rt.h>
28869 +#include <linux/locallock.h>
28870 #include <linux/page_owner.h>
28871 #include <linux/kthread.h>
28873 @@ -264,6 +265,18 @@
28874 EXPORT_SYMBOL(nr_online_nodes);
28877 +static DEFINE_LOCAL_IRQ_LOCK(pa_lock);
28879 +#ifdef CONFIG_PREEMPT_RT_BASE
28880 +# define cpu_lock_irqsave(cpu, flags) \
28881 + local_lock_irqsave_on(pa_lock, flags, cpu)
28882 +# define cpu_unlock_irqrestore(cpu, flags) \
28883 + local_unlock_irqrestore_on(pa_lock, flags, cpu)
28885 +# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags)
28886 +# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags)
28889 int page_group_by_mobility_disabled __read_mostly;
28891 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
28892 @@ -786,7 +799,7 @@
28896 - * Frees a number of pages from the PCP lists
28897 + * Frees a number of pages which have been collected from the pcp lists.
28898 * Assumes all pages on list are in same zone, and of same order.
28899 * count is the number of pages to free.
28901 @@ -797,18 +810,53 @@
28902 * pinned" detection logic.
28904 static void free_pcppages_bulk(struct zone *zone, int count,
28905 - struct per_cpu_pages *pcp)
28906 + struct list_head *list)
28908 - int migratetype = 0;
28909 - int batch_free = 0;
28910 int to_free = count;
28911 unsigned long nr_scanned;
28912 + unsigned long flags;
28914 + spin_lock_irqsave(&zone->lock, flags);
28916 - spin_lock(&zone->lock);
28917 nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
28919 __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
28921 + while (!list_empty(list)) {
28922 + struct page *page = list_first_entry(list, struct page, lru);
28923 + int mt; /* migratetype of the to-be-freed page */
28925 + /* must delete as __free_one_page list manipulates */
28926 + list_del(&page->lru);
28928 + mt = get_pcppage_migratetype(page);
28929 + /* MIGRATE_ISOLATE page should not go to pcplists */
28930 + VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
28931 + /* Pageblock could have been isolated meanwhile */
28932 + if (unlikely(has_isolate_pageblock(zone)))
28933 + mt = get_pageblock_migratetype(page);
28935 + __free_one_page(page, page_to_pfn(page), zone, 0, mt);
28936 + trace_mm_page_pcpu_drain(page, 0, mt);
28939 + WARN_ON(to_free != 0);
28940 + spin_unlock_irqrestore(&zone->lock, flags);
28944 + * Moves a number of pages from the PCP lists to free list which
28945 + * is freed outside of the locked region.
28947 + * Assumes all pages on list are in same zone, and of same order.
28948 + * count is the number of pages to free.
28950 +static void isolate_pcp_pages(int to_free, struct per_cpu_pages *src,
28951 + struct list_head *dst)
28953 + int migratetype = 0;
28954 + int batch_free = 0;
28958 struct list_head *list;
28959 @@ -824,7 +872,7 @@
28961 if (++migratetype == MIGRATE_PCPTYPES)
28963 - list = &pcp->lists[migratetype];
28964 + list = &src->lists[migratetype];
28965 } while (list_empty(list));
28967 /* This is the only non-empty list. Free them all. */
28968 @@ -832,24 +880,12 @@
28969 batch_free = to_free;
28972 - int mt; /* migratetype of the to-be-freed page */
28974 - page = list_entry(list->prev, struct page, lru);
28975 - /* must delete as __free_one_page list manipulates */
28976 + page = list_last_entry(list, struct page, lru);
28977 list_del(&page->lru);
28979 - mt = get_pcppage_migratetype(page);
28980 - /* MIGRATE_ISOLATE page should not go to pcplists */
28981 - VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
28982 - /* Pageblock could have been isolated meanwhile */
28983 - if (unlikely(has_isolate_pageblock(zone)))
28984 - mt = get_pageblock_migratetype(page);
28986 - __free_one_page(page, page_to_pfn(page), zone, 0, mt);
28987 - trace_mm_page_pcpu_drain(page, 0, mt);
28988 + list_add(&page->lru, dst);
28989 } while (--to_free && --batch_free && !list_empty(list));
28991 - spin_unlock(&zone->lock);
28994 static void free_one_page(struct zone *zone,
28995 @@ -858,7 +894,9 @@
28998 unsigned long nr_scanned;
28999 - spin_lock(&zone->lock);
29000 + unsigned long flags;
29002 + spin_lock_irqsave(&zone->lock, flags);
29003 nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
29005 __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
29006 @@ -868,7 +906,7 @@
29007 migratetype = get_pfnblock_migratetype(page, pfn);
29009 __free_one_page(page, pfn, zone, order, migratetype);
29010 - spin_unlock(&zone->lock);
29011 + spin_unlock_irqrestore(&zone->lock, flags);
29014 static int free_tail_pages_check(struct page *head_page, struct page *page)
29015 @@ -1019,10 +1057,10 @@
29018 migratetype = get_pfnblock_migratetype(page, pfn);
29019 - local_irq_save(flags);
29020 + local_lock_irqsave(pa_lock, flags);
29021 __count_vm_events(PGFREE, 1 << order);
29022 free_one_page(page_zone(page), page, pfn, order, migratetype);
29023 - local_irq_restore(flags);
29024 + local_unlock_irqrestore(pa_lock, flags);
29027 static void __init __free_pages_boot_core(struct page *page,
29028 @@ -1879,16 +1917,18 @@
29029 void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
29031 unsigned long flags;
29033 int to_drain, batch;
29035 - local_irq_save(flags);
29036 + local_lock_irqsave(pa_lock, flags);
29037 batch = READ_ONCE(pcp->batch);
29038 to_drain = min(pcp->count, batch);
29039 if (to_drain > 0) {
29040 - free_pcppages_bulk(zone, to_drain, pcp);
29041 + isolate_pcp_pages(to_drain, pcp, &dst);
29042 pcp->count -= to_drain;
29044 - local_irq_restore(flags);
29045 + local_unlock_irqrestore(pa_lock, flags);
29046 + free_pcppages_bulk(zone, to_drain, &dst);
29050 @@ -1904,16 +1944,21 @@
29051 unsigned long flags;
29052 struct per_cpu_pageset *pset;
29053 struct per_cpu_pages *pcp;
29057 - local_irq_save(flags);
29058 + cpu_lock_irqsave(cpu, flags);
29059 pset = per_cpu_ptr(zone->pageset, cpu);
29062 - if (pcp->count) {
29063 - free_pcppages_bulk(zone, pcp->count, pcp);
29064 + count = pcp->count;
29066 + isolate_pcp_pages(count, pcp, &dst);
29069 - local_irq_restore(flags);
29070 + cpu_unlock_irqrestore(cpu, flags);
29072 + free_pcppages_bulk(zone, count, &dst);
29076 @@ -1999,8 +2044,17 @@
29078 cpumask_clear_cpu(cpu, &cpus_with_pcps);
29080 +#ifndef CONFIG_PREEMPT_RT_BASE
29081 on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages,
29084 + for_each_cpu(cpu, &cpus_with_pcps) {
29086 + drain_pages_zone(cpu, zone);
29088 + drain_pages(cpu);
29093 #ifdef CONFIG_HIBERNATION
29094 @@ -2056,7 +2110,7 @@
29096 migratetype = get_pfnblock_migratetype(page, pfn);
29097 set_pcppage_migratetype(page, migratetype);
29098 - local_irq_save(flags);
29099 + local_lock_irqsave(pa_lock, flags);
29100 __count_vm_event(PGFREE);
29103 @@ -2082,12 +2136,17 @@
29105 if (pcp->count >= pcp->high) {
29106 unsigned long batch = READ_ONCE(pcp->batch);
29107 - free_pcppages_bulk(zone, batch, pcp);
29110 + isolate_pcp_pages(batch, pcp, &dst);
29111 pcp->count -= batch;
29112 + local_unlock_irqrestore(pa_lock, flags);
29113 + free_pcppages_bulk(zone, batch, &dst);
29118 - local_irq_restore(flags);
29119 + local_unlock_irqrestore(pa_lock, flags);
29123 @@ -2222,7 +2281,7 @@
29124 struct per_cpu_pages *pcp;
29125 struct list_head *list;
29127 - local_irq_save(flags);
29128 + local_lock_irqsave(pa_lock, flags);
29129 pcp = &this_cpu_ptr(zone->pageset)->pcp;
29130 list = &pcp->lists[migratetype];
29131 if (list_empty(list)) {
29132 @@ -2254,7 +2313,7 @@
29134 WARN_ON_ONCE(order > 1);
29136 - spin_lock_irqsave(&zone->lock, flags);
29137 + local_spin_lock_irqsave(pa_lock, &zone->lock, flags);
29140 if (alloc_flags & ALLOC_HARDER) {
29141 @@ -2264,11 +2323,13 @@
29144 page = __rmqueue(zone, order, migratetype, gfp_flags);
29145 - spin_unlock(&zone->lock);
29148 + spin_unlock(&zone->lock);
29151 __mod_zone_freepage_state(zone, -(1 << order),
29152 get_pcppage_migratetype(page));
29153 + spin_unlock(&zone->lock);
29156 __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
29157 @@ -2278,13 +2339,13 @@
29159 __count_zone_vm_events(PGALLOC, zone, 1 << order);
29160 zone_statistics(preferred_zone, zone, gfp_flags);
29161 - local_irq_restore(flags);
29162 + local_unlock_irqrestore(pa_lock, flags);
29164 VM_BUG_ON_PAGE(bad_range(zone, page), page);
29168 - local_irq_restore(flags);
29169 + local_unlock_irqrestore(pa_lock, flags);
29173 @@ -5953,6 +6014,7 @@
29174 void __init page_alloc_init(void)
29176 hotcpu_notifier(page_alloc_cpu_notify, 0);
29177 + local_irq_lock_init(pa_lock);
29181 @@ -6847,7 +6909,7 @@
29182 struct per_cpu_pageset *pset;
29184 /* avoid races with drain_pages() */
29185 - local_irq_save(flags);
29186 + local_lock_irqsave(pa_lock, flags);
29187 if (zone->pageset != &boot_pageset) {
29188 for_each_online_cpu(cpu) {
29189 pset = per_cpu_ptr(zone->pageset, cpu);
29190 @@ -6856,7 +6918,7 @@
29191 free_percpu(zone->pageset);
29192 zone->pageset = &boot_pageset;
29194 - local_irq_restore(flags);
29195 + local_unlock_irqrestore(pa_lock, flags);
29198 #ifdef CONFIG_MEMORY_HOTREMOVE
29199 diff -Nur linux-4.4.46.orig/mm/slab.h linux-4.4.46/mm/slab.h
29200 --- linux-4.4.46.orig/mm/slab.h 2017-02-01 08:31:11.000000000 +0100
29201 +++ linux-4.4.46/mm/slab.h 2017-02-03 17:18:10.951619984 +0100
29202 @@ -324,7 +324,11 @@
29203 * The slab lists for all objects.
29205 struct kmem_cache_node {
29206 +#ifdef CONFIG_SLUB
29207 + raw_spinlock_t list_lock;
29209 spinlock_t list_lock;
29213 struct list_head slabs_partial; /* partial list first, better asm code */
29214 diff -Nur linux-4.4.46.orig/mm/slub.c linux-4.4.46/mm/slub.c
29215 --- linux-4.4.46.orig/mm/slub.c 2017-02-01 08:31:11.000000000 +0100
29216 +++ linux-4.4.46/mm/slub.c 2017-02-03 17:18:10.951619984 +0100
29217 @@ -1075,7 +1075,7 @@
29218 void *object = head;
29221 - spin_lock_irqsave(&n->list_lock, *flags);
29222 + raw_spin_lock_irqsave(&n->list_lock, *flags);
29225 if (!check_slab(s, page))
29226 @@ -1136,7 +1136,7 @@
29230 - spin_unlock_irqrestore(&n->list_lock, *flags);
29231 + raw_spin_unlock_irqrestore(&n->list_lock, *flags);
29232 slab_fix(s, "Object at 0x%p not freed", object);
29235 @@ -1263,6 +1263,12 @@
29237 #endif /* CONFIG_SLUB_DEBUG */
29239 +struct slub_free_list {
29240 + raw_spinlock_t lock;
29241 + struct list_head list;
29243 +static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
29246 * Hooks for other subsystems that check memory allocations. In a typical
29247 * production configuration these hooks all should produce no code at all.
29248 @@ -1399,10 +1405,17 @@
29252 + bool enableirqs = false;
29254 flags &= gfp_allowed_mask;
29256 if (gfpflags_allow_blocking(flags))
29257 + enableirqs = true;
29258 +#ifdef CONFIG_PREEMPT_RT_FULL
29259 + if (system_state == SYSTEM_RUNNING)
29260 + enableirqs = true;
29263 local_irq_enable();
29265 flags |= s->allocflags;
29266 @@ -1473,7 +1486,7 @@
29270 - if (gfpflags_allow_blocking(flags))
29272 local_irq_disable();
29275 @@ -1529,6 +1542,16 @@
29276 __free_kmem_pages(page, order);
29279 +static void free_delayed(struct list_head *h)
29281 + while(!list_empty(h)) {
29282 + struct page *page = list_first_entry(h, struct page, lru);
29284 + list_del(&page->lru);
29285 + __free_slab(page->slab_cache, page);
29289 #define need_reserve_slab_rcu \
29290 (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
29292 @@ -1560,6 +1583,12 @@
29295 call_rcu(head, rcu_free_slab);
29296 + } else if (irqs_disabled()) {
29297 + struct slub_free_list *f = this_cpu_ptr(&slub_free_list);
29299 + raw_spin_lock(&f->lock);
29300 + list_add(&page->lru, &f->list);
29301 + raw_spin_unlock(&f->lock);
29303 __free_slab(s, page);
29305 @@ -1673,7 +1702,7 @@
29306 if (!n || !n->nr_partial)
29309 - spin_lock(&n->list_lock);
29310 + raw_spin_lock(&n->list_lock);
29311 list_for_each_entry_safe(page, page2, &n->partial, lru) {
29314 @@ -1698,7 +1727,7 @@
29318 - spin_unlock(&n->list_lock);
29319 + raw_spin_unlock(&n->list_lock);
29323 @@ -1944,7 +1973,7 @@
29324 * that acquire_slab() will see a slab page that
29327 - spin_lock(&n->list_lock);
29328 + raw_spin_lock(&n->list_lock);
29332 @@ -1955,7 +1984,7 @@
29333 * slabs from diagnostic functions will not see
29334 * any frozen slabs.
29336 - spin_lock(&n->list_lock);
29337 + raw_spin_lock(&n->list_lock);
29341 @@ -1990,7 +2019,7 @@
29345 - spin_unlock(&n->list_lock);
29346 + raw_spin_unlock(&n->list_lock);
29349 stat(s, DEACTIVATE_EMPTY);
29350 @@ -2022,10 +2051,10 @@
29351 n2 = get_node(s, page_to_nid(page));
29354 - spin_unlock(&n->list_lock);
29355 + raw_spin_unlock(&n->list_lock);
29358 - spin_lock(&n->list_lock);
29359 + raw_spin_lock(&n->list_lock);
29363 @@ -2054,7 +2083,7 @@
29367 - spin_unlock(&n->list_lock);
29368 + raw_spin_unlock(&n->list_lock);
29370 while (discard_page) {
29371 page = discard_page;
29372 @@ -2093,14 +2122,21 @@
29373 pobjects = oldpage->pobjects;
29374 pages = oldpage->pages;
29375 if (drain && pobjects > s->cpu_partial) {
29376 + struct slub_free_list *f;
29377 unsigned long flags;
29378 + LIST_HEAD(tofree);
29380 * partial array is full. Move the existing
29381 * set to the per node partial list.
29383 local_irq_save(flags);
29384 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
29385 + f = this_cpu_ptr(&slub_free_list);
29386 + raw_spin_lock(&f->lock);
29387 + list_splice_init(&f->list, &tofree);
29388 + raw_spin_unlock(&f->lock);
29389 local_irq_restore(flags);
29390 + free_delayed(&tofree);
29394 @@ -2172,7 +2208,22 @@
29396 static void flush_all(struct kmem_cache *s)
29398 + LIST_HEAD(tofree);
29401 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
29402 + for_each_online_cpu(cpu) {
29403 + struct slub_free_list *f;
29405 + if (!has_cpu_slab(cpu, s))
29408 + f = &per_cpu(slub_free_list, cpu);
29409 + raw_spin_lock_irq(&f->lock);
29410 + list_splice_init(&f->list, &tofree);
29411 + raw_spin_unlock_irq(&f->lock);
29412 + free_delayed(&tofree);
29417 @@ -2208,10 +2259,10 @@
29418 unsigned long x = 0;
29421 - spin_lock_irqsave(&n->list_lock, flags);
29422 + raw_spin_lock_irqsave(&n->list_lock, flags);
29423 list_for_each_entry(page, &n->partial, lru)
29424 x += get_count(page);
29425 - spin_unlock_irqrestore(&n->list_lock, flags);
29426 + raw_spin_unlock_irqrestore(&n->list_lock, flags);
29429 #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
29430 @@ -2349,8 +2400,10 @@
29431 * already disabled (which is the case for bulk allocation).
29433 static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
29434 - unsigned long addr, struct kmem_cache_cpu *c)
29435 + unsigned long addr, struct kmem_cache_cpu *c,
29436 + struct list_head *to_free)
29438 + struct slub_free_list *f;
29442 @@ -2410,6 +2463,13 @@
29443 VM_BUG_ON(!c->page->frozen);
29444 c->freelist = get_freepointer(s, freelist);
29445 c->tid = next_tid(c->tid);
29448 + f = this_cpu_ptr(&slub_free_list);
29449 + raw_spin_lock(&f->lock);
29450 + list_splice_init(&f->list, to_free);
29451 + raw_spin_unlock(&f->lock);
29456 @@ -2441,7 +2501,7 @@
29457 deactivate_slab(s, page, get_freepointer(s, freelist));
29459 c->freelist = NULL;
29465 @@ -2453,6 +2513,7 @@
29468 unsigned long flags;
29469 + LIST_HEAD(tofree);
29471 local_irq_save(flags);
29472 #ifdef CONFIG_PREEMPT
29473 @@ -2464,8 +2525,9 @@
29474 c = this_cpu_ptr(s->cpu_slab);
29477 - p = ___slab_alloc(s, gfpflags, node, addr, c);
29478 + p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree);
29479 local_irq_restore(flags);
29480 + free_delayed(&tofree);
29484 @@ -2652,7 +2714,7 @@
29488 - spin_unlock_irqrestore(&n->list_lock, flags);
29489 + raw_spin_unlock_irqrestore(&n->list_lock, flags);
29492 prior = page->freelist;
29493 @@ -2684,7 +2746,7 @@
29494 * Otherwise the list_lock will synchronize with
29495 * other processors updating the list of slabs.
29497 - spin_lock_irqsave(&n->list_lock, flags);
29498 + raw_spin_lock_irqsave(&n->list_lock, flags);
29502 @@ -2726,7 +2788,7 @@
29503 add_partial(n, page, DEACTIVATE_TO_TAIL);
29504 stat(s, FREE_ADD_PARTIAL);
29506 - spin_unlock_irqrestore(&n->list_lock, flags);
29507 + raw_spin_unlock_irqrestore(&n->list_lock, flags);
29511 @@ -2741,7 +2803,7 @@
29512 remove_full(s, n, page);
29515 - spin_unlock_irqrestore(&n->list_lock, flags);
29516 + raw_spin_unlock_irqrestore(&n->list_lock, flags);
29517 stat(s, FREE_SLAB);
29518 discard_slab(s, page);
29520 @@ -2913,6 +2975,7 @@
29523 struct kmem_cache_cpu *c;
29524 + LIST_HEAD(to_free);
29527 /* memcg and kmem_cache debug support */
29528 @@ -2936,7 +2999,7 @@
29529 * of re-populating per CPU c->freelist
29531 p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
29533 + _RET_IP_, c, &to_free);
29534 if (unlikely(!p[i]))
29537 @@ -2948,6 +3011,7 @@
29539 c->tid = next_tid(c->tid);
29540 local_irq_enable();
29541 + free_delayed(&to_free);
29543 /* Clear memory outside IRQ disabled fastpath loop */
29544 if (unlikely(flags & __GFP_ZERO)) {
29545 @@ -3095,7 +3159,7 @@
29546 init_kmem_cache_node(struct kmem_cache_node *n)
29549 - spin_lock_init(&n->list_lock);
29550 + raw_spin_lock_init(&n->list_lock);
29551 INIT_LIST_HEAD(&n->partial);
29552 #ifdef CONFIG_SLUB_DEBUG
29553 atomic_long_set(&n->nr_slabs, 0);
29554 @@ -3677,7 +3741,7 @@
29555 for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
29556 INIT_LIST_HEAD(promote + i);
29558 - spin_lock_irqsave(&n->list_lock, flags);
29559 + raw_spin_lock_irqsave(&n->list_lock, flags);
29562 * Build lists of slabs to discard or promote.
29563 @@ -3708,7 +3772,7 @@
29564 for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
29565 list_splice(promote + i, &n->partial);
29567 - spin_unlock_irqrestore(&n->list_lock, flags);
29568 + raw_spin_unlock_irqrestore(&n->list_lock, flags);
29570 /* Release empty slabs */
29571 list_for_each_entry_safe(page, t, &discard, lru)
29572 @@ -3884,6 +3948,12 @@
29574 static __initdata struct kmem_cache boot_kmem_cache,
29575 boot_kmem_cache_node;
29578 + for_each_possible_cpu(cpu) {
29579 + raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
29580 + INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
29583 if (debug_guardpage_minorder())
29584 slub_max_order = 0;
29585 @@ -4127,7 +4197,7 @@
29587 unsigned long flags;
29589 - spin_lock_irqsave(&n->list_lock, flags);
29590 + raw_spin_lock_irqsave(&n->list_lock, flags);
29592 list_for_each_entry(page, &n->partial, lru) {
29593 validate_slab_slab(s, page, map);
29594 @@ -4149,7 +4219,7 @@
29595 s->name, count, atomic_long_read(&n->nr_slabs));
29598 - spin_unlock_irqrestore(&n->list_lock, flags);
29599 + raw_spin_unlock_irqrestore(&n->list_lock, flags);
29603 @@ -4337,12 +4407,12 @@
29604 if (!atomic_long_read(&n->nr_slabs))
29607 - spin_lock_irqsave(&n->list_lock, flags);
29608 + raw_spin_lock_irqsave(&n->list_lock, flags);
29609 list_for_each_entry(page, &n->partial, lru)
29610 process_slab(&t, s, page, alloc, map);
29611 list_for_each_entry(page, &n->full, lru)
29612 process_slab(&t, s, page, alloc, map);
29613 - spin_unlock_irqrestore(&n->list_lock, flags);
29614 + raw_spin_unlock_irqrestore(&n->list_lock, flags);
29617 for (i = 0; i < t.count; i++) {
29618 diff -Nur linux-4.4.46.orig/mm/swap.c linux-4.4.46/mm/swap.c
29619 --- linux-4.4.46.orig/mm/swap.c 2017-02-01 08:31:11.000000000 +0100
29620 +++ linux-4.4.46/mm/swap.c 2017-02-03 17:18:10.951619984 +0100
29622 #include <linux/memcontrol.h>
29623 #include <linux/gfp.h>
29624 #include <linux/uio.h>
29625 +#include <linux/locallock.h>
29626 #include <linux/hugetlb.h>
29627 #include <linux/page_idle.h>
29630 static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
29631 static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);
29633 +static DEFINE_LOCAL_IRQ_LOCK(rotate_lock);
29634 +DEFINE_LOCAL_IRQ_LOCK(swapvec_lock);
29637 * This path almost never happens for VM activity - pages are normally
29638 * freed via pagevecs. But it gets used by networking.
29639 @@ -481,11 +485,11 @@
29640 unsigned long flags;
29642 page_cache_get(page);
29643 - local_irq_save(flags);
29644 + local_lock_irqsave(rotate_lock, flags);
29645 pvec = this_cpu_ptr(&lru_rotate_pvecs);
29646 if (!pagevec_add(pvec, page))
29647 pagevec_move_tail(pvec);
29648 - local_irq_restore(flags);
29649 + local_unlock_irqrestore(rotate_lock, flags);
29653 @@ -536,12 +540,13 @@
29654 void activate_page(struct page *page)
29656 if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
29657 - struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
29658 + struct pagevec *pvec = &get_locked_var(swapvec_lock,
29659 + activate_page_pvecs);
29661 page_cache_get(page);
29662 if (!pagevec_add(pvec, page))
29663 pagevec_lru_move_fn(pvec, __activate_page, NULL);
29664 - put_cpu_var(activate_page_pvecs);
29665 + put_locked_var(swapvec_lock, activate_page_pvecs);
29669 @@ -567,7 +572,7 @@
29671 static void __lru_cache_activate_page(struct page *page)
29673 - struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
29674 + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
29678 @@ -589,7 +594,7 @@
29682 - put_cpu_var(lru_add_pvec);
29683 + put_locked_var(swapvec_lock, lru_add_pvec);
29687 @@ -630,13 +635,13 @@
29689 static void __lru_cache_add(struct page *page)
29691 - struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
29692 + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
29694 page_cache_get(page);
29695 if (!pagevec_space(pvec))
29696 __pagevec_lru_add(pvec);
29697 pagevec_add(pvec, page);
29698 - put_cpu_var(lru_add_pvec);
29699 + put_locked_var(swapvec_lock, lru_add_pvec);
29703 @@ -816,9 +821,15 @@
29704 unsigned long flags;
29706 /* No harm done if a racing interrupt already did this */
29707 - local_irq_save(flags);
29708 +#ifdef CONFIG_PREEMPT_RT_BASE
29709 + local_lock_irqsave_on(rotate_lock, flags, cpu);
29710 + pagevec_move_tail(pvec);
29711 + local_unlock_irqrestore_on(rotate_lock, flags, cpu);
29713 + local_lock_irqsave(rotate_lock, flags);
29714 pagevec_move_tail(pvec);
29715 - local_irq_restore(flags);
29716 + local_unlock_irqrestore(rotate_lock, flags);
29720 pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
29721 @@ -846,26 +857,47 @@
29724 if (likely(get_page_unless_zero(page))) {
29725 - struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
29726 + struct pagevec *pvec = &get_locked_var(swapvec_lock,
29727 + lru_deactivate_file_pvecs);
29729 if (!pagevec_add(pvec, page))
29730 pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
29731 - put_cpu_var(lru_deactivate_file_pvecs);
29732 + put_locked_var(swapvec_lock, lru_deactivate_file_pvecs);
29736 void lru_add_drain(void)
29738 - lru_add_drain_cpu(get_cpu());
29740 + lru_add_drain_cpu(local_lock_cpu(swapvec_lock));
29741 + local_unlock_cpu(swapvec_lock);
29745 +#ifdef CONFIG_PREEMPT_RT_BASE
29746 +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
29748 + local_lock_on(swapvec_lock, cpu);
29749 + lru_add_drain_cpu(cpu);
29750 + local_unlock_on(swapvec_lock, cpu);
29755 static void lru_add_drain_per_cpu(struct work_struct *dummy)
29760 static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
29761 +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
29763 + struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
29765 + INIT_WORK(work, lru_add_drain_per_cpu);
29766 + schedule_work_on(cpu, work);
29767 + cpumask_set_cpu(cpu, has_work);
29771 void lru_add_drain_all(void)
29773 @@ -878,20 +910,17 @@
29774 cpumask_clear(&has_work);
29776 for_each_online_cpu(cpu) {
29777 - struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
29779 if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
29780 pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
29781 pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
29782 - need_activate_page_drain(cpu)) {
29783 - INIT_WORK(work, lru_add_drain_per_cpu);
29784 - schedule_work_on(cpu, work);
29785 - cpumask_set_cpu(cpu, &has_work);
29787 + need_activate_page_drain(cpu))
29788 + remote_lru_add_drain(cpu, &has_work);
29791 +#ifndef CONFIG_PREEMPT_RT_BASE
29792 for_each_cpu(cpu, &has_work)
29793 flush_work(&per_cpu(lru_add_drain_work, cpu));
29797 mutex_unlock(&lock);
29798 diff -Nur linux-4.4.46.orig/mm/truncate.c linux-4.4.46/mm/truncate.c
29799 --- linux-4.4.46.orig/mm/truncate.c 2017-02-01 08:31:11.000000000 +0100
29800 +++ linux-4.4.46/mm/truncate.c 2017-02-03 17:18:10.951619984 +0100
29802 * protected by mapping->tree_lock.
29804 if (!workingset_node_shadows(node) &&
29805 - !list_empty(&node->private_list))
29806 - list_lru_del(&workingset_shadow_nodes, &node->private_list);
29807 + !list_empty(&node->private_list)) {
29808 + local_lock(workingset_shadow_lock);
29809 + list_lru_del(&__workingset_shadow_nodes, &node->private_list);
29810 + local_unlock(workingset_shadow_lock);
29812 __radix_tree_delete_node(&mapping->page_tree, node);
29814 spin_unlock_irq(&mapping->tree_lock);
29815 diff -Nur linux-4.4.46.orig/mm/vmalloc.c linux-4.4.46/mm/vmalloc.c
29816 --- linux-4.4.46.orig/mm/vmalloc.c 2017-02-01 08:31:11.000000000 +0100
29817 +++ linux-4.4.46/mm/vmalloc.c 2017-02-03 17:18:10.951619984 +0100
29818 @@ -821,7 +821,7 @@
29819 struct vmap_block *vb;
29820 struct vmap_area *va;
29821 unsigned long vb_idx;
29823 + int node, err, cpu;
29826 node = numa_node_id();
29827 @@ -864,11 +864,12 @@
29829 radix_tree_preload_end();
29831 - vbq = &get_cpu_var(vmap_block_queue);
29832 + cpu = get_cpu_light();
29833 + vbq = this_cpu_ptr(&vmap_block_queue);
29834 spin_lock(&vbq->lock);
29835 list_add_tail_rcu(&vb->free_list, &vbq->free);
29836 spin_unlock(&vbq->lock);
29837 - put_cpu_var(vmap_block_queue);
29842 @@ -937,6 +938,7 @@
29843 struct vmap_block *vb;
29844 void *vaddr = NULL;
29845 unsigned int order;
29848 BUG_ON(offset_in_page(size));
29849 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
29850 @@ -951,7 +953,8 @@
29851 order = get_order(size);
29854 - vbq = &get_cpu_var(vmap_block_queue);
29855 + cpu = get_cpu_light();
29856 + vbq = this_cpu_ptr(&vmap_block_queue);
29857 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
29858 unsigned long pages_off;
29860 @@ -974,7 +977,7 @@
29864 - put_cpu_var(vmap_block_queue);
29868 /* Allocate new block if nothing was found */
29869 diff -Nur linux-4.4.46.orig/mm/vmstat.c linux-4.4.46/mm/vmstat.c
29870 --- linux-4.4.46.orig/mm/vmstat.c 2017-02-01 08:31:11.000000000 +0100
29871 +++ linux-4.4.46/mm/vmstat.c 2017-02-03 17:18:10.951619984 +0100
29872 @@ -226,6 +226,7 @@
29876 + preempt_disable_rt();
29877 x = delta + __this_cpu_read(*p);
29879 t = __this_cpu_read(pcp->stat_threshold);
29880 @@ -235,6 +236,7 @@
29883 __this_cpu_write(*p, x);
29884 + preempt_enable_rt();
29886 EXPORT_SYMBOL(__mod_zone_page_state);
29888 @@ -267,6 +269,7 @@
29889 s8 __percpu *p = pcp->vm_stat_diff + item;
29892 + preempt_disable_rt();
29893 v = __this_cpu_inc_return(*p);
29894 t = __this_cpu_read(pcp->stat_threshold);
29895 if (unlikely(v > t)) {
29896 @@ -275,6 +278,7 @@
29897 zone_page_state_add(v + overstep, zone, item);
29898 __this_cpu_write(*p, -overstep);
29900 + preempt_enable_rt();
29903 void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
29904 @@ -289,6 +293,7 @@
29905 s8 __percpu *p = pcp->vm_stat_diff + item;
29908 + preempt_disable_rt();
29909 v = __this_cpu_dec_return(*p);
29910 t = __this_cpu_read(pcp->stat_threshold);
29911 if (unlikely(v < - t)) {
29912 @@ -297,6 +302,7 @@
29913 zone_page_state_add(v - overstep, zone, item);
29914 __this_cpu_write(*p, overstep);
29916 + preempt_enable_rt();
29919 void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
29920 diff -Nur linux-4.4.46.orig/mm/workingset.c linux-4.4.46/mm/workingset.c
29921 --- linux-4.4.46.orig/mm/workingset.c 2017-02-01 08:31:11.000000000 +0100
29922 +++ linux-4.4.46/mm/workingset.c 2017-02-03 17:18:10.951619984 +0100
29923 @@ -264,7 +264,8 @@
29924 * point where they would still be useful.
29927 -struct list_lru workingset_shadow_nodes;
29928 +struct list_lru __workingset_shadow_nodes;
29929 +DEFINE_LOCAL_IRQ_LOCK(workingset_shadow_lock);
29931 static unsigned long count_shadow_nodes(struct shrinker *shrinker,
29932 struct shrink_control *sc)
29933 @@ -274,9 +275,9 @@
29934 unsigned long pages;
29936 /* list_lru lock nests inside IRQ-safe mapping->tree_lock */
29937 - local_irq_disable();
29938 - shadow_nodes = list_lru_shrink_count(&workingset_shadow_nodes, sc);
29939 - local_irq_enable();
29940 + local_lock_irq(workingset_shadow_lock);
29941 + shadow_nodes = list_lru_shrink_count(&__workingset_shadow_nodes, sc);
29942 + local_unlock_irq(workingset_shadow_lock);
29944 pages = node_present_pages(sc->nid);
29946 @@ -361,9 +362,9 @@
29947 spin_unlock(&mapping->tree_lock);
29948 ret = LRU_REMOVED_RETRY;
29950 - local_irq_enable();
29951 + local_unlock_irq(workingset_shadow_lock);
29953 - local_irq_disable();
29954 + local_lock_irq(workingset_shadow_lock);
29955 spin_lock(lru_lock);
29958 @@ -374,10 +375,10 @@
29961 /* list_lru lock nests inside IRQ-safe mapping->tree_lock */
29962 - local_irq_disable();
29963 - ret = list_lru_shrink_walk(&workingset_shadow_nodes, sc,
29964 + local_lock_irq(workingset_shadow_lock);
29965 + ret = list_lru_shrink_walk(&__workingset_shadow_nodes, sc,
29966 shadow_lru_isolate, NULL);
29967 - local_irq_enable();
29968 + local_unlock_irq(workingset_shadow_lock);
29972 @@ -398,7 +399,7 @@
29976 - ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key);
29977 + ret = list_lru_init_key(&__workingset_shadow_nodes, &shadow_nodes_key);
29980 ret = register_shrinker(&workingset_shadow_shrinker);
29981 @@ -406,7 +407,7 @@
29985 - list_lru_destroy(&workingset_shadow_nodes);
29986 + list_lru_destroy(&__workingset_shadow_nodes);
29990 diff -Nur linux-4.4.46.orig/mm/zsmalloc.c linux-4.4.46/mm/zsmalloc.c
29991 --- linux-4.4.46.orig/mm/zsmalloc.c 2017-02-01 08:31:11.000000000 +0100
29992 +++ linux-4.4.46/mm/zsmalloc.c 2017-02-03 17:18:10.951619984 +0100
29994 #include <linux/debugfs.h>
29995 #include <linux/zsmalloc.h>
29996 #include <linux/zpool.h>
29997 +#include <linux/locallock.h>
30000 * This must be power of 2 and greater than of equal to sizeof(link_free).
30001 @@ -403,6 +404,7 @@
30003 /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
30004 static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
30005 +static DEFINE_LOCAL_IRQ_LOCK(zs_map_area_lock);
30007 static int is_first_page(struct page *page)
30009 @@ -1289,7 +1291,7 @@
30010 class = pool->size_class[class_idx];
30011 off = obj_idx_to_offset(page, obj_idx, class->size);
30013 - area = &get_cpu_var(zs_map_area);
30014 + area = &get_locked_var(zs_map_area_lock, zs_map_area);
30016 if (off + class->size <= PAGE_SIZE) {
30017 /* this object is contained entirely within a page */
30018 @@ -1342,7 +1344,7 @@
30020 __zs_unmap_object(area, pages, off, class->size);
30022 - put_cpu_var(zs_map_area);
30023 + put_locked_var(zs_map_area_lock, zs_map_area);
30026 EXPORT_SYMBOL_GPL(zs_unmap_object);
30027 diff -Nur linux-4.4.46.orig/net/core/dev.c linux-4.4.46/net/core/dev.c
30028 --- linux-4.4.46.orig/net/core/dev.c 2017-02-01 08:31:11.000000000 +0100
30029 +++ linux-4.4.46/net/core/dev.c 2017-02-03 17:18:10.955620139 +0100
30030 @@ -186,6 +186,7 @@
30031 static DEFINE_HASHTABLE(napi_hash, 8);
30033 static seqcount_t devnet_rename_seq;
30034 +static DEFINE_MUTEX(devnet_rename_mutex);
30036 static inline void dev_base_seq_inc(struct net *net)
30038 @@ -207,14 +208,14 @@
30039 static inline void rps_lock(struct softnet_data *sd)
30042 - spin_lock(&sd->input_pkt_queue.lock);
30043 + raw_spin_lock(&sd->input_pkt_queue.raw_lock);
30047 static inline void rps_unlock(struct softnet_data *sd)
30050 - spin_unlock(&sd->input_pkt_queue.lock);
30051 + raw_spin_unlock(&sd->input_pkt_queue.raw_lock);
30055 @@ -884,7 +885,8 @@
30056 strcpy(name, dev->name);
30058 if (read_seqcount_retry(&devnet_rename_seq, seq)) {
30060 + mutex_lock(&devnet_rename_mutex);
30061 + mutex_unlock(&devnet_rename_mutex);
30065 @@ -1153,20 +1155,17 @@
30066 if (dev->flags & IFF_UP)
30069 - write_seqcount_begin(&devnet_rename_seq);
30070 + mutex_lock(&devnet_rename_mutex);
30071 + __raw_write_seqcount_begin(&devnet_rename_seq);
30073 - if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
30074 - write_seqcount_end(&devnet_rename_seq);
30077 + if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
30080 memcpy(oldname, dev->name, IFNAMSIZ);
30082 err = dev_get_valid_name(net, dev, newname);
30084 - write_seqcount_end(&devnet_rename_seq);
30090 if (oldname[0] && !strchr(oldname, '%'))
30091 netdev_info(dev, "renamed from %s\n", oldname);
30092 @@ -1179,11 +1178,12 @@
30094 memcpy(dev->name, oldname, IFNAMSIZ);
30095 dev->name_assign_type = old_assign_type;
30096 - write_seqcount_end(&devnet_rename_seq);
30102 - write_seqcount_end(&devnet_rename_seq);
30103 + __raw_write_seqcount_end(&devnet_rename_seq);
30104 + mutex_unlock(&devnet_rename_mutex);
30106 netdev_adjacent_rename_links(dev, oldname);
30108 @@ -1204,7 +1204,8 @@
30109 /* err >= 0 after dev_alloc_name() or stores the first errno */
30112 - write_seqcount_begin(&devnet_rename_seq);
30113 + mutex_lock(&devnet_rename_mutex);
30114 + __raw_write_seqcount_begin(&devnet_rename_seq);
30115 memcpy(dev->name, oldname, IFNAMSIZ);
30116 memcpy(oldname, newname, IFNAMSIZ);
30117 dev->name_assign_type = old_assign_type;
30118 @@ -1217,6 +1218,11 @@
30124 + __raw_write_seqcount_end(&devnet_rename_seq);
30125 + mutex_unlock(&devnet_rename_mutex);
30130 @@ -2246,6 +2252,7 @@
30131 sd->output_queue_tailp = &q->next_sched;
30132 raise_softirq_irqoff(NET_TX_SOFTIRQ);
30133 local_irq_restore(flags);
30134 + preempt_check_resched_rt();
30137 void __netif_schedule(struct Qdisc *q)
30138 @@ -2327,6 +2334,7 @@
30139 __this_cpu_write(softnet_data.completion_queue, skb);
30140 raise_softirq_irqoff(NET_TX_SOFTIRQ);
30141 local_irq_restore(flags);
30142 + preempt_check_resched_rt();
30144 EXPORT_SYMBOL(__dev_kfree_skb_irq);
30146 @@ -2884,7 +2892,11 @@
30147 * This permits __QDISC___STATE_RUNNING owner to get the lock more
30148 * often and dequeue packets faster.
30150 +#ifdef CONFIG_PREEMPT_RT_FULL
30151 + contended = true;
30153 contended = qdisc_is_running(q);
30155 if (unlikely(contended))
30156 spin_lock(&q->busylock);
30158 @@ -2944,9 +2956,44 @@
30159 #define skb_update_prio(skb)
30162 +#ifdef CONFIG_PREEMPT_RT_FULL
30164 +static inline int xmit_rec_read(void)
30166 + return current->xmit_recursion;
30169 +static inline void xmit_rec_inc(void)
30171 + current->xmit_recursion++;
30174 +static inline void xmit_rec_dec(void)
30176 + current->xmit_recursion--;
30181 DEFINE_PER_CPU(int, xmit_recursion);
30182 EXPORT_SYMBOL(xmit_recursion);
30184 +static inline int xmit_rec_read(void)
30186 + return __this_cpu_read(xmit_recursion);
30189 +static inline void xmit_rec_inc(void)
30191 + __this_cpu_inc(xmit_recursion);
30194 +static inline void xmit_rec_dec(void)
30196 + __this_cpu_dec(xmit_recursion);
30200 #define RECURSION_LIMIT 10
30203 @@ -3139,7 +3186,7 @@
30205 if (txq->xmit_lock_owner != cpu) {
30207 - if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
30208 + if (xmit_rec_read() > RECURSION_LIMIT)
30209 goto recursion_alert;
30211 skb = validate_xmit_skb(skb, dev);
30212 @@ -3149,9 +3196,9 @@
30213 HARD_TX_LOCK(dev, txq, cpu);
30215 if (!netif_xmit_stopped(txq)) {
30216 - __this_cpu_inc(xmit_recursion);
30218 skb = dev_hard_start_xmit(skb, dev, txq, &rc);
30219 - __this_cpu_dec(xmit_recursion);
30221 if (dev_xmit_complete(rc)) {
30222 HARD_TX_UNLOCK(dev, txq);
30224 @@ -3525,6 +3572,7 @@
30227 local_irq_restore(flags);
30228 + preempt_check_resched_rt();
30230 atomic_long_inc(&skb->dev->rx_dropped);
30232 @@ -3543,7 +3591,7 @@
30233 struct rps_dev_flow voidflow, *rflow = &voidflow;
30236 - preempt_disable();
30237 + migrate_disable();
30240 cpu = get_rps_cpu(skb->dev, skb, &rflow);
30241 @@ -3553,13 +3601,13 @@
30242 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
30245 - preempt_enable();
30246 + migrate_enable();
30250 unsigned int qtail;
30251 - ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
30253 + ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail);
30258 @@ -3593,16 +3641,44 @@
30260 trace_netif_rx_ni_entry(skb);
30262 - preempt_disable();
30263 + local_bh_disable();
30264 err = netif_rx_internal(skb);
30265 - if (local_softirq_pending())
30267 - preempt_enable();
30268 + local_bh_enable();
30272 EXPORT_SYMBOL(netif_rx_ni);
30274 +#ifdef CONFIG_PREEMPT_RT_FULL
30276 + * RT runs ksoftirqd as a real time thread and the root_lock is a
30277 + * "sleeping spinlock". If the trylock fails then we can go into an
30278 + * infinite loop when ksoftirqd preempted the task which actually
30279 + * holds the lock, because we requeue q and raise NET_TX softirq
30280 + * causing ksoftirqd to loop forever.
30282 + * It's safe to use spin_lock on RT here as softirqs run in thread
30283 + * context and cannot deadlock against the thread which is holding
30286 + * On !RT the trylock might fail, but there we bail out from the
30287 + * softirq loop after 10 attempts which we can't do on RT. And the
30288 + * task holding root_lock cannot be preempted, so the only downside of
30289 + * that trylock is that we need 10 loops to decide that we should have
30290 + * given up in the first one :)
30292 +static inline int take_root_lock(spinlock_t *lock)
30298 +static inline int take_root_lock(spinlock_t *lock)
30300 + return spin_trylock(lock);
30304 static void net_tx_action(struct softirq_action *h)
30306 struct softnet_data *sd = this_cpu_ptr(&softnet_data);
30307 @@ -3644,7 +3720,7 @@
30308 head = head->next_sched;
30310 root_lock = qdisc_lock(q);
30311 - if (spin_trylock(root_lock)) {
30312 + if (take_root_lock(root_lock)) {
30313 smp_mb__before_atomic();
30314 clear_bit(__QDISC_STATE_SCHED,
30316 @@ -4066,7 +4142,7 @@
30317 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
30318 if (skb->dev == dev) {
30319 __skb_unlink(skb, &sd->input_pkt_queue);
30321 + __skb_queue_tail(&sd->tofree_queue, skb);
30322 input_queue_head_incr(sd);
30325 @@ -4075,10 +4151,13 @@
30326 skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
30327 if (skb->dev == dev) {
30328 __skb_unlink(skb, &sd->process_queue);
30330 + __skb_queue_tail(&sd->tofree_queue, skb);
30331 input_queue_head_incr(sd);
30335 + if (!skb_queue_empty(&sd->tofree_queue))
30336 + raise_softirq_irqoff(NET_RX_SOFTIRQ);
30339 static int napi_gro_complete(struct sk_buff *skb)
30340 @@ -4535,6 +4614,7 @@
30341 sd->rps_ipi_list = NULL;
30343 local_irq_enable();
30344 + preempt_check_resched_rt();
30346 /* Send pending IPI's to kick RPS processing on remote cpus. */
30348 @@ -4548,6 +4628,7 @@
30351 local_irq_enable();
30352 + preempt_check_resched_rt();
30355 static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
30356 @@ -4629,6 +4710,7 @@
30357 local_irq_save(flags);
30358 ____napi_schedule(this_cpu_ptr(&softnet_data), n);
30359 local_irq_restore(flags);
30360 + preempt_check_resched_rt();
30362 EXPORT_SYMBOL(__napi_schedule);
30364 @@ -4905,7 +4987,7 @@
30365 list_splice_tail(&repoll, &list);
30366 list_splice(&list, &sd->poll_list);
30367 if (!list_empty(&sd->poll_list))
30368 - __raise_softirq_irqoff(NET_RX_SOFTIRQ);
30369 + __raise_softirq_irqoff_ksoft(NET_RX_SOFTIRQ);
30371 net_rps_action_and_irq_enable(sd);
30373 @@ -7244,7 +7326,7 @@
30374 void synchronize_net(void)
30377 - if (rtnl_is_locked())
30378 + if (rtnl_is_locked() && !IS_ENABLED(CONFIG_PREEMPT_RT_FULL))
30379 synchronize_rcu_expedited();
30382 @@ -7485,16 +7567,20 @@
30384 raise_softirq_irqoff(NET_TX_SOFTIRQ);
30385 local_irq_enable();
30386 + preempt_check_resched_rt();
30388 /* Process offline CPU's input_pkt_queue */
30389 while ((skb = __skb_dequeue(&oldsd->process_queue))) {
30391 input_queue_head_incr(oldsd);
30393 - while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
30394 + while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
30396 input_queue_head_incr(oldsd);
30398 + while ((skb = __skb_dequeue(&oldsd->tofree_queue))) {
30404 @@ -7796,8 +7882,9 @@
30405 for_each_possible_cpu(i) {
30406 struct softnet_data *sd = &per_cpu(softnet_data, i);
30408 - skb_queue_head_init(&sd->input_pkt_queue);
30409 - skb_queue_head_init(&sd->process_queue);
30410 + skb_queue_head_init_raw(&sd->input_pkt_queue);
30411 + skb_queue_head_init_raw(&sd->process_queue);
30412 + skb_queue_head_init_raw(&sd->tofree_queue);
30413 INIT_LIST_HEAD(&sd->poll_list);
30414 sd->output_queue_tailp = &sd->output_queue;
30416 diff -Nur linux-4.4.46.orig/net/core/skbuff.c linux-4.4.46/net/core/skbuff.c
30417 --- linux-4.4.46.orig/net/core/skbuff.c 2017-02-01 08:31:11.000000000 +0100
30418 +++ linux-4.4.46/net/core/skbuff.c 2017-02-03 17:18:10.955620139 +0100
30420 #include <linux/errqueue.h>
30421 #include <linux/prefetch.h>
30422 #include <linux/if_vlan.h>
30423 +#include <linux/locallock.h>
30425 #include <net/protocol.h>
30426 #include <net/dst.h>
30427 @@ -351,6 +352,8 @@
30429 static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
30430 static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache);
30431 +static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock);
30432 +static DEFINE_LOCAL_IRQ_LOCK(napi_alloc_cache_lock);
30434 static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
30436 @@ -358,10 +361,10 @@
30437 unsigned long flags;
30440 - local_irq_save(flags);
30441 + local_lock_irqsave(netdev_alloc_lock, flags);
30442 nc = this_cpu_ptr(&netdev_alloc_cache);
30443 data = __alloc_page_frag(nc, fragsz, gfp_mask);
30444 - local_irq_restore(flags);
30445 + local_unlock_irqrestore(netdev_alloc_lock, flags);
30449 @@ -380,9 +383,13 @@
30451 static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
30453 - struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
30454 + struct page_frag_cache *nc;
30457 - return __alloc_page_frag(nc, fragsz, gfp_mask);
30458 + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
30459 + data = __alloc_page_frag(nc, fragsz, gfp_mask);
30460 + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
30464 void *napi_alloc_frag(unsigned int fragsz)
30465 @@ -429,13 +436,13 @@
30466 if (sk_memalloc_socks())
30467 gfp_mask |= __GFP_MEMALLOC;
30469 - local_irq_save(flags);
30470 + local_lock_irqsave(netdev_alloc_lock, flags);
30472 nc = this_cpu_ptr(&netdev_alloc_cache);
30473 data = __alloc_page_frag(nc, len, gfp_mask);
30474 pfmemalloc = nc->pfmemalloc;
30476 - local_irq_restore(flags);
30477 + local_unlock_irqrestore(netdev_alloc_lock, flags);
30479 if (unlikely(!data))
30481 @@ -476,9 +483,10 @@
30482 struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
30485 - struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
30486 + struct page_frag_cache *nc;
30487 struct sk_buff *skb;
30491 len += NET_SKB_PAD + NET_IP_ALIGN;
30493 @@ -496,7 +504,11 @@
30494 if (sk_memalloc_socks())
30495 gfp_mask |= __GFP_MEMALLOC;
30497 + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
30498 data = __alloc_page_frag(nc, len, gfp_mask);
30499 + pfmemalloc = nc->pfmemalloc;
30500 + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
30502 if (unlikely(!data))
30505 @@ -507,7 +519,7 @@
30508 /* use OR instead of assignment to avoid clearing of bits in mask */
30509 - if (nc->pfmemalloc)
30511 skb->pfmemalloc = 1;
30512 skb->head_frag = 1;
30514 diff -Nur linux-4.4.46.orig/net/core/sock.c linux-4.4.46/net/core/sock.c
30515 --- linux-4.4.46.orig/net/core/sock.c 2017-02-01 08:31:11.000000000 +0100
30516 +++ linux-4.4.46/net/core/sock.c 2017-02-03 17:18:10.955620139 +0100
30517 @@ -2436,12 +2436,11 @@
30518 if (sk->sk_lock.owned)
30520 sk->sk_lock.owned = 1;
30521 - spin_unlock(&sk->sk_lock.slock);
30522 + spin_unlock_bh(&sk->sk_lock.slock);
30524 * The sk_lock has mutex_lock() semantics here:
30526 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
30527 - local_bh_enable();
30529 EXPORT_SYMBOL(lock_sock_nested);
30531 diff -Nur linux-4.4.46.orig/net/ipv4/icmp.c linux-4.4.46/net/ipv4/icmp.c
30532 --- linux-4.4.46.orig/net/ipv4/icmp.c 2017-02-01 08:31:11.000000000 +0100
30533 +++ linux-4.4.46/net/ipv4/icmp.c 2017-02-03 17:18:10.955620139 +0100
30535 #include <linux/jiffies.h>
30536 #include <linux/kernel.h>
30537 #include <linux/fcntl.h>
30538 +#include <linux/sysrq.h>
30539 #include <linux/socket.h>
30540 #include <linux/in.h>
30541 #include <linux/inet.h>
30543 #include <linux/string.h>
30544 #include <linux/netfilter_ipv4.h>
30545 #include <linux/slab.h>
30546 +#include <linux/locallock.h>
30547 #include <net/snmp.h>
30548 #include <net/ip.h>
30549 #include <net/route.h>
30550 @@ -204,6 +206,8 @@
30552 * On SMP we have one ICMP socket per-cpu.
30554 +static DEFINE_LOCAL_IRQ_LOCK(icmp_sk_lock);
30556 static struct sock *icmp_sk(struct net *net)
30558 return *this_cpu_ptr(net->ipv4.icmp_sk);
30559 @@ -215,12 +219,14 @@
30561 local_bh_disable();
30563 + local_lock(icmp_sk_lock);
30566 if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
30567 /* This can happen if the output path signals a
30568 * dst_link_failure() for an outgoing ICMP packet.
30570 + local_unlock(icmp_sk_lock);
30574 @@ -230,6 +236,7 @@
30575 static inline void icmp_xmit_unlock(struct sock *sk)
30577 spin_unlock_bh(&sk->sk_lock.slock);
30578 + local_unlock(icmp_sk_lock);
30581 int sysctl_icmp_msgs_per_sec __read_mostly = 1000;
30582 @@ -358,6 +365,7 @@
30584 struct sk_buff *skb;
30586 + local_lock(icmp_sk_lock);
30587 sk = icmp_sk(dev_net((*rt)->dst.dev));
30588 if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param,
30589 icmp_param->data_len+icmp_param->head_len,
30590 @@ -380,6 +388,7 @@
30591 skb->ip_summed = CHECKSUM_NONE;
30592 ip_push_pending_frames(sk, fl4);
30594 + local_unlock(icmp_sk_lock);
30598 @@ -891,6 +900,30 @@
30602 + * 32bit and 64bit have different timestamp length, so we check for
30603 + * the cookie at offset 20 and verify it is repeated at offset 50
30605 +#define CO_POS0 20
30606 +#define CO_POS1 50
30607 +#define CO_SIZE sizeof(int)
30608 +#define ICMP_SYSRQ_SIZE 57
30611 + * We got a ICMP_SYSRQ_SIZE sized ping request. Check for the cookie
30612 + * pattern and if it matches send the next byte as a trigger to sysrq.
30614 +static void icmp_check_sysrq(struct net *net, struct sk_buff *skb)
30616 + int cookie = htonl(net->ipv4.sysctl_icmp_echo_sysrq);
30617 + char *p = skb->data;
30619 + if (!memcmp(&cookie, p + CO_POS0, CO_SIZE) &&
30620 + !memcmp(&cookie, p + CO_POS1, CO_SIZE) &&
30621 + p[CO_POS0 + CO_SIZE] == p[CO_POS1 + CO_SIZE])
30622 + handle_sysrq(p[CO_POS0 + CO_SIZE]);
30626 * Handle ICMP_ECHO ("ping") requests.
30628 * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo
30629 @@ -917,6 +950,11 @@
30630 icmp_param.data_len = skb->len;
30631 icmp_param.head_len = sizeof(struct icmphdr);
30632 icmp_reply(&icmp_param, skb);
30634 + if (skb->len == ICMP_SYSRQ_SIZE &&
30635 + net->ipv4.sysctl_icmp_echo_sysrq) {
30636 + icmp_check_sysrq(net, skb);
30639 /* should there be an ICMP stat for ignored echos? */
30641 diff -Nur linux-4.4.46.orig/net/ipv4/sysctl_net_ipv4.c linux-4.4.46/net/ipv4/sysctl_net_ipv4.c
30642 --- linux-4.4.46.orig/net/ipv4/sysctl_net_ipv4.c 2017-02-01 08:31:11.000000000 +0100
30643 +++ linux-4.4.46/net/ipv4/sysctl_net_ipv4.c 2017-02-03 17:18:10.955620139 +0100
30644 @@ -818,6 +818,13 @@
30645 .proc_handler = proc_dointvec
30648 + .procname = "icmp_echo_sysrq",
30649 + .data = &init_net.ipv4.sysctl_icmp_echo_sysrq,
30650 + .maxlen = sizeof(int),
30652 + .proc_handler = proc_dointvec
30655 .procname = "icmp_ignore_bogus_error_responses",
30656 .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
30657 .maxlen = sizeof(int),
30658 diff -Nur linux-4.4.46.orig/net/ipv4/tcp_ipv4.c linux-4.4.46/net/ipv4/tcp_ipv4.c
30659 --- linux-4.4.46.orig/net/ipv4/tcp_ipv4.c 2017-02-01 08:31:11.000000000 +0100
30660 +++ linux-4.4.46/net/ipv4/tcp_ipv4.c 2017-02-03 17:18:10.955620139 +0100
30662 #include <linux/init.h>
30663 #include <linux/times.h>
30664 #include <linux/slab.h>
30665 +#include <linux/locallock.h>
30667 #include <net/net_namespace.h>
30668 #include <net/icmp.h>
30669 @@ -566,6 +567,7 @@
30671 EXPORT_SYMBOL(tcp_v4_send_check);
30673 +static DEFINE_LOCAL_IRQ_LOCK(tcp_sk_lock);
30675 * This routine will send an RST to the other tcp.
30677 @@ -687,10 +689,13 @@
30678 arg.bound_dev_if = sk->sk_bound_dev_if;
30680 arg.tos = ip_hdr(skb)->tos;
30682 + local_lock(tcp_sk_lock);
30683 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
30684 skb, &TCP_SKB_CB(skb)->header.h4.opt,
30685 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
30686 &arg, arg.iov[0].iov_len);
30687 + local_unlock(tcp_sk_lock);
30689 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
30690 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
30691 @@ -772,10 +777,12 @@
30693 arg.bound_dev_if = oif;
30695 + local_lock(tcp_sk_lock);
30696 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
30697 skb, &TCP_SKB_CB(skb)->header.h4.opt,
30698 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
30699 &arg, arg.iov[0].iov_len);
30700 + local_unlock(tcp_sk_lock);
30702 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
30704 diff -Nur linux-4.4.46.orig/net/mac80211/rx.c linux-4.4.46/net/mac80211/rx.c
30705 --- linux-4.4.46.orig/net/mac80211/rx.c 2017-02-01 08:31:11.000000000 +0100
30706 +++ linux-4.4.46/net/mac80211/rx.c 2017-02-03 17:18:10.955620139 +0100
30707 @@ -3580,7 +3580,7 @@
30708 struct ieee80211_supported_band *sband;
30709 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
30711 - WARN_ON_ONCE(softirq_count() == 0);
30712 + WARN_ON_ONCE_NONRT(softirq_count() == 0);
30714 if (WARN_ON(status->band >= IEEE80211_NUM_BANDS))
30716 diff -Nur linux-4.4.46.orig/net/netfilter/core.c linux-4.4.46/net/netfilter/core.c
30717 --- linux-4.4.46.orig/net/netfilter/core.c 2017-02-01 08:31:11.000000000 +0100
30718 +++ linux-4.4.46/net/netfilter/core.c 2017-02-03 17:18:10.955620139 +0100
30719 @@ -22,11 +22,17 @@
30720 #include <linux/proc_fs.h>
30721 #include <linux/mutex.h>
30722 #include <linux/slab.h>
30723 +#include <linux/locallock.h>
30724 #include <net/net_namespace.h>
30725 #include <net/sock.h>
30727 #include "nf_internals.h"
30729 +#ifdef CONFIG_PREEMPT_RT_BASE
30730 +DEFINE_LOCAL_IRQ_LOCK(xt_write_lock);
30731 +EXPORT_PER_CPU_SYMBOL(xt_write_lock);
30734 static DEFINE_MUTEX(afinfo_mutex);
30736 const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
30737 diff -Nur linux-4.4.46.orig/net/packet/af_packet.c linux-4.4.46/net/packet/af_packet.c
30738 --- linux-4.4.46.orig/net/packet/af_packet.c 2017-02-01 08:31:11.000000000 +0100
30739 +++ linux-4.4.46/net/packet/af_packet.c 2017-02-03 17:18:10.959620294 +0100
30741 #include <linux/if_packet.h>
30742 #include <linux/wireless.h>
30743 #include <linux/kernel.h>
30744 +#include <linux/delay.h>
30745 #include <linux/kmod.h>
30746 #include <linux/slab.h>
30747 #include <linux/vmalloc.h>
30748 @@ -694,7 +695,7 @@
30749 if (BLOCK_NUM_PKTS(pbd)) {
30750 while (atomic_read(&pkc->blk_fill_in_prog)) {
30751 /* Waiting for skb_copy_bits to finish... */
30757 @@ -956,7 +957,7 @@
30758 if (!(status & TP_STATUS_BLK_TMO)) {
30759 while (atomic_read(&pkc->blk_fill_in_prog)) {
30760 /* Waiting for skb_copy_bits to finish... */
30765 prb_close_block(pkc, pbd, po, status);
30766 diff -Nur linux-4.4.46.orig/net/rds/ib_rdma.c linux-4.4.46/net/rds/ib_rdma.c
30767 --- linux-4.4.46.orig/net/rds/ib_rdma.c 2017-02-01 08:31:11.000000000 +0100
30768 +++ linux-4.4.46/net/rds/ib_rdma.c 2017-02-03 17:18:10.959620294 +0100
30770 #include <linux/slab.h>
30771 #include <linux/rculist.h>
30772 #include <linux/llist.h>
30773 +#include <linux/delay.h>
30777 @@ -313,7 +314,7 @@
30778 for_each_online_cpu(cpu) {
30779 flag = &per_cpu(clean_list_grace, cpu);
30780 while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
30786 diff -Nur linux-4.4.46.orig/net/sched/sch_generic.c linux-4.4.46/net/sched/sch_generic.c
30787 --- linux-4.4.46.orig/net/sched/sch_generic.c 2017-02-01 08:31:11.000000000 +0100
30788 +++ linux-4.4.46/net/sched/sch_generic.c 2017-02-03 17:18:10.959620294 +0100
30789 @@ -893,7 +893,7 @@
30790 /* Wait for outstanding qdisc_run calls. */
30791 list_for_each_entry(dev, head, close_list)
30792 while (some_qdisc_is_busy(dev))
30797 void dev_deactivate(struct net_device *dev)
30798 diff -Nur linux-4.4.46.orig/net/sunrpc/svc_xprt.c linux-4.4.46/net/sunrpc/svc_xprt.c
30799 --- linux-4.4.46.orig/net/sunrpc/svc_xprt.c 2017-02-01 08:31:11.000000000 +0100
30800 +++ linux-4.4.46/net/sunrpc/svc_xprt.c 2017-02-03 17:18:10.959620294 +0100
30801 @@ -340,7 +340,7 @@
30806 + cpu = get_cpu_light();
30807 pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
30809 atomic_long_inc(&pool->sp_stats.packets);
30810 @@ -376,7 +376,7 @@
30812 atomic_long_inc(&pool->sp_stats.threads_woken);
30813 wake_up_process(rqstp->rq_task);
30819 @@ -397,7 +397,7 @@
30826 trace_svc_xprt_do_enqueue(xprt, rqstp);
30828 diff -Nur linux-4.4.46.orig/scripts/mkcompile_h linux-4.4.46/scripts/mkcompile_h
30829 --- linux-4.4.46.orig/scripts/mkcompile_h 2017-02-01 08:31:11.000000000 +0100
30830 +++ linux-4.4.46/scripts/mkcompile_h 2017-02-03 17:18:10.959620294 +0100
30839 vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; }
30843 if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi
30844 if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi
30845 +if [ -n "$RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS RT"; fi
30846 UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP"
30848 # Truncate to maximum length
30849 diff -Nur linux-4.4.46.orig/sound/core/pcm_native.c linux-4.4.46/sound/core/pcm_native.c
30850 --- linux-4.4.46.orig/sound/core/pcm_native.c 2017-02-01 08:31:11.000000000 +0100
30851 +++ linux-4.4.46/sound/core/pcm_native.c 2017-02-03 17:18:10.959620294 +0100
30852 @@ -135,7 +135,7 @@
30853 void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream)
30855 if (!substream->pcm->nonatomic)
30856 - local_irq_disable();
30857 + local_irq_disable_nort();
30858 snd_pcm_stream_lock(substream);
30860 EXPORT_SYMBOL_GPL(snd_pcm_stream_lock_irq);
30861 @@ -150,7 +150,7 @@
30863 snd_pcm_stream_unlock(substream);
30864 if (!substream->pcm->nonatomic)
30865 - local_irq_enable();
30866 + local_irq_enable_nort();
30868 EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irq);
30870 @@ -158,7 +158,7 @@
30872 unsigned long flags = 0;
30873 if (!substream->pcm->nonatomic)
30874 - local_irq_save(flags);
30875 + local_irq_save_nort(flags);
30876 snd_pcm_stream_lock(substream);
30879 @@ -176,7 +176,7 @@
30881 snd_pcm_stream_unlock(substream);
30882 if (!substream->pcm->nonatomic)
30883 - local_irq_restore(flags);
30884 + local_irq_restore_nort(flags);
30886 EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irqrestore);
30888 diff -Nur linux-4.4.46.orig/virt/kvm/async_pf.c linux-4.4.46/virt/kvm/async_pf.c
30889 --- linux-4.4.46.orig/virt/kvm/async_pf.c 2017-02-01 08:31:11.000000000 +0100
30890 +++ linux-4.4.46/virt/kvm/async_pf.c 2017-02-03 17:18:10.959620294 +0100
30892 * This memory barrier pairs with prepare_to_wait's set_current_state()
30895 - if (waitqueue_active(&vcpu->wq))
30896 - wake_up_interruptible(&vcpu->wq);
30897 + if (swait_active(&vcpu->wq))
30898 + swake_up(&vcpu->wq);
30901 kvm_put_kvm(vcpu->kvm);
30902 diff -Nur linux-4.4.46.orig/virt/kvm/kvm_main.c linux-4.4.46/virt/kvm/kvm_main.c
30903 --- linux-4.4.46.orig/virt/kvm/kvm_main.c 2017-02-01 08:31:11.000000000 +0100
30904 +++ linux-4.4.46/virt/kvm/kvm_main.c 2017-02-03 17:18:10.959620294 +0100
30905 @@ -228,8 +228,7 @@
30907 vcpu->vcpu_id = id;
30909 - vcpu->halt_poll_ns = 0;
30910 - init_waitqueue_head(&vcpu->wq);
30911 + init_swait_queue_head(&vcpu->wq);
30912 kvm_async_pf_vcpu_init(vcpu);
30914 vcpu->pre_pcpu = -1;
30915 @@ -2005,7 +2004,7 @@
30916 void kvm_vcpu_block(struct kvm_vcpu *vcpu)
30918 ktime_t start, cur;
30919 - DEFINE_WAIT(wait);
30920 + DECLARE_SWAITQUEUE(wait);
30921 bool waited = false;
30924 @@ -2030,7 +2029,7 @@
30925 kvm_arch_vcpu_blocking(vcpu);
30928 - prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
30929 + prepare_to_swait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
30931 if (kvm_vcpu_check_block(vcpu) < 0)
30933 @@ -2039,7 +2038,7 @@
30937 - finish_wait(&vcpu->wq, &wait);
30938 + finish_swait(&vcpu->wq, &wait);
30941 kvm_arch_vcpu_unblocking(vcpu);
30942 @@ -2071,11 +2070,11 @@
30945 int cpu = vcpu->cpu;
30946 - wait_queue_head_t *wqp;
30947 + struct swait_queue_head *wqp;
30949 wqp = kvm_arch_vcpu_wq(vcpu);
30950 - if (waitqueue_active(wqp)) {
30951 - wake_up_interruptible(wqp);
30952 + if (swait_active(wqp)) {
30954 ++vcpu->stat.halt_wakeup;
30957 @@ -2176,7 +2175,7 @@
30961 - if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
30962 + if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
30964 if (!kvm_vcpu_eligible_for_directed_yield(vcpu))