fwinstall: simplify, add support for devicename
[openadk.git] / target / linux / patches / 4.4.48 / patch-realtime
blob5ab7acb202102b0546296d23f921c6ecf6cc1b05
1 diff -Nur linux-4.4.46.orig/arch/arm/include/asm/switch_to.h linux-4.4.46/arch/arm/include/asm/switch_to.h
2 --- linux-4.4.46.orig/arch/arm/include/asm/switch_to.h  2017-02-01 08:31:11.000000000 +0100
3 +++ linux-4.4.46/arch/arm/include/asm/switch_to.h       2017-02-03 17:18:05.627414322 +0100
4 @@ -3,6 +3,13 @@
5  
6  #include <linux/thread_info.h>
7  
8 +#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM
9 +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p);
10 +#else
11 +static inline void
12 +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
13 +#endif
15  /*
16   * For v7 SMP cores running a preemptible kernel we may be pre-empted
17   * during a TLB maintenance operation, so execute an inner-shareable dsb
18 @@ -25,6 +32,7 @@
19  #define switch_to(prev,next,last)                                      \
20  do {                                                                   \
21         __complete_pending_tlbi();                                      \
22 +       switch_kmaps(prev, next);                                       \
23         last = __switch_to(prev,task_thread_info(prev), task_thread_info(next));        \
24  } while (0)
26 diff -Nur linux-4.4.46.orig/arch/arm/include/asm/thread_info.h linux-4.4.46/arch/arm/include/asm/thread_info.h
27 --- linux-4.4.46.orig/arch/arm/include/asm/thread_info.h        2017-02-01 08:31:11.000000000 +0100
28 +++ linux-4.4.46/arch/arm/include/asm/thread_info.h     2017-02-03 17:18:05.627414322 +0100
29 @@ -49,6 +49,7 @@
30  struct thread_info {
31         unsigned long           flags;          /* low level flags */
32         int                     preempt_count;  /* 0 => preemptable, <0 => bug */
33 +       int                     preempt_lazy_count; /* 0 => preemptable, <0 => bug */
34         mm_segment_t            addr_limit;     /* address limit */
35         struct task_struct      *task;          /* main task structure */
36         __u32                   cpu;            /* cpu */
37 @@ -142,7 +143,8 @@
38  #define TIF_SYSCALL_TRACE      4       /* syscall trace active */
39  #define TIF_SYSCALL_AUDIT      5       /* syscall auditing active */
40  #define TIF_SYSCALL_TRACEPOINT 6       /* syscall tracepoint instrumentation */
41 -#define TIF_SECCOMP            7       /* seccomp syscall filtering active */
42 +#define TIF_SECCOMP            8       /* seccomp syscall filtering active */
43 +#define TIF_NEED_RESCHED_LAZY  7
45  #define TIF_NOHZ               12      /* in adaptive nohz mode */
46  #define TIF_USING_IWMMXT       17
47 @@ -152,6 +154,7 @@
48  #define _TIF_SIGPENDING                (1 << TIF_SIGPENDING)
49  #define _TIF_NEED_RESCHED      (1 << TIF_NEED_RESCHED)
50  #define _TIF_NOTIFY_RESUME     (1 << TIF_NOTIFY_RESUME)
51 +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
52  #define _TIF_UPROBE            (1 << TIF_UPROBE)
53  #define _TIF_SYSCALL_TRACE     (1 << TIF_SYSCALL_TRACE)
54  #define _TIF_SYSCALL_AUDIT     (1 << TIF_SYSCALL_AUDIT)
55 @@ -167,7 +170,8 @@
56   * Change these and you break ASM code in entry-common.S
57   */
58  #define _TIF_WORK_MASK         (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
59 -                                _TIF_NOTIFY_RESUME | _TIF_UPROBE)
60 +                                _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
61 +                                _TIF_NEED_RESCHED_LAZY)
63  #endif /* __KERNEL__ */
64  #endif /* __ASM_ARM_THREAD_INFO_H */
65 diff -Nur linux-4.4.46.orig/arch/arm/Kconfig linux-4.4.46/arch/arm/Kconfig
66 --- linux-4.4.46.orig/arch/arm/Kconfig  2017-02-01 08:31:11.000000000 +0100
67 +++ linux-4.4.46/arch/arm/Kconfig       2017-02-03 17:18:05.627414322 +0100
68 @@ -33,7 +33,7 @@
69         select HARDIRQS_SW_RESEND
70         select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT)
71         select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
72 -       select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32
73 +       select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !PREEMPT_RT_BASE
74         select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32
75         select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
76         select HAVE_ARCH_TRACEHOOK
77 @@ -68,6 +68,7 @@
78         select HAVE_PERF_EVENTS
79         select HAVE_PERF_REGS
80         select HAVE_PERF_USER_STACK_DUMP
81 +       select HAVE_PREEMPT_LAZY
82         select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE)
83         select HAVE_REGS_AND_STACK_ACCESS_API
84         select HAVE_SYSCALL_TRACEPOINTS
85 diff -Nur linux-4.4.46.orig/arch/arm/kernel/asm-offsets.c linux-4.4.46/arch/arm/kernel/asm-offsets.c
86 --- linux-4.4.46.orig/arch/arm/kernel/asm-offsets.c     2017-02-01 08:31:11.000000000 +0100
87 +++ linux-4.4.46/arch/arm/kernel/asm-offsets.c  2017-02-03 17:18:05.627414322 +0100
88 @@ -65,6 +65,7 @@
89    BLANK();
90    DEFINE(TI_FLAGS,             offsetof(struct thread_info, flags));
91    DEFINE(TI_PREEMPT,           offsetof(struct thread_info, preempt_count));
92 +  DEFINE(TI_PREEMPT_LAZY,      offsetof(struct thread_info, preempt_lazy_count));
93    DEFINE(TI_ADDR_LIMIT,                offsetof(struct thread_info, addr_limit));
94    DEFINE(TI_TASK,              offsetof(struct thread_info, task));
95    DEFINE(TI_CPU,               offsetof(struct thread_info, cpu));
96 diff -Nur linux-4.4.46.orig/arch/arm/kernel/entry-armv.S linux-4.4.46/arch/arm/kernel/entry-armv.S
97 --- linux-4.4.46.orig/arch/arm/kernel/entry-armv.S      2017-02-01 08:31:11.000000000 +0100
98 +++ linux-4.4.46/arch/arm/kernel/entry-armv.S   2017-02-03 17:18:05.627414322 +0100
99 @@ -215,11 +215,18 @@
100  #ifdef CONFIG_PREEMPT
101         get_thread_info tsk
102         ldr     r8, [tsk, #TI_PREEMPT]          @ get preempt count
103 -       ldr     r0, [tsk, #TI_FLAGS]            @ get flags
104         teq     r8, #0                          @ if preempt count != 0
105 +       bne     1f                              @ return from exeption
106 +       ldr     r0, [tsk, #TI_FLAGS]            @ get flags
107 +       tst     r0, #_TIF_NEED_RESCHED          @ if NEED_RESCHED is set
108 +       blne    svc_preempt                     @ preempt!
110 +       ldr     r8, [tsk, #TI_PREEMPT_LAZY]     @ get preempt lazy count
111 +       teq     r8, #0                          @ if preempt lazy count != 0
112         movne   r0, #0                          @ force flags to 0
113 -       tst     r0, #_TIF_NEED_RESCHED
114 +       tst     r0, #_TIF_NEED_RESCHED_LAZY
115         blne    svc_preempt
117  #endif
119         svc_exit r5, irq = 1                    @ return from exception
120 @@ -234,8 +241,14 @@
121  1:     bl      preempt_schedule_irq            @ irq en/disable is done inside
122         ldr     r0, [tsk, #TI_FLAGS]            @ get new tasks TI_FLAGS
123         tst     r0, #_TIF_NEED_RESCHED
124 +       bne     1b
125 +       tst     r0, #_TIF_NEED_RESCHED_LAZY
126         reteq   r8                              @ go again
127 -       b       1b
128 +       ldr     r0, [tsk, #TI_PREEMPT_LAZY]     @ get preempt lazy count
129 +       teq     r0, #0                          @ if preempt lazy count != 0
130 +       beq     1b
131 +       ret     r8                              @ go again
133  #endif
135  __und_fault:
136 diff -Nur linux-4.4.46.orig/arch/arm/kernel/entry-common.S linux-4.4.46/arch/arm/kernel/entry-common.S
137 --- linux-4.4.46.orig/arch/arm/kernel/entry-common.S    2017-02-01 08:31:11.000000000 +0100
138 +++ linux-4.4.46/arch/arm/kernel/entry-common.S 2017-02-03 17:18:05.627414322 +0100
139 @@ -36,7 +36,9 @@
140   UNWIND(.cantunwind    )
141         disable_irq_notrace                     @ disable interrupts
142         ldr     r1, [tsk, #TI_FLAGS]            @ re-check for syscall tracing
143 -       tst     r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
144 +       tst     r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP)
145 +       bne     fast_work_pending
146 +       tst     r1, #_TIF_SECCOMP
147         bne     fast_work_pending
149         /* perform architecture specific actions before user return */
150 @@ -62,8 +64,11 @@
151         str     r0, [sp, #S_R0 + S_OFF]!        @ save returned r0
152         disable_irq_notrace                     @ disable interrupts
153         ldr     r1, [tsk, #TI_FLAGS]            @ re-check for syscall tracing
154 -       tst     r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
155 +       tst     r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP)
156 +       bne     do_slower_path
157 +       tst     r1, #_TIF_SECCOMP
158         beq     no_work_pending
159 +do_slower_path:
160   UNWIND(.fnend         )
161  ENDPROC(ret_fast_syscall)
163 diff -Nur linux-4.4.46.orig/arch/arm/kernel/process.c linux-4.4.46/arch/arm/kernel/process.c
164 --- linux-4.4.46.orig/arch/arm/kernel/process.c 2017-02-01 08:31:11.000000000 +0100
165 +++ linux-4.4.46/arch/arm/kernel/process.c      2017-02-03 17:18:05.627414322 +0100
166 @@ -319,6 +319,30 @@
169  #ifdef CONFIG_MMU
171 + * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock.  If the lock is not
172 + * initialized by pgtable_page_ctor() then a coredump of the vector page will
173 + * fail.
174 + */
175 +static int __init vectors_user_mapping_init_page(void)
177 +       struct page *page;
178 +       unsigned long addr = 0xffff0000;
179 +       pgd_t *pgd;
180 +       pud_t *pud;
181 +       pmd_t *pmd;
183 +       pgd = pgd_offset_k(addr);
184 +       pud = pud_offset(pgd, addr);
185 +       pmd = pmd_offset(pud, addr);
186 +       page = pmd_page(*(pmd));
188 +       pgtable_page_ctor(page);
190 +       return 0;
192 +late_initcall(vectors_user_mapping_init_page);
194  #ifdef CONFIG_KUSER_HELPERS
195  /*
196   * The vectors page is always readable from user space for the
197 diff -Nur linux-4.4.46.orig/arch/arm/kernel/signal.c linux-4.4.46/arch/arm/kernel/signal.c
198 --- linux-4.4.46.orig/arch/arm/kernel/signal.c  2017-02-01 08:31:11.000000000 +0100
199 +++ linux-4.4.46/arch/arm/kernel/signal.c       2017-02-03 17:18:05.627414322 +0100
200 @@ -572,7 +572,8 @@
201          */
202         trace_hardirqs_off();
203         do {
204 -               if (likely(thread_flags & _TIF_NEED_RESCHED)) {
205 +               if (likely(thread_flags & (_TIF_NEED_RESCHED |
206 +                                          _TIF_NEED_RESCHED_LAZY))) {
207                         schedule();
208                 } else {
209                         if (unlikely(!user_mode(regs)))
210 diff -Nur linux-4.4.46.orig/arch/arm/kernel/smp.c linux-4.4.46/arch/arm/kernel/smp.c
211 --- linux-4.4.46.orig/arch/arm/kernel/smp.c     2017-02-01 08:31:11.000000000 +0100
212 +++ linux-4.4.46/arch/arm/kernel/smp.c  2017-02-03 17:18:05.627414322 +0100
213 @@ -230,8 +230,6 @@
214         flush_cache_louis();
215         local_flush_tlb_all();
217 -       clear_tasks_mm_cpumask(cpu);
219         return 0;
222 @@ -247,6 +245,9 @@
223                 pr_err("CPU%u: cpu didn't die\n", cpu);
224                 return;
225         }
227 +       clear_tasks_mm_cpumask(cpu);
229         pr_notice("CPU%u: shutdown\n", cpu);
231         /*
232 diff -Nur linux-4.4.46.orig/arch/arm/kernel/unwind.c linux-4.4.46/arch/arm/kernel/unwind.c
233 --- linux-4.4.46.orig/arch/arm/kernel/unwind.c  2017-02-01 08:31:11.000000000 +0100
234 +++ linux-4.4.46/arch/arm/kernel/unwind.c       2017-02-03 17:18:05.627414322 +0100
235 @@ -93,7 +93,7 @@
236  static const struct unwind_idx *__origin_unwind_idx;
237  extern const struct unwind_idx __stop_unwind_idx[];
239 -static DEFINE_SPINLOCK(unwind_lock);
240 +static DEFINE_RAW_SPINLOCK(unwind_lock);
241  static LIST_HEAD(unwind_tables);
243  /* Convert a prel31 symbol to an absolute address */
244 @@ -201,7 +201,7 @@
245                 /* module unwind tables */
246                 struct unwind_table *table;
248 -               spin_lock_irqsave(&unwind_lock, flags);
249 +               raw_spin_lock_irqsave(&unwind_lock, flags);
250                 list_for_each_entry(table, &unwind_tables, list) {
251                         if (addr >= table->begin_addr &&
252                             addr < table->end_addr) {
253 @@ -213,7 +213,7 @@
254                                 break;
255                         }
256                 }
257 -               spin_unlock_irqrestore(&unwind_lock, flags);
258 +               raw_spin_unlock_irqrestore(&unwind_lock, flags);
259         }
261         pr_debug("%s: idx = %p\n", __func__, idx);
262 @@ -529,9 +529,9 @@
263         tab->begin_addr = text_addr;
264         tab->end_addr = text_addr + text_size;
266 -       spin_lock_irqsave(&unwind_lock, flags);
267 +       raw_spin_lock_irqsave(&unwind_lock, flags);
268         list_add_tail(&tab->list, &unwind_tables);
269 -       spin_unlock_irqrestore(&unwind_lock, flags);
270 +       raw_spin_unlock_irqrestore(&unwind_lock, flags);
272         return tab;
274 @@ -543,9 +543,9 @@
275         if (!tab)
276                 return;
278 -       spin_lock_irqsave(&unwind_lock, flags);
279 +       raw_spin_lock_irqsave(&unwind_lock, flags);
280         list_del(&tab->list);
281 -       spin_unlock_irqrestore(&unwind_lock, flags);
282 +       raw_spin_unlock_irqrestore(&unwind_lock, flags);
284         kfree(tab);
286 diff -Nur linux-4.4.46.orig/arch/arm/kvm/arm.c linux-4.4.46/arch/arm/kvm/arm.c
287 --- linux-4.4.46.orig/arch/arm/kvm/arm.c        2017-02-01 08:31:11.000000000 +0100
288 +++ linux-4.4.46/arch/arm/kvm/arm.c     2017-02-03 17:18:05.627414322 +0100
289 @@ -496,18 +496,18 @@
290         struct kvm_vcpu *vcpu;
292         kvm_for_each_vcpu(i, vcpu, kvm) {
293 -               wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
294 +               struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
296                 vcpu->arch.pause = false;
297 -               wake_up_interruptible(wq);
298 +               swake_up(wq);
299         }
302  static void vcpu_sleep(struct kvm_vcpu *vcpu)
304 -       wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
305 +       struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
307 -       wait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
308 +       swait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
309                                        (!vcpu->arch.pause)));
312 @@ -566,7 +566,7 @@
313                  * involves poking the GIC, which must be done in a
314                  * non-preemptible context.
315                  */
316 -               preempt_disable();
317 +               migrate_disable();
318                 kvm_timer_flush_hwstate(vcpu);
319                 kvm_vgic_flush_hwstate(vcpu);
321 @@ -585,7 +585,7 @@
322                         local_irq_enable();
323                         kvm_timer_sync_hwstate(vcpu);
324                         kvm_vgic_sync_hwstate(vcpu);
325 -                       preempt_enable();
326 +                       migrate_enable();
327                         continue;
328                 }
330 @@ -639,7 +639,7 @@
332                 kvm_vgic_sync_hwstate(vcpu);
334 -               preempt_enable();
335 +               migrate_enable();
337                 ret = handle_exit(vcpu, run, ret);
338         }
339 diff -Nur linux-4.4.46.orig/arch/arm/kvm/psci.c linux-4.4.46/arch/arm/kvm/psci.c
340 --- linux-4.4.46.orig/arch/arm/kvm/psci.c       2017-02-01 08:31:11.000000000 +0100
341 +++ linux-4.4.46/arch/arm/kvm/psci.c    2017-02-03 17:18:05.627414322 +0100
342 @@ -70,7 +70,7 @@
344         struct kvm *kvm = source_vcpu->kvm;
345         struct kvm_vcpu *vcpu = NULL;
346 -       wait_queue_head_t *wq;
347 +       struct swait_queue_head *wq;
348         unsigned long cpu_id;
349         unsigned long context_id;
350         phys_addr_t target_pc;
351 @@ -119,7 +119,7 @@
352         smp_mb();               /* Make sure the above is visible */
354         wq = kvm_arch_vcpu_wq(vcpu);
355 -       wake_up_interruptible(wq);
356 +       swake_up(wq);
358         return PSCI_RET_SUCCESS;
360 diff -Nur linux-4.4.46.orig/arch/arm/mach-at91/at91rm9200.c linux-4.4.46/arch/arm/mach-at91/at91rm9200.c
361 --- linux-4.4.46.orig/arch/arm/mach-at91/at91rm9200.c   2017-02-01 08:31:11.000000000 +0100
362 +++ linux-4.4.46/arch/arm/mach-at91/at91rm9200.c        2017-02-03 17:18:05.627414322 +0100
363 @@ -12,7 +12,6 @@
364  #include <linux/of_platform.h>
366  #include <asm/mach/arch.h>
367 -#include <asm/system_misc.h>
369  #include "generic.h"
370  #include "soc.h"
371 @@ -33,7 +32,6 @@
373         of_platform_populate(NULL, of_default_bus_match_table, NULL, soc_dev);
375 -       arm_pm_idle = at91rm9200_idle;
376         at91rm9200_pm_init();
379 diff -Nur linux-4.4.46.orig/arch/arm/mach-at91/at91sam9.c linux-4.4.46/arch/arm/mach-at91/at91sam9.c
380 --- linux-4.4.46.orig/arch/arm/mach-at91/at91sam9.c     2017-02-01 08:31:11.000000000 +0100
381 +++ linux-4.4.46/arch/arm/mach-at91/at91sam9.c  2017-02-03 17:18:05.627414322 +0100
382 @@ -62,8 +62,6 @@
383                 soc_dev = soc_device_to_device(soc);
385         of_platform_populate(NULL, of_default_bus_match_table, NULL, soc_dev);
387 -       arm_pm_idle = at91sam9_idle;
390  static void __init at91sam9_dt_device_init(void)
391 diff -Nur linux-4.4.46.orig/arch/arm/mach-at91/generic.h linux-4.4.46/arch/arm/mach-at91/generic.h
392 --- linux-4.4.46.orig/arch/arm/mach-at91/generic.h      2017-02-01 08:31:11.000000000 +0100
393 +++ linux-4.4.46/arch/arm/mach-at91/generic.h   2017-02-03 17:18:05.627414322 +0100
394 @@ -11,27 +11,18 @@
395  #ifndef _AT91_GENERIC_H
396  #define _AT91_GENERIC_H
398 -#include <linux/of.h>
399 -#include <linux/reboot.h>
401 - /* Map io */
402 -extern void __init at91_map_io(void);
403 -extern void __init at91_alt_map_io(void);
405 -/* idle */
406 -extern void at91rm9200_idle(void);
407 -extern void at91sam9_idle(void);
409  #ifdef CONFIG_PM
410  extern void __init at91rm9200_pm_init(void);
411  extern void __init at91sam9260_pm_init(void);
412  extern void __init at91sam9g45_pm_init(void);
413  extern void __init at91sam9x5_pm_init(void);
414 +extern void __init sama5_pm_init(void);
415  #else
416  static inline void __init at91rm9200_pm_init(void) { }
417  static inline void __init at91sam9260_pm_init(void) { }
418  static inline void __init at91sam9g45_pm_init(void) { }
419  static inline void __init at91sam9x5_pm_init(void) { }
420 +static inline void __init sama5_pm_init(void) { }
421  #endif
423  #endif /* _AT91_GENERIC_H */
424 diff -Nur linux-4.4.46.orig/arch/arm/mach-at91/Kconfig linux-4.4.46/arch/arm/mach-at91/Kconfig
425 --- linux-4.4.46.orig/arch/arm/mach-at91/Kconfig        2017-02-01 08:31:11.000000000 +0100
426 +++ linux-4.4.46/arch/arm/mach-at91/Kconfig     2017-02-03 17:18:05.627414322 +0100
427 @@ -99,6 +99,7 @@
428  config COMMON_CLK_AT91
429         bool
430         select COMMON_CLK
431 +       select MFD_SYSCON
433  config HAVE_AT91_SMD
434         bool
435 diff -Nur linux-4.4.46.orig/arch/arm/mach-at91/pm.c linux-4.4.46/arch/arm/mach-at91/pm.c
436 --- linux-4.4.46.orig/arch/arm/mach-at91/pm.c   2017-02-01 08:31:11.000000000 +0100
437 +++ linux-4.4.46/arch/arm/mach-at91/pm.c        2017-02-03 17:18:05.627414322 +0100
438 @@ -31,10 +31,13 @@
439  #include <asm/mach/irq.h>
440  #include <asm/fncpy.h>
441  #include <asm/cacheflush.h>
442 +#include <asm/system_misc.h>
444  #include "generic.h"
445  #include "pm.h"
447 +static void __iomem *pmc;
449  /*
450   * FIXME: this is needed to communicate between the pinctrl driver and
451   * the PM implementation in the machine. Possibly part of the PM
452 @@ -87,7 +90,7 @@
453         unsigned long scsr;
454         int i;
456 -       scsr = at91_pmc_read(AT91_PMC_SCSR);
457 +       scsr = readl(pmc + AT91_PMC_SCSR);
459         /* USB must not be using PLLB */
460         if ((scsr & at91_pm_data.uhp_udp_mask) != 0) {
461 @@ -101,8 +104,7 @@
463                 if ((scsr & (AT91_PMC_PCK0 << i)) == 0)
464                         continue;
466 -               css = at91_pmc_read(AT91_PMC_PCKR(i)) & AT91_PMC_CSS;
467 +               css = readl(pmc + AT91_PMC_PCKR(i)) & AT91_PMC_CSS;
468                 if (css != AT91_PMC_CSS_SLOW) {
469                         pr_err("AT91: PM - Suspend-to-RAM with PCK%d src %d\n", i, css);
470                         return 0;
471 @@ -145,8 +147,8 @@
472         flush_cache_all();
473         outer_disable();
475 -       at91_suspend_sram_fn(at91_pmc_base, at91_ramc_base[0],
476 -                               at91_ramc_base[1], pm_data);
477 +       at91_suspend_sram_fn(pmc, at91_ramc_base[0],
478 +                            at91_ramc_base[1], pm_data);
480         outer_resume();
482 @@ -353,6 +355,21 @@
483         at91_pm_set_standby(standby);
486 +void at91rm9200_idle(void)
488 +       /*
489 +        * Disable the processor clock.  The processor will be automatically
490 +        * re-enabled by an interrupt or by a reset.
491 +        */
492 +       writel(AT91_PMC_PCK, pmc + AT91_PMC_SCDR);
495 +void at91sam9_idle(void)
497 +       writel(AT91_PMC_PCK, pmc + AT91_PMC_SCDR);
498 +       cpu_do_idle();
501  static void __init at91_pm_sram_init(void)
503         struct gen_pool *sram_pool;
504 @@ -399,13 +416,36 @@
505                         &at91_pm_suspend_in_sram, at91_pm_suspend_in_sram_sz);
508 -static void __init at91_pm_init(void)
509 +static const struct of_device_id atmel_pmc_ids[] __initconst = {
510 +       { .compatible = "atmel,at91rm9200-pmc"  },
511 +       { .compatible = "atmel,at91sam9260-pmc" },
512 +       { .compatible = "atmel,at91sam9g45-pmc" },
513 +       { .compatible = "atmel,at91sam9n12-pmc" },
514 +       { .compatible = "atmel,at91sam9x5-pmc" },
515 +       { .compatible = "atmel,sama5d3-pmc" },
516 +       { .compatible = "atmel,sama5d2-pmc" },
517 +       { /* sentinel */ },
520 +static void __init at91_pm_init(void (*pm_idle)(void))
522 -       at91_pm_sram_init();
523 +       struct device_node *pmc_np;
525         if (at91_cpuidle_device.dev.platform_data)
526                 platform_device_register(&at91_cpuidle_device);
528 +       pmc_np = of_find_matching_node(NULL, atmel_pmc_ids);
529 +       pmc = of_iomap(pmc_np, 0);
530 +       if (!pmc) {
531 +               pr_err("AT91: PM not supported, PMC not found\n");
532 +               return;
533 +       }
535 +       if (pm_idle)
536 +               arm_pm_idle = pm_idle;
538 +       at91_pm_sram_init();
540         if (at91_suspend_sram_fn)
541                 suspend_set_ops(&at91_pm_ops);
542         else
543 @@ -424,7 +464,7 @@
544         at91_pm_data.uhp_udp_mask = AT91RM9200_PMC_UHP | AT91RM9200_PMC_UDP;
545         at91_pm_data.memctrl = AT91_MEMCTRL_MC;
547 -       at91_pm_init();
548 +       at91_pm_init(at91rm9200_idle);
551  void __init at91sam9260_pm_init(void)
552 @@ -432,7 +472,7 @@
553         at91_dt_ramc();
554         at91_pm_data.memctrl = AT91_MEMCTRL_SDRAMC;
555         at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP;
556 -       return at91_pm_init();
557 +       at91_pm_init(at91sam9_idle);
560  void __init at91sam9g45_pm_init(void)
561 @@ -440,7 +480,7 @@
562         at91_dt_ramc();
563         at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP;
564         at91_pm_data.memctrl = AT91_MEMCTRL_DDRSDR;
565 -       return at91_pm_init();
566 +       at91_pm_init(at91sam9_idle);
569  void __init at91sam9x5_pm_init(void)
570 @@ -448,5 +488,13 @@
571         at91_dt_ramc();
572         at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP;
573         at91_pm_data.memctrl = AT91_MEMCTRL_DDRSDR;
574 -       return at91_pm_init();
575 +       at91_pm_init(at91sam9_idle);
578 +void __init sama5_pm_init(void)
580 +       at91_dt_ramc();
581 +       at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP;
582 +       at91_pm_data.memctrl = AT91_MEMCTRL_DDRSDR;
583 +       at91_pm_init(NULL);
585 diff -Nur linux-4.4.46.orig/arch/arm/mach-at91/sama5.c linux-4.4.46/arch/arm/mach-at91/sama5.c
586 --- linux-4.4.46.orig/arch/arm/mach-at91/sama5.c        2017-02-01 08:31:11.000000000 +0100
587 +++ linux-4.4.46/arch/arm/mach-at91/sama5.c     2017-02-03 17:18:05.627414322 +0100
588 @@ -51,7 +51,7 @@
589                 soc_dev = soc_device_to_device(soc);
591         of_platform_populate(NULL, of_default_bus_match_table, NULL, soc_dev);
592 -       at91sam9x5_pm_init();
593 +       sama5_pm_init();
596  static const char *const sama5_dt_board_compat[] __initconst = {
597 diff -Nur linux-4.4.46.orig/arch/arm/mach-exynos/platsmp.c linux-4.4.46/arch/arm/mach-exynos/platsmp.c
598 --- linux-4.4.46.orig/arch/arm/mach-exynos/platsmp.c    2017-02-01 08:31:11.000000000 +0100
599 +++ linux-4.4.46/arch/arm/mach-exynos/platsmp.c 2017-02-03 17:18:05.627414322 +0100
600 @@ -230,7 +230,7 @@
601         return (void __iomem *)(S5P_VA_SCU);
604 -static DEFINE_SPINLOCK(boot_lock);
605 +static DEFINE_RAW_SPINLOCK(boot_lock);
607  static void exynos_secondary_init(unsigned int cpu)
609 @@ -243,8 +243,8 @@
610         /*
611          * Synchronise with the boot thread.
612          */
613 -       spin_lock(&boot_lock);
614 -       spin_unlock(&boot_lock);
615 +       raw_spin_lock(&boot_lock);
616 +       raw_spin_unlock(&boot_lock);
619  int exynos_set_boot_addr(u32 core_id, unsigned long boot_addr)
620 @@ -308,7 +308,7 @@
621          * Set synchronisation state between this boot processor
622          * and the secondary one
623          */
624 -       spin_lock(&boot_lock);
625 +       raw_spin_lock(&boot_lock);
627         /*
628          * The secondary processor is waiting to be released from
629 @@ -335,7 +335,7 @@
631                 if (timeout == 0) {
632                         printk(KERN_ERR "cpu1 power enable failed");
633 -                       spin_unlock(&boot_lock);
634 +                       raw_spin_unlock(&boot_lock);
635                         return -ETIMEDOUT;
636                 }
637         }
638 @@ -381,7 +381,7 @@
639          * calibrations, then wait for it to finish
640          */
641  fail:
642 -       spin_unlock(&boot_lock);
643 +       raw_spin_unlock(&boot_lock);
645         return pen_release != -1 ? ret : 0;
647 diff -Nur linux-4.4.46.orig/arch/arm/mach-hisi/platmcpm.c linux-4.4.46/arch/arm/mach-hisi/platmcpm.c
648 --- linux-4.4.46.orig/arch/arm/mach-hisi/platmcpm.c     2017-02-01 08:31:11.000000000 +0100
649 +++ linux-4.4.46/arch/arm/mach-hisi/platmcpm.c  2017-02-03 17:18:05.627414322 +0100
650 @@ -61,7 +61,7 @@
652  static void __iomem *sysctrl, *fabric;
653  static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER];
654 -static DEFINE_SPINLOCK(boot_lock);
655 +static DEFINE_RAW_SPINLOCK(boot_lock);
656  static u32 fabric_phys_addr;
657  /*
658   * [0]: bootwrapper physical address
659 @@ -113,7 +113,7 @@
660         if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
661                 return -EINVAL;
663 -       spin_lock_irq(&boot_lock);
664 +       raw_spin_lock_irq(&boot_lock);
666         if (hip04_cpu_table[cluster][cpu])
667                 goto out;
668 @@ -147,7 +147,7 @@
670  out:
671         hip04_cpu_table[cluster][cpu]++;
672 -       spin_unlock_irq(&boot_lock);
673 +       raw_spin_unlock_irq(&boot_lock);
675         return 0;
677 @@ -162,11 +162,11 @@
678         cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
679         cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
681 -       spin_lock(&boot_lock);
682 +       raw_spin_lock(&boot_lock);
683         hip04_cpu_table[cluster][cpu]--;
684         if (hip04_cpu_table[cluster][cpu] == 1) {
685                 /* A power_up request went ahead of us. */
686 -               spin_unlock(&boot_lock);
687 +               raw_spin_unlock(&boot_lock);
688                 return;
689         } else if (hip04_cpu_table[cluster][cpu] > 1) {
690                 pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu);
691 @@ -174,7 +174,7 @@
692         }
694         last_man = hip04_cluster_is_down(cluster);
695 -       spin_unlock(&boot_lock);
696 +       raw_spin_unlock(&boot_lock);
697         if (last_man) {
698                 /* Since it's Cortex A15, disable L2 prefetching. */
699                 asm volatile(
700 @@ -203,7 +203,7 @@
701                cpu >= HIP04_MAX_CPUS_PER_CLUSTER);
703         count = TIMEOUT_MSEC / POLL_MSEC;
704 -       spin_lock_irq(&boot_lock);
705 +       raw_spin_lock_irq(&boot_lock);
706         for (tries = 0; tries < count; tries++) {
707                 if (hip04_cpu_table[cluster][cpu])
708                         goto err;
709 @@ -211,10 +211,10 @@
710                 data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster));
711                 if (data & CORE_WFI_STATUS(cpu))
712                         break;
713 -               spin_unlock_irq(&boot_lock);
714 +               raw_spin_unlock_irq(&boot_lock);
715                 /* Wait for clean L2 when the whole cluster is down. */
716                 msleep(POLL_MSEC);
717 -               spin_lock_irq(&boot_lock);
718 +               raw_spin_lock_irq(&boot_lock);
719         }
720         if (tries >= count)
721                 goto err;
722 @@ -231,10 +231,10 @@
723                 goto err;
724         if (hip04_cluster_is_down(cluster))
725                 hip04_set_snoop_filter(cluster, 0);
726 -       spin_unlock_irq(&boot_lock);
727 +       raw_spin_unlock_irq(&boot_lock);
728         return 1;
729  err:
730 -       spin_unlock_irq(&boot_lock);
731 +       raw_spin_unlock_irq(&boot_lock);
732         return 0;
734  #endif
735 diff -Nur linux-4.4.46.orig/arch/arm/mach-imx/Kconfig linux-4.4.46/arch/arm/mach-imx/Kconfig
736 --- linux-4.4.46.orig/arch/arm/mach-imx/Kconfig 2017-02-01 08:31:11.000000000 +0100
737 +++ linux-4.4.46/arch/arm/mach-imx/Kconfig      2017-02-03 17:18:05.627414322 +0100
738 @@ -524,7 +524,7 @@
739         bool "i.MX6 Quad/DualLite support"
740         select ARM_ERRATA_764369 if SMP
741         select HAVE_ARM_SCU if SMP
742 -       select HAVE_ARM_TWD if SMP
743 +       select HAVE_ARM_TWD
744         select PCI_DOMAINS if PCI
745         select PINCTRL_IMX6Q
746         select SOC_IMX6
747 diff -Nur linux-4.4.46.orig/arch/arm/mach-omap2/omap-smp.c linux-4.4.46/arch/arm/mach-omap2/omap-smp.c
748 --- linux-4.4.46.orig/arch/arm/mach-omap2/omap-smp.c    2017-02-01 08:31:11.000000000 +0100
749 +++ linux-4.4.46/arch/arm/mach-omap2/omap-smp.c 2017-02-03 17:18:05.631414475 +0100
750 @@ -43,7 +43,7 @@
751  /* SCU base address */
752  static void __iomem *scu_base;
754 -static DEFINE_SPINLOCK(boot_lock);
755 +static DEFINE_RAW_SPINLOCK(boot_lock);
757  void __iomem *omap4_get_scu_base(void)
759 @@ -74,8 +74,8 @@
760         /*
761          * Synchronise with the boot thread.
762          */
763 -       spin_lock(&boot_lock);
764 -       spin_unlock(&boot_lock);
765 +       raw_spin_lock(&boot_lock);
766 +       raw_spin_unlock(&boot_lock);
769  static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
770 @@ -89,7 +89,7 @@
771          * Set synchronisation state between this boot processor
772          * and the secondary one
773          */
774 -       spin_lock(&boot_lock);
775 +       raw_spin_lock(&boot_lock);
777         /*
778          * Update the AuxCoreBoot0 with boot state for secondary core.
779 @@ -166,7 +166,7 @@
780          * Now the secondary core is starting up let it run its
781          * calibrations, then wait for it to finish
782          */
783 -       spin_unlock(&boot_lock);
784 +       raw_spin_unlock(&boot_lock);
786         return 0;
788 diff -Nur linux-4.4.46.orig/arch/arm/mach-prima2/platsmp.c linux-4.4.46/arch/arm/mach-prima2/platsmp.c
789 --- linux-4.4.46.orig/arch/arm/mach-prima2/platsmp.c    2017-02-01 08:31:11.000000000 +0100
790 +++ linux-4.4.46/arch/arm/mach-prima2/platsmp.c 2017-02-03 17:18:05.631414475 +0100
791 @@ -22,7 +22,7 @@
793  static void __iomem *clk_base;
795 -static DEFINE_SPINLOCK(boot_lock);
796 +static DEFINE_RAW_SPINLOCK(boot_lock);
798  static void sirfsoc_secondary_init(unsigned int cpu)
800 @@ -36,8 +36,8 @@
801         /*
802          * Synchronise with the boot thread.
803          */
804 -       spin_lock(&boot_lock);
805 -       spin_unlock(&boot_lock);
806 +       raw_spin_lock(&boot_lock);
807 +       raw_spin_unlock(&boot_lock);
810  static const struct of_device_id clk_ids[]  = {
811 @@ -75,7 +75,7 @@
812         /* make sure write buffer is drained */
813         mb();
815 -       spin_lock(&boot_lock);
816 +       raw_spin_lock(&boot_lock);
818         /*
819          * The secondary processor is waiting to be released from
820 @@ -107,7 +107,7 @@
821          * now the secondary core is starting up let it run its
822          * calibrations, then wait for it to finish
823          */
824 -       spin_unlock(&boot_lock);
825 +       raw_spin_unlock(&boot_lock);
827         return pen_release != -1 ? -ENOSYS : 0;
829 diff -Nur linux-4.4.46.orig/arch/arm/mach-qcom/platsmp.c linux-4.4.46/arch/arm/mach-qcom/platsmp.c
830 --- linux-4.4.46.orig/arch/arm/mach-qcom/platsmp.c      2017-02-01 08:31:11.000000000 +0100
831 +++ linux-4.4.46/arch/arm/mach-qcom/platsmp.c   2017-02-03 17:18:05.631414475 +0100
832 @@ -46,7 +46,7 @@
834  extern void secondary_startup_arm(void);
836 -static DEFINE_SPINLOCK(boot_lock);
837 +static DEFINE_RAW_SPINLOCK(boot_lock);
839  #ifdef CONFIG_HOTPLUG_CPU
840  static void qcom_cpu_die(unsigned int cpu)
841 @@ -60,8 +60,8 @@
842         /*
843          * Synchronise with the boot thread.
844          */
845 -       spin_lock(&boot_lock);
846 -       spin_unlock(&boot_lock);
847 +       raw_spin_lock(&boot_lock);
848 +       raw_spin_unlock(&boot_lock);
851  static int scss_release_secondary(unsigned int cpu)
852 @@ -284,7 +284,7 @@
853          * set synchronisation state between this boot processor
854          * and the secondary one
855          */
856 -       spin_lock(&boot_lock);
857 +       raw_spin_lock(&boot_lock);
859         /*
860          * Send the secondary CPU a soft interrupt, thereby causing
861 @@ -297,7 +297,7 @@
862          * now the secondary core is starting up let it run its
863          * calibrations, then wait for it to finish
864          */
865 -       spin_unlock(&boot_lock);
866 +       raw_spin_unlock(&boot_lock);
868         return ret;
870 diff -Nur linux-4.4.46.orig/arch/arm/mach-spear/platsmp.c linux-4.4.46/arch/arm/mach-spear/platsmp.c
871 --- linux-4.4.46.orig/arch/arm/mach-spear/platsmp.c     2017-02-01 08:31:11.000000000 +0100
872 +++ linux-4.4.46/arch/arm/mach-spear/platsmp.c  2017-02-03 17:18:05.631414475 +0100
873 @@ -32,7 +32,7 @@
874         sync_cache_w(&pen_release);
877 -static DEFINE_SPINLOCK(boot_lock);
878 +static DEFINE_RAW_SPINLOCK(boot_lock);
880  static void __iomem *scu_base = IOMEM(VA_SCU_BASE);
882 @@ -47,8 +47,8 @@
883         /*
884          * Synchronise with the boot thread.
885          */
886 -       spin_lock(&boot_lock);
887 -       spin_unlock(&boot_lock);
888 +       raw_spin_lock(&boot_lock);
889 +       raw_spin_unlock(&boot_lock);
892  static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
893 @@ -59,7 +59,7 @@
894          * set synchronisation state between this boot processor
895          * and the secondary one
896          */
897 -       spin_lock(&boot_lock);
898 +       raw_spin_lock(&boot_lock);
900         /*
901          * The secondary processor is waiting to be released from
902 @@ -84,7 +84,7 @@
903          * now the secondary core is starting up let it run its
904          * calibrations, then wait for it to finish
905          */
906 -       spin_unlock(&boot_lock);
907 +       raw_spin_unlock(&boot_lock);
909         return pen_release != -1 ? -ENOSYS : 0;
911 diff -Nur linux-4.4.46.orig/arch/arm/mach-sti/platsmp.c linux-4.4.46/arch/arm/mach-sti/platsmp.c
912 --- linux-4.4.46.orig/arch/arm/mach-sti/platsmp.c       2017-02-01 08:31:11.000000000 +0100
913 +++ linux-4.4.46/arch/arm/mach-sti/platsmp.c    2017-02-03 17:18:05.631414475 +0100
914 @@ -35,7 +35,7 @@
915         sync_cache_w(&pen_release);
918 -static DEFINE_SPINLOCK(boot_lock);
919 +static DEFINE_RAW_SPINLOCK(boot_lock);
921  static void sti_secondary_init(unsigned int cpu)
923 @@ -48,8 +48,8 @@
924         /*
925          * Synchronise with the boot thread.
926          */
927 -       spin_lock(&boot_lock);
928 -       spin_unlock(&boot_lock);
929 +       raw_spin_lock(&boot_lock);
930 +       raw_spin_unlock(&boot_lock);
933  static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle)
934 @@ -60,7 +60,7 @@
935          * set synchronisation state between this boot processor
936          * and the secondary one
937          */
938 -       spin_lock(&boot_lock);
939 +       raw_spin_lock(&boot_lock);
941         /*
942          * The secondary processor is waiting to be released from
943 @@ -91,7 +91,7 @@
944          * now the secondary core is starting up let it run its
945          * calibrations, then wait for it to finish
946          */
947 -       spin_unlock(&boot_lock);
948 +       raw_spin_unlock(&boot_lock);
950         return pen_release != -1 ? -ENOSYS : 0;
952 diff -Nur linux-4.4.46.orig/arch/arm/mm/fault.c linux-4.4.46/arch/arm/mm/fault.c
953 --- linux-4.4.46.orig/arch/arm/mm/fault.c       2017-02-01 08:31:11.000000000 +0100
954 +++ linux-4.4.46/arch/arm/mm/fault.c    2017-02-03 17:18:05.631414475 +0100
955 @@ -430,6 +430,9 @@
956         if (addr < TASK_SIZE)
957                 return do_page_fault(addr, fsr, regs);
959 +       if (interrupts_enabled(regs))
960 +               local_irq_enable();
962         if (user_mode(regs))
963                 goto bad_area;
965 @@ -497,6 +500,9 @@
966  static int
967  do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
969 +       if (interrupts_enabled(regs))
970 +               local_irq_enable();
972         do_bad_area(addr, fsr, regs);
973         return 0;
975 diff -Nur linux-4.4.46.orig/arch/arm/mm/highmem.c linux-4.4.46/arch/arm/mm/highmem.c
976 --- linux-4.4.46.orig/arch/arm/mm/highmem.c     2017-02-01 08:31:11.000000000 +0100
977 +++ linux-4.4.46/arch/arm/mm/highmem.c  2017-02-03 17:18:05.631414475 +0100
978 @@ -34,6 +34,11 @@
979         return *ptep;
982 +static unsigned int fixmap_idx(int type)
984 +       return FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
987  void *kmap(struct page *page)
989         might_sleep();
990 @@ -54,12 +59,13 @@
992  void *kmap_atomic(struct page *page)
994 +       pte_t pte = mk_pte(page, kmap_prot);
995         unsigned int idx;
996         unsigned long vaddr;
997         void *kmap;
998         int type;
1000 -       preempt_disable();
1001 +       preempt_disable_nort();
1002         pagefault_disable();
1003         if (!PageHighMem(page))
1004                 return page_address(page);
1005 @@ -79,7 +85,7 @@
1007         type = kmap_atomic_idx_push();
1009 -       idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
1010 +       idx = fixmap_idx(type);
1011         vaddr = __fix_to_virt(idx);
1012  #ifdef CONFIG_DEBUG_HIGHMEM
1013         /*
1014 @@ -93,7 +99,10 @@
1015          * in place, so the contained TLB flush ensures the TLB is updated
1016          * with the new mapping.
1017          */
1018 -       set_fixmap_pte(idx, mk_pte(page, kmap_prot));
1019 +#ifdef CONFIG_PREEMPT_RT_FULL
1020 +       current->kmap_pte[type] = pte;
1021 +#endif
1022 +       set_fixmap_pte(idx, pte);
1024         return (void *)vaddr;
1026 @@ -106,44 +115,75 @@
1028         if (kvaddr >= (void *)FIXADDR_START) {
1029                 type = kmap_atomic_idx();
1030 -               idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
1031 +               idx = fixmap_idx(type);
1033                 if (cache_is_vivt())
1034                         __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
1035 +#ifdef CONFIG_PREEMPT_RT_FULL
1036 +               current->kmap_pte[type] = __pte(0);
1037 +#endif
1038  #ifdef CONFIG_DEBUG_HIGHMEM
1039                 BUG_ON(vaddr != __fix_to_virt(idx));
1040 -               set_fixmap_pte(idx, __pte(0));
1041  #else
1042                 (void) idx;  /* to kill a warning */
1043  #endif
1044 +               set_fixmap_pte(idx, __pte(0));
1045                 kmap_atomic_idx_pop();
1046         } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) {
1047                 /* this address was obtained through kmap_high_get() */
1048                 kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)]));
1049         }
1050         pagefault_enable();
1051 -       preempt_enable();
1052 +       preempt_enable_nort();
1054  EXPORT_SYMBOL(__kunmap_atomic);
1056  void *kmap_atomic_pfn(unsigned long pfn)
1058 +       pte_t pte = pfn_pte(pfn, kmap_prot);
1059         unsigned long vaddr;
1060         int idx, type;
1061         struct page *page = pfn_to_page(pfn);
1063 -       preempt_disable();
1064 +       preempt_disable_nort();
1065         pagefault_disable();
1066         if (!PageHighMem(page))
1067                 return page_address(page);
1069         type = kmap_atomic_idx_push();
1070 -       idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
1071 +       idx = fixmap_idx(type);
1072         vaddr = __fix_to_virt(idx);
1073  #ifdef CONFIG_DEBUG_HIGHMEM
1074         BUG_ON(!pte_none(get_fixmap_pte(vaddr)));
1075  #endif
1076 -       set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot));
1077 +#ifdef CONFIG_PREEMPT_RT_FULL
1078 +       current->kmap_pte[type] = pte;
1079 +#endif
1080 +       set_fixmap_pte(idx, pte);
1082         return (void *)vaddr;
1084 +#if defined CONFIG_PREEMPT_RT_FULL
1085 +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
1087 +       int i;
1089 +       /*
1090 +        * Clear @prev's kmap_atomic mappings
1091 +        */
1092 +       for (i = 0; i < prev_p->kmap_idx; i++) {
1093 +               int idx = fixmap_idx(i);
1095 +               set_fixmap_pte(idx, __pte(0));
1096 +       }
1097 +       /*
1098 +        * Restore @next_p's kmap_atomic mappings
1099 +        */
1100 +       for (i = 0; i < next_p->kmap_idx; i++) {
1101 +               int idx = fixmap_idx(i);
1103 +               if (!pte_none(next_p->kmap_pte[i]))
1104 +                       set_fixmap_pte(idx, next_p->kmap_pte[i]);
1105 +       }
1107 +#endif
1108 diff -Nur linux-4.4.46.orig/arch/arm/plat-versatile/platsmp.c linux-4.4.46/arch/arm/plat-versatile/platsmp.c
1109 --- linux-4.4.46.orig/arch/arm/plat-versatile/platsmp.c 2017-02-01 08:31:11.000000000 +0100
1110 +++ linux-4.4.46/arch/arm/plat-versatile/platsmp.c      2017-02-03 17:18:05.631414475 +0100
1111 @@ -30,7 +30,7 @@
1112         sync_cache_w(&pen_release);
1115 -static DEFINE_SPINLOCK(boot_lock);
1116 +static DEFINE_RAW_SPINLOCK(boot_lock);
1118  void versatile_secondary_init(unsigned int cpu)
1120 @@ -43,8 +43,8 @@
1121         /*
1122          * Synchronise with the boot thread.
1123          */
1124 -       spin_lock(&boot_lock);
1125 -       spin_unlock(&boot_lock);
1126 +       raw_spin_lock(&boot_lock);
1127 +       raw_spin_unlock(&boot_lock);
1130  int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
1131 @@ -55,7 +55,7 @@
1132          * Set synchronisation state between this boot processor
1133          * and the secondary one
1134          */
1135 -       spin_lock(&boot_lock);
1136 +       raw_spin_lock(&boot_lock);
1138         /*
1139          * This is really belt and braces; we hold unintended secondary
1140 @@ -85,7 +85,7 @@
1141          * now the secondary core is starting up let it run its
1142          * calibrations, then wait for it to finish
1143          */
1144 -       spin_unlock(&boot_lock);
1145 +       raw_spin_unlock(&boot_lock);
1147         return pen_release != -1 ? -ENOSYS : 0;
1149 diff -Nur linux-4.4.46.orig/arch/arm64/include/asm/thread_info.h linux-4.4.46/arch/arm64/include/asm/thread_info.h
1150 --- linux-4.4.46.orig/arch/arm64/include/asm/thread_info.h      2017-02-01 08:31:11.000000000 +0100
1151 +++ linux-4.4.46/arch/arm64/include/asm/thread_info.h   2017-02-03 17:18:05.631414475 +0100
1152 @@ -49,6 +49,7 @@
1153         mm_segment_t            addr_limit;     /* address limit */
1154         struct task_struct      *task;          /* main task structure */
1155         int                     preempt_count;  /* 0 => preemptable, <0 => bug */
1156 +       int                     preempt_lazy_count; /* 0 => preemptable, <0 => bug */
1157         int                     cpu;            /* cpu */
1158  };
1160 @@ -103,6 +104,7 @@
1161  #define TIF_NEED_RESCHED       1
1162  #define TIF_NOTIFY_RESUME      2       /* callback before returning to user */
1163  #define TIF_FOREIGN_FPSTATE    3       /* CPU's FP state is not current's */
1164 +#define TIF_NEED_RESCHED_LAZY  4
1165  #define TIF_NOHZ               7
1166  #define TIF_SYSCALL_TRACE      8
1167  #define TIF_SYSCALL_AUDIT      9
1168 @@ -118,6 +120,7 @@
1169  #define _TIF_NEED_RESCHED      (1 << TIF_NEED_RESCHED)
1170  #define _TIF_NOTIFY_RESUME     (1 << TIF_NOTIFY_RESUME)
1171  #define _TIF_FOREIGN_FPSTATE   (1 << TIF_FOREIGN_FPSTATE)
1172 +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
1173  #define _TIF_NOHZ              (1 << TIF_NOHZ)
1174  #define _TIF_SYSCALL_TRACE     (1 << TIF_SYSCALL_TRACE)
1175  #define _TIF_SYSCALL_AUDIT     (1 << TIF_SYSCALL_AUDIT)
1176 @@ -126,7 +129,8 @@
1177  #define _TIF_32BIT             (1 << TIF_32BIT)
1179  #define _TIF_WORK_MASK         (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
1180 -                                _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE)
1181 +                                _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
1182 +                                _TIF_NEED_RESCHED_LAZY)
1184  #define _TIF_SYSCALL_WORK      (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
1185                                  _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
1186 diff -Nur linux-4.4.46.orig/arch/arm64/Kconfig linux-4.4.46/arch/arm64/Kconfig
1187 --- linux-4.4.46.orig/arch/arm64/Kconfig        2017-02-01 08:31:11.000000000 +0100
1188 +++ linux-4.4.46/arch/arm64/Kconfig     2017-02-03 17:18:05.631414475 +0100
1189 @@ -76,6 +76,7 @@
1190         select HAVE_PERF_REGS
1191         select HAVE_PERF_USER_STACK_DUMP
1192         select HAVE_RCU_TABLE_FREE
1193 +       select HAVE_PREEMPT_LAZY
1194         select HAVE_SYSCALL_TRACEPOINTS
1195         select IOMMU_DMA if IOMMU_SUPPORT
1196         select IRQ_DOMAIN
1197 @@ -582,7 +583,7 @@
1199  config XEN
1200         bool "Xen guest support on ARM64"
1201 -       depends on ARM64 && OF
1202 +       depends on ARM64 && OF && !PREEMPT_RT_FULL
1203         select SWIOTLB_XEN
1204         help
1205           Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64.
1206 diff -Nur linux-4.4.46.orig/arch/arm64/kernel/asm-offsets.c linux-4.4.46/arch/arm64/kernel/asm-offsets.c
1207 --- linux-4.4.46.orig/arch/arm64/kernel/asm-offsets.c   2017-02-01 08:31:11.000000000 +0100
1208 +++ linux-4.4.46/arch/arm64/kernel/asm-offsets.c        2017-02-03 17:18:05.639414785 +0100
1209 @@ -35,6 +35,7 @@
1210    BLANK();
1211    DEFINE(TI_FLAGS,             offsetof(struct thread_info, flags));
1212    DEFINE(TI_PREEMPT,           offsetof(struct thread_info, preempt_count));
1213 +  DEFINE(TI_PREEMPT_LAZY,      offsetof(struct thread_info, preempt_lazy_count));
1214    DEFINE(TI_ADDR_LIMIT,                offsetof(struct thread_info, addr_limit));
1215    DEFINE(TI_TASK,              offsetof(struct thread_info, task));
1216    DEFINE(TI_CPU,               offsetof(struct thread_info, cpu));
1217 diff -Nur linux-4.4.46.orig/arch/arm64/kernel/entry.S linux-4.4.46/arch/arm64/kernel/entry.S
1218 --- linux-4.4.46.orig/arch/arm64/kernel/entry.S 2017-02-01 08:31:11.000000000 +0100
1219 +++ linux-4.4.46/arch/arm64/kernel/entry.S      2017-02-03 17:18:05.639414785 +0100
1220 @@ -376,11 +376,16 @@
1221  #ifdef CONFIG_PREEMPT
1222         get_thread_info tsk
1223         ldr     w24, [tsk, #TI_PREEMPT]         // get preempt count
1224 -       cbnz    w24, 1f                         // preempt count != 0
1225 +       cbnz    w24, 2f                         // preempt count != 0
1226         ldr     x0, [tsk, #TI_FLAGS]            // get flags
1227 -       tbz     x0, #TIF_NEED_RESCHED, 1f       // needs rescheduling?
1228 -       bl      el1_preempt
1229 +       tbnz    x0, #TIF_NEED_RESCHED, 1f       // needs rescheduling?
1231 +       ldr     w24, [tsk, #TI_PREEMPT_LAZY]    // get preempt lazy count
1232 +       cbnz    w24, 2f                         // preempt lazy count != 0
1233 +       tbz     x0, #TIF_NEED_RESCHED_LAZY, 2f  // needs rescheduling?
1234  1:
1235 +       bl      el1_preempt
1237  #endif
1238  #ifdef CONFIG_TRACE_IRQFLAGS
1239         bl      trace_hardirqs_on
1240 @@ -394,6 +399,7 @@
1241  1:     bl      preempt_schedule_irq            // irq en/disable is done inside
1242         ldr     x0, [tsk, #TI_FLAGS]            // get new tasks TI_FLAGS
1243         tbnz    x0, #TIF_NEED_RESCHED, 1b       // needs rescheduling?
1244 +       tbnz    x0, #TIF_NEED_RESCHED_LAZY, 1b  // needs rescheduling?
1245         ret     x24
1246  #endif
1248 @@ -638,6 +644,7 @@
1249   */
1250  work_pending:
1251         tbnz    x1, #TIF_NEED_RESCHED, work_resched
1252 +       tbnz    x1, #TIF_NEED_RESCHED_LAZY, work_resched
1253         /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */
1254         ldr     x2, [sp, #S_PSTATE]
1255         mov     x0, sp                          // 'regs'
1256 diff -Nur linux-4.4.46.orig/arch/Kconfig linux-4.4.46/arch/Kconfig
1257 --- linux-4.4.46.orig/arch/Kconfig      2017-02-01 08:31:11.000000000 +0100
1258 +++ linux-4.4.46/arch/Kconfig   2017-02-03 17:18:05.627414322 +0100
1259 @@ -9,6 +9,7 @@
1260         tristate "OProfile system profiling"
1261         depends on PROFILING
1262         depends on HAVE_OPROFILE
1263 +       depends on !PREEMPT_RT_FULL
1264         select RING_BUFFER
1265         select RING_BUFFER_ALLOW_SWAP
1266         help
1267 @@ -52,6 +53,7 @@
1268  config JUMP_LABEL
1269         bool "Optimize very unlikely/likely branches"
1270         depends on HAVE_ARCH_JUMP_LABEL
1271 +       depends on (!INTERRUPT_OFF_HIST && !PREEMPT_OFF_HIST && !WAKEUP_LATENCY_HIST && !MISSED_TIMER_OFFSETS_HIST)
1272         help
1273           This option enables a transparent branch optimization that
1274          makes certain almost-always-true or almost-always-false branch
1275 diff -Nur linux-4.4.46.orig/arch/mips/Kconfig linux-4.4.46/arch/mips/Kconfig
1276 --- linux-4.4.46.orig/arch/mips/Kconfig 2017-02-01 08:31:11.000000000 +0100
1277 +++ linux-4.4.46/arch/mips/Kconfig      2017-02-03 17:18:05.639414785 +0100
1278 @@ -2410,7 +2410,7 @@
1280  config HIGHMEM
1281         bool "High Memory Support"
1282 -       depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA
1283 +       depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA && !PREEMPT_RT_FULL
1285  config CPU_SUPPORTS_HIGHMEM
1286         bool
1287 diff -Nur linux-4.4.46.orig/arch/mips/kvm/mips.c linux-4.4.46/arch/mips/kvm/mips.c
1288 --- linux-4.4.46.orig/arch/mips/kvm/mips.c      2017-02-01 08:31:11.000000000 +0100
1289 +++ linux-4.4.46/arch/mips/kvm/mips.c   2017-02-03 17:18:05.639414785 +0100
1290 @@ -454,8 +454,8 @@
1292         dvcpu->arch.wait = 0;
1294 -       if (waitqueue_active(&dvcpu->wq))
1295 -               wake_up_interruptible(&dvcpu->wq);
1296 +       if (swait_active(&dvcpu->wq))
1297 +               swake_up(&dvcpu->wq);
1299         return 0;
1301 @@ -1183,8 +1183,8 @@
1302         kvm_mips_callbacks->queue_timer_int(vcpu);
1304         vcpu->arch.wait = 0;
1305 -       if (waitqueue_active(&vcpu->wq))
1306 -               wake_up_interruptible(&vcpu->wq);
1307 +       if (swait_active(&vcpu->wq))
1308 +               swake_up(&vcpu->wq);
1311  /* low level hrtimer wake routine */
1312 diff -Nur linux-4.4.46.orig/arch/powerpc/include/asm/kvm_host.h linux-4.4.46/arch/powerpc/include/asm/kvm_host.h
1313 --- linux-4.4.46.orig/arch/powerpc/include/asm/kvm_host.h       2017-02-01 08:31:11.000000000 +0100
1314 +++ linux-4.4.46/arch/powerpc/include/asm/kvm_host.h    2017-02-03 17:18:05.639414785 +0100
1315 @@ -286,7 +286,7 @@
1316         struct list_head runnable_threads;
1317         struct list_head preempt_list;
1318         spinlock_t lock;
1319 -       wait_queue_head_t wq;
1320 +       struct swait_queue_head wq;
1321         spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
1322         u64 stolen_tb;
1323         u64 preempt_tb;
1324 @@ -627,7 +627,7 @@
1325         u8 prodded;
1326         u32 last_inst;
1328 -       wait_queue_head_t *wqp;
1329 +       struct swait_queue_head *wqp;
1330         struct kvmppc_vcore *vcore;
1331         int ret;
1332         int trap;
1333 diff -Nur linux-4.4.46.orig/arch/powerpc/include/asm/thread_info.h linux-4.4.46/arch/powerpc/include/asm/thread_info.h
1334 --- linux-4.4.46.orig/arch/powerpc/include/asm/thread_info.h    2017-02-01 08:31:11.000000000 +0100
1335 +++ linux-4.4.46/arch/powerpc/include/asm/thread_info.h 2017-02-03 17:18:05.639414785 +0100
1336 @@ -42,6 +42,8 @@
1337         int             cpu;                    /* cpu we're on */
1338         int             preempt_count;          /* 0 => preemptable,
1339                                                    <0 => BUG */
1340 +       int             preempt_lazy_count;      /* 0 => preemptable,
1341 +                                                  <0 => BUG */
1342         unsigned long   local_flags;            /* private flags for thread */
1344         /* low level flags - has atomic operations done on it */
1345 @@ -82,8 +84,7 @@
1346  #define TIF_SYSCALL_TRACE      0       /* syscall trace active */
1347  #define TIF_SIGPENDING         1       /* signal pending */
1348  #define TIF_NEED_RESCHED       2       /* rescheduling necessary */
1349 -#define TIF_POLLING_NRFLAG     3       /* true if poll_idle() is polling
1350 -                                          TIF_NEED_RESCHED */
1351 +#define TIF_NEED_RESCHED_LAZY  3       /* lazy rescheduling necessary */
1352  #define TIF_32BIT              4       /* 32 bit binary */
1353  #define TIF_RESTORE_TM         5       /* need to restore TM FP/VEC/VSX */
1354  #define TIF_SYSCALL_AUDIT      7       /* syscall auditing active */
1355 @@ -101,6 +102,8 @@
1356  #if defined(CONFIG_PPC64)
1357  #define TIF_ELF2ABI            18      /* function descriptors must die! */
1358  #endif
1359 +#define TIF_POLLING_NRFLAG     19      /* true if poll_idle() is polling
1360 +                                          TIF_NEED_RESCHED */
1362  /* as above, but as bit values */
1363  #define _TIF_SYSCALL_TRACE     (1<<TIF_SYSCALL_TRACE)
1364 @@ -119,14 +122,16 @@
1365  #define _TIF_SYSCALL_TRACEPOINT        (1<<TIF_SYSCALL_TRACEPOINT)
1366  #define _TIF_EMULATE_STACK_STORE       (1<<TIF_EMULATE_STACK_STORE)
1367  #define _TIF_NOHZ              (1<<TIF_NOHZ)
1368 +#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY)
1369  #define _TIF_SYSCALL_DOTRACE   (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
1370                                  _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
1371                                  _TIF_NOHZ)
1373  #define _TIF_USER_WORK_MASK    (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
1374                                  _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
1375 -                                _TIF_RESTORE_TM)
1376 +                                _TIF_RESTORE_TM | _TIF_NEED_RESCHED_LAZY)
1377  #define _TIF_PERSYSCALL_MASK   (_TIF_RESTOREALL|_TIF_NOERROR)
1378 +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
1380  /* Bits in local_flags */
1381  /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
1382 diff -Nur linux-4.4.46.orig/arch/powerpc/Kconfig linux-4.4.46/arch/powerpc/Kconfig
1383 --- linux-4.4.46.orig/arch/powerpc/Kconfig      2017-02-01 08:31:11.000000000 +0100
1384 +++ linux-4.4.46/arch/powerpc/Kconfig   2017-02-03 17:18:05.639414785 +0100
1385 @@ -60,10 +60,11 @@
1387  config RWSEM_GENERIC_SPINLOCK
1388         bool
1389 +       default y if PREEMPT_RT_FULL
1391  config RWSEM_XCHGADD_ALGORITHM
1392         bool
1393 -       default y
1394 +       default y if !PREEMPT_RT_FULL
1396  config GENERIC_LOCKBREAK
1397         bool
1398 @@ -141,6 +142,7 @@
1399         select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
1400         select GENERIC_STRNCPY_FROM_USER
1401         select GENERIC_STRNLEN_USER
1402 +       select HAVE_PREEMPT_LAZY
1403         select HAVE_MOD_ARCH_SPECIFIC
1404         select MODULES_USE_ELF_RELA
1405         select CLONE_BACKWARDS
1406 @@ -319,7 +321,7 @@
1408  config HIGHMEM
1409         bool "High memory support"
1410 -       depends on PPC32
1411 +       depends on PPC32 && !PREEMPT_RT_FULL
1413  source kernel/Kconfig.hz
1414  source kernel/Kconfig.preempt
1415 diff -Nur linux-4.4.46.orig/arch/powerpc/kernel/asm-offsets.c linux-4.4.46/arch/powerpc/kernel/asm-offsets.c
1416 --- linux-4.4.46.orig/arch/powerpc/kernel/asm-offsets.c 2017-02-01 08:31:11.000000000 +0100
1417 +++ linux-4.4.46/arch/powerpc/kernel/asm-offsets.c      2017-02-03 17:18:05.643414939 +0100
1418 @@ -160,6 +160,7 @@
1419         DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
1420         DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
1421         DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
1422 +       DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
1423         DEFINE(TI_TASK, offsetof(struct thread_info, task));
1424         DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
1426 diff -Nur linux-4.4.46.orig/arch/powerpc/kernel/entry_32.S linux-4.4.46/arch/powerpc/kernel/entry_32.S
1427 --- linux-4.4.46.orig/arch/powerpc/kernel/entry_32.S    2017-02-01 08:31:11.000000000 +0100
1428 +++ linux-4.4.46/arch/powerpc/kernel/entry_32.S 2017-02-03 17:18:05.643414939 +0100
1429 @@ -818,7 +818,14 @@
1430         cmpwi   0,r0,0          /* if non-zero, just restore regs and return */
1431         bne     restore
1432         andi.   r8,r8,_TIF_NEED_RESCHED
1433 +       bne+    1f
1434 +       lwz     r0,TI_PREEMPT_LAZY(r9)
1435 +       cmpwi   0,r0,0          /* if non-zero, just restore regs and return */
1436 +       bne     restore
1437 +       lwz     r0,TI_FLAGS(r9)
1438 +       andi.   r0,r0,_TIF_NEED_RESCHED_LAZY
1439         beq+    restore
1441         lwz     r3,_MSR(r1)
1442         andi.   r0,r3,MSR_EE    /* interrupts off? */
1443         beq     restore         /* don't schedule if so */
1444 @@ -829,11 +836,11 @@
1445          */
1446         bl      trace_hardirqs_off
1447  #endif
1448 -1:     bl      preempt_schedule_irq
1449 +2:     bl      preempt_schedule_irq
1450         CURRENT_THREAD_INFO(r9, r1)
1451         lwz     r3,TI_FLAGS(r9)
1452 -       andi.   r0,r3,_TIF_NEED_RESCHED
1453 -       bne-    1b
1454 +       andi.   r0,r3,_TIF_NEED_RESCHED_MASK
1455 +       bne-    2b
1456  #ifdef CONFIG_TRACE_IRQFLAGS
1457         /* And now, to properly rebalance the above, we tell lockdep they
1458          * are being turned back on, which will happen when we return
1459 @@ -1154,7 +1161,7 @@
1460  #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
1462  do_work:                       /* r10 contains MSR_KERNEL here */
1463 -       andi.   r0,r9,_TIF_NEED_RESCHED
1464 +       andi.   r0,r9,_TIF_NEED_RESCHED_MASK
1465         beq     do_user_signal
1467  do_resched:                    /* r10 contains MSR_KERNEL here */
1468 @@ -1175,7 +1182,7 @@
1469         MTMSRD(r10)             /* disable interrupts */
1470         CURRENT_THREAD_INFO(r9, r1)
1471         lwz     r9,TI_FLAGS(r9)
1472 -       andi.   r0,r9,_TIF_NEED_RESCHED
1473 +       andi.   r0,r9,_TIF_NEED_RESCHED_MASK
1474         bne-    do_resched
1475         andi.   r0,r9,_TIF_USER_WORK_MASK
1476         beq     restore_user
1477 diff -Nur linux-4.4.46.orig/arch/powerpc/kernel/entry_64.S linux-4.4.46/arch/powerpc/kernel/entry_64.S
1478 --- linux-4.4.46.orig/arch/powerpc/kernel/entry_64.S    2017-02-01 08:31:11.000000000 +0100
1479 +++ linux-4.4.46/arch/powerpc/kernel/entry_64.S 2017-02-03 17:18:05.643414939 +0100
1480 @@ -683,7 +683,7 @@
1481  #else
1482         beq     restore
1483  #endif
1484 -1:     andi.   r0,r4,_TIF_NEED_RESCHED
1485 +1:     andi.   r0,r4,_TIF_NEED_RESCHED_MASK
1486         beq     2f
1487         bl      restore_interrupts
1488         SCHEDULE_USER
1489 @@ -745,10 +745,18 @@
1491  #ifdef CONFIG_PREEMPT
1492         /* Check if we need to preempt */
1493 +       lwz     r8,TI_PREEMPT(r9)
1494 +       cmpwi   0,r8,0          /* if non-zero, just restore regs and return */
1495 +       bne     restore
1496         andi.   r0,r4,_TIF_NEED_RESCHED
1497 +       bne+    check_count
1499 +       andi.   r0,r4,_TIF_NEED_RESCHED_LAZY
1500         beq+    restore
1501 +       lwz     r8,TI_PREEMPT_LAZY(r9)
1503         /* Check that preempt_count() == 0 and interrupts are enabled */
1504 -       lwz     r8,TI_PREEMPT(r9)
1505 +check_count:
1506         cmpwi   cr1,r8,0
1507         ld      r0,SOFTE(r1)
1508         cmpdi   r0,0
1509 @@ -765,7 +773,7 @@
1510         /* Re-test flags and eventually loop */
1511         CURRENT_THREAD_INFO(r9, r1)
1512         ld      r4,TI_FLAGS(r9)
1513 -       andi.   r0,r4,_TIF_NEED_RESCHED
1514 +       andi.   r0,r4,_TIF_NEED_RESCHED_MASK
1515         bne     1b
1517         /*
1518 diff -Nur linux-4.4.46.orig/arch/powerpc/kernel/irq.c linux-4.4.46/arch/powerpc/kernel/irq.c
1519 --- linux-4.4.46.orig/arch/powerpc/kernel/irq.c 2017-02-01 08:31:11.000000000 +0100
1520 +++ linux-4.4.46/arch/powerpc/kernel/irq.c      2017-02-03 17:18:05.643414939 +0100
1521 @@ -614,6 +614,7 @@
1522         }
1525 +#ifndef CONFIG_PREEMPT_RT_FULL
1526  void do_softirq_own_stack(void)
1528         struct thread_info *curtp, *irqtp;
1529 @@ -631,6 +632,7 @@
1530         if (irqtp->flags)
1531                 set_bits(irqtp->flags, &curtp->flags);
1533 +#endif
1535  irq_hw_number_t virq_to_hw(unsigned int virq)
1537 diff -Nur linux-4.4.46.orig/arch/powerpc/kernel/misc_32.S linux-4.4.46/arch/powerpc/kernel/misc_32.S
1538 --- linux-4.4.46.orig/arch/powerpc/kernel/misc_32.S     2017-02-01 08:31:11.000000000 +0100
1539 +++ linux-4.4.46/arch/powerpc/kernel/misc_32.S  2017-02-03 17:18:05.643414939 +0100
1540 @@ -40,6 +40,7 @@
1541   * We store the saved ksp_limit in the unused part
1542   * of the STACK_FRAME_OVERHEAD
1543   */
1544 +#ifndef CONFIG_PREEMPT_RT_FULL
1545  _GLOBAL(call_do_softirq)
1546         mflr    r0
1547         stw     r0,4(r1)
1548 @@ -56,6 +57,7 @@
1549         stw     r10,THREAD+KSP_LIMIT(r2)
1550         mtlr    r0
1551         blr
1552 +#endif
1554  /*
1555   * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp);
1556 diff -Nur linux-4.4.46.orig/arch/powerpc/kernel/misc_64.S linux-4.4.46/arch/powerpc/kernel/misc_64.S
1557 --- linux-4.4.46.orig/arch/powerpc/kernel/misc_64.S     2017-02-01 08:31:11.000000000 +0100
1558 +++ linux-4.4.46/arch/powerpc/kernel/misc_64.S  2017-02-03 17:18:05.643414939 +0100
1559 @@ -30,6 +30,7 @@
1561         .text
1563 +#ifndef CONFIG_PREEMPT_RT_FULL
1564  _GLOBAL(call_do_softirq)
1565         mflr    r0
1566         std     r0,16(r1)
1567 @@ -40,6 +41,7 @@
1568         ld      r0,16(r1)
1569         mtlr    r0
1570         blr
1571 +#endif
1573  _GLOBAL(call_do_irq)
1574         mflr    r0
1575 diff -Nur linux-4.4.46.orig/arch/powerpc/kvm/book3s_hv.c linux-4.4.46/arch/powerpc/kvm/book3s_hv.c
1576 --- linux-4.4.46.orig/arch/powerpc/kvm/book3s_hv.c      2017-02-01 08:31:11.000000000 +0100
1577 +++ linux-4.4.46/arch/powerpc/kvm/book3s_hv.c   2017-02-03 17:18:05.643414939 +0100
1578 @@ -114,11 +114,11 @@
1579  static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
1581         int cpu;
1582 -       wait_queue_head_t *wqp;
1583 +       struct swait_queue_head *wqp;
1585         wqp = kvm_arch_vcpu_wq(vcpu);
1586 -       if (waitqueue_active(wqp)) {
1587 -               wake_up_interruptible(wqp);
1588 +       if (swait_active(wqp)) {
1589 +               swake_up(wqp);
1590                 ++vcpu->stat.halt_wakeup;
1591         }
1593 @@ -707,8 +707,8 @@
1594                 tvcpu->arch.prodded = 1;
1595                 smp_mb();
1596                 if (vcpu->arch.ceded) {
1597 -                       if (waitqueue_active(&vcpu->wq)) {
1598 -                               wake_up_interruptible(&vcpu->wq);
1599 +                       if (swait_active(&vcpu->wq)) {
1600 +                               swake_up(&vcpu->wq);
1601                                 vcpu->stat.halt_wakeup++;
1602                         }
1603                 }
1604 @@ -1453,7 +1453,7 @@
1605         INIT_LIST_HEAD(&vcore->runnable_threads);
1606         spin_lock_init(&vcore->lock);
1607         spin_lock_init(&vcore->stoltb_lock);
1608 -       init_waitqueue_head(&vcore->wq);
1609 +       init_swait_queue_head(&vcore->wq);
1610         vcore->preempt_tb = TB_NIL;
1611         vcore->lpcr = kvm->arch.lpcr;
1612         vcore->first_vcpuid = core * threads_per_subcore;
1613 @@ -2525,10 +2525,9 @@
1615         struct kvm_vcpu *vcpu;
1616         int do_sleep = 1;
1617 +       DECLARE_SWAITQUEUE(wait);
1619 -       DEFINE_WAIT(wait);
1621 -       prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
1622 +       prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
1624         /*
1625          * Check one last time for pending exceptions and ceded state after
1626 @@ -2542,7 +2541,7 @@
1627         }
1629         if (!do_sleep) {
1630 -               finish_wait(&vc->wq, &wait);
1631 +               finish_swait(&vc->wq, &wait);
1632                 return;
1633         }
1635 @@ -2550,7 +2549,7 @@
1636         trace_kvmppc_vcore_blocked(vc, 0);
1637         spin_unlock(&vc->lock);
1638         schedule();
1639 -       finish_wait(&vc->wq, &wait);
1640 +       finish_swait(&vc->wq, &wait);
1641         spin_lock(&vc->lock);
1642         vc->vcore_state = VCORE_INACTIVE;
1643         trace_kvmppc_vcore_blocked(vc, 1);
1644 @@ -2606,7 +2605,7 @@
1645                         kvmppc_start_thread(vcpu, vc);
1646                         trace_kvm_guest_enter(vcpu);
1647                 } else if (vc->vcore_state == VCORE_SLEEPING) {
1648 -                       wake_up(&vc->wq);
1649 +                       swake_up(&vc->wq);
1650                 }
1652         }
1653 diff -Nur linux-4.4.46.orig/arch/powerpc/kvm/Kconfig linux-4.4.46/arch/powerpc/kvm/Kconfig
1654 --- linux-4.4.46.orig/arch/powerpc/kvm/Kconfig  2017-02-01 08:31:11.000000000 +0100
1655 +++ linux-4.4.46/arch/powerpc/kvm/Kconfig       2017-02-03 17:18:05.643414939 +0100
1656 @@ -172,6 +172,7 @@
1657  config KVM_MPIC
1658         bool "KVM in-kernel MPIC emulation"
1659         depends on KVM && E500
1660 +       depends on !PREEMPT_RT_FULL
1661         select HAVE_KVM_IRQCHIP
1662         select HAVE_KVM_IRQFD
1663         select HAVE_KVM_IRQ_ROUTING
1664 diff -Nur linux-4.4.46.orig/arch/powerpc/platforms/ps3/device-init.c linux-4.4.46/arch/powerpc/platforms/ps3/device-init.c
1665 --- linux-4.4.46.orig/arch/powerpc/platforms/ps3/device-init.c  2017-02-01 08:31:11.000000000 +0100
1666 +++ linux-4.4.46/arch/powerpc/platforms/ps3/device-init.c       2017-02-03 17:18:05.643414939 +0100
1667 @@ -752,7 +752,7 @@
1668         }
1669         pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op);
1671 -       res = wait_event_interruptible(dev->done.wait,
1672 +       res = swait_event_interruptible(dev->done.wait,
1673                                        dev->done.done || kthread_should_stop());
1674         if (kthread_should_stop())
1675                 res = -EINTR;
1676 diff -Nur linux-4.4.46.orig/arch/s390/include/asm/kvm_host.h linux-4.4.46/arch/s390/include/asm/kvm_host.h
1677 --- linux-4.4.46.orig/arch/s390/include/asm/kvm_host.h  2017-02-01 08:31:11.000000000 +0100
1678 +++ linux-4.4.46/arch/s390/include/asm/kvm_host.h       2017-02-03 17:18:05.643414939 +0100
1679 @@ -427,7 +427,7 @@
1680  struct kvm_s390_local_interrupt {
1681         spinlock_t lock;
1682         struct kvm_s390_float_interrupt *float_int;
1683 -       wait_queue_head_t *wq;
1684 +       struct swait_queue_head *wq;
1685         atomic_t *cpuflags;
1686         DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
1687         struct kvm_s390_irq_payload irq;
1688 diff -Nur linux-4.4.46.orig/arch/s390/kvm/interrupt.c linux-4.4.46/arch/s390/kvm/interrupt.c
1689 --- linux-4.4.46.orig/arch/s390/kvm/interrupt.c 2017-02-01 08:31:11.000000000 +0100
1690 +++ linux-4.4.46/arch/s390/kvm/interrupt.c      2017-02-03 17:18:05.643414939 +0100
1691 @@ -868,13 +868,13 @@
1693  void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
1695 -       if (waitqueue_active(&vcpu->wq)) {
1696 +       if (swait_active(&vcpu->wq)) {
1697                 /*
1698                  * The vcpu gave up the cpu voluntarily, mark it as a good
1699                  * yield-candidate.
1700                  */
1701                 vcpu->preempted = true;
1702 -               wake_up_interruptible(&vcpu->wq);
1703 +               swake_up(&vcpu->wq);
1704                 vcpu->stat.halt_wakeup++;
1705         }
1707 diff -Nur linux-4.4.46.orig/arch/sh/kernel/irq.c linux-4.4.46/arch/sh/kernel/irq.c
1708 --- linux-4.4.46.orig/arch/sh/kernel/irq.c      2017-02-01 08:31:11.000000000 +0100
1709 +++ linux-4.4.46/arch/sh/kernel/irq.c   2017-02-03 17:18:05.643414939 +0100
1710 @@ -147,6 +147,7 @@
1711         hardirq_ctx[cpu] = NULL;
1714 +#ifndef CONFIG_PREEMPT_RT_FULL
1715  void do_softirq_own_stack(void)
1717         struct thread_info *curctx;
1718 @@ -174,6 +175,7 @@
1719                   "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr"
1720         );
1722 +#endif
1723  #else
1724  static inline void handle_one_irq(unsigned int irq)
1726 diff -Nur linux-4.4.46.orig/arch/sparc/Kconfig linux-4.4.46/arch/sparc/Kconfig
1727 --- linux-4.4.46.orig/arch/sparc/Kconfig        2017-02-01 08:31:11.000000000 +0100
1728 +++ linux-4.4.46/arch/sparc/Kconfig     2017-02-03 17:18:05.643414939 +0100
1729 @@ -189,12 +189,10 @@
1730  source kernel/Kconfig.hz
1732  config RWSEM_GENERIC_SPINLOCK
1733 -       bool
1734 -       default y if SPARC32
1735 +       def_bool PREEMPT_RT_FULL
1737  config RWSEM_XCHGADD_ALGORITHM
1738 -       bool
1739 -       default y if SPARC64
1740 +       def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
1742  config GENERIC_HWEIGHT
1743         bool
1744 diff -Nur linux-4.4.46.orig/arch/sparc/kernel/irq_64.c linux-4.4.46/arch/sparc/kernel/irq_64.c
1745 --- linux-4.4.46.orig/arch/sparc/kernel/irq_64.c        2017-02-01 08:31:11.000000000 +0100
1746 +++ linux-4.4.46/arch/sparc/kernel/irq_64.c     2017-02-03 17:18:05.643414939 +0100
1747 @@ -854,6 +854,7 @@
1748         set_irq_regs(old_regs);
1751 +#ifndef CONFIG_PREEMPT_RT_FULL
1752  void do_softirq_own_stack(void)
1754         void *orig_sp, *sp = softirq_stack[smp_processor_id()];
1755 @@ -868,6 +869,7 @@
1756         __asm__ __volatile__("mov %0, %%sp"
1757                              : : "r" (orig_sp));
1759 +#endif
1761  #ifdef CONFIG_HOTPLUG_CPU
1762  void fixup_irqs(void)
1763 diff -Nur linux-4.4.46.orig/arch/x86/crypto/aesni-intel_glue.c linux-4.4.46/arch/x86/crypto/aesni-intel_glue.c
1764 --- linux-4.4.46.orig/arch/x86/crypto/aesni-intel_glue.c        2017-02-01 08:31:11.000000000 +0100
1765 +++ linux-4.4.46/arch/x86/crypto/aesni-intel_glue.c     2017-02-03 17:18:05.643414939 +0100
1766 @@ -383,14 +383,14 @@
1767         err = blkcipher_walk_virt(desc, &walk);
1768         desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
1770 -       kernel_fpu_begin();
1771         while ((nbytes = walk.nbytes)) {
1772 +               kernel_fpu_begin();
1773                 aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
1774 -                             nbytes & AES_BLOCK_MASK);
1775 +                               nbytes & AES_BLOCK_MASK);
1776 +               kernel_fpu_end();
1777                 nbytes &= AES_BLOCK_SIZE - 1;
1778                 err = blkcipher_walk_done(desc, &walk, nbytes);
1779         }
1780 -       kernel_fpu_end();
1782         return err;
1784 @@ -407,14 +407,14 @@
1785         err = blkcipher_walk_virt(desc, &walk);
1786         desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
1788 -       kernel_fpu_begin();
1789         while ((nbytes = walk.nbytes)) {
1790 +               kernel_fpu_begin();
1791                 aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
1792                               nbytes & AES_BLOCK_MASK);
1793 +               kernel_fpu_end();
1794                 nbytes &= AES_BLOCK_SIZE - 1;
1795                 err = blkcipher_walk_done(desc, &walk, nbytes);
1796         }
1797 -       kernel_fpu_end();
1799         return err;
1801 @@ -431,14 +431,14 @@
1802         err = blkcipher_walk_virt(desc, &walk);
1803         desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
1805 -       kernel_fpu_begin();
1806         while ((nbytes = walk.nbytes)) {
1807 +               kernel_fpu_begin();
1808                 aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
1809                               nbytes & AES_BLOCK_MASK, walk.iv);
1810 +               kernel_fpu_end();
1811                 nbytes &= AES_BLOCK_SIZE - 1;
1812                 err = blkcipher_walk_done(desc, &walk, nbytes);
1813         }
1814 -       kernel_fpu_end();
1816         return err;
1818 @@ -455,14 +455,14 @@
1819         err = blkcipher_walk_virt(desc, &walk);
1820         desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
1822 -       kernel_fpu_begin();
1823         while ((nbytes = walk.nbytes)) {
1824 +               kernel_fpu_begin();
1825                 aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
1826                               nbytes & AES_BLOCK_MASK, walk.iv);
1827 +               kernel_fpu_end();
1828                 nbytes &= AES_BLOCK_SIZE - 1;
1829                 err = blkcipher_walk_done(desc, &walk, nbytes);
1830         }
1831 -       kernel_fpu_end();
1833         return err;
1835 @@ -514,18 +514,20 @@
1836         err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
1837         desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
1839 -       kernel_fpu_begin();
1840         while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
1841 +               kernel_fpu_begin();
1842                 aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr,
1843                                       nbytes & AES_BLOCK_MASK, walk.iv);
1844 +               kernel_fpu_end();
1845                 nbytes &= AES_BLOCK_SIZE - 1;
1846                 err = blkcipher_walk_done(desc, &walk, nbytes);
1847         }
1848         if (walk.nbytes) {
1849 +               kernel_fpu_begin();
1850                 ctr_crypt_final(ctx, &walk);
1851 +               kernel_fpu_end();
1852                 err = blkcipher_walk_done(desc, &walk, 0);
1853         }
1854 -       kernel_fpu_end();
1856         return err;
1858 diff -Nur linux-4.4.46.orig/arch/x86/crypto/cast5_avx_glue.c linux-4.4.46/arch/x86/crypto/cast5_avx_glue.c
1859 --- linux-4.4.46.orig/arch/x86/crypto/cast5_avx_glue.c  2017-02-01 08:31:11.000000000 +0100
1860 +++ linux-4.4.46/arch/x86/crypto/cast5_avx_glue.c       2017-02-03 17:18:05.647415094 +0100
1861 @@ -59,7 +59,7 @@
1862  static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
1863                      bool enc)
1865 -       bool fpu_enabled = false;
1866 +       bool fpu_enabled;
1867         struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1868         const unsigned int bsize = CAST5_BLOCK_SIZE;
1869         unsigned int nbytes;
1870 @@ -75,7 +75,7 @@
1871                 u8 *wsrc = walk->src.virt.addr;
1872                 u8 *wdst = walk->dst.virt.addr;
1874 -               fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
1875 +               fpu_enabled = cast5_fpu_begin(false, nbytes);
1877                 /* Process multi-block batch */
1878                 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
1879 @@ -103,10 +103,9 @@
1880                 } while (nbytes >= bsize);
1882  done:
1883 +               cast5_fpu_end(fpu_enabled);
1884                 err = blkcipher_walk_done(desc, walk, nbytes);
1885         }
1887 -       cast5_fpu_end(fpu_enabled);
1888         return err;
1891 @@ -227,7 +226,7 @@
1892  static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1893                        struct scatterlist *src, unsigned int nbytes)
1895 -       bool fpu_enabled = false;
1896 +       bool fpu_enabled;
1897         struct blkcipher_walk walk;
1898         int err;
1900 @@ -236,12 +235,11 @@
1901         desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
1903         while ((nbytes = walk.nbytes)) {
1904 -               fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
1905 +               fpu_enabled = cast5_fpu_begin(false, nbytes);
1906                 nbytes = __cbc_decrypt(desc, &walk);
1907 +               cast5_fpu_end(fpu_enabled);
1908                 err = blkcipher_walk_done(desc, &walk, nbytes);
1909         }
1911 -       cast5_fpu_end(fpu_enabled);
1912         return err;
1915 @@ -311,7 +309,7 @@
1916  static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1917                      struct scatterlist *src, unsigned int nbytes)
1919 -       bool fpu_enabled = false;
1920 +       bool fpu_enabled;
1921         struct blkcipher_walk walk;
1922         int err;
1924 @@ -320,13 +318,12 @@
1925         desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
1927         while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
1928 -               fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
1929 +               fpu_enabled = cast5_fpu_begin(false, nbytes);
1930                 nbytes = __ctr_crypt(desc, &walk);
1931 +               cast5_fpu_end(fpu_enabled);
1932                 err = blkcipher_walk_done(desc, &walk, nbytes);
1933         }
1935 -       cast5_fpu_end(fpu_enabled);
1937         if (walk.nbytes) {
1938                 ctr_crypt_final(desc, &walk);
1939                 err = blkcipher_walk_done(desc, &walk, 0);
1940 diff -Nur linux-4.4.46.orig/arch/x86/crypto/glue_helper.c linux-4.4.46/arch/x86/crypto/glue_helper.c
1941 --- linux-4.4.46.orig/arch/x86/crypto/glue_helper.c     2017-02-01 08:31:11.000000000 +0100
1942 +++ linux-4.4.46/arch/x86/crypto/glue_helper.c  2017-02-03 17:18:05.647415094 +0100
1943 @@ -39,7 +39,7 @@
1944         void *ctx = crypto_blkcipher_ctx(desc->tfm);
1945         const unsigned int bsize = 128 / 8;
1946         unsigned int nbytes, i, func_bytes;
1947 -       bool fpu_enabled = false;
1948 +       bool fpu_enabled;
1949         int err;
1951         err = blkcipher_walk_virt(desc, walk);
1952 @@ -49,7 +49,7 @@
1953                 u8 *wdst = walk->dst.virt.addr;
1955                 fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
1956 -                                            desc, fpu_enabled, nbytes);
1957 +                                            desc, false, nbytes);
1959                 for (i = 0; i < gctx->num_funcs; i++) {
1960                         func_bytes = bsize * gctx->funcs[i].num_blocks;
1961 @@ -71,10 +71,10 @@
1962                 }
1964  done:
1965 +               glue_fpu_end(fpu_enabled);
1966                 err = blkcipher_walk_done(desc, walk, nbytes);
1967         }
1969 -       glue_fpu_end(fpu_enabled);
1970         return err;
1973 @@ -194,7 +194,7 @@
1974                             struct scatterlist *src, unsigned int nbytes)
1976         const unsigned int bsize = 128 / 8;
1977 -       bool fpu_enabled = false;
1978 +       bool fpu_enabled;
1979         struct blkcipher_walk walk;
1980         int err;
1982 @@ -203,12 +203,12 @@
1984         while ((nbytes = walk.nbytes)) {
1985                 fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
1986 -                                            desc, fpu_enabled, nbytes);
1987 +                                            desc, false, nbytes);
1988                 nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk);
1989 +               glue_fpu_end(fpu_enabled);
1990                 err = blkcipher_walk_done(desc, &walk, nbytes);
1991         }
1993 -       glue_fpu_end(fpu_enabled);
1994         return err;
1996  EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit);
1997 @@ -277,7 +277,7 @@
1998                           struct scatterlist *src, unsigned int nbytes)
2000         const unsigned int bsize = 128 / 8;
2001 -       bool fpu_enabled = false;
2002 +       bool fpu_enabled;
2003         struct blkcipher_walk walk;
2004         int err;
2006 @@ -286,13 +286,12 @@
2008         while ((nbytes = walk.nbytes) >= bsize) {
2009                 fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
2010 -                                            desc, fpu_enabled, nbytes);
2011 +                                            desc, false, nbytes);
2012                 nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk);
2013 +               glue_fpu_end(fpu_enabled);
2014                 err = blkcipher_walk_done(desc, &walk, nbytes);
2015         }
2017 -       glue_fpu_end(fpu_enabled);
2019         if (walk.nbytes) {
2020                 glue_ctr_crypt_final_128bit(
2021                         gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk);
2022 @@ -347,7 +346,7 @@
2023                           void *tweak_ctx, void *crypt_ctx)
2025         const unsigned int bsize = 128 / 8;
2026 -       bool fpu_enabled = false;
2027 +       bool fpu_enabled;
2028         struct blkcipher_walk walk;
2029         int err;
2031 @@ -360,21 +359,21 @@
2033         /* set minimum length to bsize, for tweak_fn */
2034         fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
2035 -                                    desc, fpu_enabled,
2036 +                                    desc, false,
2037                                      nbytes < bsize ? bsize : nbytes);
2039         /* calculate first value of T */
2040         tweak_fn(tweak_ctx, walk.iv, walk.iv);
2041 +       glue_fpu_end(fpu_enabled);
2043         while (nbytes) {
2044 +               fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
2045 +                               desc, false, nbytes);
2046                 nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk);
2048 +               glue_fpu_end(fpu_enabled);
2049                 err = blkcipher_walk_done(desc, &walk, nbytes);
2050                 nbytes = walk.nbytes;
2051         }
2053 -       glue_fpu_end(fpu_enabled);
2055         return err;
2057  EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit);
2058 diff -Nur linux-4.4.46.orig/arch/x86/entry/common.c linux-4.4.46/arch/x86/entry/common.c
2059 --- linux-4.4.46.orig/arch/x86/entry/common.c   2017-02-01 08:31:11.000000000 +0100
2060 +++ linux-4.4.46/arch/x86/entry/common.c        2017-02-03 17:18:05.647415094 +0100
2061 @@ -220,7 +220,7 @@
2063  #define EXIT_TO_USERMODE_LOOP_FLAGS                            \
2064         (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE |   \
2065 -        _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY)
2066 +        _TIF_NEED_RESCHED_MASK | _TIF_USER_RETURN_NOTIFY)
2068  static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
2070 @@ -236,9 +236,16 @@
2071                 /* We have work to do. */
2072                 local_irq_enable();
2074 -               if (cached_flags & _TIF_NEED_RESCHED)
2075 +               if (cached_flags & _TIF_NEED_RESCHED_MASK)
2076                         schedule();
2078 +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
2079 +               if (unlikely(current->forced_info.si_signo)) {
2080 +                       struct task_struct *t = current;
2081 +                       force_sig_info(t->forced_info.si_signo, &t->forced_info, t);
2082 +                       t->forced_info.si_signo = 0;
2083 +               }
2084 +#endif
2085                 if (cached_flags & _TIF_UPROBE)
2086                         uprobe_notify_resume(regs);
2088 diff -Nur linux-4.4.46.orig/arch/x86/entry/entry_32.S linux-4.4.46/arch/x86/entry/entry_32.S
2089 --- linux-4.4.46.orig/arch/x86/entry/entry_32.S 2017-02-01 08:31:11.000000000 +0100
2090 +++ linux-4.4.46/arch/x86/entry/entry_32.S      2017-02-03 17:18:05.647415094 +0100
2091 @@ -278,8 +278,24 @@
2092  ENTRY(resume_kernel)
2093         DISABLE_INTERRUPTS(CLBR_ANY)
2094  need_resched:
2095 +       # preempt count == 0 + NEED_RS set?
2096         cmpl    $0, PER_CPU_VAR(__preempt_count)
2097 +#ifndef CONFIG_PREEMPT_LAZY
2098         jnz     restore_all
2099 +#else
2100 +       jz test_int_off
2102 +       # atleast preempt count == 0 ?
2103 +       cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
2104 +       jne restore_all
2106 +       cmpl $0,TI_preempt_lazy_count(%ebp)     # non-zero preempt_lazy_count ?
2107 +       jnz restore_all
2109 +       testl $_TIF_NEED_RESCHED_LAZY, TI_flags(%ebp)
2110 +       jz restore_all
2111 +test_int_off:
2112 +#endif
2113         testl   $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
2114         jz      restore_all
2115         call    preempt_schedule_irq
2116 diff -Nur linux-4.4.46.orig/arch/x86/entry/entry_64.S linux-4.4.46/arch/x86/entry/entry_64.S
2117 --- linux-4.4.46.orig/arch/x86/entry/entry_64.S 2017-02-01 08:31:11.000000000 +0100
2118 +++ linux-4.4.46/arch/x86/entry/entry_64.S      2017-02-03 17:18:05.647415094 +0100
2119 @@ -579,7 +579,23 @@
2120         bt      $9, EFLAGS(%rsp)                /* were interrupts off? */
2121         jnc     1f
2122  0:     cmpl    $0, PER_CPU_VAR(__preempt_count)
2123 +#ifndef CONFIG_PREEMPT_LAZY
2124         jnz     1f
2125 +#else
2126 +       jz      do_preempt_schedule_irq
2128 +       # atleast preempt count == 0 ?
2129 +       cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
2130 +       jnz     1f
2132 +       GET_THREAD_INFO(%rcx)
2133 +       cmpl    $0, TI_preempt_lazy_count(%rcx)
2134 +       jnz     1f
2136 +       bt      $TIF_NEED_RESCHED_LAZY,TI_flags(%rcx)
2137 +       jnc     1f
2138 +do_preempt_schedule_irq:
2139 +#endif
2140         call    preempt_schedule_irq
2141         jmp     0b
2142  1:
2143 @@ -867,6 +883,7 @@
2144         jmp     2b
2145         .previous
2147 +#ifndef CONFIG_PREEMPT_RT_FULL
2148  /* Call softirq on interrupt stack. Interrupts are off. */
2149  ENTRY(do_softirq_own_stack)
2150         pushq   %rbp
2151 @@ -879,6 +896,7 @@
2152         decl    PER_CPU_VAR(irq_count)
2153         ret
2154  END(do_softirq_own_stack)
2155 +#endif
2157  #ifdef CONFIG_XEN
2158  idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
2159 diff -Nur linux-4.4.46.orig/arch/x86/include/asm/preempt.h linux-4.4.46/arch/x86/include/asm/preempt.h
2160 --- linux-4.4.46.orig/arch/x86/include/asm/preempt.h    2017-02-01 08:31:11.000000000 +0100
2161 +++ linux-4.4.46/arch/x86/include/asm/preempt.h 2017-02-03 17:18:05.647415094 +0100
2162 @@ -79,17 +79,46 @@
2163   * a decrement which hits zero means we have no preempt_count and should
2164   * reschedule.
2165   */
2166 -static __always_inline bool __preempt_count_dec_and_test(void)
2167 +static __always_inline bool ____preempt_count_dec_and_test(void)
2169         GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e");
2172 +static __always_inline bool __preempt_count_dec_and_test(void)
2174 +       if (____preempt_count_dec_and_test())
2175 +               return true;
2176 +#ifdef CONFIG_PREEMPT_LAZY
2177 +       if (current_thread_info()->preempt_lazy_count)
2178 +               return false;
2179 +       return test_thread_flag(TIF_NEED_RESCHED_LAZY);
2180 +#else
2181 +       return false;
2182 +#endif
2185  /*
2186   * Returns true when we need to resched and can (barring IRQ state).
2187   */
2188  static __always_inline bool should_resched(int preempt_offset)
2190 +#ifdef CONFIG_PREEMPT_LAZY
2191 +       u32 tmp;
2193 +       tmp = raw_cpu_read_4(__preempt_count);
2194 +       if (tmp == preempt_offset)
2195 +               return true;
2197 +       /* preempt count == 0 ? */
2198 +       tmp &= ~PREEMPT_NEED_RESCHED;
2199 +       if (tmp)
2200 +               return false;
2201 +       if (current_thread_info()->preempt_lazy_count)
2202 +               return false;
2203 +       return test_thread_flag(TIF_NEED_RESCHED_LAZY);
2204 +#else
2205         return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
2206 +#endif
2209  #ifdef CONFIG_PREEMPT
2210 diff -Nur linux-4.4.46.orig/arch/x86/include/asm/signal.h linux-4.4.46/arch/x86/include/asm/signal.h
2211 --- linux-4.4.46.orig/arch/x86/include/asm/signal.h     2017-02-01 08:31:11.000000000 +0100
2212 +++ linux-4.4.46/arch/x86/include/asm/signal.h  2017-02-03 17:18:05.647415094 +0100
2213 @@ -23,6 +23,19 @@
2214         unsigned long sig[_NSIG_WORDS];
2215  } sigset_t;
2218 + * Because some traps use the IST stack, we must keep preemption
2219 + * disabled while calling do_trap(), but do_trap() may call
2220 + * force_sig_info() which will grab the signal spin_locks for the
2221 + * task, which in PREEMPT_RT_FULL are mutexes.  By defining
2222 + * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set
2223 + * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
2224 + * trap.
2225 + */
2226 +#if defined(CONFIG_PREEMPT_RT_FULL)
2227 +#define ARCH_RT_DELAYS_SIGNAL_SEND
2228 +#endif
2230  #ifndef CONFIG_COMPAT
2231  typedef sigset_t compat_sigset_t;
2232  #endif
2233 diff -Nur linux-4.4.46.orig/arch/x86/include/asm/stackprotector.h linux-4.4.46/arch/x86/include/asm/stackprotector.h
2234 --- linux-4.4.46.orig/arch/x86/include/asm/stackprotector.h     2017-02-01 08:31:11.000000000 +0100
2235 +++ linux-4.4.46/arch/x86/include/asm/stackprotector.h  2017-02-03 17:18:05.647415094 +0100
2236 @@ -59,7 +59,7 @@
2237   */
2238  static __always_inline void boot_init_stack_canary(void)
2240 -       u64 canary;
2241 +       u64 uninitialized_var(canary);
2242         u64 tsc;
2244  #ifdef CONFIG_X86_64
2245 @@ -70,8 +70,15 @@
2246          * of randomness. The TSC only matters for very early init,
2247          * there it already has some randomness on most systems. Later
2248          * on during the bootup the random pool has true entropy too.
2249 +        *
2250 +        * For preempt-rt we need to weaken the randomness a bit, as
2251 +        * we can't call into the random generator from atomic context
2252 +        * due to locking constraints. We just leave canary
2253 +        * uninitialized and use the TSC based randomness on top of it.
2254          */
2255 +#ifndef CONFIG_PREEMPT_RT_FULL
2256         get_random_bytes(&canary, sizeof(canary));
2257 +#endif
2258         tsc = rdtsc();
2259         canary += tsc + (tsc << 32UL);
2261 diff -Nur linux-4.4.46.orig/arch/x86/include/asm/thread_info.h linux-4.4.46/arch/x86/include/asm/thread_info.h
2262 --- linux-4.4.46.orig/arch/x86/include/asm/thread_info.h        2017-02-01 08:31:11.000000000 +0100
2263 +++ linux-4.4.46/arch/x86/include/asm/thread_info.h     2017-02-03 17:18:05.647415094 +0100
2264 @@ -58,6 +58,8 @@
2265         __u32                   status;         /* thread synchronous flags */
2266         __u32                   cpu;            /* current CPU */
2267         mm_segment_t            addr_limit;
2268 +       int                     preempt_lazy_count;     /* 0 => lazy preemptable
2269 +                                                         <0 => BUG */
2270         unsigned int            sig_on_uaccess_error:1;
2271         unsigned int            uaccess_err:1;  /* uaccess failed */
2272  };
2273 @@ -95,6 +97,7 @@
2274  #define TIF_SYSCALL_EMU                6       /* syscall emulation active */
2275  #define TIF_SYSCALL_AUDIT      7       /* syscall auditing active */
2276  #define TIF_SECCOMP            8       /* secure computing */
2277 +#define TIF_NEED_RESCHED_LAZY  9       /* lazy rescheduling necessary */
2278  #define TIF_USER_RETURN_NOTIFY 11      /* notify kernel of userspace return */
2279  #define TIF_UPROBE             12      /* breakpointed or singlestepping */
2280  #define TIF_NOTSC              16      /* TSC is not accessible in userland */
2281 @@ -119,6 +122,7 @@
2282  #define _TIF_SYSCALL_EMU       (1 << TIF_SYSCALL_EMU)
2283  #define _TIF_SYSCALL_AUDIT     (1 << TIF_SYSCALL_AUDIT)
2284  #define _TIF_SECCOMP           (1 << TIF_SECCOMP)
2285 +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
2286  #define _TIF_USER_RETURN_NOTIFY        (1 << TIF_USER_RETURN_NOTIFY)
2287  #define _TIF_UPROBE            (1 << TIF_UPROBE)
2288  #define _TIF_NOTSC             (1 << TIF_NOTSC)
2289 @@ -152,6 +156,8 @@
2290  #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
2291  #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
2293 +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
2295  #define STACK_WARN             (THREAD_SIZE/8)
2297  /*
2298 diff -Nur linux-4.4.46.orig/arch/x86/include/asm/uv/uv_bau.h linux-4.4.46/arch/x86/include/asm/uv/uv_bau.h
2299 --- linux-4.4.46.orig/arch/x86/include/asm/uv/uv_bau.h  2017-02-01 08:31:11.000000000 +0100
2300 +++ linux-4.4.46/arch/x86/include/asm/uv/uv_bau.h       2017-02-03 17:18:05.647415094 +0100
2301 @@ -615,9 +615,9 @@
2302         cycles_t                send_message;
2303         cycles_t                period_end;
2304         cycles_t                period_time;
2305 -       spinlock_t              uvhub_lock;
2306 -       spinlock_t              queue_lock;
2307 -       spinlock_t              disable_lock;
2308 +       raw_spinlock_t          uvhub_lock;
2309 +       raw_spinlock_t          queue_lock;
2310 +       raw_spinlock_t          disable_lock;
2311         /* tunables */
2312         int                     max_concurr;
2313         int                     max_concurr_const;
2314 @@ -776,15 +776,15 @@
2315   * to be lowered below the current 'v'.  atomic_add_unless can only stop
2316   * on equal.
2317   */
2318 -static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
2319 +static inline int atomic_inc_unless_ge(raw_spinlock_t *lock, atomic_t *v, int u)
2321 -       spin_lock(lock);
2322 +       raw_spin_lock(lock);
2323         if (atomic_read(v) >= u) {
2324 -               spin_unlock(lock);
2325 +               raw_spin_unlock(lock);
2326                 return 0;
2327         }
2328         atomic_inc(v);
2329 -       spin_unlock(lock);
2330 +       raw_spin_unlock(lock);
2331         return 1;
2334 diff -Nur linux-4.4.46.orig/arch/x86/include/asm/uv/uv_hub.h linux-4.4.46/arch/x86/include/asm/uv/uv_hub.h
2335 --- linux-4.4.46.orig/arch/x86/include/asm/uv/uv_hub.h  2017-02-01 08:31:11.000000000 +0100
2336 +++ linux-4.4.46/arch/x86/include/asm/uv/uv_hub.h       2017-02-03 17:18:05.647415094 +0100
2337 @@ -492,7 +492,7 @@
2338         unsigned short  nr_online_cpus;
2339         unsigned short  pnode;
2340         short           memory_nid;
2341 -       spinlock_t      nmi_lock;       /* obsolete, see uv_hub_nmi */
2342 +       raw_spinlock_t  nmi_lock;       /* obsolete, see uv_hub_nmi */
2343         unsigned long   nmi_count;      /* obsolete, see uv_hub_nmi */
2344  };
2345  extern struct uv_blade_info *uv_blade_info;
2346 diff -Nur linux-4.4.46.orig/arch/x86/Kconfig linux-4.4.46/arch/x86/Kconfig
2347 --- linux-4.4.46.orig/arch/x86/Kconfig  2017-02-01 08:31:11.000000000 +0100
2348 +++ linux-4.4.46/arch/x86/Kconfig       2017-02-03 17:18:05.643414939 +0100
2349 @@ -17,6 +17,7 @@
2350  ### Arch settings
2351  config X86
2352         def_bool y
2353 +       select HAVE_PREEMPT_LAZY
2354         select ACPI_LEGACY_TABLES_LOOKUP        if ACPI
2355         select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
2356         select ANON_INODES
2357 @@ -212,8 +213,11 @@
2358         def_bool y
2359         depends on ISA_DMA_API
2361 +config RWSEM_GENERIC_SPINLOCK
2362 +       def_bool PREEMPT_RT_FULL
2364  config RWSEM_XCHGADD_ALGORITHM
2365 -       def_bool y
2366 +       def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
2368  config GENERIC_CALIBRATE_DELAY
2369         def_bool y
2370 @@ -848,7 +852,7 @@
2371  config MAXSMP
2372         bool "Enable Maximum number of SMP Processors and NUMA Nodes"
2373         depends on X86_64 && SMP && DEBUG_KERNEL
2374 -       select CPUMASK_OFFSTACK
2375 +       select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL
2376         ---help---
2377           Enable maximum number of CPUS and NUMA Nodes for this architecture.
2378           If unsure, say N.
2379 diff -Nur linux-4.4.46.orig/arch/x86/kernel/acpi/boot.c linux-4.4.46/arch/x86/kernel/acpi/boot.c
2380 --- linux-4.4.46.orig/arch/x86/kernel/acpi/boot.c       2017-02-01 08:31:11.000000000 +0100
2381 +++ linux-4.4.46/arch/x86/kernel/acpi/boot.c    2017-02-03 17:18:05.655415404 +0100
2382 @@ -87,7 +87,9 @@
2383   *             ->ioapic_mutex
2384   *                     ->ioapic_lock
2385   */
2386 +#ifdef CONFIG_X86_IO_APIC
2387  static DEFINE_MUTEX(acpi_ioapic_lock);
2388 +#endif
2390  /* --------------------------------------------------------------------------
2391                                Boot-time Configuration
2392 diff -Nur linux-4.4.46.orig/arch/x86/kernel/apic/io_apic.c linux-4.4.46/arch/x86/kernel/apic/io_apic.c
2393 --- linux-4.4.46.orig/arch/x86/kernel/apic/io_apic.c    2017-02-01 08:31:11.000000000 +0100
2394 +++ linux-4.4.46/arch/x86/kernel/apic/io_apic.c 2017-02-03 17:18:05.655415404 +0100
2395 @@ -1711,7 +1711,8 @@
2396  static inline bool ioapic_irqd_mask(struct irq_data *data)
2398         /* If we are moving the irq we need to mask it */
2399 -       if (unlikely(irqd_is_setaffinity_pending(data))) {
2400 +       if (unlikely(irqd_is_setaffinity_pending(data) &&
2401 +                    !irqd_irq_inprogress(data))) {
2402                 mask_ioapic_irq(data);
2403                 return true;
2404         }
2405 diff -Nur linux-4.4.46.orig/arch/x86/kernel/apic/x2apic_uv_x.c linux-4.4.46/arch/x86/kernel/apic/x2apic_uv_x.c
2406 --- linux-4.4.46.orig/arch/x86/kernel/apic/x2apic_uv_x.c        2017-02-01 08:31:11.000000000 +0100
2407 +++ linux-4.4.46/arch/x86/kernel/apic/x2apic_uv_x.c     2017-02-03 17:18:05.655415404 +0100
2408 @@ -947,7 +947,7 @@
2409                         uv_blade_info[blade].pnode = pnode;
2410                         uv_blade_info[blade].nr_possible_cpus = 0;
2411                         uv_blade_info[blade].nr_online_cpus = 0;
2412 -                       spin_lock_init(&uv_blade_info[blade].nmi_lock);
2413 +                       raw_spin_lock_init(&uv_blade_info[blade].nmi_lock);
2414                         min_pnode = min(pnode, min_pnode);
2415                         max_pnode = max(pnode, max_pnode);
2416                         blade++;
2417 diff -Nur linux-4.4.46.orig/arch/x86/kernel/asm-offsets.c linux-4.4.46/arch/x86/kernel/asm-offsets.c
2418 --- linux-4.4.46.orig/arch/x86/kernel/asm-offsets.c     2017-02-01 08:31:11.000000000 +0100
2419 +++ linux-4.4.46/arch/x86/kernel/asm-offsets.c  2017-02-03 17:18:05.655415404 +0100
2420 @@ -32,6 +32,7 @@
2421         OFFSET(TI_flags, thread_info, flags);
2422         OFFSET(TI_status, thread_info, status);
2423         OFFSET(TI_addr_limit, thread_info, addr_limit);
2424 +       OFFSET(TI_preempt_lazy_count, thread_info, preempt_lazy_count);
2426         BLANK();
2427         OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
2428 @@ -89,4 +90,5 @@
2430         BLANK();
2431         DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
2432 +       DEFINE(_PREEMPT_ENABLED, PREEMPT_ENABLED);
2434 diff -Nur linux-4.4.46.orig/arch/x86/kernel/cpu/mcheck/mce.c linux-4.4.46/arch/x86/kernel/cpu/mcheck/mce.c
2435 --- linux-4.4.46.orig/arch/x86/kernel/cpu/mcheck/mce.c  2017-02-01 08:31:11.000000000 +0100
2436 +++ linux-4.4.46/arch/x86/kernel/cpu/mcheck/mce.c       2017-02-03 17:18:05.659415557 +0100
2437 @@ -41,6 +41,8 @@
2438  #include <linux/debugfs.h>
2439  #include <linux/irq_work.h>
2440  #include <linux/export.h>
2441 +#include <linux/jiffies.h>
2442 +#include <linux/swork.h>
2444  #include <asm/processor.h>
2445  #include <asm/traps.h>
2446 @@ -1236,7 +1238,7 @@
2447  static unsigned long check_interval = INITIAL_CHECK_INTERVAL;
2449  static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
2450 -static DEFINE_PER_CPU(struct timer_list, mce_timer);
2451 +static DEFINE_PER_CPU(struct hrtimer, mce_timer);
2453  static unsigned long mce_adjust_timer_default(unsigned long interval)
2455 @@ -1245,32 +1247,18 @@
2457  static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
2459 -static void __restart_timer(struct timer_list *t, unsigned long interval)
2460 +static enum hrtimer_restart __restart_timer(struct hrtimer *timer, unsigned long interval)
2462 -       unsigned long when = jiffies + interval;
2463 -       unsigned long flags;
2465 -       local_irq_save(flags);
2467 -       if (timer_pending(t)) {
2468 -               if (time_before(when, t->expires))
2469 -                       mod_timer_pinned(t, when);
2470 -       } else {
2471 -               t->expires = round_jiffies(when);
2472 -               add_timer_on(t, smp_processor_id());
2473 -       }
2475 -       local_irq_restore(flags);
2476 +       if (!interval)
2477 +               return HRTIMER_NORESTART;
2478 +       hrtimer_forward_now(timer, ns_to_ktime(jiffies_to_nsecs(interval)));
2479 +       return HRTIMER_RESTART;
2482 -static void mce_timer_fn(unsigned long data)
2483 +static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer)
2485 -       struct timer_list *t = this_cpu_ptr(&mce_timer);
2486 -       int cpu = smp_processor_id();
2487         unsigned long iv;
2489 -       WARN_ON(cpu != data);
2491         iv = __this_cpu_read(mce_next_interval);
2493         if (mce_available(this_cpu_ptr(&cpu_info))) {
2494 @@ -1293,7 +1281,7 @@
2496  done:
2497         __this_cpu_write(mce_next_interval, iv);
2498 -       __restart_timer(t, iv);
2499 +       return __restart_timer(timer, iv);
2502  /*
2503 @@ -1301,7 +1289,7 @@
2504   */
2505  void mce_timer_kick(unsigned long interval)
2507 -       struct timer_list *t = this_cpu_ptr(&mce_timer);
2508 +       struct hrtimer *t = this_cpu_ptr(&mce_timer);
2509         unsigned long iv = __this_cpu_read(mce_next_interval);
2511         __restart_timer(t, interval);
2512 @@ -1316,7 +1304,7 @@
2513         int cpu;
2515         for_each_online_cpu(cpu)
2516 -               del_timer_sync(&per_cpu(mce_timer, cpu));
2517 +               hrtimer_cancel(&per_cpu(mce_timer, cpu));
2520  static void mce_do_trigger(struct work_struct *work)
2521 @@ -1326,6 +1314,56 @@
2523  static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
2525 +static void __mce_notify_work(struct swork_event *event)
2527 +       /* Not more than two messages every minute */
2528 +       static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
2530 +       /* wake processes polling /dev/mcelog */
2531 +       wake_up_interruptible(&mce_chrdev_wait);
2533 +       /*
2534 +        * There is no risk of missing notifications because
2535 +        * work_pending is always cleared before the function is
2536 +        * executed.
2537 +        */
2538 +       if (mce_helper[0] && !work_pending(&mce_trigger_work))
2539 +               schedule_work(&mce_trigger_work);
2541 +       if (__ratelimit(&ratelimit))
2542 +               pr_info(HW_ERR "Machine check events logged\n");
2545 +#ifdef CONFIG_PREEMPT_RT_FULL
2546 +static bool notify_work_ready __read_mostly;
2547 +static struct swork_event notify_work;
2549 +static int mce_notify_work_init(void)
2551 +       int err;
2553 +       err = swork_get();
2554 +       if (err)
2555 +               return err;
2557 +       INIT_SWORK(&notify_work, __mce_notify_work);
2558 +       notify_work_ready = true;
2559 +       return 0;
2562 +static void mce_notify_work(void)
2564 +       if (notify_work_ready)
2565 +               swork_queue(&notify_work);
2567 +#else
2568 +static void mce_notify_work(void)
2570 +       __mce_notify_work(NULL);
2572 +static inline int mce_notify_work_init(void) { return 0; }
2573 +#endif
2575  /*
2576   * Notify the user(s) about new machine check events.
2577   * Can be called from interrupt context, but not from machine check/NMI
2578 @@ -1333,19 +1371,8 @@
2579   */
2580  int mce_notify_irq(void)
2582 -       /* Not more than two messages every minute */
2583 -       static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
2585         if (test_and_clear_bit(0, &mce_need_notify)) {
2586 -               /* wake processes polling /dev/mcelog */
2587 -               wake_up_interruptible(&mce_chrdev_wait);
2589 -               if (mce_helper[0])
2590 -                       schedule_work(&mce_trigger_work);
2592 -               if (__ratelimit(&ratelimit))
2593 -                       pr_info(HW_ERR "Machine check events logged\n");
2595 +               mce_notify_work();
2596                 return 1;
2597         }
2598         return 0;
2599 @@ -1639,7 +1666,7 @@
2600         }
2603 -static void mce_start_timer(unsigned int cpu, struct timer_list *t)
2604 +static void mce_start_timer(unsigned int cpu, struct hrtimer *t)
2606         unsigned long iv = check_interval * HZ;
2608 @@ -1648,16 +1675,17 @@
2610         per_cpu(mce_next_interval, cpu) = iv;
2612 -       t->expires = round_jiffies(jiffies + iv);
2613 -       add_timer_on(t, cpu);
2614 +       hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL),
2615 +                       0, HRTIMER_MODE_REL_PINNED);
2618  static void __mcheck_cpu_init_timer(void)
2620 -       struct timer_list *t = this_cpu_ptr(&mce_timer);
2621 +       struct hrtimer *t = this_cpu_ptr(&mce_timer);
2622         unsigned int cpu = smp_processor_id();
2624 -       setup_timer(t, mce_timer_fn, cpu);
2625 +       hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2626 +       t->function = mce_timer_fn;
2627         mce_start_timer(cpu, t);
2630 @@ -2376,6 +2404,8 @@
2631         if (!mce_available(raw_cpu_ptr(&cpu_info)))
2632                 return;
2634 +       hrtimer_cancel(this_cpu_ptr(&mce_timer));
2636         if (!(action & CPU_TASKS_FROZEN))
2637                 cmci_clear();
2639 @@ -2398,6 +2428,7 @@
2640                 if (b->init)
2641                         wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
2642         }
2643 +       __mcheck_cpu_init_timer();
2646  /* Get notified when a cpu comes on/off. Be hotplug friendly. */
2647 @@ -2405,7 +2436,6 @@
2648  mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
2650         unsigned int cpu = (unsigned long)hcpu;
2651 -       struct timer_list *t = &per_cpu(mce_timer, cpu);
2653         switch (action & ~CPU_TASKS_FROZEN) {
2654         case CPU_ONLINE:
2655 @@ -2425,11 +2455,9 @@
2656                 break;
2657         case CPU_DOWN_PREPARE:
2658                 smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
2659 -               del_timer_sync(t);
2660                 break;
2661         case CPU_DOWN_FAILED:
2662                 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
2663 -               mce_start_timer(cpu, t);
2664                 break;
2665         }
2667 @@ -2468,6 +2496,10 @@
2668                 goto err_out;
2669         }
2671 +       err = mce_notify_work_init();
2672 +       if (err)
2673 +               goto err_out;
2675         if (!zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL)) {
2676                 err = -ENOMEM;
2677                 goto err_out;
2678 diff -Nur linux-4.4.46.orig/arch/x86/kernel/cpu/perf_event_intel_rapl.c linux-4.4.46/arch/x86/kernel/cpu/perf_event_intel_rapl.c
2679 --- linux-4.4.46.orig/arch/x86/kernel/cpu/perf_event_intel_rapl.c       2017-02-01 08:31:11.000000000 +0100
2680 +++ linux-4.4.46/arch/x86/kernel/cpu/perf_event_intel_rapl.c    2017-02-03 17:18:05.663415712 +0100
2681 @@ -117,7 +117,7 @@
2682  };
2684  struct rapl_pmu {
2685 -       spinlock_t       lock;
2686 +       raw_spinlock_t   lock;
2687         int              n_active; /* number of active events */
2688         struct list_head active_list;
2689         struct pmu       *pmu; /* pointer to rapl_pmu_class */
2690 @@ -220,13 +220,13 @@
2691         if (!pmu->n_active)
2692                 return HRTIMER_NORESTART;
2694 -       spin_lock_irqsave(&pmu->lock, flags);
2695 +       raw_spin_lock_irqsave(&pmu->lock, flags);
2697         list_for_each_entry(event, &pmu->active_list, active_entry) {
2698                 rapl_event_update(event);
2699         }
2701 -       spin_unlock_irqrestore(&pmu->lock, flags);
2702 +       raw_spin_unlock_irqrestore(&pmu->lock, flags);
2704         hrtimer_forward_now(hrtimer, pmu->timer_interval);
2706 @@ -263,9 +263,9 @@
2707         struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
2708         unsigned long flags;
2710 -       spin_lock_irqsave(&pmu->lock, flags);
2711 +       raw_spin_lock_irqsave(&pmu->lock, flags);
2712         __rapl_pmu_event_start(pmu, event);
2713 -       spin_unlock_irqrestore(&pmu->lock, flags);
2714 +       raw_spin_unlock_irqrestore(&pmu->lock, flags);
2717  static void rapl_pmu_event_stop(struct perf_event *event, int mode)
2718 @@ -274,7 +274,7 @@
2719         struct hw_perf_event *hwc = &event->hw;
2720         unsigned long flags;
2722 -       spin_lock_irqsave(&pmu->lock, flags);
2723 +       raw_spin_lock_irqsave(&pmu->lock, flags);
2725         /* mark event as deactivated and stopped */
2726         if (!(hwc->state & PERF_HES_STOPPED)) {
2727 @@ -299,7 +299,7 @@
2728                 hwc->state |= PERF_HES_UPTODATE;
2729         }
2731 -       spin_unlock_irqrestore(&pmu->lock, flags);
2732 +       raw_spin_unlock_irqrestore(&pmu->lock, flags);
2735  static int rapl_pmu_event_add(struct perf_event *event, int mode)
2736 @@ -308,14 +308,14 @@
2737         struct hw_perf_event *hwc = &event->hw;
2738         unsigned long flags;
2740 -       spin_lock_irqsave(&pmu->lock, flags);
2741 +       raw_spin_lock_irqsave(&pmu->lock, flags);
2743         hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
2745         if (mode & PERF_EF_START)
2746                 __rapl_pmu_event_start(pmu, event);
2748 -       spin_unlock_irqrestore(&pmu->lock, flags);
2749 +       raw_spin_unlock_irqrestore(&pmu->lock, flags);
2751         return 0;
2753 @@ -603,7 +603,7 @@
2754         pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
2755         if (!pmu)
2756                 return -1;
2757 -       spin_lock_init(&pmu->lock);
2758 +       raw_spin_lock_init(&pmu->lock);
2760         INIT_LIST_HEAD(&pmu->active_list);
2762 diff -Nur linux-4.4.46.orig/arch/x86/kernel/dumpstack_32.c linux-4.4.46/arch/x86/kernel/dumpstack_32.c
2763 --- linux-4.4.46.orig/arch/x86/kernel/dumpstack_32.c    2017-02-01 08:31:11.000000000 +0100
2764 +++ linux-4.4.46/arch/x86/kernel/dumpstack_32.c 2017-02-03 17:18:05.663415712 +0100
2765 @@ -42,7 +42,7 @@
2766                 unsigned long *stack, unsigned long bp,
2767                 const struct stacktrace_ops *ops, void *data)
2769 -       const unsigned cpu = get_cpu();
2770 +       const unsigned cpu = get_cpu_light();
2771         int graph = 0;
2772         u32 *prev_esp;
2774 @@ -86,7 +86,7 @@
2775                         break;
2776                 touch_nmi_watchdog();
2777         }
2778 -       put_cpu();
2779 +       put_cpu_light();
2781  EXPORT_SYMBOL(dump_trace);
2783 diff -Nur linux-4.4.46.orig/arch/x86/kernel/dumpstack_64.c linux-4.4.46/arch/x86/kernel/dumpstack_64.c
2784 --- linux-4.4.46.orig/arch/x86/kernel/dumpstack_64.c    2017-02-01 08:31:11.000000000 +0100
2785 +++ linux-4.4.46/arch/x86/kernel/dumpstack_64.c 2017-02-03 17:18:05.663415712 +0100
2786 @@ -152,7 +152,7 @@
2787                 unsigned long *stack, unsigned long bp,
2788                 const struct stacktrace_ops *ops, void *data)
2790 -       const unsigned cpu = get_cpu();
2791 +       const unsigned cpu = get_cpu_light();
2792         struct thread_info *tinfo;
2793         unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu);
2794         unsigned long dummy;
2795 @@ -241,7 +241,7 @@
2796          * This handles the process stack:
2797          */
2798         bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph);
2799 -       put_cpu();
2800 +       put_cpu_light();
2802  EXPORT_SYMBOL(dump_trace);
2804 @@ -255,7 +255,7 @@
2805         int cpu;
2806         int i;
2808 -       preempt_disable();
2809 +       migrate_disable();
2810         cpu = smp_processor_id();
2812         irq_stack_end   = (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
2813 @@ -291,7 +291,7 @@
2814                         pr_cont(" %016lx", *stack++);
2815                 touch_nmi_watchdog();
2816         }
2817 -       preempt_enable();
2818 +       migrate_enable();
2820         pr_cont("\n");
2821         show_trace_log_lvl(task, regs, sp, bp, log_lvl);
2822 diff -Nur linux-4.4.46.orig/arch/x86/kernel/irq_32.c linux-4.4.46/arch/x86/kernel/irq_32.c
2823 --- linux-4.4.46.orig/arch/x86/kernel/irq_32.c  2017-02-01 08:31:11.000000000 +0100
2824 +++ linux-4.4.46/arch/x86/kernel/irq_32.c       2017-02-03 17:18:05.663415712 +0100
2825 @@ -128,6 +128,7 @@
2826                cpu, per_cpu(hardirq_stack, cpu),  per_cpu(softirq_stack, cpu));
2829 +#ifndef CONFIG_PREEMPT_RT_FULL
2830  void do_softirq_own_stack(void)
2832         struct thread_info *curstk;
2833 @@ -146,6 +147,7 @@
2835         call_on_stack(__do_softirq, isp);
2837 +#endif
2839  bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
2841 diff -Nur linux-4.4.46.orig/arch/x86/kernel/kvm.c linux-4.4.46/arch/x86/kernel/kvm.c
2842 --- linux-4.4.46.orig/arch/x86/kernel/kvm.c     2017-02-01 08:31:11.000000000 +0100
2843 +++ linux-4.4.46/arch/x86/kernel/kvm.c  2017-02-03 17:18:05.663415712 +0100
2844 @@ -36,6 +36,7 @@
2845  #include <linux/kprobes.h>
2846  #include <linux/debugfs.h>
2847  #include <linux/nmi.h>
2848 +#include <linux/swait.h>
2849  #include <asm/timer.h>
2850  #include <asm/cpu.h>
2851  #include <asm/traps.h>
2852 @@ -91,14 +92,14 @@
2854  struct kvm_task_sleep_node {
2855         struct hlist_node link;
2856 -       wait_queue_head_t wq;
2857 +       struct swait_queue_head wq;
2858         u32 token;
2859         int cpu;
2860         bool halted;
2861  };
2863  static struct kvm_task_sleep_head {
2864 -       spinlock_t lock;
2865 +       raw_spinlock_t lock;
2866         struct hlist_head list;
2867  } async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE];
2869 @@ -122,17 +123,17 @@
2870         u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
2871         struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
2872         struct kvm_task_sleep_node n, *e;
2873 -       DEFINE_WAIT(wait);
2874 +       DECLARE_SWAITQUEUE(wait);
2876         rcu_irq_enter();
2878 -       spin_lock(&b->lock);
2879 +       raw_spin_lock(&b->lock);
2880         e = _find_apf_task(b, token);
2881         if (e) {
2882                 /* dummy entry exist -> wake up was delivered ahead of PF */
2883                 hlist_del(&e->link);
2884                 kfree(e);
2885 -               spin_unlock(&b->lock);
2886 +               raw_spin_unlock(&b->lock);
2888                 rcu_irq_exit();
2889                 return;
2890 @@ -141,13 +142,13 @@
2891         n.token = token;
2892         n.cpu = smp_processor_id();
2893         n.halted = is_idle_task(current) || preempt_count() > 1;
2894 -       init_waitqueue_head(&n.wq);
2895 +       init_swait_queue_head(&n.wq);
2896         hlist_add_head(&n.link, &b->list);
2897 -       spin_unlock(&b->lock);
2898 +       raw_spin_unlock(&b->lock);
2900         for (;;) {
2901                 if (!n.halted)
2902 -                       prepare_to_wait(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
2903 +                       prepare_to_swait(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
2904                 if (hlist_unhashed(&n.link))
2905                         break;
2907 @@ -166,7 +167,7 @@
2908                 }
2909         }
2910         if (!n.halted)
2911 -               finish_wait(&n.wq, &wait);
2912 +               finish_swait(&n.wq, &wait);
2914         rcu_irq_exit();
2915         return;
2916 @@ -178,8 +179,8 @@
2917         hlist_del_init(&n->link);
2918         if (n->halted)
2919                 smp_send_reschedule(n->cpu);
2920 -       else if (waitqueue_active(&n->wq))
2921 -               wake_up(&n->wq);
2922 +       else if (swait_active(&n->wq))
2923 +               swake_up(&n->wq);
2926  static void apf_task_wake_all(void)
2927 @@ -189,14 +190,14 @@
2928         for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
2929                 struct hlist_node *p, *next;
2930                 struct kvm_task_sleep_head *b = &async_pf_sleepers[i];
2931 -               spin_lock(&b->lock);
2932 +               raw_spin_lock(&b->lock);
2933                 hlist_for_each_safe(p, next, &b->list) {
2934                         struct kvm_task_sleep_node *n =
2935                                 hlist_entry(p, typeof(*n), link);
2936                         if (n->cpu == smp_processor_id())
2937                                 apf_task_wake_one(n);
2938                 }
2939 -               spin_unlock(&b->lock);
2940 +               raw_spin_unlock(&b->lock);
2941         }
2944 @@ -212,7 +213,7 @@
2945         }
2947  again:
2948 -       spin_lock(&b->lock);
2949 +       raw_spin_lock(&b->lock);
2950         n = _find_apf_task(b, token);
2951         if (!n) {
2952                 /*
2953 @@ -225,17 +226,17 @@
2954                          * Allocation failed! Busy wait while other cpu
2955                          * handles async PF.
2956                          */
2957 -                       spin_unlock(&b->lock);
2958 +                       raw_spin_unlock(&b->lock);
2959                         cpu_relax();
2960                         goto again;
2961                 }
2962                 n->token = token;
2963                 n->cpu = smp_processor_id();
2964 -               init_waitqueue_head(&n->wq);
2965 +               init_swait_queue_head(&n->wq);
2966                 hlist_add_head(&n->link, &b->list);
2967         } else
2968                 apf_task_wake_one(n);
2969 -       spin_unlock(&b->lock);
2970 +       raw_spin_unlock(&b->lock);
2971         return;
2973  EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
2974 @@ -486,7 +487,7 @@
2975         paravirt_ops_setup();
2976         register_reboot_notifier(&kvm_pv_reboot_nb);
2977         for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
2978 -               spin_lock_init(&async_pf_sleepers[i].lock);
2979 +               raw_spin_lock_init(&async_pf_sleepers[i].lock);
2980         if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
2981                 x86_init.irqs.trap_init = kvm_apf_trap_init;
2983 diff -Nur linux-4.4.46.orig/arch/x86/kernel/nmi.c linux-4.4.46/arch/x86/kernel/nmi.c
2984 --- linux-4.4.46.orig/arch/x86/kernel/nmi.c     2017-02-01 08:31:11.000000000 +0100
2985 +++ linux-4.4.46/arch/x86/kernel/nmi.c  2017-02-03 17:18:05.663415712 +0100
2986 @@ -231,7 +231,7 @@
2987  #endif
2989         if (panic_on_unrecovered_nmi)
2990 -               panic("NMI: Not continuing");
2991 +               nmi_panic(regs, "NMI: Not continuing");
2993         pr_emerg("Dazed and confused, but trying to continue\n");
2995 @@ -255,8 +255,16 @@
2996                  reason, smp_processor_id());
2997         show_regs(regs);
2999 -       if (panic_on_io_nmi)
3000 -               panic("NMI IOCK error: Not continuing");
3001 +       if (panic_on_io_nmi) {
3002 +               nmi_panic(regs, "NMI IOCK error: Not continuing");
3004 +               /*
3005 +                * If we end up here, it means we have received an NMI while
3006 +                * processing panic(). Simply return without delaying and
3007 +                * re-enabling NMIs.
3008 +                */
3009 +               return;
3010 +       }
3012         /* Re-enable the IOCK line, wait for a few seconds */
3013         reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
3014 @@ -297,7 +305,7 @@
3016         pr_emerg("Do you have a strange power saving mode enabled?\n");
3017         if (unknown_nmi_panic || panic_on_unrecovered_nmi)
3018 -               panic("NMI: Not continuing");
3019 +               nmi_panic(regs, "NMI: Not continuing");
3021         pr_emerg("Dazed and confused, but trying to continue\n");
3023 diff -Nur linux-4.4.46.orig/arch/x86/kernel/process_32.c linux-4.4.46/arch/x86/kernel/process_32.c
3024 --- linux-4.4.46.orig/arch/x86/kernel/process_32.c      2017-02-01 08:31:11.000000000 +0100
3025 +++ linux-4.4.46/arch/x86/kernel/process_32.c   2017-02-03 17:18:05.663415712 +0100
3026 @@ -35,6 +35,7 @@
3027  #include <linux/uaccess.h>
3028  #include <linux/io.h>
3029  #include <linux/kdebug.h>
3030 +#include <linux/highmem.h>
3032  #include <asm/pgtable.h>
3033  #include <asm/ldt.h>
3034 @@ -210,6 +211,35 @@
3036  EXPORT_SYMBOL_GPL(start_thread);
3038 +#ifdef CONFIG_PREEMPT_RT_FULL
3039 +static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
3041 +       int i;
3043 +       /*
3044 +        * Clear @prev's kmap_atomic mappings
3045 +        */
3046 +       for (i = 0; i < prev_p->kmap_idx; i++) {
3047 +               int idx = i + KM_TYPE_NR * smp_processor_id();
3048 +               pte_t *ptep = kmap_pte - idx;
3050 +               kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx));
3051 +       }
3052 +       /*
3053 +        * Restore @next_p's kmap_atomic mappings
3054 +        */
3055 +       for (i = 0; i < next_p->kmap_idx; i++) {
3056 +               int idx = i + KM_TYPE_NR * smp_processor_id();
3058 +               if (!pte_none(next_p->kmap_pte[i]))
3059 +                       set_pte(kmap_pte - idx, next_p->kmap_pte[i]);
3060 +       }
3062 +#else
3063 +static inline void
3064 +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
3065 +#endif
3068  /*
3069   *     switch_to(x,y) should switch tasks from x to y.
3070 @@ -286,6 +316,8 @@
3071                      task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
3072                 __switch_to_xtra(prev_p, next_p, tss);
3074 +       switch_kmaps(prev_p, next_p);
3076         /*
3077          * Leave lazy mode, flushing any hypercalls made here.
3078          * This must be done before restoring TLS segments so
3079 diff -Nur linux-4.4.46.orig/arch/x86/kernel/reboot.c linux-4.4.46/arch/x86/kernel/reboot.c
3080 --- linux-4.4.46.orig/arch/x86/kernel/reboot.c  2017-02-01 08:31:11.000000000 +0100
3081 +++ linux-4.4.46/arch/x86/kernel/reboot.c       2017-02-03 17:18:05.663415712 +0100
3082 @@ -726,6 +726,7 @@
3083  static nmi_shootdown_cb shootdown_callback;
3085  static atomic_t waiting_for_crash_ipi;
3086 +static int crash_ipi_issued;
3088  static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
3090 @@ -788,6 +789,9 @@
3092         smp_send_nmi_allbutself();
3094 +       /* Kick CPUs looping in NMI context. */
3095 +       WRITE_ONCE(crash_ipi_issued, 1);
3097         msecs = 1000; /* Wait at most a second for the other cpus to stop */
3098         while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
3099                 mdelay(1);
3100 @@ -796,6 +800,22 @@
3102         /* Leave the nmi callback set */
3105 +/* Override the weak function in kernel/panic.c */
3106 +void nmi_panic_self_stop(struct pt_regs *regs)
3108 +       while (1) {
3109 +               /*
3110 +                * Wait for the crash dumping IPI to be issued, and then
3111 +                * call its callback directly.
3112 +                */
3113 +               if (READ_ONCE(crash_ipi_issued))
3114 +                       crash_nmi_callback(0, regs); /* Don't return */
3116 +               cpu_relax();
3117 +       }
3120  #else /* !CONFIG_SMP */
3121  void nmi_shootdown_cpus(nmi_shootdown_cb callback)
3123 diff -Nur linux-4.4.46.orig/arch/x86/kvm/lapic.c linux-4.4.46/arch/x86/kvm/lapic.c
3124 --- linux-4.4.46.orig/arch/x86/kvm/lapic.c      2017-02-01 08:31:11.000000000 +0100
3125 +++ linux-4.4.46/arch/x86/kvm/lapic.c   2017-02-03 17:18:05.663415712 +0100
3126 @@ -1195,7 +1195,7 @@
3127  static void apic_timer_expired(struct kvm_lapic *apic)
3129         struct kvm_vcpu *vcpu = apic->vcpu;
3130 -       wait_queue_head_t *q = &vcpu->wq;
3131 +       struct swait_queue_head *q = &vcpu->wq;
3132         struct kvm_timer *ktimer = &apic->lapic_timer;
3134         if (atomic_read(&apic->lapic_timer.pending))
3135 @@ -1204,8 +1204,8 @@
3136         atomic_inc(&apic->lapic_timer.pending);
3137         kvm_set_pending_timer(vcpu);
3139 -       if (waitqueue_active(q))
3140 -               wake_up_interruptible(q);
3141 +       if (swait_active(q))
3142 +               swake_up(q);
3144         if (apic_lvtt_tscdeadline(apic))
3145                 ktimer->expired_tscdeadline = ktimer->tscdeadline;
3146 @@ -1801,6 +1801,7 @@
3147         hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
3148                      HRTIMER_MODE_ABS);
3149         apic->lapic_timer.timer.function = apic_timer_fn;
3150 +       apic->lapic_timer.timer.irqsafe = 1;
3152         /*
3153          * APIC is created enabled. This will prevent kvm_lapic_set_base from
3154 diff -Nur linux-4.4.46.orig/arch/x86/kvm/x86.c linux-4.4.46/arch/x86/kvm/x86.c
3155 --- linux-4.4.46.orig/arch/x86/kvm/x86.c        2017-02-01 08:31:11.000000000 +0100
3156 +++ linux-4.4.46/arch/x86/kvm/x86.c     2017-02-03 17:18:05.663415712 +0100
3157 @@ -5809,6 +5809,13 @@
3158                 goto out;
3159         }
3161 +#ifdef CONFIG_PREEMPT_RT_FULL
3162 +       if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
3163 +               printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n");
3164 +               return -EOPNOTSUPP;
3165 +       }
3166 +#endif
3168         r = kvm_mmu_module_init();
3169         if (r)
3170                 goto out_free_percpu;
3171 diff -Nur linux-4.4.46.orig/arch/x86/mm/highmem_32.c linux-4.4.46/arch/x86/mm/highmem_32.c
3172 --- linux-4.4.46.orig/arch/x86/mm/highmem_32.c  2017-02-01 08:31:11.000000000 +0100
3173 +++ linux-4.4.46/arch/x86/mm/highmem_32.c       2017-02-03 17:18:05.663415712 +0100
3174 @@ -32,10 +32,11 @@
3175   */
3176  void *kmap_atomic_prot(struct page *page, pgprot_t prot)
3178 +       pte_t pte = mk_pte(page, prot);
3179         unsigned long vaddr;
3180         int idx, type;
3182 -       preempt_disable();
3183 +       preempt_disable_nort();
3184         pagefault_disable();
3186         if (!PageHighMem(page))
3187 @@ -45,7 +46,10 @@
3188         idx = type + KM_TYPE_NR*smp_processor_id();
3189         vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
3190         BUG_ON(!pte_none(*(kmap_pte-idx)));
3191 -       set_pte(kmap_pte-idx, mk_pte(page, prot));
3192 +#ifdef CONFIG_PREEMPT_RT_FULL
3193 +       current->kmap_pte[type] = pte;
3194 +#endif
3195 +       set_pte(kmap_pte-idx, pte);
3196         arch_flush_lazy_mmu_mode();
3198         return (void *)vaddr;
3199 @@ -88,6 +92,9 @@
3200                  * is a bad idea also, in case the page changes cacheability
3201                  * attributes or becomes a protected page in a hypervisor.
3202                  */
3203 +#ifdef CONFIG_PREEMPT_RT_FULL
3204 +               current->kmap_pte[type] = __pte(0);
3205 +#endif
3206                 kpte_clear_flush(kmap_pte-idx, vaddr);
3207                 kmap_atomic_idx_pop();
3208                 arch_flush_lazy_mmu_mode();
3209 @@ -100,7 +107,7 @@
3210  #endif
3212         pagefault_enable();
3213 -       preempt_enable();
3214 +       preempt_enable_nort();
3216  EXPORT_SYMBOL(__kunmap_atomic);
3218 diff -Nur linux-4.4.46.orig/arch/x86/mm/iomap_32.c linux-4.4.46/arch/x86/mm/iomap_32.c
3219 --- linux-4.4.46.orig/arch/x86/mm/iomap_32.c    2017-02-01 08:31:11.000000000 +0100
3220 +++ linux-4.4.46/arch/x86/mm/iomap_32.c 2017-02-03 17:18:05.663415712 +0100
3221 @@ -56,6 +56,7 @@
3223  void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
3225 +       pte_t pte = pfn_pte(pfn, prot);
3226         unsigned long vaddr;
3227         int idx, type;
3229 @@ -65,7 +66,12 @@
3230         type = kmap_atomic_idx_push();
3231         idx = type + KM_TYPE_NR * smp_processor_id();
3232         vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
3233 -       set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
3234 +       WARN_ON(!pte_none(*(kmap_pte - idx)));
3236 +#ifdef CONFIG_PREEMPT_RT_FULL
3237 +       current->kmap_pte[type] = pte;
3238 +#endif
3239 +       set_pte(kmap_pte - idx, pte);
3240         arch_flush_lazy_mmu_mode();
3242         return (void *)vaddr;
3243 @@ -113,6 +119,9 @@
3244                  * is a bad idea also, in case the page changes cacheability
3245                  * attributes or becomes a protected page in a hypervisor.
3246                  */
3247 +#ifdef CONFIG_PREEMPT_RT_FULL
3248 +               current->kmap_pte[type] = __pte(0);
3249 +#endif
3250                 kpte_clear_flush(kmap_pte-idx, vaddr);
3251                 kmap_atomic_idx_pop();
3252         }
3253 diff -Nur linux-4.4.46.orig/arch/x86/platform/uv/tlb_uv.c linux-4.4.46/arch/x86/platform/uv/tlb_uv.c
3254 --- linux-4.4.46.orig/arch/x86/platform/uv/tlb_uv.c     2017-02-01 08:31:11.000000000 +0100
3255 +++ linux-4.4.46/arch/x86/platform/uv/tlb_uv.c  2017-02-03 17:18:05.663415712 +0100
3256 @@ -714,9 +714,9 @@
3258                 quiesce_local_uvhub(hmaster);
3260 -               spin_lock(&hmaster->queue_lock);
3261 +               raw_spin_lock(&hmaster->queue_lock);
3262                 reset_with_ipi(&bau_desc->distribution, bcp);
3263 -               spin_unlock(&hmaster->queue_lock);
3264 +               raw_spin_unlock(&hmaster->queue_lock);
3266                 end_uvhub_quiesce(hmaster);
3268 @@ -736,9 +736,9 @@
3270                 quiesce_local_uvhub(hmaster);
3272 -               spin_lock(&hmaster->queue_lock);
3273 +               raw_spin_lock(&hmaster->queue_lock);
3274                 reset_with_ipi(&bau_desc->distribution, bcp);
3275 -               spin_unlock(&hmaster->queue_lock);
3276 +               raw_spin_unlock(&hmaster->queue_lock);
3278                 end_uvhub_quiesce(hmaster);
3280 @@ -759,7 +759,7 @@
3281         cycles_t tm1;
3283         hmaster = bcp->uvhub_master;
3284 -       spin_lock(&hmaster->disable_lock);
3285 +       raw_spin_lock(&hmaster->disable_lock);
3286         if (!bcp->baudisabled) {
3287                 stat->s_bau_disabled++;
3288                 tm1 = get_cycles();
3289 @@ -772,7 +772,7 @@
3290                         }
3291                 }
3292         }
3293 -       spin_unlock(&hmaster->disable_lock);
3294 +       raw_spin_unlock(&hmaster->disable_lock);
3297  static void count_max_concurr(int stat, struct bau_control *bcp,
3298 @@ -835,7 +835,7 @@
3299   */
3300  static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat)
3302 -       spinlock_t *lock = &hmaster->uvhub_lock;
3303 +       raw_spinlock_t *lock = &hmaster->uvhub_lock;
3304         atomic_t *v;
3306         v = &hmaster->active_descriptor_count;
3307 @@ -968,7 +968,7 @@
3308         struct bau_control *hmaster;
3310         hmaster = bcp->uvhub_master;
3311 -       spin_lock(&hmaster->disable_lock);
3312 +       raw_spin_lock(&hmaster->disable_lock);
3313         if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) {
3314                 stat->s_bau_reenabled++;
3315                 for_each_present_cpu(tcpu) {
3316 @@ -980,10 +980,10 @@
3317                                 tbcp->period_giveups = 0;
3318                         }
3319                 }
3320 -               spin_unlock(&hmaster->disable_lock);
3321 +               raw_spin_unlock(&hmaster->disable_lock);
3322                 return 0;
3323         }
3324 -       spin_unlock(&hmaster->disable_lock);
3325 +       raw_spin_unlock(&hmaster->disable_lock);
3326         return -1;
3329 @@ -1901,9 +1901,9 @@
3330                 bcp->cong_reps                  = congested_reps;
3331                 bcp->disabled_period =          sec_2_cycles(disabled_period);
3332                 bcp->giveup_limit =             giveup_limit;
3333 -               spin_lock_init(&bcp->queue_lock);
3334 -               spin_lock_init(&bcp->uvhub_lock);
3335 -               spin_lock_init(&bcp->disable_lock);
3336 +               raw_spin_lock_init(&bcp->queue_lock);
3337 +               raw_spin_lock_init(&bcp->uvhub_lock);
3338 +               raw_spin_lock_init(&bcp->disable_lock);
3339         }
3342 diff -Nur linux-4.4.46.orig/arch/x86/platform/uv/uv_time.c linux-4.4.46/arch/x86/platform/uv/uv_time.c
3343 --- linux-4.4.46.orig/arch/x86/platform/uv/uv_time.c    2017-02-01 08:31:11.000000000 +0100
3344 +++ linux-4.4.46/arch/x86/platform/uv/uv_time.c 2017-02-03 17:18:05.663415712 +0100
3345 @@ -57,7 +57,7 @@
3347  /* There is one of these allocated per node */
3348  struct uv_rtc_timer_head {
3349 -       spinlock_t      lock;
3350 +       raw_spinlock_t  lock;
3351         /* next cpu waiting for timer, local node relative: */
3352         int             next_cpu;
3353         /* number of cpus on this node: */
3354 @@ -177,7 +177,7 @@
3355                                 uv_rtc_deallocate_timers();
3356                                 return -ENOMEM;
3357                         }
3358 -                       spin_lock_init(&head->lock);
3359 +                       raw_spin_lock_init(&head->lock);
3360                         head->ncpus = uv_blade_nr_possible_cpus(bid);
3361                         head->next_cpu = -1;
3362                         blade_info[bid] = head;
3363 @@ -231,7 +231,7 @@
3364         unsigned long flags;
3365         int next_cpu;
3367 -       spin_lock_irqsave(&head->lock, flags);
3368 +       raw_spin_lock_irqsave(&head->lock, flags);
3370         next_cpu = head->next_cpu;
3371         *t = expires;
3372 @@ -243,12 +243,12 @@
3373                 if (uv_setup_intr(cpu, expires)) {
3374                         *t = ULLONG_MAX;
3375                         uv_rtc_find_next_timer(head, pnode);
3376 -                       spin_unlock_irqrestore(&head->lock, flags);
3377 +                       raw_spin_unlock_irqrestore(&head->lock, flags);
3378                         return -ETIME;
3379                 }
3380         }
3382 -       spin_unlock_irqrestore(&head->lock, flags);
3383 +       raw_spin_unlock_irqrestore(&head->lock, flags);
3384         return 0;
3387 @@ -267,7 +267,7 @@
3388         unsigned long flags;
3389         int rc = 0;
3391 -       spin_lock_irqsave(&head->lock, flags);
3392 +       raw_spin_lock_irqsave(&head->lock, flags);
3394         if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force)
3395                 rc = 1;
3396 @@ -279,7 +279,7 @@
3397                         uv_rtc_find_next_timer(head, pnode);
3398         }
3400 -       spin_unlock_irqrestore(&head->lock, flags);
3401 +       raw_spin_unlock_irqrestore(&head->lock, flags);
3403         return rc;
3405 @@ -299,13 +299,18 @@
3406  static cycle_t uv_read_rtc(struct clocksource *cs)
3408         unsigned long offset;
3409 +       cycle_t cycles;
3411 +       preempt_disable();
3412         if (uv_get_min_hub_revision_id() == 1)
3413                 offset = 0;
3414         else
3415                 offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE;
3417 -       return (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
3418 +       cycles = (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
3419 +       preempt_enable();
3421 +       return cycles;
3424  /*
3425 diff -Nur linux-4.4.46.orig/block/blk-core.c linux-4.4.46/block/blk-core.c
3426 --- linux-4.4.46.orig/block/blk-core.c  2017-02-01 08:31:11.000000000 +0100
3427 +++ linux-4.4.46/block/blk-core.c       2017-02-03 17:18:05.667415866 +0100
3428 @@ -125,6 +125,9 @@
3430         INIT_LIST_HEAD(&rq->queuelist);
3431         INIT_LIST_HEAD(&rq->timeout_list);
3432 +#ifdef CONFIG_PREEMPT_RT_FULL
3433 +       INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work);
3434 +#endif
3435         rq->cpu = -1;
3436         rq->q = q;
3437         rq->__sector = (sector_t) -1;
3438 @@ -233,7 +236,7 @@
3439   **/
3440  void blk_start_queue(struct request_queue *q)
3442 -       WARN_ON(!irqs_disabled());
3443 +       WARN_ON_NONRT(!irqs_disabled());
3445         queue_flag_clear(QUEUE_FLAG_STOPPED, q);
3446         __blk_run_queue(q);
3447 @@ -659,7 +662,7 @@
3448                 if (!gfpflags_allow_blocking(gfp))
3449                         return -EBUSY;
3451 -               ret = wait_event_interruptible(q->mq_freeze_wq,
3452 +               ret = swait_event_interruptible(q->mq_freeze_wq,
3453                                 !atomic_read(&q->mq_freeze_depth) ||
3454                                 blk_queue_dying(q));
3455                 if (blk_queue_dying(q))
3456 @@ -679,7 +682,7 @@
3457         struct request_queue *q =
3458                 container_of(ref, struct request_queue, q_usage_counter);
3460 -       wake_up_all(&q->mq_freeze_wq);
3461 +       swake_up_all(&q->mq_freeze_wq);
3464  struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
3465 @@ -741,7 +744,7 @@
3466         q->bypass_depth = 1;
3467         __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
3469 -       init_waitqueue_head(&q->mq_freeze_wq);
3470 +       init_swait_queue_head(&q->mq_freeze_wq);
3472         /*
3473          * Init percpu_ref in atomic mode so that it's faster to shutdown.
3474 @@ -3200,7 +3203,7 @@
3475                 blk_run_queue_async(q);
3476         else
3477                 __blk_run_queue(q);
3478 -       spin_unlock(q->queue_lock);
3479 +       spin_unlock_irq(q->queue_lock);
3482  static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
3483 @@ -3248,7 +3251,6 @@
3484  void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
3486         struct request_queue *q;
3487 -       unsigned long flags;
3488         struct request *rq;
3489         LIST_HEAD(list);
3490         unsigned int depth;
3491 @@ -3268,11 +3270,6 @@
3492         q = NULL;
3493         depth = 0;
3495 -       /*
3496 -        * Save and disable interrupts here, to avoid doing it for every
3497 -        * queue lock we have to take.
3498 -        */
3499 -       local_irq_save(flags);
3500         while (!list_empty(&list)) {
3501                 rq = list_entry_rq(list.next);
3502                 list_del_init(&rq->queuelist);
3503 @@ -3285,7 +3282,7 @@
3504                                 queue_unplugged(q, depth, from_schedule);
3505                         q = rq->q;
3506                         depth = 0;
3507 -                       spin_lock(q->queue_lock);
3508 +                       spin_lock_irq(q->queue_lock);
3509                 }
3511                 /*
3512 @@ -3312,8 +3309,6 @@
3513          */
3514         if (q)
3515                 queue_unplugged(q, depth, from_schedule);
3517 -       local_irq_restore(flags);
3520  void blk_finish_plug(struct blk_plug *plug)
3521 diff -Nur linux-4.4.46.orig/block/blk-ioc.c linux-4.4.46/block/blk-ioc.c
3522 --- linux-4.4.46.orig/block/blk-ioc.c   2017-02-01 08:31:11.000000000 +0100
3523 +++ linux-4.4.46/block/blk-ioc.c        2017-02-03 17:18:05.667415866 +0100
3524 @@ -7,6 +7,7 @@
3525  #include <linux/bio.h>
3526  #include <linux/blkdev.h>
3527  #include <linux/slab.h>
3528 +#include <linux/delay.h>
3530  #include "blk.h"
3532 @@ -109,7 +110,7 @@
3533                         spin_unlock(q->queue_lock);
3534                 } else {
3535                         spin_unlock_irqrestore(&ioc->lock, flags);
3536 -                       cpu_relax();
3537 +                       cpu_chill();
3538                         spin_lock_irqsave_nested(&ioc->lock, flags, 1);
3539                 }
3540         }
3541 @@ -187,7 +188,7 @@
3542                         spin_unlock(icq->q->queue_lock);
3543                 } else {
3544                         spin_unlock_irqrestore(&ioc->lock, flags);
3545 -                       cpu_relax();
3546 +                       cpu_chill();
3547                         goto retry;
3548                 }
3549         }
3550 diff -Nur linux-4.4.46.orig/block/blk-iopoll.c linux-4.4.46/block/blk-iopoll.c
3551 --- linux-4.4.46.orig/block/blk-iopoll.c        2017-02-01 08:31:11.000000000 +0100
3552 +++ linux-4.4.46/block/blk-iopoll.c     2017-02-03 17:18:05.667415866 +0100
3553 @@ -35,6 +35,7 @@
3554         list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll));
3555         __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
3556         local_irq_restore(flags);
3557 +       preempt_check_resched_rt();
3559  EXPORT_SYMBOL(blk_iopoll_sched);
3561 @@ -132,6 +133,7 @@
3562                 __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
3564         local_irq_enable();
3565 +       preempt_check_resched_rt();
3568  /**
3569 @@ -201,6 +203,7 @@
3570                                  this_cpu_ptr(&blk_cpu_iopoll));
3571                 __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
3572                 local_irq_enable();
3573 +               preempt_check_resched_rt();
3574         }
3576         return NOTIFY_OK;
3577 diff -Nur linux-4.4.46.orig/block/blk-mq.c linux-4.4.46/block/blk-mq.c
3578 --- linux-4.4.46.orig/block/blk-mq.c    2017-02-01 08:31:11.000000000 +0100
3579 +++ linux-4.4.46/block/blk-mq.c 2017-02-03 17:18:05.667415866 +0100
3580 @@ -92,7 +92,7 @@
3582  static void blk_mq_freeze_queue_wait(struct request_queue *q)
3584 -       wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
3585 +       swait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
3588  /*
3589 @@ -130,7 +130,7 @@
3590         WARN_ON_ONCE(freeze_depth < 0);
3591         if (!freeze_depth) {
3592                 percpu_ref_reinit(&q->q_usage_counter);
3593 -               wake_up_all(&q->mq_freeze_wq);
3594 +               swake_up_all(&q->mq_freeze_wq);
3595         }
3597  EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
3598 @@ -149,7 +149,7 @@
3599          * dying, we need to ensure that processes currently waiting on
3600          * the queue are notified as well.
3601          */
3602 -       wake_up_all(&q->mq_freeze_wq);
3603 +       swake_up_all(&q->mq_freeze_wq);
3606  bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
3607 @@ -196,6 +196,9 @@
3608         rq->resid_len = 0;
3609         rq->sense = NULL;
3611 +#ifdef CONFIG_PREEMPT_RT_FULL
3612 +       INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work);
3613 +#endif
3614         INIT_LIST_HEAD(&rq->timeout_list);
3615         rq->timeout = 0;
3617 @@ -325,6 +328,17 @@
3619  EXPORT_SYMBOL(blk_mq_end_request);
3621 +#ifdef CONFIG_PREEMPT_RT_FULL
3623 +void __blk_mq_complete_request_remote_work(struct work_struct *work)
3625 +       struct request *rq = container_of(work, struct request, work);
3627 +       rq->q->softirq_done_fn(rq);
3630 +#else
3632  static void __blk_mq_complete_request_remote(void *data)
3634         struct request *rq = data;
3635 @@ -332,6 +346,8 @@
3636         rq->q->softirq_done_fn(rq);
3639 +#endif
3641  static void blk_mq_ipi_complete_request(struct request *rq)
3643         struct blk_mq_ctx *ctx = rq->mq_ctx;
3644 @@ -343,19 +359,23 @@
3645                 return;
3646         }
3648 -       cpu = get_cpu();
3649 +       cpu = get_cpu_light();
3650         if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags))
3651                 shared = cpus_share_cache(cpu, ctx->cpu);
3653         if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
3654 +#ifdef CONFIG_PREEMPT_RT_FULL
3655 +               schedule_work_on(ctx->cpu, &rq->work);
3656 +#else
3657                 rq->csd.func = __blk_mq_complete_request_remote;
3658                 rq->csd.info = rq;
3659                 rq->csd.flags = 0;
3660                 smp_call_function_single_async(ctx->cpu, &rq->csd);
3661 +#endif
3662         } else {
3663                 rq->q->softirq_done_fn(rq);
3664         }
3665 -       put_cpu();
3666 +       put_cpu_light();
3669  static void __blk_mq_complete_request(struct request *rq)
3670 @@ -862,14 +882,14 @@
3671                 return;
3673         if (!async) {
3674 -               int cpu = get_cpu();
3675 +               int cpu = get_cpu_light();
3676                 if (cpumask_test_cpu(cpu, hctx->cpumask)) {
3677                         __blk_mq_run_hw_queue(hctx);
3678 -                       put_cpu();
3679 +                       put_cpu_light();
3680                         return;
3681                 }
3683 -               put_cpu();
3684 +               put_cpu_light();
3685         }
3687         kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
3688 @@ -1617,7 +1637,7 @@
3690         struct blk_mq_hw_ctx *hctx = data;
3692 -       if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
3693 +       if (action == CPU_POST_DEAD)
3694                 return blk_mq_hctx_cpu_offline(hctx, cpu);
3696         /*
3697 diff -Nur linux-4.4.46.orig/block/blk-mq-cpu.c linux-4.4.46/block/blk-mq-cpu.c
3698 --- linux-4.4.46.orig/block/blk-mq-cpu.c        2017-02-01 08:31:11.000000000 +0100
3699 +++ linux-4.4.46/block/blk-mq-cpu.c     2017-02-03 17:18:05.667415866 +0100
3700 @@ -16,7 +16,7 @@
3701  #include "blk-mq.h"
3703  static LIST_HEAD(blk_mq_cpu_notify_list);
3704 -static DEFINE_RAW_SPINLOCK(blk_mq_cpu_notify_lock);
3705 +static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock);
3707  static int blk_mq_main_cpu_notify(struct notifier_block *self,
3708                                   unsigned long action, void *hcpu)
3709 @@ -25,7 +25,10 @@
3710         struct blk_mq_cpu_notifier *notify;
3711         int ret = NOTIFY_OK;
3713 -       raw_spin_lock(&blk_mq_cpu_notify_lock);
3714 +       if (action != CPU_POST_DEAD)
3715 +               return NOTIFY_OK;
3717 +       spin_lock(&blk_mq_cpu_notify_lock);
3719         list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) {
3720                 ret = notify->notify(notify->data, action, cpu);
3721 @@ -33,7 +36,7 @@
3722                         break;
3723         }
3725 -       raw_spin_unlock(&blk_mq_cpu_notify_lock);
3726 +       spin_unlock(&blk_mq_cpu_notify_lock);
3727         return ret;
3730 @@ -41,16 +44,16 @@
3732         BUG_ON(!notifier->notify);
3734 -       raw_spin_lock(&blk_mq_cpu_notify_lock);
3735 +       spin_lock(&blk_mq_cpu_notify_lock);
3736         list_add_tail(&notifier->list, &blk_mq_cpu_notify_list);
3737 -       raw_spin_unlock(&blk_mq_cpu_notify_lock);
3738 +       spin_unlock(&blk_mq_cpu_notify_lock);
3741  void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
3743 -       raw_spin_lock(&blk_mq_cpu_notify_lock);
3744 +       spin_lock(&blk_mq_cpu_notify_lock);
3745         list_del(&notifier->list);
3746 -       raw_spin_unlock(&blk_mq_cpu_notify_lock);
3747 +       spin_unlock(&blk_mq_cpu_notify_lock);
3750  void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
3751 diff -Nur linux-4.4.46.orig/block/blk-mq.h linux-4.4.46/block/blk-mq.h
3752 --- linux-4.4.46.orig/block/blk-mq.h    2017-02-01 08:31:11.000000000 +0100
3753 +++ linux-4.4.46/block/blk-mq.h 2017-02-03 17:18:05.667415866 +0100
3754 @@ -74,7 +74,10 @@
3755  static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
3756                                            unsigned int cpu)
3758 -       return per_cpu_ptr(q->queue_ctx, cpu);
3759 +       struct blk_mq_ctx *ctx;
3761 +       ctx = per_cpu_ptr(q->queue_ctx, cpu);
3762 +       return ctx;
3765  /*
3766 @@ -85,12 +88,12 @@
3767   */
3768  static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q)
3770 -       return __blk_mq_get_ctx(q, get_cpu());
3771 +       return __blk_mq_get_ctx(q, get_cpu_light());
3774  static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
3776 -       put_cpu();
3777 +       put_cpu_light();
3780  struct blk_mq_alloc_data {
3781 diff -Nur linux-4.4.46.orig/block/blk-softirq.c linux-4.4.46/block/blk-softirq.c
3782 --- linux-4.4.46.orig/block/blk-softirq.c       2017-02-01 08:31:11.000000000 +0100
3783 +++ linux-4.4.46/block/blk-softirq.c    2017-02-03 17:18:05.667415866 +0100
3784 @@ -51,6 +51,7 @@
3785                 raise_softirq_irqoff(BLOCK_SOFTIRQ);
3787         local_irq_restore(flags);
3788 +       preempt_check_resched_rt();
3791  /*
3792 @@ -93,6 +94,7 @@
3793                                  this_cpu_ptr(&blk_cpu_done));
3794                 raise_softirq_irqoff(BLOCK_SOFTIRQ);
3795                 local_irq_enable();
3796 +               preempt_check_resched_rt();
3797         }
3799         return NOTIFY_OK;
3800 @@ -150,6 +152,7 @@
3801                 goto do_local;
3803         local_irq_restore(flags);
3804 +       preempt_check_resched_rt();
3807  /**
3808 diff -Nur linux-4.4.46.orig/block/bounce.c linux-4.4.46/block/bounce.c
3809 --- linux-4.4.46.orig/block/bounce.c    2017-02-01 08:31:11.000000000 +0100
3810 +++ linux-4.4.46/block/bounce.c 2017-02-03 17:18:05.667415866 +0100
3811 @@ -55,11 +55,11 @@
3812         unsigned long flags;
3813         unsigned char *vto;
3815 -       local_irq_save(flags);
3816 +       local_irq_save_nort(flags);
3817         vto = kmap_atomic(to->bv_page);
3818         memcpy(vto + to->bv_offset, vfrom, to->bv_len);
3819         kunmap_atomic(vto);
3820 -       local_irq_restore(flags);
3821 +       local_irq_restore_nort(flags);
3824  #else /* CONFIG_HIGHMEM */
3825 diff -Nur linux-4.4.46.orig/crypto/algapi.c linux-4.4.46/crypto/algapi.c
3826 --- linux-4.4.46.orig/crypto/algapi.c   2017-02-01 08:31:11.000000000 +0100
3827 +++ linux-4.4.46/crypto/algapi.c        2017-02-03 17:18:05.667415866 +0100
3828 @@ -719,13 +719,13 @@
3830  int crypto_register_notifier(struct notifier_block *nb)
3832 -       return blocking_notifier_chain_register(&crypto_chain, nb);
3833 +       return srcu_notifier_chain_register(&crypto_chain, nb);
3835  EXPORT_SYMBOL_GPL(crypto_register_notifier);
3837  int crypto_unregister_notifier(struct notifier_block *nb)
3839 -       return blocking_notifier_chain_unregister(&crypto_chain, nb);
3840 +       return srcu_notifier_chain_unregister(&crypto_chain, nb);
3842  EXPORT_SYMBOL_GPL(crypto_unregister_notifier);
3844 diff -Nur linux-4.4.46.orig/crypto/api.c linux-4.4.46/crypto/api.c
3845 --- linux-4.4.46.orig/crypto/api.c      2017-02-01 08:31:11.000000000 +0100
3846 +++ linux-4.4.46/crypto/api.c   2017-02-03 17:18:05.671416021 +0100
3847 @@ -31,7 +31,7 @@
3848  DECLARE_RWSEM(crypto_alg_sem);
3849  EXPORT_SYMBOL_GPL(crypto_alg_sem);
3851 -BLOCKING_NOTIFIER_HEAD(crypto_chain);
3852 +SRCU_NOTIFIER_HEAD(crypto_chain);
3853  EXPORT_SYMBOL_GPL(crypto_chain);
3855  static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg);
3856 @@ -236,10 +236,10 @@
3858         int ok;
3860 -       ok = blocking_notifier_call_chain(&crypto_chain, val, v);
3861 +       ok = srcu_notifier_call_chain(&crypto_chain, val, v);
3862         if (ok == NOTIFY_DONE) {
3863                 request_module("cryptomgr");
3864 -               ok = blocking_notifier_call_chain(&crypto_chain, val, v);
3865 +               ok = srcu_notifier_call_chain(&crypto_chain, val, v);
3866         }
3868         return ok;
3869 diff -Nur linux-4.4.46.orig/crypto/internal.h linux-4.4.46/crypto/internal.h
3870 --- linux-4.4.46.orig/crypto/internal.h 2017-02-01 08:31:11.000000000 +0100
3871 +++ linux-4.4.46/crypto/internal.h      2017-02-03 17:18:05.671416021 +0100
3872 @@ -47,7 +47,7 @@
3874  extern struct list_head crypto_alg_list;
3875  extern struct rw_semaphore crypto_alg_sem;
3876 -extern struct blocking_notifier_head crypto_chain;
3877 +extern struct srcu_notifier_head crypto_chain;
3879  #ifdef CONFIG_PROC_FS
3880  void __init crypto_init_proc(void);
3881 @@ -143,7 +143,7 @@
3883  static inline void crypto_notify(unsigned long val, void *v)
3885 -       blocking_notifier_call_chain(&crypto_chain, val, v);
3886 +       srcu_notifier_call_chain(&crypto_chain, val, v);
3889  #endif /* _CRYPTO_INTERNAL_H */
3890 diff -Nur linux-4.4.46.orig/Documentation/hwlat_detector.txt linux-4.4.46/Documentation/hwlat_detector.txt
3891 --- linux-4.4.46.orig/Documentation/hwlat_detector.txt  1970-01-01 01:00:00.000000000 +0100
3892 +++ linux-4.4.46/Documentation/hwlat_detector.txt       2017-02-03 17:18:05.623414168 +0100
3893 @@ -0,0 +1,64 @@
3894 +Introduction:
3895 +-------------
3897 +The module hwlat_detector is a special purpose kernel module that is used to
3898 +detect large system latencies induced by the behavior of certain underlying
3899 +hardware or firmware, independent of Linux itself. The code was developed
3900 +originally to detect SMIs (System Management Interrupts) on x86 systems,
3901 +however there is nothing x86 specific about this patchset. It was
3902 +originally written for use by the "RT" patch since the Real Time
3903 +kernel is highly latency sensitive.
3905 +SMIs are usually not serviced by the Linux kernel, which typically does not
3906 +even know that they are occuring. SMIs are instead are set up by BIOS code
3907 +and are serviced by BIOS code, usually for "critical" events such as
3908 +management of thermal sensors and fans. Sometimes though, SMIs are used for
3909 +other tasks and those tasks can spend an inordinate amount of time in the
3910 +handler (sometimes measured in milliseconds). Obviously this is a problem if
3911 +you are trying to keep event service latencies down in the microsecond range.
3913 +The hardware latency detector works by hogging all of the cpus for configurable
3914 +amounts of time (by calling stop_machine()), polling the CPU Time Stamp Counter
3915 +for some period, then looking for gaps in the TSC data. Any gap indicates a
3916 +time when the polling was interrupted and since the machine is stopped and
3917 +interrupts turned off the only thing that could do that would be an SMI.
3919 +Note that the SMI detector should *NEVER* be used in a production environment.
3920 +It is intended to be run manually to determine if the hardware platform has a
3921 +problem with long system firmware service routines.
3923 +Usage:
3924 +------
3926 +Loading the module hwlat_detector passing the parameter "enabled=1" (or by
3927 +setting the "enable" entry in "hwlat_detector" debugfs toggled on) is the only
3928 +step required to start the hwlat_detector. It is possible to redefine the
3929 +threshold in microseconds (us) above which latency spikes will be taken
3930 +into account (parameter "threshold=").
3932 +Example:
3934 +       # modprobe hwlat_detector enabled=1 threshold=100
3936 +After the module is loaded, it creates a directory named "hwlat_detector" under
3937 +the debugfs mountpoint, "/debug/hwlat_detector" for this text. It is necessary
3938 +to have debugfs mounted, which might be on /sys/debug on your system.
3940 +The /debug/hwlat_detector interface contains the following files:
3942 +count                  - number of latency spikes observed since last reset
3943 +enable                 - a global enable/disable toggle (0/1), resets count
3944 +max                    - maximum hardware latency actually observed (usecs)
3945 +sample                 - a pipe from which to read current raw sample data
3946 +                         in the format <timestamp> <latency observed usecs>
3947 +                         (can be opened O_NONBLOCK for a single sample)
3948 +threshold              - minimum latency value to be considered (usecs)
3949 +width                  - time period to sample with CPUs held (usecs)
3950 +                         must be less than the total window size (enforced)
3951 +window                 - total period of sampling, width being inside (usecs)
3953 +By default we will set width to 500,000 and window to 1,000,000, meaning that
3954 +we will sample every 1,000,000 usecs (1s) for 500,000 usecs (0.5s). If we
3955 +observe any latencies that exceed the threshold (initially 100 usecs),
3956 +then we write to a global sample ring buffer of 8K samples, which is
3957 +consumed by reading from the "sample" (pipe) debugfs file interface.
3958 diff -Nur linux-4.4.46.orig/Documentation/kernel-parameters.txt linux-4.4.46/Documentation/kernel-parameters.txt
3959 --- linux-4.4.46.orig/Documentation/kernel-parameters.txt       2017-02-01 08:31:11.000000000 +0100
3960 +++ linux-4.4.46/Documentation/kernel-parameters.txt    2017-02-03 17:18:05.623414168 +0100
3961 @@ -1636,6 +1636,15 @@
3962         ip=             [IP_PNP]
3963                         See Documentation/filesystems/nfs/nfsroot.txt.
3965 +       irqaffinity=    [SMP] Set the default irq affinity mask
3966 +                       Format:
3967 +                       <cpu number>,...,<cpu number>
3968 +                       or
3969 +                       <cpu number>-<cpu number>
3970 +                       (must be a positive range in ascending order)
3971 +                       or a mixture
3972 +                       <cpu number>,...,<cpu number>-<cpu number>
3974         irqfixup        [HW]
3975                         When an interrupt is not handled search all handlers
3976                         for it. Intended to get systems with badly broken
3977 diff -Nur linux-4.4.46.orig/Documentation/sysrq.txt linux-4.4.46/Documentation/sysrq.txt
3978 --- linux-4.4.46.orig/Documentation/sysrq.txt   2017-02-01 08:31:11.000000000 +0100
3979 +++ linux-4.4.46/Documentation/sysrq.txt        2017-02-03 17:18:05.623414168 +0100
3980 @@ -59,10 +59,17 @@
3981  On other - If you know of the key combos for other architectures, please
3982             let me know so I can add them to this section.
3984 -On all -  write a character to /proc/sysrq-trigger.  e.g.:
3986 +On all -  write a character to /proc/sysrq-trigger, e.g.:
3987                 echo t > /proc/sysrq-trigger
3989 +On all - Enable network SysRq by writing a cookie to icmp_echo_sysrq, e.g.
3990 +               echo 0x01020304 >/proc/sys/net/ipv4/icmp_echo_sysrq
3991 +        Send an ICMP echo request with this pattern plus the particular
3992 +        SysRq command key. Example:
3993 +               # ping -c1 -s57 -p0102030468
3994 +        will trigger the SysRq-H (help) command.
3997  *  What are the 'command' keys?
3998  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3999  'b'     - Will immediately reboot the system without syncing or unmounting
4000 diff -Nur linux-4.4.46.orig/Documentation/trace/histograms.txt linux-4.4.46/Documentation/trace/histograms.txt
4001 --- linux-4.4.46.orig/Documentation/trace/histograms.txt        1970-01-01 01:00:00.000000000 +0100
4002 +++ linux-4.4.46/Documentation/trace/histograms.txt     2017-02-03 17:18:05.623414168 +0100
4003 @@ -0,0 +1,186 @@
4004 +               Using the Linux Kernel Latency Histograms
4007 +This document gives a short explanation how to enable, configure and use
4008 +latency histograms. Latency histograms are primarily relevant in the
4009 +context of real-time enabled kernels (CONFIG_PREEMPT/CONFIG_PREEMPT_RT)
4010 +and are used in the quality management of the Linux real-time
4011 +capabilities.
4014 +* Purpose of latency histograms
4016 +A latency histogram continuously accumulates the frequencies of latency
4017 +data. There are two types of histograms
4018 +- potential sources of latencies
4019 +- effective latencies
4022 +* Potential sources of latencies
4024 +Potential sources of latencies are code segments where interrupts,
4025 +preemption or both are disabled (aka critical sections). To create
4026 +histograms of potential sources of latency, the kernel stores the time
4027 +stamp at the start of a critical section, determines the time elapsed
4028 +when the end of the section is reached, and increments the frequency
4029 +counter of that latency value - irrespective of whether any concurrently
4030 +running process is affected by latency or not.
4031 +- Configuration items (in the Kernel hacking/Tracers submenu)
4032 +  CONFIG_INTERRUPT_OFF_LATENCY
4033 +  CONFIG_PREEMPT_OFF_LATENCY
4036 +* Effective latencies
4038 +Effective latencies are actually occuring during wakeup of a process. To
4039 +determine effective latencies, the kernel stores the time stamp when a
4040 +process is scheduled to be woken up, and determines the duration of the
4041 +wakeup time shortly before control is passed over to this process. Note
4042 +that the apparent latency in user space may be somewhat longer, since the
4043 +process may be interrupted after control is passed over to it but before
4044 +the execution in user space takes place. Simply measuring the interval
4045 +between enqueuing and wakeup may also not appropriate in cases when a
4046 +process is scheduled as a result of a timer expiration. The timer may have
4047 +missed its deadline, e.g. due to disabled interrupts, but this latency
4048 +would not be registered. Therefore, the offsets of missed timers are
4049 +recorded in a separate histogram. If both wakeup latency and missed timer
4050 +offsets are configured and enabled, a third histogram may be enabled that
4051 +records the overall latency as a sum of the timer latency, if any, and the
4052 +wakeup latency. This histogram is called "timerandwakeup".
4053 +- Configuration items (in the Kernel hacking/Tracers submenu)
4054 +  CONFIG_WAKEUP_LATENCY
4055 +  CONFIG_MISSED_TIMER_OFSETS
4058 +* Usage
4060 +The interface to the administration of the latency histograms is located
4061 +in the debugfs file system. To mount it, either enter
4063 +mount -t sysfs nodev /sys
4064 +mount -t debugfs nodev /sys/kernel/debug
4066 +from shell command line level, or add
4068 +nodev  /sys                    sysfs   defaults        0 0
4069 +nodev  /sys/kernel/debug       debugfs defaults        0 0
4071 +to the file /etc/fstab. All latency histogram related files are then
4072 +available in the directory /sys/kernel/debug/tracing/latency_hist. A
4073 +particular histogram type is enabled by writing non-zero to the related
4074 +variable in the /sys/kernel/debug/tracing/latency_hist/enable directory.
4075 +Select "preemptirqsoff" for the histograms of potential sources of
4076 +latencies and "wakeup" for histograms of effective latencies etc. The
4077 +histogram data - one per CPU - are available in the files
4079 +/sys/kernel/debug/tracing/latency_hist/preemptoff/CPUx
4080 +/sys/kernel/debug/tracing/latency_hist/irqsoff/CPUx
4081 +/sys/kernel/debug/tracing/latency_hist/preemptirqsoff/CPUx
4082 +/sys/kernel/debug/tracing/latency_hist/wakeup/CPUx
4083 +/sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio/CPUx
4084 +/sys/kernel/debug/tracing/latency_hist/missed_timer_offsets/CPUx
4085 +/sys/kernel/debug/tracing/latency_hist/timerandwakeup/CPUx
4087 +The histograms are reset by writing non-zero to the file "reset" in a
4088 +particular latency directory. To reset all latency data, use
4090 +#!/bin/sh
4092 +TRACINGDIR=/sys/kernel/debug/tracing
4093 +HISTDIR=$TRACINGDIR/latency_hist
4095 +if test -d $HISTDIR
4096 +then
4097 +  cd $HISTDIR
4098 +  for i in `find . | grep /reset$`
4099 +  do
4100 +    echo 1 >$i
4101 +  done
4105 +* Data format
4107 +Latency data are stored with a resolution of one microsecond. The
4108 +maximum latency is 10,240 microseconds. The data are only valid, if the
4109 +overflow register is empty. Every output line contains the latency in
4110 +microseconds in the first row and the number of samples in the second
4111 +row. To display only lines with a positive latency count, use, for
4112 +example,
4114 +grep -v " 0$" /sys/kernel/debug/tracing/latency_hist/preemptoff/CPU0
4116 +#Minimum latency: 0 microseconds.
4117 +#Average latency: 0 microseconds.
4118 +#Maximum latency: 25 microseconds.
4119 +#Total samples: 3104770694
4120 +#There are 0 samples greater or equal than 10240 microseconds
4121 +#usecs          samples
4122 +    0        2984486876
4123 +    1          49843506
4124 +    2          58219047
4125 +    3           5348126
4126 +    4           2187960
4127 +    5           3388262
4128 +    6            959289
4129 +    7            208294
4130 +    8             40420
4131 +    9              4485
4132 +   10             14918
4133 +   11             18340
4134 +   12             25052
4135 +   13             19455
4136 +   14              5602
4137 +   15               969
4138 +   16                47
4139 +   17                18
4140 +   18                14
4141 +   19                 1
4142 +   20                 3
4143 +   21                 2
4144 +   22                 5
4145 +   23                 2
4146 +   25                 1
4149 +* Wakeup latency of a selected process
4151 +To only collect wakeup latency data of a particular process, write the
4152 +PID of the requested process to
4154 +/sys/kernel/debug/tracing/latency_hist/wakeup/pid
4156 +PIDs are not considered, if this variable is set to 0.
4159 +* Details of the process with the highest wakeup latency so far
4161 +Selected data of the process that suffered from the highest wakeup
4162 +latency that occurred in a particular CPU are available in the file
4164 +/sys/kernel/debug/tracing/latency_hist/wakeup/max_latency-CPUx.
4166 +In addition, other relevant system data at the time when the
4167 +latency occurred are given.
4169 +The format of the data is (all in one line):
4170 +<PID> <Priority> <Latency> (<Timeroffset>) <Command> \
4171 +<- <PID> <Priority> <Command> <Timestamp>
4173 +The value of <Timeroffset> is only relevant in the combined timer
4174 +and wakeup latency recording. In the wakeup recording, it is
4175 +always 0, in the missed_timer_offsets recording, it is the same
4176 +as <Latency>.
4178 +When retrospectively searching for the origin of a latency and
4179 +tracing was not enabled, it may be helpful to know the name and
4180 +some basic data of the task that (finally) was switching to the
4181 +late real-tlme task. In addition to the victim's data, also the
4182 +data of the possible culprit are therefore displayed after the
4183 +"<-" symbol.
4185 +Finally, the timestamp of the time when the latency occurred
4186 +in <seconds>.<microseconds> after the most recent system boot
4187 +is provided.
4189 +These data are also reset when the wakeup histogram is reset.
4190 diff -Nur linux-4.4.46.orig/drivers/acpi/acpica/acglobal.h linux-4.4.46/drivers/acpi/acpica/acglobal.h
4191 --- linux-4.4.46.orig/drivers/acpi/acpica/acglobal.h    2017-02-01 08:31:11.000000000 +0100
4192 +++ linux-4.4.46/drivers/acpi/acpica/acglobal.h 2017-02-03 17:18:05.671416021 +0100
4193 @@ -116,7 +116,7 @@
4194   * interrupt level
4195   */
4196  ACPI_GLOBAL(acpi_spinlock, acpi_gbl_gpe_lock); /* For GPE data structs and registers */
4197 -ACPI_GLOBAL(acpi_spinlock, acpi_gbl_hardware_lock);    /* For ACPI H/W except GPE registers */
4198 +ACPI_GLOBAL(acpi_raw_spinlock, acpi_gbl_hardware_lock);        /* For ACPI H/W except GPE registers */
4199  ACPI_GLOBAL(acpi_spinlock, acpi_gbl_reference_count_lock);
4201  /* Mutex for _OSI support */
4202 diff -Nur linux-4.4.46.orig/drivers/acpi/acpica/hwregs.c linux-4.4.46/drivers/acpi/acpica/hwregs.c
4203 --- linux-4.4.46.orig/drivers/acpi/acpica/hwregs.c      2017-02-01 08:31:11.000000000 +0100
4204 +++ linux-4.4.46/drivers/acpi/acpica/hwregs.c   2017-02-03 17:18:05.671416021 +0100
4205 @@ -269,14 +269,14 @@
4206                           ACPI_BITMASK_ALL_FIXED_STATUS,
4207                           ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address)));
4209 -       lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
4210 +       raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags);
4212         /* Clear the fixed events in PM1 A/B */
4214         status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS,
4215                                         ACPI_BITMASK_ALL_FIXED_STATUS);
4217 -       acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
4218 +       raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags);
4220         if (ACPI_FAILURE(status)) {
4221                 goto exit;
4222 diff -Nur linux-4.4.46.orig/drivers/acpi/acpica/hwxface.c linux-4.4.46/drivers/acpi/acpica/hwxface.c
4223 --- linux-4.4.46.orig/drivers/acpi/acpica/hwxface.c     2017-02-01 08:31:11.000000000 +0100
4224 +++ linux-4.4.46/drivers/acpi/acpica/hwxface.c  2017-02-03 17:18:05.671416021 +0100
4225 @@ -374,7 +374,7 @@
4226                 return_ACPI_STATUS(AE_BAD_PARAMETER);
4227         }
4229 -       lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
4230 +       raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags);
4232         /*
4233          * At this point, we know that the parent register is one of the
4234 @@ -435,7 +435,7 @@
4236  unlock_and_exit:
4238 -       acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
4239 +       raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags);
4240         return_ACPI_STATUS(status);
4243 diff -Nur linux-4.4.46.orig/drivers/acpi/acpica/utmutex.c linux-4.4.46/drivers/acpi/acpica/utmutex.c
4244 --- linux-4.4.46.orig/drivers/acpi/acpica/utmutex.c     2017-02-01 08:31:11.000000000 +0100
4245 +++ linux-4.4.46/drivers/acpi/acpica/utmutex.c  2017-02-03 17:18:05.671416021 +0100
4246 @@ -88,7 +88,7 @@
4247                 return_ACPI_STATUS (status);
4248         }
4250 -       status = acpi_os_create_lock (&acpi_gbl_hardware_lock);
4251 +       status = acpi_os_create_raw_lock (&acpi_gbl_hardware_lock);
4252         if (ACPI_FAILURE (status)) {
4253                 return_ACPI_STATUS (status);
4254         }
4255 @@ -156,7 +156,7 @@
4256         /* Delete the spinlocks */
4258         acpi_os_delete_lock(acpi_gbl_gpe_lock);
4259 -       acpi_os_delete_lock(acpi_gbl_hardware_lock);
4260 +       acpi_os_delete_raw_lock(acpi_gbl_hardware_lock);
4261         acpi_os_delete_lock(acpi_gbl_reference_count_lock);
4263         /* Delete the reader/writer lock */
4264 diff -Nur linux-4.4.46.orig/drivers/ata/libata-sff.c linux-4.4.46/drivers/ata/libata-sff.c
4265 --- linux-4.4.46.orig/drivers/ata/libata-sff.c  2017-02-01 08:31:11.000000000 +0100
4266 +++ linux-4.4.46/drivers/ata/libata-sff.c       2017-02-03 17:18:05.671416021 +0100
4267 @@ -678,9 +678,9 @@
4268         unsigned long flags;
4269         unsigned int consumed;
4271 -       local_irq_save(flags);
4272 +       local_irq_save_nort(flags);
4273         consumed = ata_sff_data_xfer32(dev, buf, buflen, rw);
4274 -       local_irq_restore(flags);
4275 +       local_irq_restore_nort(flags);
4277         return consumed;
4279 @@ -719,7 +719,7 @@
4280                 unsigned long flags;
4282                 /* FIXME: use a bounce buffer */
4283 -               local_irq_save(flags);
4284 +               local_irq_save_nort(flags);
4285                 buf = kmap_atomic(page);
4287                 /* do the actual data transfer */
4288 @@ -727,7 +727,7 @@
4289                                        do_write);
4291                 kunmap_atomic(buf);
4292 -               local_irq_restore(flags);
4293 +               local_irq_restore_nort(flags);
4294         } else {
4295                 buf = page_address(page);
4296                 ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size,
4297 @@ -864,7 +864,7 @@
4298                 unsigned long flags;
4300                 /* FIXME: use bounce buffer */
4301 -               local_irq_save(flags);
4302 +               local_irq_save_nort(flags);
4303                 buf = kmap_atomic(page);
4305                 /* do the actual data transfer */
4306 @@ -872,7 +872,7 @@
4307                                                                 count, rw);
4309                 kunmap_atomic(buf);
4310 -               local_irq_restore(flags);
4311 +               local_irq_restore_nort(flags);
4312         } else {
4313                 buf = page_address(page);
4314                 consumed = ap->ops->sff_data_xfer(dev,  buf + offset,
4315 diff -Nur linux-4.4.46.orig/drivers/block/zram/zram_drv.c linux-4.4.46/drivers/block/zram/zram_drv.c
4316 --- linux-4.4.46.orig/drivers/block/zram/zram_drv.c     2017-02-01 08:31:11.000000000 +0100
4317 +++ linux-4.4.46/drivers/block/zram/zram_drv.c  2017-02-03 17:18:05.671416021 +0100
4318 @@ -520,6 +520,8 @@
4319                 goto out_error;
4320         }
4322 +       zram_meta_init_table_locks(meta, disksize);
4324         return meta;
4326  out_error:
4327 @@ -568,12 +570,12 @@
4328         unsigned long handle;
4329         size_t size;
4331 -       bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
4332 +       zram_lock_table(&meta->table[index]);
4333         handle = meta->table[index].handle;
4334         size = zram_get_obj_size(meta, index);
4336         if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
4337 -               bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
4338 +               zram_unlock_table(&meta->table[index]);
4339                 clear_page(mem);
4340                 return 0;
4341         }
4342 @@ -584,7 +586,7 @@
4343         else
4344                 ret = zcomp_decompress(zram->comp, cmem, size, mem);
4345         zs_unmap_object(meta->mem_pool, handle);
4346 -       bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
4347 +       zram_unlock_table(&meta->table[index]);
4349         /* Should NEVER happen. Return bio error if it does. */
4350         if (unlikely(ret)) {
4351 @@ -604,14 +606,14 @@
4352         struct zram_meta *meta = zram->meta;
4353         page = bvec->bv_page;
4355 -       bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
4356 +       zram_lock_table(&meta->table[index]);
4357         if (unlikely(!meta->table[index].handle) ||
4358                         zram_test_flag(meta, index, ZRAM_ZERO)) {
4359 -               bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
4360 +               zram_unlock_table(&meta->table[index]);
4361                 handle_zero_page(bvec);
4362                 return 0;
4363         }
4364 -       bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
4365 +       zram_unlock_table(&meta->table[index]);
4367         if (is_partial_io(bvec))
4368                 /* Use  a temporary buffer to decompress the page */
4369 @@ -689,10 +691,10 @@
4370                 if (user_mem)
4371                         kunmap_atomic(user_mem);
4372                 /* Free memory associated with this sector now. */
4373 -               bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
4374 +               zram_lock_table(&meta->table[index]);
4375                 zram_free_page(zram, index);
4376                 zram_set_flag(meta, index, ZRAM_ZERO);
4377 -               bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
4378 +               zram_unlock_table(&meta->table[index]);
4380                 atomic64_inc(&zram->stats.zero_pages);
4381                 ret = 0;
4382 @@ -752,12 +754,12 @@
4383          * Free memory associated with this sector
4384          * before overwriting unused sectors.
4385          */
4386 -       bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
4387 +       zram_lock_table(&meta->table[index]);
4388         zram_free_page(zram, index);
4390         meta->table[index].handle = handle;
4391         zram_set_obj_size(meta, index, clen);
4392 -       bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
4393 +       zram_unlock_table(&meta->table[index]);
4395         /* Update stats */
4396         atomic64_add(clen, &zram->stats.compr_data_size);
4397 @@ -800,9 +802,9 @@
4398         }
4400         while (n >= PAGE_SIZE) {
4401 -               bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
4402 +               zram_lock_table(&meta->table[index]);
4403                 zram_free_page(zram, index);
4404 -               bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
4405 +               zram_unlock_table(&meta->table[index]);
4406                 atomic64_inc(&zram->stats.notify_free);
4407                 index++;
4408                 n -= PAGE_SIZE;
4409 @@ -928,9 +930,9 @@
4410         zram = bdev->bd_disk->private_data;
4411         meta = zram->meta;
4413 -       bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
4414 +       zram_lock_table(&meta->table[index]);
4415         zram_free_page(zram, index);
4416 -       bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
4417 +       zram_unlock_table(&meta->table[index]);
4418         atomic64_inc(&zram->stats.notify_free);
4421 diff -Nur linux-4.4.46.orig/drivers/block/zram/zram_drv.h linux-4.4.46/drivers/block/zram/zram_drv.h
4422 --- linux-4.4.46.orig/drivers/block/zram/zram_drv.h     2017-02-01 08:31:11.000000000 +0100
4423 +++ linux-4.4.46/drivers/block/zram/zram_drv.h  2017-02-03 17:18:05.671416021 +0100
4424 @@ -72,6 +72,9 @@
4425  struct zram_table_entry {
4426         unsigned long handle;
4427         unsigned long value;
4428 +#ifdef CONFIG_PREEMPT_RT_BASE
4429 +       spinlock_t lock;
4430 +#endif
4431  };
4433  struct zram_stats {
4434 @@ -119,4 +122,42 @@
4435          */
4436         bool claim; /* Protected by bdev->bd_mutex */
4437  };
4439 +#ifndef CONFIG_PREEMPT_RT_BASE
4440 +static inline void zram_lock_table(struct zram_table_entry *table)
4442 +       bit_spin_lock(ZRAM_ACCESS, &table->value);
4445 +static inline void zram_unlock_table(struct zram_table_entry *table)
4447 +       bit_spin_unlock(ZRAM_ACCESS, &table->value);
4450 +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize) { }
4451 +#else /* CONFIG_PREEMPT_RT_BASE */
4452 +static inline void zram_lock_table(struct zram_table_entry *table)
4454 +       spin_lock(&table->lock);
4455 +       __set_bit(ZRAM_ACCESS, &table->value);
4458 +static inline void zram_unlock_table(struct zram_table_entry *table)
4460 +       __clear_bit(ZRAM_ACCESS, &table->value);
4461 +       spin_unlock(&table->lock);
4464 +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize)
4466 +        size_t num_pages = disksize >> PAGE_SHIFT;
4467 +        size_t index;
4469 +        for (index = 0; index < num_pages; index++) {
4470 +               spinlock_t *lock = &meta->table[index].lock;
4471 +               spin_lock_init(lock);
4472 +        }
4474 +#endif /* CONFIG_PREEMPT_RT_BASE */
4476  #endif
4477 diff -Nur linux-4.4.46.orig/drivers/char/random.c linux-4.4.46/drivers/char/random.c
4478 --- linux-4.4.46.orig/drivers/char/random.c     2017-02-01 08:31:11.000000000 +0100
4479 +++ linux-4.4.46/drivers/char/random.c  2017-02-03 17:18:05.671416021 +0100
4480 @@ -799,8 +799,6 @@
4481         } sample;
4482         long delta, delta2, delta3;
4484 -       preempt_disable();
4486         sample.jiffies = jiffies;
4487         sample.cycles = random_get_entropy();
4488         sample.num = num;
4489 @@ -841,7 +839,6 @@
4490                  */
4491                 credit_entropy_bits(r, min_t(int, fls(delta>>1), 11));
4492         }
4493 -       preempt_enable();
4496  void add_input_randomness(unsigned int type, unsigned int code,
4497 @@ -894,28 +891,27 @@
4498         return *(ptr + f->reg_idx++);
4501 -void add_interrupt_randomness(int irq, int irq_flags)
4502 +void add_interrupt_randomness(int irq, int irq_flags, __u64 ip)
4504         struct entropy_store    *r;
4505         struct fast_pool        *fast_pool = this_cpu_ptr(&irq_randomness);
4506 -       struct pt_regs          *regs = get_irq_regs();
4507         unsigned long           now = jiffies;
4508         cycles_t                cycles = random_get_entropy();
4509         __u32                   c_high, j_high;
4510 -       __u64                   ip;
4511         unsigned long           seed;
4512         int                     credit = 0;
4514         if (cycles == 0)
4515 -               cycles = get_reg(fast_pool, regs);
4516 +               cycles = get_reg(fast_pool, NULL);
4517         c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0;
4518         j_high = (sizeof(now) > 4) ? now >> 32 : 0;
4519         fast_pool->pool[0] ^= cycles ^ j_high ^ irq;
4520         fast_pool->pool[1] ^= now ^ c_high;
4521 -       ip = regs ? instruction_pointer(regs) : _RET_IP_;
4522 +       if (!ip)
4523 +               ip = _RET_IP_;
4524         fast_pool->pool[2] ^= ip;
4525         fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 :
4526 -               get_reg(fast_pool, regs);
4527 +               get_reg(fast_pool, NULL);
4529         fast_mix(fast_pool);
4530         add_interrupt_bench(cycles);
4531 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-generated.c linux-4.4.46/drivers/clk/at91/clk-generated.c
4532 --- linux-4.4.46.orig/drivers/clk/at91/clk-generated.c  2017-02-01 08:31:11.000000000 +0100
4533 +++ linux-4.4.46/drivers/clk/at91/clk-generated.c       2017-02-03 17:18:05.671416021 +0100
4534 @@ -15,8 +15,8 @@
4535  #include <linux/clkdev.h>
4536  #include <linux/clk/at91_pmc.h>
4537  #include <linux/of.h>
4538 -#include <linux/of_address.h>
4539 -#include <linux/io.h>
4540 +#include <linux/mfd/syscon.h>
4541 +#include <linux/regmap.h>
4543  #include "pmc.h"
4545 @@ -28,8 +28,9 @@
4547  struct clk_generated {
4548         struct clk_hw hw;
4549 -       struct at91_pmc *pmc;
4550 +       struct regmap *regmap;
4551         struct clk_range range;
4552 +       spinlock_t *lock;
4553         u32 id;
4554         u32 gckdiv;
4555         u8 parent_id;
4556 @@ -41,49 +42,52 @@
4557  static int clk_generated_enable(struct clk_hw *hw)
4559         struct clk_generated *gck = to_clk_generated(hw);
4560 -       struct at91_pmc *pmc = gck->pmc;
4561 -       u32 tmp;
4562 +       unsigned long flags;
4564         pr_debug("GCLK: %s, gckdiv = %d, parent id = %d\n",
4565                  __func__, gck->gckdiv, gck->parent_id);
4567 -       pmc_lock(pmc);
4568 -       pmc_write(pmc, AT91_PMC_PCR, (gck->id & AT91_PMC_PCR_PID_MASK));
4569 -       tmp = pmc_read(pmc, AT91_PMC_PCR) &
4570 -                       ~(AT91_PMC_PCR_GCKDIV_MASK | AT91_PMC_PCR_GCKCSS_MASK);
4571 -       pmc_write(pmc, AT91_PMC_PCR, tmp | AT91_PMC_PCR_GCKCSS(gck->parent_id)
4572 -                                        | AT91_PMC_PCR_CMD
4573 -                                        | AT91_PMC_PCR_GCKDIV(gck->gckdiv)
4574 -                                        | AT91_PMC_PCR_GCKEN);
4575 -       pmc_unlock(pmc);
4576 +       spin_lock_irqsave(gck->lock, flags);
4577 +       regmap_write(gck->regmap, AT91_PMC_PCR,
4578 +                    (gck->id & AT91_PMC_PCR_PID_MASK));
4579 +       regmap_update_bits(gck->regmap, AT91_PMC_PCR,
4580 +                          AT91_PMC_PCR_GCKDIV_MASK | AT91_PMC_PCR_GCKCSS_MASK |
4581 +                          AT91_PMC_PCR_CMD | AT91_PMC_PCR_GCKEN,
4582 +                          AT91_PMC_PCR_GCKCSS(gck->parent_id) |
4583 +                          AT91_PMC_PCR_CMD |
4584 +                          AT91_PMC_PCR_GCKDIV(gck->gckdiv) |
4585 +                          AT91_PMC_PCR_GCKEN);
4586 +       spin_unlock_irqrestore(gck->lock, flags);
4587         return 0;
4590  static void clk_generated_disable(struct clk_hw *hw)
4592         struct clk_generated *gck = to_clk_generated(hw);
4593 -       struct at91_pmc *pmc = gck->pmc;
4594 -       u32 tmp;
4595 +       unsigned long flags;
4597 -       pmc_lock(pmc);
4598 -       pmc_write(pmc, AT91_PMC_PCR, (gck->id & AT91_PMC_PCR_PID_MASK));
4599 -       tmp = pmc_read(pmc, AT91_PMC_PCR) & ~AT91_PMC_PCR_GCKEN;
4600 -       pmc_write(pmc, AT91_PMC_PCR, tmp | AT91_PMC_PCR_CMD);
4601 -       pmc_unlock(pmc);
4602 +       spin_lock_irqsave(gck->lock, flags);
4603 +       regmap_write(gck->regmap, AT91_PMC_PCR,
4604 +                    (gck->id & AT91_PMC_PCR_PID_MASK));
4605 +       regmap_update_bits(gck->regmap, AT91_PMC_PCR,
4606 +                          AT91_PMC_PCR_CMD | AT91_PMC_PCR_GCKEN,
4607 +                          AT91_PMC_PCR_CMD);
4608 +       spin_unlock_irqrestore(gck->lock, flags);
4611  static int clk_generated_is_enabled(struct clk_hw *hw)
4613         struct clk_generated *gck = to_clk_generated(hw);
4614 -       struct at91_pmc *pmc = gck->pmc;
4615 -       int ret;
4616 +       unsigned long flags;
4617 +       unsigned int status;
4619 -       pmc_lock(pmc);
4620 -       pmc_write(pmc, AT91_PMC_PCR, (gck->id & AT91_PMC_PCR_PID_MASK));
4621 -       ret = !!(pmc_read(pmc, AT91_PMC_PCR) & AT91_PMC_PCR_GCKEN);
4622 -       pmc_unlock(pmc);
4623 +       spin_lock_irqsave(gck->lock, flags);
4624 +       regmap_write(gck->regmap, AT91_PMC_PCR,
4625 +                    (gck->id & AT91_PMC_PCR_PID_MASK));
4626 +       regmap_read(gck->regmap, AT91_PMC_PCR, &status);
4627 +       spin_unlock_irqrestore(gck->lock, flags);
4629 -       return ret;
4630 +       return status & AT91_PMC_PCR_GCKEN ? 1 : 0;
4633  static unsigned long
4634 @@ -214,13 +218,14 @@
4635   */
4636  static void clk_generated_startup(struct clk_generated *gck)
4638 -       struct at91_pmc *pmc = gck->pmc;
4639         u32 tmp;
4640 +       unsigned long flags;
4642 -       pmc_lock(pmc);
4643 -       pmc_write(pmc, AT91_PMC_PCR, (gck->id & AT91_PMC_PCR_PID_MASK));
4644 -       tmp = pmc_read(pmc, AT91_PMC_PCR);
4645 -       pmc_unlock(pmc);
4646 +       spin_lock_irqsave(gck->lock, flags);
4647 +       regmap_write(gck->regmap, AT91_PMC_PCR,
4648 +                    (gck->id & AT91_PMC_PCR_PID_MASK));
4649 +       regmap_read(gck->regmap, AT91_PMC_PCR, &tmp);
4650 +       spin_unlock_irqrestore(gck->lock, flags);
4652         gck->parent_id = (tmp & AT91_PMC_PCR_GCKCSS_MASK)
4653                                         >> AT91_PMC_PCR_GCKCSS_OFFSET;
4654 @@ -229,8 +234,8 @@
4657  static struct clk * __init
4658 -at91_clk_register_generated(struct at91_pmc *pmc, const char *name,
4659 -                           const char **parent_names, u8 num_parents,
4660 +at91_clk_register_generated(struct regmap *regmap,  spinlock_t *lock, const char
4661 +                           *name, const char **parent_names, u8 num_parents,
4662                             u8 id, const struct clk_range *range)
4664         struct clk_generated *gck;
4665 @@ -249,7 +254,8 @@
4667         gck->id = id;
4668         gck->hw.init = &init;
4669 -       gck->pmc = pmc;
4670 +       gck->regmap = regmap;
4671 +       gck->lock = lock;
4672         gck->range = *range;
4674         clk = clk_register(NULL, &gck->hw);
4675 @@ -261,8 +267,7 @@
4676         return clk;
4679 -void __init of_sama5d2_clk_generated_setup(struct device_node *np,
4680 -                                          struct at91_pmc *pmc)
4681 +void __init of_sama5d2_clk_generated_setup(struct device_node *np)
4683         int num;
4684         u32 id;
4685 @@ -272,6 +277,7 @@
4686         const char *parent_names[GENERATED_SOURCE_MAX];
4687         struct device_node *gcknp;
4688         struct clk_range range = CLK_RANGE(0, 0);
4689 +       struct regmap *regmap;
4691         num_parents = of_clk_get_parent_count(np);
4692         if (num_parents <= 0 || num_parents > GENERATED_SOURCE_MAX)
4693 @@ -283,6 +289,10 @@
4694         if (!num || num > PERIPHERAL_MAX)
4695                 return;
4697 +       regmap = syscon_node_to_regmap(of_get_parent(np));
4698 +       if (IS_ERR(regmap))
4699 +               return;
4701         for_each_child_of_node(np, gcknp) {
4702                 if (of_property_read_u32(gcknp, "reg", &id))
4703                         continue;
4704 @@ -296,11 +306,14 @@
4705                 of_at91_get_clk_range(gcknp, "atmel,clk-output-range",
4706                                       &range);
4708 -               clk = at91_clk_register_generated(pmc, name, parent_names,
4709 -                                                 num_parents, id, &range);
4710 +               clk = at91_clk_register_generated(regmap, &pmc_pcr_lock, name,
4711 +                                                 parent_names, num_parents,
4712 +                                                 id, &range);
4713                 if (IS_ERR(clk))
4714                         continue;
4716                 of_clk_add_provider(gcknp, of_clk_src_simple_get, clk);
4717         }
4719 +CLK_OF_DECLARE(of_sama5d2_clk_generated_setup, "atmel,sama5d2-clk-generated",
4720 +              of_sama5d2_clk_generated_setup);
4721 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-h32mx.c linux-4.4.46/drivers/clk/at91/clk-h32mx.c
4722 --- linux-4.4.46.orig/drivers/clk/at91/clk-h32mx.c      2017-02-01 08:31:11.000000000 +0100
4723 +++ linux-4.4.46/drivers/clk/at91/clk-h32mx.c   2017-02-03 17:18:05.671416021 +0100
4724 @@ -15,15 +15,9 @@
4725  #include <linux/clk-provider.h>
4726  #include <linux/clkdev.h>
4727  #include <linux/clk/at91_pmc.h>
4728 -#include <linux/delay.h>
4729  #include <linux/of.h>
4730 -#include <linux/of_address.h>
4731 -#include <linux/of_irq.h>
4732 -#include <linux/io.h>
4733 -#include <linux/interrupt.h>
4734 -#include <linux/irq.h>
4735 -#include <linux/sched.h>
4736 -#include <linux/wait.h>
4737 +#include <linux/regmap.h>
4738 +#include <linux/mfd/syscon.h>
4740  #include "pmc.h"
4742 @@ -31,7 +25,7 @@
4744  struct clk_sama5d4_h32mx {
4745         struct clk_hw hw;
4746 -       struct at91_pmc *pmc;
4747 +       struct regmap *regmap;
4748  };
4750  #define to_clk_sama5d4_h32mx(hw) container_of(hw, struct clk_sama5d4_h32mx, hw)
4751 @@ -40,8 +34,10 @@
4752                                                  unsigned long parent_rate)
4754         struct clk_sama5d4_h32mx *h32mxclk = to_clk_sama5d4_h32mx(hw);
4755 +       unsigned int mckr;
4757 -       if (pmc_read(h32mxclk->pmc, AT91_PMC_MCKR) & AT91_PMC_H32MXDIV)
4758 +       regmap_read(h32mxclk->regmap, AT91_PMC_MCKR, &mckr);
4759 +       if (mckr & AT91_PMC_H32MXDIV)
4760                 return parent_rate / 2;
4762         if (parent_rate > H32MX_MAX_FREQ)
4763 @@ -70,18 +66,16 @@
4764                                     unsigned long parent_rate)
4766         struct clk_sama5d4_h32mx *h32mxclk = to_clk_sama5d4_h32mx(hw);
4767 -       struct at91_pmc *pmc = h32mxclk->pmc;
4768 -       u32 tmp;
4769 +       u32 mckr = 0;
4771         if (parent_rate != rate && (parent_rate / 2) != rate)
4772                 return -EINVAL;
4774 -       pmc_lock(pmc);
4775 -       tmp = pmc_read(pmc, AT91_PMC_MCKR) & ~AT91_PMC_H32MXDIV;
4776         if ((parent_rate / 2) == rate)
4777 -               tmp |= AT91_PMC_H32MXDIV;
4778 -       pmc_write(pmc, AT91_PMC_MCKR, tmp);
4779 -       pmc_unlock(pmc);
4780 +               mckr = AT91_PMC_H32MXDIV;
4782 +       regmap_update_bits(h32mxclk->regmap, AT91_PMC_MCKR,
4783 +                          AT91_PMC_H32MXDIV, mckr);
4785         return 0;
4787 @@ -92,14 +86,18 @@
4788         .set_rate = clk_sama5d4_h32mx_set_rate,
4789  };
4791 -void __init of_sama5d4_clk_h32mx_setup(struct device_node *np,
4792 -                                    struct at91_pmc *pmc)
4793 +static void __init of_sama5d4_clk_h32mx_setup(struct device_node *np)
4795         struct clk_sama5d4_h32mx *h32mxclk;
4796         struct clk_init_data init;
4797         const char *parent_name;
4798 +       struct regmap *regmap;
4799         struct clk *clk;
4801 +       regmap = syscon_node_to_regmap(of_get_parent(np));
4802 +       if (IS_ERR(regmap))
4803 +               return;
4805         h32mxclk = kzalloc(sizeof(*h32mxclk), GFP_KERNEL);
4806         if (!h32mxclk)
4807                 return;
4808 @@ -113,7 +111,7 @@
4809         init.flags = CLK_SET_RATE_GATE;
4811         h32mxclk->hw.init = &init;
4812 -       h32mxclk->pmc = pmc;
4813 +       h32mxclk->regmap = regmap;
4815         clk = clk_register(NULL, &h32mxclk->hw);
4816         if (IS_ERR(clk)) {
4817 @@ -123,3 +121,5 @@
4819         of_clk_add_provider(np, of_clk_src_simple_get, clk);
4821 +CLK_OF_DECLARE(of_sama5d4_clk_h32mx_setup, "atmel,sama5d4-clk-h32mx",
4822 +              of_sama5d4_clk_h32mx_setup);
4823 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-main.c linux-4.4.46/drivers/clk/at91/clk-main.c
4824 --- linux-4.4.46.orig/drivers/clk/at91/clk-main.c       2017-02-01 08:31:11.000000000 +0100
4825 +++ linux-4.4.46/drivers/clk/at91/clk-main.c    2017-02-03 17:18:05.671416021 +0100
4826 @@ -13,13 +13,8 @@
4827  #include <linux/clk/at91_pmc.h>
4828  #include <linux/delay.h>
4829  #include <linux/of.h>
4830 -#include <linux/of_address.h>
4831 -#include <linux/of_irq.h>
4832 -#include <linux/io.h>
4833 -#include <linux/interrupt.h>
4834 -#include <linux/irq.h>
4835 -#include <linux/sched.h>
4836 -#include <linux/wait.h>
4837 +#include <linux/mfd/syscon.h>
4838 +#include <linux/regmap.h>
4840  #include "pmc.h"
4842 @@ -34,18 +29,14 @@
4844  struct clk_main_osc {
4845         struct clk_hw hw;
4846 -       struct at91_pmc *pmc;
4847 -       unsigned int irq;
4848 -       wait_queue_head_t wait;
4849 +       struct regmap *regmap;
4850  };
4852  #define to_clk_main_osc(hw) container_of(hw, struct clk_main_osc, hw)
4854  struct clk_main_rc_osc {
4855         struct clk_hw hw;
4856 -       struct at91_pmc *pmc;
4857 -       unsigned int irq;
4858 -       wait_queue_head_t wait;
4859 +       struct regmap *regmap;
4860         unsigned long frequency;
4861         unsigned long accuracy;
4862  };
4863 @@ -54,51 +45,47 @@
4865  struct clk_rm9200_main {
4866         struct clk_hw hw;
4867 -       struct at91_pmc *pmc;
4868 +       struct regmap *regmap;
4869  };
4871  #define to_clk_rm9200_main(hw) container_of(hw, struct clk_rm9200_main, hw)
4873  struct clk_sam9x5_main {
4874         struct clk_hw hw;
4875 -       struct at91_pmc *pmc;
4876 -       unsigned int irq;
4877 -       wait_queue_head_t wait;
4878 +       struct regmap *regmap;
4879         u8 parent;
4880  };
4882  #define to_clk_sam9x5_main(hw) container_of(hw, struct clk_sam9x5_main, hw)
4884 -static irqreturn_t clk_main_osc_irq_handler(int irq, void *dev_id)
4885 +static inline bool clk_main_osc_ready(struct regmap *regmap)
4887 -       struct clk_main_osc *osc = dev_id;
4888 +       unsigned int status;
4890 -       wake_up(&osc->wait);
4891 -       disable_irq_nosync(osc->irq);
4892 +       regmap_read(regmap, AT91_PMC_SR, &status);
4894 -       return IRQ_HANDLED;
4895 +       return status & AT91_PMC_MOSCS;
4898  static int clk_main_osc_prepare(struct clk_hw *hw)
4900         struct clk_main_osc *osc = to_clk_main_osc(hw);
4901 -       struct at91_pmc *pmc = osc->pmc;
4902 +       struct regmap *regmap = osc->regmap;
4903         u32 tmp;
4905 -       tmp = pmc_read(pmc, AT91_CKGR_MOR) & ~MOR_KEY_MASK;
4906 +       regmap_read(regmap, AT91_CKGR_MOR, &tmp);
4907 +       tmp &= ~MOR_KEY_MASK;
4909         if (tmp & AT91_PMC_OSCBYPASS)
4910                 return 0;
4912         if (!(tmp & AT91_PMC_MOSCEN)) {
4913                 tmp |= AT91_PMC_MOSCEN | AT91_PMC_KEY;
4914 -               pmc_write(pmc, AT91_CKGR_MOR, tmp);
4915 +               regmap_write(regmap, AT91_CKGR_MOR, tmp);
4916         }
4918 -       while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCS)) {
4919 -               enable_irq(osc->irq);
4920 -               wait_event(osc->wait,
4921 -                          pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCS);
4922 -       }
4923 +       while (!clk_main_osc_ready(regmap))
4924 +               cpu_relax();
4926         return 0;
4928 @@ -106,9 +93,10 @@
4929  static void clk_main_osc_unprepare(struct clk_hw *hw)
4931         struct clk_main_osc *osc = to_clk_main_osc(hw);
4932 -       struct at91_pmc *pmc = osc->pmc;
4933 -       u32 tmp = pmc_read(pmc, AT91_CKGR_MOR);
4934 +       struct regmap *regmap = osc->regmap;
4935 +       u32 tmp;
4937 +       regmap_read(regmap, AT91_CKGR_MOR, &tmp);
4938         if (tmp & AT91_PMC_OSCBYPASS)
4939                 return;
4941 @@ -116,20 +104,22 @@
4942                 return;
4944         tmp &= ~(AT91_PMC_KEY | AT91_PMC_MOSCEN);
4945 -       pmc_write(pmc, AT91_CKGR_MOR, tmp | AT91_PMC_KEY);
4946 +       regmap_write(regmap, AT91_CKGR_MOR, tmp | AT91_PMC_KEY);
4949  static int clk_main_osc_is_prepared(struct clk_hw *hw)
4951         struct clk_main_osc *osc = to_clk_main_osc(hw);
4952 -       struct at91_pmc *pmc = osc->pmc;
4953 -       u32 tmp = pmc_read(pmc, AT91_CKGR_MOR);
4954 +       struct regmap *regmap = osc->regmap;
4955 +       u32 tmp, status;
4957 +       regmap_read(regmap, AT91_CKGR_MOR, &tmp);
4958         if (tmp & AT91_PMC_OSCBYPASS)
4959                 return 1;
4961 -       return !!((pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCS) &&
4962 -                 (pmc_read(pmc, AT91_CKGR_MOR) & AT91_PMC_MOSCEN));
4963 +       regmap_read(regmap, AT91_PMC_SR, &status);
4965 +       return (status & AT91_PMC_MOSCS) && (tmp & AT91_PMC_MOSCEN);
4968  static const struct clk_ops main_osc_ops = {
4969 @@ -139,18 +129,16 @@
4970  };
4972  static struct clk * __init
4973 -at91_clk_register_main_osc(struct at91_pmc *pmc,
4974 -                          unsigned int irq,
4975 +at91_clk_register_main_osc(struct regmap *regmap,
4976                            const char *name,
4977                            const char *parent_name,
4978                            bool bypass)
4980 -       int ret;
4981         struct clk_main_osc *osc;
4982         struct clk *clk = NULL;
4983         struct clk_init_data init;
4985 -       if (!pmc || !irq || !name || !parent_name)
4986 +       if (!name || !parent_name)
4987                 return ERR_PTR(-EINVAL);
4989         osc = kzalloc(sizeof(*osc), GFP_KERNEL);
4990 @@ -164,85 +152,70 @@
4991         init.flags = CLK_IGNORE_UNUSED;
4993         osc->hw.init = &init;
4994 -       osc->pmc = pmc;
4995 -       osc->irq = irq;
4997 -       init_waitqueue_head(&osc->wait);
4998 -       irq_set_status_flags(osc->irq, IRQ_NOAUTOEN);
4999 -       ret = request_irq(osc->irq, clk_main_osc_irq_handler,
5000 -                         IRQF_TRIGGER_HIGH, name, osc);
5001 -       if (ret) {
5002 -               kfree(osc);
5003 -               return ERR_PTR(ret);
5004 -       }
5005 +       osc->regmap = regmap;
5007         if (bypass)
5008 -               pmc_write(pmc, AT91_CKGR_MOR,
5009 -                         (pmc_read(pmc, AT91_CKGR_MOR) &
5010 -                          ~(MOR_KEY_MASK | AT91_PMC_MOSCEN)) |
5011 -                         AT91_PMC_OSCBYPASS | AT91_PMC_KEY);
5012 +               regmap_update_bits(regmap,
5013 +                                  AT91_CKGR_MOR, MOR_KEY_MASK |
5014 +                                  AT91_PMC_MOSCEN,
5015 +                                  AT91_PMC_OSCBYPASS | AT91_PMC_KEY);
5017         clk = clk_register(NULL, &osc->hw);
5018 -       if (IS_ERR(clk)) {
5019 -               free_irq(irq, osc);
5020 +       if (IS_ERR(clk))
5021                 kfree(osc);
5022 -       }
5024         return clk;
5027 -void __init of_at91rm9200_clk_main_osc_setup(struct device_node *np,
5028 -                                            struct at91_pmc *pmc)
5029 +static void __init of_at91rm9200_clk_main_osc_setup(struct device_node *np)
5031         struct clk *clk;
5032 -       unsigned int irq;
5033         const char *name = np->name;
5034         const char *parent_name;
5035 +       struct regmap *regmap;
5036         bool bypass;
5038         of_property_read_string(np, "clock-output-names", &name);
5039         bypass = of_property_read_bool(np, "atmel,osc-bypass");
5040         parent_name = of_clk_get_parent_name(np, 0);
5042 -       irq = irq_of_parse_and_map(np, 0);
5043 -       if (!irq)
5044 +       regmap = syscon_node_to_regmap(of_get_parent(np));
5045 +       if (IS_ERR(regmap))
5046                 return;
5048 -       clk = at91_clk_register_main_osc(pmc, irq, name, parent_name, bypass);
5049 +       clk = at91_clk_register_main_osc(regmap, name, parent_name, bypass);
5050         if (IS_ERR(clk))
5051                 return;
5053         of_clk_add_provider(np, of_clk_src_simple_get, clk);
5055 +CLK_OF_DECLARE(at91rm9200_clk_main_osc, "atmel,at91rm9200-clk-main-osc",
5056 +              of_at91rm9200_clk_main_osc_setup);
5058 -static irqreturn_t clk_main_rc_osc_irq_handler(int irq, void *dev_id)
5059 +static bool clk_main_rc_osc_ready(struct regmap *regmap)
5061 -       struct clk_main_rc_osc *osc = dev_id;
5062 +       unsigned int status;
5064 -       wake_up(&osc->wait);
5065 -       disable_irq_nosync(osc->irq);
5066 +       regmap_read(regmap, AT91_PMC_SR, &status);
5068 -       return IRQ_HANDLED;
5069 +       return status & AT91_PMC_MOSCRCS;
5072  static int clk_main_rc_osc_prepare(struct clk_hw *hw)
5074         struct clk_main_rc_osc *osc = to_clk_main_rc_osc(hw);
5075 -       struct at91_pmc *pmc = osc->pmc;
5076 -       u32 tmp;
5077 +       struct regmap *regmap = osc->regmap;
5078 +       unsigned int mor;
5080 -       tmp = pmc_read(pmc, AT91_CKGR_MOR) & ~MOR_KEY_MASK;
5081 +       regmap_read(regmap, AT91_CKGR_MOR, &mor);
5083 -       if (!(tmp & AT91_PMC_MOSCRCEN)) {
5084 -               tmp |= AT91_PMC_MOSCRCEN | AT91_PMC_KEY;
5085 -               pmc_write(pmc, AT91_CKGR_MOR, tmp);
5086 -       }
5087 +       if (!(mor & AT91_PMC_MOSCRCEN))
5088 +               regmap_update_bits(regmap, AT91_CKGR_MOR,
5089 +                                  MOR_KEY_MASK | AT91_PMC_MOSCRCEN,
5090 +                                  AT91_PMC_MOSCRCEN | AT91_PMC_KEY);
5092 -       while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCRCS)) {
5093 -               enable_irq(osc->irq);
5094 -               wait_event(osc->wait,
5095 -                          pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCRCS);
5096 -       }
5097 +       while (!clk_main_rc_osc_ready(regmap))
5098 +               cpu_relax();
5100         return 0;
5102 @@ -250,23 +223,28 @@
5103  static void clk_main_rc_osc_unprepare(struct clk_hw *hw)
5105         struct clk_main_rc_osc *osc = to_clk_main_rc_osc(hw);
5106 -       struct at91_pmc *pmc = osc->pmc;
5107 -       u32 tmp = pmc_read(pmc, AT91_CKGR_MOR);
5108 +       struct regmap *regmap = osc->regmap;
5109 +       unsigned int mor;
5111 -       if (!(tmp & AT91_PMC_MOSCRCEN))
5112 +       regmap_read(regmap, AT91_CKGR_MOR, &mor);
5114 +       if (!(mor & AT91_PMC_MOSCRCEN))
5115                 return;
5117 -       tmp &= ~(MOR_KEY_MASK | AT91_PMC_MOSCRCEN);
5118 -       pmc_write(pmc, AT91_CKGR_MOR, tmp | AT91_PMC_KEY);
5119 +       regmap_update_bits(regmap, AT91_CKGR_MOR,
5120 +                          MOR_KEY_MASK | AT91_PMC_MOSCRCEN, AT91_PMC_KEY);
5123  static int clk_main_rc_osc_is_prepared(struct clk_hw *hw)
5125         struct clk_main_rc_osc *osc = to_clk_main_rc_osc(hw);
5126 -       struct at91_pmc *pmc = osc->pmc;
5127 +       struct regmap *regmap = osc->regmap;
5128 +       unsigned int mor, status;
5130 +       regmap_read(regmap, AT91_CKGR_MOR, &mor);
5131 +       regmap_read(regmap, AT91_PMC_SR, &status);
5133 -       return !!((pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCRCS) &&
5134 -                 (pmc_read(pmc, AT91_CKGR_MOR) & AT91_PMC_MOSCRCEN));
5135 +       return (mor & AT91_PMC_MOSCRCEN) && (status & AT91_PMC_MOSCRCS);
5138  static unsigned long clk_main_rc_osc_recalc_rate(struct clk_hw *hw,
5139 @@ -294,17 +272,15 @@
5140  };
5142  static struct clk * __init
5143 -at91_clk_register_main_rc_osc(struct at91_pmc *pmc,
5144 -                             unsigned int irq,
5145 +at91_clk_register_main_rc_osc(struct regmap *regmap,
5146                               const char *name,
5147                               u32 frequency, u32 accuracy)
5149 -       int ret;
5150         struct clk_main_rc_osc *osc;
5151         struct clk *clk = NULL;
5152         struct clk_init_data init;
5154 -       if (!pmc || !irq || !name || !frequency)
5155 +       if (!name || !frequency)
5156                 return ERR_PTR(-EINVAL);
5158         osc = kzalloc(sizeof(*osc), GFP_KERNEL);
5159 @@ -318,63 +294,53 @@
5160         init.flags = CLK_IS_ROOT | CLK_IGNORE_UNUSED;
5162         osc->hw.init = &init;
5163 -       osc->pmc = pmc;
5164 -       osc->irq = irq;
5165 +       osc->regmap = regmap;
5166         osc->frequency = frequency;
5167         osc->accuracy = accuracy;
5169 -       init_waitqueue_head(&osc->wait);
5170 -       irq_set_status_flags(osc->irq, IRQ_NOAUTOEN);
5171 -       ret = request_irq(osc->irq, clk_main_rc_osc_irq_handler,
5172 -                         IRQF_TRIGGER_HIGH, name, osc);
5173 -       if (ret)
5174 -               return ERR_PTR(ret);
5176         clk = clk_register(NULL, &osc->hw);
5177 -       if (IS_ERR(clk)) {
5178 -               free_irq(irq, osc);
5179 +       if (IS_ERR(clk))
5180                 kfree(osc);
5181 -       }
5183         return clk;
5186 -void __init of_at91sam9x5_clk_main_rc_osc_setup(struct device_node *np,
5187 -                                               struct at91_pmc *pmc)
5188 +static void __init of_at91sam9x5_clk_main_rc_osc_setup(struct device_node *np)
5190         struct clk *clk;
5191 -       unsigned int irq;
5192         u32 frequency = 0;
5193         u32 accuracy = 0;
5194         const char *name = np->name;
5195 +       struct regmap *regmap;
5197         of_property_read_string(np, "clock-output-names", &name);
5198         of_property_read_u32(np, "clock-frequency", &frequency);
5199         of_property_read_u32(np, "clock-accuracy", &accuracy);
5201 -       irq = irq_of_parse_and_map(np, 0);
5202 -       if (!irq)
5203 +       regmap = syscon_node_to_regmap(of_get_parent(np));
5204 +       if (IS_ERR(regmap))
5205                 return;
5207 -       clk = at91_clk_register_main_rc_osc(pmc, irq, name, frequency,
5208 -                                           accuracy);
5209 +       clk = at91_clk_register_main_rc_osc(regmap, name, frequency, accuracy);
5210         if (IS_ERR(clk))
5211                 return;
5213         of_clk_add_provider(np, of_clk_src_simple_get, clk);
5215 +CLK_OF_DECLARE(at91sam9x5_clk_main_rc_osc, "atmel,at91sam9x5-clk-main-rc-osc",
5216 +              of_at91sam9x5_clk_main_rc_osc_setup);
5219 -static int clk_main_probe_frequency(struct at91_pmc *pmc)
5220 +static int clk_main_probe_frequency(struct regmap *regmap)
5222         unsigned long prep_time, timeout;
5223 -       u32 tmp;
5224 +       unsigned int mcfr;
5226         timeout = jiffies + usecs_to_jiffies(MAINFRDY_TIMEOUT);
5227         do {
5228                 prep_time = jiffies;
5229 -               tmp = pmc_read(pmc, AT91_CKGR_MCFR);
5230 -               if (tmp & AT91_PMC_MAINRDY)
5231 +               regmap_read(regmap, AT91_CKGR_MCFR, &mcfr);
5232 +               if (mcfr & AT91_PMC_MAINRDY)
5233                         return 0;
5234                 usleep_range(MAINF_LOOP_MIN_WAIT, MAINF_LOOP_MAX_WAIT);
5235         } while (time_before(prep_time, timeout));
5236 @@ -382,34 +348,37 @@
5237         return -ETIMEDOUT;
5240 -static unsigned long clk_main_recalc_rate(struct at91_pmc *pmc,
5241 +static unsigned long clk_main_recalc_rate(struct regmap *regmap,
5242                                           unsigned long parent_rate)
5244 -       u32 tmp;
5245 +       unsigned int mcfr;
5247         if (parent_rate)
5248                 return parent_rate;
5250         pr_warn("Main crystal frequency not set, using approximate value\n");
5251 -       tmp = pmc_read(pmc, AT91_CKGR_MCFR);
5252 -       if (!(tmp & AT91_PMC_MAINRDY))
5253 +       regmap_read(regmap, AT91_CKGR_MCFR, &mcfr);
5254 +       if (!(mcfr & AT91_PMC_MAINRDY))
5255                 return 0;
5257 -       return ((tmp & AT91_PMC_MAINF) * SLOW_CLOCK_FREQ) / MAINF_DIV;
5258 +       return ((mcfr & AT91_PMC_MAINF) * SLOW_CLOCK_FREQ) / MAINF_DIV;
5261  static int clk_rm9200_main_prepare(struct clk_hw *hw)
5263         struct clk_rm9200_main *clkmain = to_clk_rm9200_main(hw);
5265 -       return clk_main_probe_frequency(clkmain->pmc);
5266 +       return clk_main_probe_frequency(clkmain->regmap);
5269  static int clk_rm9200_main_is_prepared(struct clk_hw *hw)
5271         struct clk_rm9200_main *clkmain = to_clk_rm9200_main(hw);
5272 +       unsigned int status;
5274 +       regmap_read(clkmain->regmap, AT91_CKGR_MCFR, &status);
5276 -       return !!(pmc_read(clkmain->pmc, AT91_CKGR_MCFR) & AT91_PMC_MAINRDY);
5277 +       return status & AT91_PMC_MAINRDY ? 1 : 0;
5280  static unsigned long clk_rm9200_main_recalc_rate(struct clk_hw *hw,
5281 @@ -417,7 +386,7 @@
5283         struct clk_rm9200_main *clkmain = to_clk_rm9200_main(hw);
5285 -       return clk_main_recalc_rate(clkmain->pmc, parent_rate);
5286 +       return clk_main_recalc_rate(clkmain->regmap, parent_rate);
5289  static const struct clk_ops rm9200_main_ops = {
5290 @@ -427,7 +396,7 @@
5291  };
5293  static struct clk * __init
5294 -at91_clk_register_rm9200_main(struct at91_pmc *pmc,
5295 +at91_clk_register_rm9200_main(struct regmap *regmap,
5296                               const char *name,
5297                               const char *parent_name)
5299 @@ -435,7 +404,7 @@
5300         struct clk *clk = NULL;
5301         struct clk_init_data init;
5303 -       if (!pmc || !name)
5304 +       if (!name)
5305                 return ERR_PTR(-EINVAL);
5307         if (!parent_name)
5308 @@ -452,7 +421,7 @@
5309         init.flags = 0;
5311         clkmain->hw.init = &init;
5312 -       clkmain->pmc = pmc;
5313 +       clkmain->regmap = regmap;
5315         clk = clk_register(NULL, &clkmain->hw);
5316         if (IS_ERR(clk))
5317 @@ -461,52 +430,54 @@
5318         return clk;
5321 -void __init of_at91rm9200_clk_main_setup(struct device_node *np,
5322 -                                        struct at91_pmc *pmc)
5323 +static void __init of_at91rm9200_clk_main_setup(struct device_node *np)
5325         struct clk *clk;
5326         const char *parent_name;
5327         const char *name = np->name;
5328 +       struct regmap *regmap;
5330         parent_name = of_clk_get_parent_name(np, 0);
5331         of_property_read_string(np, "clock-output-names", &name);
5333 -       clk = at91_clk_register_rm9200_main(pmc, name, parent_name);
5334 +       regmap = syscon_node_to_regmap(of_get_parent(np));
5335 +       if (IS_ERR(regmap))
5336 +               return;
5338 +       clk = at91_clk_register_rm9200_main(regmap, name, parent_name);
5339         if (IS_ERR(clk))
5340                 return;
5342         of_clk_add_provider(np, of_clk_src_simple_get, clk);
5344 +CLK_OF_DECLARE(at91rm9200_clk_main, "atmel,at91rm9200-clk-main",
5345 +              of_at91rm9200_clk_main_setup);
5347 -static irqreturn_t clk_sam9x5_main_irq_handler(int irq, void *dev_id)
5348 +static inline bool clk_sam9x5_main_ready(struct regmap *regmap)
5350 -       struct clk_sam9x5_main *clkmain = dev_id;
5351 +       unsigned int status;
5353 -       wake_up(&clkmain->wait);
5354 -       disable_irq_nosync(clkmain->irq);
5355 +       regmap_read(regmap, AT91_PMC_SR, &status);
5357 -       return IRQ_HANDLED;
5358 +       return status & AT91_PMC_MOSCSELS ? 1 : 0;
5361  static int clk_sam9x5_main_prepare(struct clk_hw *hw)
5363         struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw);
5364 -       struct at91_pmc *pmc = clkmain->pmc;
5365 +       struct regmap *regmap = clkmain->regmap;
5367 -       while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS)) {
5368 -               enable_irq(clkmain->irq);
5369 -               wait_event(clkmain->wait,
5370 -                          pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS);
5371 -       }
5372 +       while (!clk_sam9x5_main_ready(regmap))
5373 +               cpu_relax();
5375 -       return clk_main_probe_frequency(pmc);
5376 +       return clk_main_probe_frequency(regmap);
5379  static int clk_sam9x5_main_is_prepared(struct clk_hw *hw)
5381         struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw);
5383 -       return !!(pmc_read(clkmain->pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS);
5384 +       return clk_sam9x5_main_ready(clkmain->regmap);
5387  static unsigned long clk_sam9x5_main_recalc_rate(struct clk_hw *hw,
5388 @@ -514,30 +485,28 @@
5390         struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw);
5392 -       return clk_main_recalc_rate(clkmain->pmc, parent_rate);
5393 +       return clk_main_recalc_rate(clkmain->regmap, parent_rate);
5396  static int clk_sam9x5_main_set_parent(struct clk_hw *hw, u8 index)
5398         struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw);
5399 -       struct at91_pmc *pmc = clkmain->pmc;
5400 -       u32 tmp;
5401 +       struct regmap *regmap = clkmain->regmap;
5402 +       unsigned int tmp;
5404         if (index > 1)
5405                 return -EINVAL;
5407 -       tmp = pmc_read(pmc, AT91_CKGR_MOR) & ~MOR_KEY_MASK;
5408 +       regmap_read(regmap, AT91_CKGR_MOR, &tmp);
5409 +       tmp &= ~MOR_KEY_MASK;
5411         if (index && !(tmp & AT91_PMC_MOSCSEL))
5412 -               pmc_write(pmc, AT91_CKGR_MOR, tmp | AT91_PMC_MOSCSEL);
5413 +               regmap_write(regmap, AT91_CKGR_MOR, tmp | AT91_PMC_MOSCSEL);
5414         else if (!index && (tmp & AT91_PMC_MOSCSEL))
5415 -               pmc_write(pmc, AT91_CKGR_MOR, tmp & ~AT91_PMC_MOSCSEL);
5416 +               regmap_write(regmap, AT91_CKGR_MOR, tmp & ~AT91_PMC_MOSCSEL);
5418 -       while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS)) {
5419 -               enable_irq(clkmain->irq);
5420 -               wait_event(clkmain->wait,
5421 -                          pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS);
5422 -       }
5423 +       while (!clk_sam9x5_main_ready(regmap))
5424 +               cpu_relax();
5426         return 0;
5428 @@ -545,8 +514,11 @@
5429  static u8 clk_sam9x5_main_get_parent(struct clk_hw *hw)
5431         struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw);
5432 +       unsigned int status;
5434 +       regmap_read(clkmain->regmap, AT91_CKGR_MOR, &status);
5436 -       return !!(pmc_read(clkmain->pmc, AT91_CKGR_MOR) & AT91_PMC_MOSCEN);
5437 +       return status & AT91_PMC_MOSCEN ? 1 : 0;
5440  static const struct clk_ops sam9x5_main_ops = {
5441 @@ -558,18 +530,17 @@
5442  };
5444  static struct clk * __init
5445 -at91_clk_register_sam9x5_main(struct at91_pmc *pmc,
5446 -                             unsigned int irq,
5447 +at91_clk_register_sam9x5_main(struct regmap *regmap,
5448                               const char *name,
5449                               const char **parent_names,
5450                               int num_parents)
5452 -       int ret;
5453         struct clk_sam9x5_main *clkmain;
5454         struct clk *clk = NULL;
5455         struct clk_init_data init;
5456 +       unsigned int status;
5458 -       if (!pmc || !irq || !name)
5459 +       if (!name)
5460                 return ERR_PTR(-EINVAL);
5462         if (!parent_names || !num_parents)
5463 @@ -586,51 +557,42 @@
5464         init.flags = CLK_SET_PARENT_GATE;
5466         clkmain->hw.init = &init;
5467 -       clkmain->pmc = pmc;
5468 -       clkmain->irq = irq;
5469 -       clkmain->parent = !!(pmc_read(clkmain->pmc, AT91_CKGR_MOR) &
5470 -                            AT91_PMC_MOSCEN);
5471 -       init_waitqueue_head(&clkmain->wait);
5472 -       irq_set_status_flags(clkmain->irq, IRQ_NOAUTOEN);
5473 -       ret = request_irq(clkmain->irq, clk_sam9x5_main_irq_handler,
5474 -                         IRQF_TRIGGER_HIGH, name, clkmain);
5475 -       if (ret)
5476 -               return ERR_PTR(ret);
5477 +       clkmain->regmap = regmap;
5478 +       regmap_read(clkmain->regmap, AT91_CKGR_MOR, &status);
5479 +       clkmain->parent = status & AT91_PMC_MOSCEN ? 1 : 0;
5481         clk = clk_register(NULL, &clkmain->hw);
5482 -       if (IS_ERR(clk)) {
5483 -               free_irq(clkmain->irq, clkmain);
5484 +       if (IS_ERR(clk))
5485                 kfree(clkmain);
5486 -       }
5488         return clk;
5491 -void __init of_at91sam9x5_clk_main_setup(struct device_node *np,
5492 -                                        struct at91_pmc *pmc)
5493 +static void __init of_at91sam9x5_clk_main_setup(struct device_node *np)
5495         struct clk *clk;
5496         const char *parent_names[2];
5497         int num_parents;
5498 -       unsigned int irq;
5499         const char *name = np->name;
5500 +       struct regmap *regmap;
5502         num_parents = of_clk_get_parent_count(np);
5503         if (num_parents <= 0 || num_parents > 2)
5504                 return;
5506         of_clk_parent_fill(np, parent_names, num_parents);
5507 +       regmap = syscon_node_to_regmap(of_get_parent(np));
5508 +       if (IS_ERR(regmap))
5509 +               return;
5511         of_property_read_string(np, "clock-output-names", &name);
5513 -       irq = irq_of_parse_and_map(np, 0);
5514 -       if (!irq)
5515 -               return;
5517 -       clk = at91_clk_register_sam9x5_main(pmc, irq, name, parent_names,
5518 +       clk = at91_clk_register_sam9x5_main(regmap, name, parent_names,
5519                                             num_parents);
5520         if (IS_ERR(clk))
5521                 return;
5523         of_clk_add_provider(np, of_clk_src_simple_get, clk);
5525 +CLK_OF_DECLARE(at91sam9x5_clk_main, "atmel,at91sam9x5-clk-main",
5526 +              of_at91sam9x5_clk_main_setup);
5527 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-master.c linux-4.4.46/drivers/clk/at91/clk-master.c
5528 --- linux-4.4.46.orig/drivers/clk/at91/clk-master.c     2017-02-01 08:31:11.000000000 +0100
5529 +++ linux-4.4.46/drivers/clk/at91/clk-master.c  2017-02-03 17:18:05.671416021 +0100
5530 @@ -12,13 +12,8 @@
5531  #include <linux/clkdev.h>
5532  #include <linux/clk/at91_pmc.h>
5533  #include <linux/of.h>
5534 -#include <linux/of_address.h>
5535 -#include <linux/of_irq.h>
5536 -#include <linux/io.h>
5537 -#include <linux/wait.h>
5538 -#include <linux/sched.h>
5539 -#include <linux/interrupt.h>
5540 -#include <linux/irq.h>
5541 +#include <linux/mfd/syscon.h>
5542 +#include <linux/regmap.h>
5544  #include "pmc.h"
5546 @@ -44,32 +39,26 @@
5548  struct clk_master {
5549         struct clk_hw hw;
5550 -       struct at91_pmc *pmc;
5551 -       unsigned int irq;
5552 -       wait_queue_head_t wait;
5553 +       struct regmap *regmap;
5554         const struct clk_master_layout *layout;
5555         const struct clk_master_characteristics *characteristics;
5556  };
5558 -static irqreturn_t clk_master_irq_handler(int irq, void *dev_id)
5559 +static inline bool clk_master_ready(struct regmap *regmap)
5561 -       struct clk_master *master = (struct clk_master *)dev_id;
5562 +       unsigned int status;
5564 -       wake_up(&master->wait);
5565 -       disable_irq_nosync(master->irq);
5566 +       regmap_read(regmap, AT91_PMC_SR, &status);
5568 -       return IRQ_HANDLED;
5569 +       return status & AT91_PMC_MCKRDY ? 1 : 0;
5572  static int clk_master_prepare(struct clk_hw *hw)
5574         struct clk_master *master = to_clk_master(hw);
5575 -       struct at91_pmc *pmc = master->pmc;
5577 -       while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MCKRDY)) {
5578 -               enable_irq(master->irq);
5579 -               wait_event(master->wait,
5580 -                          pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MCKRDY);
5581 -       }
5582 +       while (!clk_master_ready(master->regmap))
5583 +               cpu_relax();
5585         return 0;
5587 @@ -78,7 +67,7 @@
5589         struct clk_master *master = to_clk_master(hw);
5591 -       return !!(pmc_read(master->pmc, AT91_PMC_SR) & AT91_PMC_MCKRDY);
5592 +       return clk_master_ready(master->regmap);
5595  static unsigned long clk_master_recalc_rate(struct clk_hw *hw,
5596 @@ -88,18 +77,16 @@
5597         u8 div;
5598         unsigned long rate = parent_rate;
5599         struct clk_master *master = to_clk_master(hw);
5600 -       struct at91_pmc *pmc = master->pmc;
5601         const struct clk_master_layout *layout = master->layout;
5602         const struct clk_master_characteristics *characteristics =
5603                                                 master->characteristics;
5604 -       u32 tmp;
5605 +       unsigned int mckr;
5607 -       pmc_lock(pmc);
5608 -       tmp = pmc_read(pmc, AT91_PMC_MCKR) & layout->mask;
5609 -       pmc_unlock(pmc);
5610 +       regmap_read(master->regmap, AT91_PMC_MCKR, &mckr);
5611 +       mckr &= layout->mask;
5613 -       pres = (tmp >> layout->pres_shift) & MASTER_PRES_MASK;
5614 -       div = (tmp >> MASTER_DIV_SHIFT) & MASTER_DIV_MASK;
5615 +       pres = (mckr >> layout->pres_shift) & MASTER_PRES_MASK;
5616 +       div = (mckr >> MASTER_DIV_SHIFT) & MASTER_DIV_MASK;
5618         if (characteristics->have_div3_pres && pres == MASTER_PRES_MAX)
5619                 rate /= 3;
5620 @@ -119,9 +106,11 @@
5621  static u8 clk_master_get_parent(struct clk_hw *hw)
5623         struct clk_master *master = to_clk_master(hw);
5624 -       struct at91_pmc *pmc = master->pmc;
5625 +       unsigned int mckr;
5627 -       return pmc_read(pmc, AT91_PMC_MCKR) & AT91_PMC_CSS;
5628 +       regmap_read(master->regmap, AT91_PMC_MCKR, &mckr);
5630 +       return mckr & AT91_PMC_CSS;
5633  static const struct clk_ops master_ops = {
5634 @@ -132,18 +121,17 @@
5635  };
5637  static struct clk * __init
5638 -at91_clk_register_master(struct at91_pmc *pmc, unsigned int irq,
5639 +at91_clk_register_master(struct regmap *regmap,
5640                 const char *name, int num_parents,
5641                 const char **parent_names,
5642                 const struct clk_master_layout *layout,
5643                 const struct clk_master_characteristics *characteristics)
5645 -       int ret;
5646         struct clk_master *master;
5647         struct clk *clk = NULL;
5648         struct clk_init_data init;
5650 -       if (!pmc || !irq || !name || !num_parents || !parent_names)
5651 +       if (!name || !num_parents || !parent_names)
5652                 return ERR_PTR(-EINVAL);
5654         master = kzalloc(sizeof(*master), GFP_KERNEL);
5655 @@ -159,20 +147,10 @@
5656         master->hw.init = &init;
5657         master->layout = layout;
5658         master->characteristics = characteristics;
5659 -       master->pmc = pmc;
5660 -       master->irq = irq;
5661 -       init_waitqueue_head(&master->wait);
5662 -       irq_set_status_flags(master->irq, IRQ_NOAUTOEN);
5663 -       ret = request_irq(master->irq, clk_master_irq_handler,
5664 -                         IRQF_TRIGGER_HIGH, "clk-master", master);
5665 -       if (ret) {
5666 -               kfree(master);
5667 -               return ERR_PTR(ret);
5668 -       }
5669 +       master->regmap = regmap;
5671         clk = clk_register(NULL, &master->hw);
5672         if (IS_ERR(clk)) {
5673 -               free_irq(master->irq, master);
5674                 kfree(master);
5675         }
5677 @@ -217,15 +195,15 @@
5680  static void __init
5681 -of_at91_clk_master_setup(struct device_node *np, struct at91_pmc *pmc,
5682 +of_at91_clk_master_setup(struct device_node *np,
5683                          const struct clk_master_layout *layout)
5685         struct clk *clk;
5686         int num_parents;
5687 -       unsigned int irq;
5688         const char *parent_names[MASTER_SOURCE_MAX];
5689         const char *name = np->name;
5690         struct clk_master_characteristics *characteristics;
5691 +       struct regmap *regmap;
5693         num_parents = of_clk_get_parent_count(np);
5694         if (num_parents <= 0 || num_parents > MASTER_SOURCE_MAX)
5695 @@ -239,11 +217,11 @@
5696         if (!characteristics)
5697                 return;
5699 -       irq = irq_of_parse_and_map(np, 0);
5700 -       if (!irq)
5701 -               goto out_free_characteristics;
5702 +       regmap = syscon_node_to_regmap(of_get_parent(np));
5703 +       if (IS_ERR(regmap))
5704 +               return;
5706 -       clk = at91_clk_register_master(pmc, irq, name, num_parents,
5707 +       clk = at91_clk_register_master(regmap, name, num_parents,
5708                                        parent_names, layout,
5709                                        characteristics);
5710         if (IS_ERR(clk))
5711 @@ -256,14 +234,16 @@
5712         kfree(characteristics);
5715 -void __init of_at91rm9200_clk_master_setup(struct device_node *np,
5716 -                                          struct at91_pmc *pmc)
5717 +static void __init of_at91rm9200_clk_master_setup(struct device_node *np)
5719 -       of_at91_clk_master_setup(np, pmc, &at91rm9200_master_layout);
5720 +       of_at91_clk_master_setup(np, &at91rm9200_master_layout);
5722 +CLK_OF_DECLARE(at91rm9200_clk_master, "atmel,at91rm9200-clk-master",
5723 +              of_at91rm9200_clk_master_setup);
5725 -void __init of_at91sam9x5_clk_master_setup(struct device_node *np,
5726 -                                          struct at91_pmc *pmc)
5727 +static void __init of_at91sam9x5_clk_master_setup(struct device_node *np)
5729 -       of_at91_clk_master_setup(np, pmc, &at91sam9x5_master_layout);
5730 +       of_at91_clk_master_setup(np, &at91sam9x5_master_layout);
5732 +CLK_OF_DECLARE(at91sam9x5_clk_master, "atmel,at91sam9x5-clk-master",
5733 +              of_at91sam9x5_clk_master_setup);
5734 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-peripheral.c linux-4.4.46/drivers/clk/at91/clk-peripheral.c
5735 --- linux-4.4.46.orig/drivers/clk/at91/clk-peripheral.c 2017-02-01 08:31:11.000000000 +0100
5736 +++ linux-4.4.46/drivers/clk/at91/clk-peripheral.c      2017-02-03 17:18:05.671416021 +0100
5737 @@ -12,11 +12,13 @@
5738  #include <linux/clkdev.h>
5739  #include <linux/clk/at91_pmc.h>
5740  #include <linux/of.h>
5741 -#include <linux/of_address.h>
5742 -#include <linux/io.h>
5743 +#include <linux/mfd/syscon.h>
5744 +#include <linux/regmap.h>
5746  #include "pmc.h"
5748 +DEFINE_SPINLOCK(pmc_pcr_lock);
5750  #define PERIPHERAL_MAX         64
5752  #define PERIPHERAL_AT91RM9200  0
5753 @@ -33,7 +35,7 @@
5755  struct clk_peripheral {
5756         struct clk_hw hw;
5757 -       struct at91_pmc *pmc;
5758 +       struct regmap *regmap;
5759         u32 id;
5760  };
5762 @@ -41,8 +43,9 @@
5764  struct clk_sam9x5_peripheral {
5765         struct clk_hw hw;
5766 -       struct at91_pmc *pmc;
5767 +       struct regmap *regmap;
5768         struct clk_range range;
5769 +       spinlock_t *lock;
5770         u32 id;
5771         u32 div;
5772         bool auto_div;
5773 @@ -54,7 +57,6 @@
5774  static int clk_peripheral_enable(struct clk_hw *hw)
5776         struct clk_peripheral *periph = to_clk_peripheral(hw);
5777 -       struct at91_pmc *pmc = periph->pmc;
5778         int offset = AT91_PMC_PCER;
5779         u32 id = periph->id;
5781 @@ -62,14 +64,14 @@
5782                 return 0;
5783         if (id > PERIPHERAL_ID_MAX)
5784                 offset = AT91_PMC_PCER1;
5785 -       pmc_write(pmc, offset, PERIPHERAL_MASK(id));
5786 +       regmap_write(periph->regmap, offset, PERIPHERAL_MASK(id));
5788         return 0;
5791  static void clk_peripheral_disable(struct clk_hw *hw)
5793         struct clk_peripheral *periph = to_clk_peripheral(hw);
5794 -       struct at91_pmc *pmc = periph->pmc;
5795         int offset = AT91_PMC_PCDR;
5796         u32 id = periph->id;
5798 @@ -77,21 +79,23 @@
5799                 return;
5800         if (id > PERIPHERAL_ID_MAX)
5801                 offset = AT91_PMC_PCDR1;
5802 -       pmc_write(pmc, offset, PERIPHERAL_MASK(id));
5803 +       regmap_write(periph->regmap, offset, PERIPHERAL_MASK(id));
5806  static int clk_peripheral_is_enabled(struct clk_hw *hw)
5808         struct clk_peripheral *periph = to_clk_peripheral(hw);
5809 -       struct at91_pmc *pmc = periph->pmc;
5810         int offset = AT91_PMC_PCSR;
5811 +       unsigned int status;
5812         u32 id = periph->id;
5814         if (id < PERIPHERAL_ID_MIN)
5815                 return 1;
5816         if (id > PERIPHERAL_ID_MAX)
5817                 offset = AT91_PMC_PCSR1;
5818 -       return !!(pmc_read(pmc, offset) & PERIPHERAL_MASK(id));
5819 +       regmap_read(periph->regmap, offset, &status);
5821 +       return status & PERIPHERAL_MASK(id) ? 1 : 0;
5824  static const struct clk_ops peripheral_ops = {
5825 @@ -101,14 +105,14 @@
5826  };
5828  static struct clk * __init
5829 -at91_clk_register_peripheral(struct at91_pmc *pmc, const char *name,
5830 +at91_clk_register_peripheral(struct regmap *regmap, const char *name,
5831                              const char *parent_name, u32 id)
5833         struct clk_peripheral *periph;
5834         struct clk *clk = NULL;
5835         struct clk_init_data init;
5837 -       if (!pmc || !name || !parent_name || id > PERIPHERAL_ID_MAX)
5838 +       if (!name || !parent_name || id > PERIPHERAL_ID_MAX)
5839                 return ERR_PTR(-EINVAL);
5841         periph = kzalloc(sizeof(*periph), GFP_KERNEL);
5842 @@ -123,7 +127,7 @@
5844         periph->id = id;
5845         periph->hw.init = &init;
5846 -       periph->pmc = pmc;
5847 +       periph->regmap = regmap;
5849         clk = clk_register(NULL, &periph->hw);
5850         if (IS_ERR(clk))
5851 @@ -160,53 +164,58 @@
5852  static int clk_sam9x5_peripheral_enable(struct clk_hw *hw)
5854         struct clk_sam9x5_peripheral *periph = to_clk_sam9x5_peripheral(hw);
5855 -       struct at91_pmc *pmc = periph->pmc;
5856 -       u32 tmp;
5857 +       unsigned long flags;
5859         if (periph->id < PERIPHERAL_ID_MIN)
5860                 return 0;
5862 -       pmc_lock(pmc);
5863 -       pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID_MASK));
5864 -       tmp = pmc_read(pmc, AT91_PMC_PCR) & ~AT91_PMC_PCR_DIV_MASK;
5865 -       pmc_write(pmc, AT91_PMC_PCR, tmp | AT91_PMC_PCR_DIV(periph->div)
5866 -                                        | AT91_PMC_PCR_CMD
5867 -                                        | AT91_PMC_PCR_EN);
5868 -       pmc_unlock(pmc);
5869 +       spin_lock_irqsave(periph->lock, flags);
5870 +       regmap_write(periph->regmap, AT91_PMC_PCR,
5871 +                    (periph->id & AT91_PMC_PCR_PID_MASK));
5872 +       regmap_update_bits(periph->regmap, AT91_PMC_PCR,
5873 +                          AT91_PMC_PCR_DIV_MASK | AT91_PMC_PCR_CMD |
5874 +                          AT91_PMC_PCR_EN,
5875 +                          AT91_PMC_PCR_DIV(periph->div) |
5876 +                          AT91_PMC_PCR_CMD |
5877 +                          AT91_PMC_PCR_EN);
5878 +       spin_unlock_irqrestore(periph->lock, flags);
5880         return 0;
5883  static void clk_sam9x5_peripheral_disable(struct clk_hw *hw)
5885         struct clk_sam9x5_peripheral *periph = to_clk_sam9x5_peripheral(hw);
5886 -       struct at91_pmc *pmc = periph->pmc;
5887 -       u32 tmp;
5888 +       unsigned long flags;
5890         if (periph->id < PERIPHERAL_ID_MIN)
5891                 return;
5893 -       pmc_lock(pmc);
5894 -       pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID_MASK));
5895 -       tmp = pmc_read(pmc, AT91_PMC_PCR) & ~AT91_PMC_PCR_EN;
5896 -       pmc_write(pmc, AT91_PMC_PCR, tmp | AT91_PMC_PCR_CMD);
5897 -       pmc_unlock(pmc);
5898 +       spin_lock_irqsave(periph->lock, flags);
5899 +       regmap_write(periph->regmap, AT91_PMC_PCR,
5900 +                    (periph->id & AT91_PMC_PCR_PID_MASK));
5901 +       regmap_update_bits(periph->regmap, AT91_PMC_PCR,
5902 +                          AT91_PMC_PCR_EN | AT91_PMC_PCR_CMD,
5903 +                          AT91_PMC_PCR_CMD);
5904 +       spin_unlock_irqrestore(periph->lock, flags);
5907  static int clk_sam9x5_peripheral_is_enabled(struct clk_hw *hw)
5909         struct clk_sam9x5_peripheral *periph = to_clk_sam9x5_peripheral(hw);
5910 -       struct at91_pmc *pmc = periph->pmc;
5911 -       int ret;
5912 +       unsigned long flags;
5913 +       unsigned int status;
5915         if (periph->id < PERIPHERAL_ID_MIN)
5916                 return 1;
5918 -       pmc_lock(pmc);
5919 -       pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID_MASK));
5920 -       ret = !!(pmc_read(pmc, AT91_PMC_PCR) & AT91_PMC_PCR_EN);
5921 -       pmc_unlock(pmc);
5922 +       spin_lock_irqsave(periph->lock, flags);
5923 +       regmap_write(periph->regmap, AT91_PMC_PCR,
5924 +                    (periph->id & AT91_PMC_PCR_PID_MASK));
5925 +       regmap_read(periph->regmap, AT91_PMC_PCR, &status);
5926 +       spin_unlock_irqrestore(periph->lock, flags);
5928 -       return ret;
5929 +       return status & AT91_PMC_PCR_EN ? 1 : 0;
5932  static unsigned long
5933 @@ -214,19 +223,20 @@
5934                                   unsigned long parent_rate)
5936         struct clk_sam9x5_peripheral *periph = to_clk_sam9x5_peripheral(hw);
5937 -       struct at91_pmc *pmc = periph->pmc;
5938 -       u32 tmp;
5939 +       unsigned long flags;
5940 +       unsigned int status;
5942         if (periph->id < PERIPHERAL_ID_MIN)
5943                 return parent_rate;
5945 -       pmc_lock(pmc);
5946 -       pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID_MASK));
5947 -       tmp = pmc_read(pmc, AT91_PMC_PCR);
5948 -       pmc_unlock(pmc);
5949 +       spin_lock_irqsave(periph->lock, flags);
5950 +       regmap_write(periph->regmap, AT91_PMC_PCR,
5951 +                    (periph->id & AT91_PMC_PCR_PID_MASK));
5952 +       regmap_read(periph->regmap, AT91_PMC_PCR, &status);
5953 +       spin_unlock_irqrestore(periph->lock, flags);
5955 -       if (tmp & AT91_PMC_PCR_EN) {
5956 -               periph->div = PERIPHERAL_RSHIFT(tmp);
5957 +       if (status & AT91_PMC_PCR_EN) {
5958 +               periph->div = PERIPHERAL_RSHIFT(status);
5959                 periph->auto_div = false;
5960         } else {
5961                 clk_sam9x5_peripheral_autodiv(periph);
5962 @@ -318,15 +328,15 @@
5963  };
5965  static struct clk * __init
5966 -at91_clk_register_sam9x5_peripheral(struct at91_pmc *pmc, const char *name,
5967 -                                   const char *parent_name, u32 id,
5968 -                                   const struct clk_range *range)
5969 +at91_clk_register_sam9x5_peripheral(struct regmap *regmap, spinlock_t *lock,
5970 +                                   const char *name, const char *parent_name,
5971 +                                   u32 id, const struct clk_range *range)
5973         struct clk_sam9x5_peripheral *periph;
5974         struct clk *clk = NULL;
5975         struct clk_init_data init;
5977 -       if (!pmc || !name || !parent_name)
5978 +       if (!name || !parent_name)
5979                 return ERR_PTR(-EINVAL);
5981         periph = kzalloc(sizeof(*periph), GFP_KERNEL);
5982 @@ -342,7 +352,8 @@
5983         periph->id = id;
5984         periph->hw.init = &init;
5985         periph->div = 0;
5986 -       periph->pmc = pmc;
5987 +       periph->regmap = regmap;
5988 +       periph->lock = lock;
5989         periph->auto_div = true;
5990         periph->range = *range;
5992 @@ -356,7 +367,7 @@
5995  static void __init
5996 -of_at91_clk_periph_setup(struct device_node *np, struct at91_pmc *pmc, u8 type)
5997 +of_at91_clk_periph_setup(struct device_node *np, u8 type)
5999         int num;
6000         u32 id;
6001 @@ -364,6 +375,7 @@
6002         const char *parent_name;
6003         const char *name;
6004         struct device_node *periphclknp;
6005 +       struct regmap *regmap;
6007         parent_name = of_clk_get_parent_name(np, 0);
6008         if (!parent_name)
6009 @@ -373,6 +385,10 @@
6010         if (!num || num > PERIPHERAL_MAX)
6011                 return;
6013 +       regmap = syscon_node_to_regmap(of_get_parent(np));
6014 +       if (IS_ERR(regmap))
6015 +               return;
6017         for_each_child_of_node(np, periphclknp) {
6018                 if (of_property_read_u32(periphclknp, "reg", &id))
6019                         continue;
6020 @@ -384,7 +400,7 @@
6021                         name = periphclknp->name;
6023                 if (type == PERIPHERAL_AT91RM9200) {
6024 -                       clk = at91_clk_register_peripheral(pmc, name,
6025 +                       clk = at91_clk_register_peripheral(regmap, name,
6026                                                            parent_name, id);
6027                 } else {
6028                         struct clk_range range = CLK_RANGE(0, 0);
6029 @@ -393,7 +409,9 @@
6030                                               "atmel,clk-output-range",
6031                                               &range);
6033 -                       clk = at91_clk_register_sam9x5_peripheral(pmc, name,
6034 +                       clk = at91_clk_register_sam9x5_peripheral(regmap,
6035 +                                                                 &pmc_pcr_lock,
6036 +                                                                 name,
6037                                                                   parent_name,
6038                                                                   id, &range);
6039                 }
6040 @@ -405,14 +423,16 @@
6041         }
6044 -void __init of_at91rm9200_clk_periph_setup(struct device_node *np,
6045 -                                          struct at91_pmc *pmc)
6046 +static void __init of_at91rm9200_clk_periph_setup(struct device_node *np)
6048 -       of_at91_clk_periph_setup(np, pmc, PERIPHERAL_AT91RM9200);
6049 +       of_at91_clk_periph_setup(np, PERIPHERAL_AT91RM9200);
6051 +CLK_OF_DECLARE(at91rm9200_clk_periph, "atmel,at91rm9200-clk-peripheral",
6052 +              of_at91rm9200_clk_periph_setup);
6054 -void __init of_at91sam9x5_clk_periph_setup(struct device_node *np,
6055 -                                          struct at91_pmc *pmc)
6056 +static void __init of_at91sam9x5_clk_periph_setup(struct device_node *np)
6058 -       of_at91_clk_periph_setup(np, pmc, PERIPHERAL_AT91SAM9X5);
6059 +       of_at91_clk_periph_setup(np, PERIPHERAL_AT91SAM9X5);
6061 +CLK_OF_DECLARE(at91sam9x5_clk_periph, "atmel,at91sam9x5-clk-peripheral",
6062 +              of_at91sam9x5_clk_periph_setup);
6063 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-pll.c linux-4.4.46/drivers/clk/at91/clk-pll.c
6064 --- linux-4.4.46.orig/drivers/clk/at91/clk-pll.c        2017-02-01 08:31:11.000000000 +0100
6065 +++ linux-4.4.46/drivers/clk/at91/clk-pll.c     2017-02-03 17:18:05.671416021 +0100
6066 @@ -12,14 +12,8 @@
6067  #include <linux/clkdev.h>
6068  #include <linux/clk/at91_pmc.h>
6069  #include <linux/of.h>
6070 -#include <linux/of_address.h>
6071 -#include <linux/of_irq.h>
6072 -#include <linux/io.h>
6073 -#include <linux/kernel.h>
6074 -#include <linux/wait.h>
6075 -#include <linux/sched.h>
6076 -#include <linux/interrupt.h>
6077 -#include <linux/irq.h>
6078 +#include <linux/mfd/syscon.h>
6079 +#include <linux/regmap.h>
6081  #include "pmc.h"
6083 @@ -58,9 +52,7 @@
6085  struct clk_pll {
6086         struct clk_hw hw;
6087 -       struct at91_pmc *pmc;
6088 -       unsigned int irq;
6089 -       wait_queue_head_t wait;
6090 +       struct regmap *regmap;
6091         u8 id;
6092         u8 div;
6093         u8 range;
6094 @@ -69,20 +61,19 @@
6095         const struct clk_pll_characteristics *characteristics;
6096  };
6098 -static irqreturn_t clk_pll_irq_handler(int irq, void *dev_id)
6099 +static inline bool clk_pll_ready(struct regmap *regmap, int id)
6101 -       struct clk_pll *pll = (struct clk_pll *)dev_id;
6102 +       unsigned int status;
6104 -       wake_up(&pll->wait);
6105 -       disable_irq_nosync(pll->irq);
6106 +       regmap_read(regmap, AT91_PMC_SR, &status);
6108 -       return IRQ_HANDLED;
6109 +       return status & PLL_STATUS_MASK(id) ? 1 : 0;
6112  static int clk_pll_prepare(struct clk_hw *hw)
6114         struct clk_pll *pll = to_clk_pll(hw);
6115 -       struct at91_pmc *pmc = pll->pmc;
6116 +       struct regmap *regmap = pll->regmap;
6117         const struct clk_pll_layout *layout = pll->layout;
6118         const struct clk_pll_characteristics *characteristics =
6119                                                         pll->characteristics;
6120 @@ -90,39 +81,34 @@
6121         u32 mask = PLL_STATUS_MASK(id);
6122         int offset = PLL_REG(id);
6123         u8 out = 0;
6124 -       u32 pllr, icpr;
6125 +       unsigned int pllr;
6126 +       unsigned int status;
6127         u8 div;
6128         u16 mul;
6130 -       pllr = pmc_read(pmc, offset);
6131 +       regmap_read(regmap, offset, &pllr);
6132         div = PLL_DIV(pllr);
6133         mul = PLL_MUL(pllr, layout);
6135 -       if ((pmc_read(pmc, AT91_PMC_SR) & mask) &&
6136 +       regmap_read(regmap, AT91_PMC_SR, &status);
6137 +       if ((status & mask) &&
6138             (div == pll->div && mul == pll->mul))
6139                 return 0;
6141         if (characteristics->out)
6142                 out = characteristics->out[pll->range];
6143 -       if (characteristics->icpll) {
6144 -               icpr = pmc_read(pmc, AT91_PMC_PLLICPR) & ~PLL_ICPR_MASK(id);
6145 -               icpr |= (characteristics->icpll[pll->range] <<
6146 -                       PLL_ICPR_SHIFT(id));
6147 -               pmc_write(pmc, AT91_PMC_PLLICPR, icpr);
6148 -       }
6150 -       pllr &= ~layout->pllr_mask;
6151 -       pllr |= layout->pllr_mask &
6152 -              (pll->div | (PLL_MAX_COUNT << PLL_COUNT_SHIFT) |
6153 -               (out << PLL_OUT_SHIFT) |
6154 -               ((pll->mul & layout->mul_mask) << layout->mul_shift));
6155 -       pmc_write(pmc, offset, pllr);
6157 -       while (!(pmc_read(pmc, AT91_PMC_SR) & mask)) {
6158 -               enable_irq(pll->irq);
6159 -               wait_event(pll->wait,
6160 -                          pmc_read(pmc, AT91_PMC_SR) & mask);
6161 -       }
6162 +       if (characteristics->icpll)
6163 +               regmap_update_bits(regmap, AT91_PMC_PLLICPR, PLL_ICPR_MASK(id),
6164 +                       characteristics->icpll[pll->range] << PLL_ICPR_SHIFT(id));
6166 +       regmap_update_bits(regmap, offset, layout->pllr_mask,
6167 +                       pll->div | (PLL_MAX_COUNT << PLL_COUNT_SHIFT) |
6168 +                       (out << PLL_OUT_SHIFT) |
6169 +                       ((pll->mul & layout->mul_mask) << layout->mul_shift));
6171 +       while (!clk_pll_ready(regmap, pll->id))
6172 +               cpu_relax();
6174         return 0;
6176 @@ -130,32 +116,35 @@
6177  static int clk_pll_is_prepared(struct clk_hw *hw)
6179         struct clk_pll *pll = to_clk_pll(hw);
6180 -       struct at91_pmc *pmc = pll->pmc;
6182 -       return !!(pmc_read(pmc, AT91_PMC_SR) &
6183 -                 PLL_STATUS_MASK(pll->id));
6184 +       return clk_pll_ready(pll->regmap, pll->id);
6187  static void clk_pll_unprepare(struct clk_hw *hw)
6189         struct clk_pll *pll = to_clk_pll(hw);
6190 -       struct at91_pmc *pmc = pll->pmc;
6191 -       const struct clk_pll_layout *layout = pll->layout;
6192 -       int offset = PLL_REG(pll->id);
6193 -       u32 tmp = pmc_read(pmc, offset) & ~(layout->pllr_mask);
6194 +       unsigned int mask = pll->layout->pllr_mask;
6196 -       pmc_write(pmc, offset, tmp);
6197 +       regmap_update_bits(pll->regmap, PLL_REG(pll->id), mask, ~mask);
6200  static unsigned long clk_pll_recalc_rate(struct clk_hw *hw,
6201                                          unsigned long parent_rate)
6203         struct clk_pll *pll = to_clk_pll(hw);
6204 +       unsigned int pllr;
6205 +       u16 mul;
6206 +       u8 div;
6208 +       regmap_read(pll->regmap, PLL_REG(pll->id), &pllr);
6210 +       div = PLL_DIV(pllr);
6211 +       mul = PLL_MUL(pllr, pll->layout);
6213 -       if (!pll->div || !pll->mul)
6214 +       if (!div || !mul)
6215                 return 0;
6217 -       return (parent_rate / pll->div) * (pll->mul + 1);
6218 +       return (parent_rate / div) * (mul + 1);
6221  static long clk_pll_get_best_div_mul(struct clk_pll *pll, unsigned long rate,
6222 @@ -308,7 +297,7 @@
6223  };
6225  static struct clk * __init
6226 -at91_clk_register_pll(struct at91_pmc *pmc, unsigned int irq, const char *name,
6227 +at91_clk_register_pll(struct regmap *regmap, const char *name,
6228                       const char *parent_name, u8 id,
6229                       const struct clk_pll_layout *layout,
6230                       const struct clk_pll_characteristics *characteristics)
6231 @@ -316,9 +305,8 @@
6232         struct clk_pll *pll;
6233         struct clk *clk = NULL;
6234         struct clk_init_data init;
6235 -       int ret;
6236         int offset = PLL_REG(id);
6237 -       u32 tmp;
6238 +       unsigned int pllr;
6240         if (id > PLL_MAX_ID)
6241                 return ERR_PTR(-EINVAL);
6242 @@ -337,23 +325,13 @@
6243         pll->hw.init = &init;
6244         pll->layout = layout;
6245         pll->characteristics = characteristics;
6246 -       pll->pmc = pmc;
6247 -       pll->irq = irq;
6248 -       tmp = pmc_read(pmc, offset) & layout->pllr_mask;
6249 -       pll->div = PLL_DIV(tmp);
6250 -       pll->mul = PLL_MUL(tmp, layout);
6251 -       init_waitqueue_head(&pll->wait);
6252 -       irq_set_status_flags(pll->irq, IRQ_NOAUTOEN);
6253 -       ret = request_irq(pll->irq, clk_pll_irq_handler, IRQF_TRIGGER_HIGH,
6254 -                         id ? "clk-pllb" : "clk-plla", pll);
6255 -       if (ret) {
6256 -               kfree(pll);
6257 -               return ERR_PTR(ret);
6258 -       }
6259 +       pll->regmap = regmap;
6260 +       regmap_read(regmap, offset, &pllr);
6261 +       pll->div = PLL_DIV(pllr);
6262 +       pll->mul = PLL_MUL(pllr, layout);
6264         clk = clk_register(NULL, &pll->hw);
6265         if (IS_ERR(clk)) {
6266 -               free_irq(pll->irq, pll);
6267                 kfree(pll);
6268         }
6270 @@ -483,12 +461,12 @@
6273  static void __init
6274 -of_at91_clk_pll_setup(struct device_node *np, struct at91_pmc *pmc,
6275 +of_at91_clk_pll_setup(struct device_node *np,
6276                       const struct clk_pll_layout *layout)
6278         u32 id;
6279 -       unsigned int irq;
6280         struct clk *clk;
6281 +       struct regmap *regmap;
6282         const char *parent_name;
6283         const char *name = np->name;
6284         struct clk_pll_characteristics *characteristics;
6285 @@ -500,15 +478,15 @@
6287         of_property_read_string(np, "clock-output-names", &name);
6289 -       characteristics = of_at91_clk_pll_get_characteristics(np);
6290 -       if (!characteristics)
6291 +       regmap = syscon_node_to_regmap(of_get_parent(np));
6292 +       if (IS_ERR(regmap))
6293                 return;
6295 -       irq = irq_of_parse_and_map(np, 0);
6296 -       if (!irq)
6297 +       characteristics = of_at91_clk_pll_get_characteristics(np);
6298 +       if (!characteristics)
6299                 return;
6301 -       clk = at91_clk_register_pll(pmc, irq, name, parent_name, id, layout,
6302 +       clk = at91_clk_register_pll(regmap, name, parent_name, id, layout,
6303                                     characteristics);
6304         if (IS_ERR(clk))
6305                 goto out_free_characteristics;
6306 @@ -520,26 +498,30 @@
6307         kfree(characteristics);
6310 -void __init of_at91rm9200_clk_pll_setup(struct device_node *np,
6311 -                                              struct at91_pmc *pmc)
6312 +static void __init of_at91rm9200_clk_pll_setup(struct device_node *np)
6314 -       of_at91_clk_pll_setup(np, pmc, &at91rm9200_pll_layout);
6315 +       of_at91_clk_pll_setup(np, &at91rm9200_pll_layout);
6317 +CLK_OF_DECLARE(at91rm9200_clk_pll, "atmel,at91rm9200-clk-pll",
6318 +              of_at91rm9200_clk_pll_setup);
6320 -void __init of_at91sam9g45_clk_pll_setup(struct device_node *np,
6321 -                                               struct at91_pmc *pmc)
6322 +static void __init of_at91sam9g45_clk_pll_setup(struct device_node *np)
6324 -       of_at91_clk_pll_setup(np, pmc, &at91sam9g45_pll_layout);
6325 +       of_at91_clk_pll_setup(np, &at91sam9g45_pll_layout);
6327 +CLK_OF_DECLARE(at91sam9g45_clk_pll, "atmel,at91sam9g45-clk-pll",
6328 +              of_at91sam9g45_clk_pll_setup);
6330 -void __init of_at91sam9g20_clk_pllb_setup(struct device_node *np,
6331 -                                                struct at91_pmc *pmc)
6332 +static void __init of_at91sam9g20_clk_pllb_setup(struct device_node *np)
6334 -       of_at91_clk_pll_setup(np, pmc, &at91sam9g20_pllb_layout);
6335 +       of_at91_clk_pll_setup(np, &at91sam9g20_pllb_layout);
6337 +CLK_OF_DECLARE(at91sam9g20_clk_pllb, "atmel,at91sam9g20-clk-pllb",
6338 +              of_at91sam9g20_clk_pllb_setup);
6340 -void __init of_sama5d3_clk_pll_setup(struct device_node *np,
6341 -                                           struct at91_pmc *pmc)
6342 +static void __init of_sama5d3_clk_pll_setup(struct device_node *np)
6344 -       of_at91_clk_pll_setup(np, pmc, &sama5d3_pll_layout);
6345 +       of_at91_clk_pll_setup(np, &sama5d3_pll_layout);
6347 +CLK_OF_DECLARE(sama5d3_clk_pll, "atmel,sama5d3-clk-pll",
6348 +              of_sama5d3_clk_pll_setup);
6349 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-plldiv.c linux-4.4.46/drivers/clk/at91/clk-plldiv.c
6350 --- linux-4.4.46.orig/drivers/clk/at91/clk-plldiv.c     2017-02-01 08:31:11.000000000 +0100
6351 +++ linux-4.4.46/drivers/clk/at91/clk-plldiv.c  2017-02-03 17:18:05.671416021 +0100
6352 @@ -12,8 +12,8 @@
6353  #include <linux/clkdev.h>
6354  #include <linux/clk/at91_pmc.h>
6355  #include <linux/of.h>
6356 -#include <linux/of_address.h>
6357 -#include <linux/io.h>
6358 +#include <linux/mfd/syscon.h>
6359 +#include <linux/regmap.h>
6361  #include "pmc.h"
6363 @@ -21,16 +21,18 @@
6365  struct clk_plldiv {
6366         struct clk_hw hw;
6367 -       struct at91_pmc *pmc;
6368 +       struct regmap *regmap;
6369  };
6371  static unsigned long clk_plldiv_recalc_rate(struct clk_hw *hw,
6372                                             unsigned long parent_rate)
6374         struct clk_plldiv *plldiv = to_clk_plldiv(hw);
6375 -       struct at91_pmc *pmc = plldiv->pmc;
6376 +       unsigned int mckr;
6378 -       if (pmc_read(pmc, AT91_PMC_MCKR) & AT91_PMC_PLLADIV2)
6379 +       regmap_read(plldiv->regmap, AT91_PMC_MCKR, &mckr);
6381 +       if (mckr & AT91_PMC_PLLADIV2)
6382                 return parent_rate / 2;
6384         return parent_rate;
6385 @@ -57,18 +59,12 @@
6386                                unsigned long parent_rate)
6388         struct clk_plldiv *plldiv = to_clk_plldiv(hw);
6389 -       struct at91_pmc *pmc = plldiv->pmc;
6390 -       u32 tmp;
6392 -       if (parent_rate != rate && (parent_rate / 2) != rate)
6393 +       if ((parent_rate != rate) && (parent_rate / 2 != rate))
6394                 return -EINVAL;
6396 -       pmc_lock(pmc);
6397 -       tmp = pmc_read(pmc, AT91_PMC_MCKR) & ~AT91_PMC_PLLADIV2;
6398 -       if ((parent_rate / 2) == rate)
6399 -               tmp |= AT91_PMC_PLLADIV2;
6400 -       pmc_write(pmc, AT91_PMC_MCKR, tmp);
6401 -       pmc_unlock(pmc);
6402 +       regmap_update_bits(plldiv->regmap, AT91_PMC_MCKR, AT91_PMC_PLLADIV2,
6403 +                          parent_rate != rate ? AT91_PMC_PLLADIV2 : 0);
6405         return 0;
6407 @@ -80,7 +76,7 @@
6408  };
6410  static struct clk * __init
6411 -at91_clk_register_plldiv(struct at91_pmc *pmc, const char *name,
6412 +at91_clk_register_plldiv(struct regmap *regmap, const char *name,
6413                          const char *parent_name)
6415         struct clk_plldiv *plldiv;
6416 @@ -98,7 +94,7 @@
6417         init.flags = CLK_SET_RATE_GATE;
6419         plldiv->hw.init = &init;
6420 -       plldiv->pmc = pmc;
6421 +       plldiv->regmap = regmap;
6423         clk = clk_register(NULL, &plldiv->hw);
6425 @@ -109,27 +105,27 @@
6428  static void __init
6429 -of_at91_clk_plldiv_setup(struct device_node *np, struct at91_pmc *pmc)
6430 +of_at91sam9x5_clk_plldiv_setup(struct device_node *np)
6432         struct clk *clk;
6433         const char *parent_name;
6434         const char *name = np->name;
6435 +       struct regmap *regmap;
6437         parent_name = of_clk_get_parent_name(np, 0);
6439         of_property_read_string(np, "clock-output-names", &name);
6441 -       clk = at91_clk_register_plldiv(pmc, name, parent_name);
6442 +       regmap = syscon_node_to_regmap(of_get_parent(np));
6443 +       if (IS_ERR(regmap))
6444 +               return;
6446 +       clk = at91_clk_register_plldiv(regmap, name, parent_name);
6447         if (IS_ERR(clk))
6448                 return;
6450         of_clk_add_provider(np, of_clk_src_simple_get, clk);
6451         return;
6454 -void __init of_at91sam9x5_clk_plldiv_setup(struct device_node *np,
6455 -                                          struct at91_pmc *pmc)
6457 -       of_at91_clk_plldiv_setup(np, pmc);
6459 +CLK_OF_DECLARE(at91sam9x5_clk_plldiv, "atmel,at91sam9x5-clk-plldiv",
6460 +              of_at91sam9x5_clk_plldiv_setup);
6461 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-programmable.c linux-4.4.46/drivers/clk/at91/clk-programmable.c
6462 --- linux-4.4.46.orig/drivers/clk/at91/clk-programmable.c       2017-02-01 08:31:11.000000000 +0100
6463 +++ linux-4.4.46/drivers/clk/at91/clk-programmable.c    2017-02-03 17:18:05.675416176 +0100
6464 @@ -12,10 +12,8 @@
6465  #include <linux/clkdev.h>
6466  #include <linux/clk/at91_pmc.h>
6467  #include <linux/of.h>
6468 -#include <linux/of_address.h>
6469 -#include <linux/io.h>
6470 -#include <linux/wait.h>
6471 -#include <linux/sched.h>
6472 +#include <linux/mfd/syscon.h>
6473 +#include <linux/regmap.h>
6475  #include "pmc.h"
6477 @@ -24,6 +22,7 @@
6479  #define PROG_STATUS_MASK(id)   (1 << ((id) + 8))
6480  #define PROG_PRES_MASK         0x7
6481 +#define PROG_PRES(layout, pckr)        ((pckr >> layout->pres_shift) & PROG_PRES_MASK)
6482  #define PROG_MAX_RM9200_CSS    3
6484  struct clk_programmable_layout {
6485 @@ -34,7 +33,7 @@
6487  struct clk_programmable {
6488         struct clk_hw hw;
6489 -       struct at91_pmc *pmc;
6490 +       struct regmap *regmap;
6491         u8 id;
6492         const struct clk_programmable_layout *layout;
6493  };
6494 @@ -44,14 +43,12 @@
6495  static unsigned long clk_programmable_recalc_rate(struct clk_hw *hw,
6496                                                   unsigned long parent_rate)
6498 -       u32 pres;
6499         struct clk_programmable *prog = to_clk_programmable(hw);
6500 -       struct at91_pmc *pmc = prog->pmc;
6501 -       const struct clk_programmable_layout *layout = prog->layout;
6502 +       unsigned int pckr;
6504 +       regmap_read(prog->regmap, AT91_PMC_PCKR(prog->id), &pckr);
6506 -       pres = (pmc_read(pmc, AT91_PMC_PCKR(prog->id)) >> layout->pres_shift) &
6507 -              PROG_PRES_MASK;
6508 -       return parent_rate >> pres;
6509 +       return parent_rate >> PROG_PRES(prog->layout, pckr);
6512  static int clk_programmable_determine_rate(struct clk_hw *hw,
6513 @@ -101,36 +98,36 @@
6515         struct clk_programmable *prog = to_clk_programmable(hw);
6516         const struct clk_programmable_layout *layout = prog->layout;
6517 -       struct at91_pmc *pmc = prog->pmc;
6518 -       u32 tmp = pmc_read(pmc, AT91_PMC_PCKR(prog->id)) & ~layout->css_mask;
6519 +       unsigned int mask = layout->css_mask;
6520 +       unsigned int pckr = 0;
6522         if (layout->have_slck_mck)
6523 -               tmp &= AT91_PMC_CSSMCK_MCK;
6524 +               mask |= AT91_PMC_CSSMCK_MCK;
6526         if (index > layout->css_mask) {
6527 -               if (index > PROG_MAX_RM9200_CSS && layout->have_slck_mck) {
6528 -                       tmp |= AT91_PMC_CSSMCK_MCK;
6529 -                       return 0;
6530 -               } else {
6531 +               if (index > PROG_MAX_RM9200_CSS && !layout->have_slck_mck)
6532                         return -EINVAL;
6533 -               }
6535 +               pckr |= AT91_PMC_CSSMCK_MCK;
6536         }
6538 -       pmc_write(pmc, AT91_PMC_PCKR(prog->id), tmp | index);
6539 +       regmap_update_bits(prog->regmap, AT91_PMC_PCKR(prog->id), mask, pckr);
6541         return 0;
6544  static u8 clk_programmable_get_parent(struct clk_hw *hw)
6546 -       u32 tmp;
6547 -       u8 ret;
6548         struct clk_programmable *prog = to_clk_programmable(hw);
6549 -       struct at91_pmc *pmc = prog->pmc;
6550         const struct clk_programmable_layout *layout = prog->layout;
6551 +       unsigned int pckr;
6552 +       u8 ret;
6554 +       regmap_read(prog->regmap, AT91_PMC_PCKR(prog->id), &pckr);
6556 +       ret = pckr & layout->css_mask;
6558 -       tmp = pmc_read(pmc, AT91_PMC_PCKR(prog->id));
6559 -       ret = tmp & layout->css_mask;
6560 -       if (layout->have_slck_mck && (tmp & AT91_PMC_CSSMCK_MCK) && !ret)
6561 +       if (layout->have_slck_mck && (pckr & AT91_PMC_CSSMCK_MCK) && !ret)
6562                 ret = PROG_MAX_RM9200_CSS + 1;
6564         return ret;
6565 @@ -140,26 +137,27 @@
6566                                      unsigned long parent_rate)
6568         struct clk_programmable *prog = to_clk_programmable(hw);
6569 -       struct at91_pmc *pmc = prog->pmc;
6570         const struct clk_programmable_layout *layout = prog->layout;
6571         unsigned long div = parent_rate / rate;
6572 +       unsigned int pckr;
6573         int shift = 0;
6574 -       u32 tmp = pmc_read(pmc, AT91_PMC_PCKR(prog->id)) &
6575 -                 ~(PROG_PRES_MASK << layout->pres_shift);
6577 +       regmap_read(prog->regmap, AT91_PMC_PCKR(prog->id), &pckr);
6579         if (!div)
6580                 return -EINVAL;
6582         shift = fls(div) - 1;
6584 -       if (div != (1<<shift))
6585 +       if (div != (1 << shift))
6586                 return -EINVAL;
6588         if (shift >= PROG_PRES_MASK)
6589                 return -EINVAL;
6591 -       pmc_write(pmc, AT91_PMC_PCKR(prog->id),
6592 -                 tmp | (shift << layout->pres_shift));
6593 +       regmap_update_bits(prog->regmap, AT91_PMC_PCKR(prog->id),
6594 +                          PROG_PRES_MASK << layout->pres_shift,
6595 +                          shift << layout->pres_shift);
6597         return 0;
6599 @@ -173,7 +171,7 @@
6600  };
6602  static struct clk * __init
6603 -at91_clk_register_programmable(struct at91_pmc *pmc,
6604 +at91_clk_register_programmable(struct regmap *regmap,
6605                                const char *name, const char **parent_names,
6606                                u8 num_parents, u8 id,
6607                                const struct clk_programmable_layout *layout)
6608 @@ -198,7 +196,7 @@
6609         prog->id = id;
6610         prog->layout = layout;
6611         prog->hw.init = &init;
6612 -       prog->pmc = pmc;
6613 +       prog->regmap = regmap;
6615         clk = clk_register(NULL, &prog->hw);
6616         if (IS_ERR(clk))
6617 @@ -226,7 +224,7 @@
6618  };
6620  static void __init
6621 -of_at91_clk_prog_setup(struct device_node *np, struct at91_pmc *pmc,
6622 +of_at91_clk_prog_setup(struct device_node *np,
6623                        const struct clk_programmable_layout *layout)
6625         int num;
6626 @@ -236,6 +234,7 @@
6627         const char *parent_names[PROG_SOURCE_MAX];
6628         const char *name;
6629         struct device_node *progclknp;
6630 +       struct regmap *regmap;
6632         num_parents = of_clk_get_parent_count(np);
6633         if (num_parents <= 0 || num_parents > PROG_SOURCE_MAX)
6634 @@ -247,6 +246,10 @@
6635         if (!num || num > (PROG_ID_MAX + 1))
6636                 return;
6638 +       regmap = syscon_node_to_regmap(of_get_parent(np));
6639 +       if (IS_ERR(regmap))
6640 +               return;
6642         for_each_child_of_node(np, progclknp) {
6643                 if (of_property_read_u32(progclknp, "reg", &id))
6644                         continue;
6645 @@ -254,7 +257,7 @@
6646                 if (of_property_read_string(np, "clock-output-names", &name))
6647                         name = progclknp->name;
6649 -               clk = at91_clk_register_programmable(pmc, name,
6650 +               clk = at91_clk_register_programmable(regmap, name,
6651                                                      parent_names, num_parents,
6652                                                      id, layout);
6653                 if (IS_ERR(clk))
6654 @@ -265,20 +268,23 @@
6658 -void __init of_at91rm9200_clk_prog_setup(struct device_node *np,
6659 -                                        struct at91_pmc *pmc)
6660 +static void __init of_at91rm9200_clk_prog_setup(struct device_node *np)
6662 -       of_at91_clk_prog_setup(np, pmc, &at91rm9200_programmable_layout);
6663 +       of_at91_clk_prog_setup(np, &at91rm9200_programmable_layout);
6665 +CLK_OF_DECLARE(at91rm9200_clk_prog, "atmel,at91rm9200-clk-programmable",
6666 +              of_at91rm9200_clk_prog_setup);
6668 -void __init of_at91sam9g45_clk_prog_setup(struct device_node *np,
6669 -                                         struct at91_pmc *pmc)
6670 +static void __init of_at91sam9g45_clk_prog_setup(struct device_node *np)
6672 -       of_at91_clk_prog_setup(np, pmc, &at91sam9g45_programmable_layout);
6673 +       of_at91_clk_prog_setup(np, &at91sam9g45_programmable_layout);
6675 +CLK_OF_DECLARE(at91sam9g45_clk_prog, "atmel,at91sam9g45-clk-programmable",
6676 +              of_at91sam9g45_clk_prog_setup);
6678 -void __init of_at91sam9x5_clk_prog_setup(struct device_node *np,
6679 -                                        struct at91_pmc *pmc)
6680 +static void __init of_at91sam9x5_clk_prog_setup(struct device_node *np)
6682 -       of_at91_clk_prog_setup(np, pmc, &at91sam9x5_programmable_layout);
6683 +       of_at91_clk_prog_setup(np, &at91sam9x5_programmable_layout);
6685 +CLK_OF_DECLARE(at91sam9x5_clk_prog, "atmel,at91sam9x5-clk-programmable",
6686 +              of_at91sam9x5_clk_prog_setup);
6687 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-slow.c linux-4.4.46/drivers/clk/at91/clk-slow.c
6688 --- linux-4.4.46.orig/drivers/clk/at91/clk-slow.c       2017-02-01 08:31:11.000000000 +0100
6689 +++ linux-4.4.46/drivers/clk/at91/clk-slow.c    2017-02-03 17:18:05.675416176 +0100
6690 @@ -13,17 +13,11 @@
6691  #include <linux/clk.h>
6692  #include <linux/clk-provider.h>
6693  #include <linux/clkdev.h>
6694 -#include <linux/slab.h>
6695  #include <linux/clk/at91_pmc.h>
6696  #include <linux/delay.h>
6697  #include <linux/of.h>
6698 -#include <linux/of_address.h>
6699 -#include <linux/of_irq.h>
6700 -#include <linux/io.h>
6701 -#include <linux/interrupt.h>
6702 -#include <linux/irq.h>
6703 -#include <linux/sched.h>
6704 -#include <linux/wait.h>
6705 +#include <linux/mfd/syscon.h>
6706 +#include <linux/regmap.h>
6708  #include "pmc.h"
6709  #include "sckc.h"
6710 @@ -59,7 +53,7 @@
6712  struct clk_sam9260_slow {
6713         struct clk_hw hw;
6714 -       struct at91_pmc *pmc;
6715 +       struct regmap *regmap;
6716  };
6718  #define to_clk_sam9260_slow(hw) container_of(hw, struct clk_sam9260_slow, hw)
6719 @@ -393,8 +387,11 @@
6720  static u8 clk_sam9260_slow_get_parent(struct clk_hw *hw)
6722         struct clk_sam9260_slow *slowck = to_clk_sam9260_slow(hw);
6723 +       unsigned int status;
6725 -       return !!(pmc_read(slowck->pmc, AT91_PMC_SR) & AT91_PMC_OSCSEL);
6726 +       regmap_read(slowck->regmap, AT91_PMC_SR, &status);
6728 +       return status & AT91_PMC_OSCSEL ? 1 : 0;
6731  static const struct clk_ops sam9260_slow_ops = {
6732 @@ -402,7 +399,7 @@
6733  };
6735  static struct clk * __init
6736 -at91_clk_register_sam9260_slow(struct at91_pmc *pmc,
6737 +at91_clk_register_sam9260_slow(struct regmap *regmap,
6738                                const char *name,
6739                                const char **parent_names,
6740                                int num_parents)
6741 @@ -411,7 +408,7 @@
6742         struct clk *clk = NULL;
6743         struct clk_init_data init;
6745 -       if (!pmc || !name)
6746 +       if (!name)
6747                 return ERR_PTR(-EINVAL);
6749         if (!parent_names || !num_parents)
6750 @@ -428,7 +425,7 @@
6751         init.flags = 0;
6753         slowck->hw.init = &init;
6754 -       slowck->pmc = pmc;
6755 +       slowck->regmap = regmap;
6757         clk = clk_register(NULL, &slowck->hw);
6758         if (IS_ERR(clk))
6759 @@ -439,29 +436,34 @@
6760         return clk;
6763 -void __init of_at91sam9260_clk_slow_setup(struct device_node *np,
6764 -                                         struct at91_pmc *pmc)
6765 +static void __init of_at91sam9260_clk_slow_setup(struct device_node *np)
6767         struct clk *clk;
6768         const char *parent_names[2];
6769         int num_parents;
6770         const char *name = np->name;
6771 +       struct regmap *regmap;
6773         num_parents = of_clk_get_parent_count(np);
6774         if (num_parents != 2)
6775                 return;
6777         of_clk_parent_fill(np, parent_names, num_parents);
6778 +       regmap = syscon_node_to_regmap(of_get_parent(np));
6779 +       if (IS_ERR(regmap))
6780 +               return;
6782         of_property_read_string(np, "clock-output-names", &name);
6784 -       clk = at91_clk_register_sam9260_slow(pmc, name, parent_names,
6785 +       clk = at91_clk_register_sam9260_slow(regmap, name, parent_names,
6786                                              num_parents);
6787         if (IS_ERR(clk))
6788                 return;
6790         of_clk_add_provider(np, of_clk_src_simple_get, clk);
6792 +CLK_OF_DECLARE(at91sam9260_clk_slow, "atmel,at91sam9260-clk-slow",
6793 +              of_at91sam9260_clk_slow_setup);
6795  /*
6796   * FIXME: All slow clk users are not properly claiming it (get + prepare +
6797 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-smd.c linux-4.4.46/drivers/clk/at91/clk-smd.c
6798 --- linux-4.4.46.orig/drivers/clk/at91/clk-smd.c        2017-02-01 08:31:11.000000000 +0100
6799 +++ linux-4.4.46/drivers/clk/at91/clk-smd.c     2017-02-03 17:18:05.675416176 +0100
6800 @@ -12,8 +12,8 @@
6801  #include <linux/clkdev.h>
6802  #include <linux/clk/at91_pmc.h>
6803  #include <linux/of.h>
6804 -#include <linux/of_address.h>
6805 -#include <linux/io.h>
6806 +#include <linux/mfd/syscon.h>
6807 +#include <linux/regmap.h>
6809  #include "pmc.h"
6811 @@ -24,7 +24,7 @@
6813  struct at91sam9x5_clk_smd {
6814         struct clk_hw hw;
6815 -       struct at91_pmc *pmc;
6816 +       struct regmap *regmap;
6817  };
6819  #define to_at91sam9x5_clk_smd(hw) \
6820 @@ -33,13 +33,13 @@
6821  static unsigned long at91sam9x5_clk_smd_recalc_rate(struct clk_hw *hw,
6822                                                     unsigned long parent_rate)
6824 -       u32 tmp;
6825 -       u8 smddiv;
6826         struct at91sam9x5_clk_smd *smd = to_at91sam9x5_clk_smd(hw);
6827 -       struct at91_pmc *pmc = smd->pmc;
6828 +       unsigned int smdr;
6829 +       u8 smddiv;
6831 +       regmap_read(smd->regmap, AT91_PMC_SMD, &smdr);
6832 +       smddiv = (smdr & AT91_PMC_SMD_DIV) >> SMD_DIV_SHIFT;
6834 -       tmp = pmc_read(pmc, AT91_PMC_SMD);
6835 -       smddiv = (tmp & AT91_PMC_SMD_DIV) >> SMD_DIV_SHIFT;
6836         return parent_rate / (smddiv + 1);
6839 @@ -67,40 +67,38 @@
6841  static int at91sam9x5_clk_smd_set_parent(struct clk_hw *hw, u8 index)
6843 -       u32 tmp;
6844         struct at91sam9x5_clk_smd *smd = to_at91sam9x5_clk_smd(hw);
6845 -       struct at91_pmc *pmc = smd->pmc;
6847         if (index > 1)
6848                 return -EINVAL;
6849 -       tmp = pmc_read(pmc, AT91_PMC_SMD) & ~AT91_PMC_SMDS;
6850 -       if (index)
6851 -               tmp |= AT91_PMC_SMDS;
6852 -       pmc_write(pmc, AT91_PMC_SMD, tmp);
6854 +       regmap_update_bits(smd->regmap, AT91_PMC_SMD, AT91_PMC_SMDS,
6855 +                          index ? AT91_PMC_SMDS : 0);
6857         return 0;
6860  static u8 at91sam9x5_clk_smd_get_parent(struct clk_hw *hw)
6862         struct at91sam9x5_clk_smd *smd = to_at91sam9x5_clk_smd(hw);
6863 -       struct at91_pmc *pmc = smd->pmc;
6864 +       unsigned int smdr;
6866 -       return pmc_read(pmc, AT91_PMC_SMD) & AT91_PMC_SMDS;
6867 +       regmap_read(smd->regmap, AT91_PMC_SMD, &smdr);
6869 +       return smdr & AT91_PMC_SMDS;
6872  static int at91sam9x5_clk_smd_set_rate(struct clk_hw *hw, unsigned long rate,
6873                                        unsigned long parent_rate)
6875 -       u32 tmp;
6876         struct at91sam9x5_clk_smd *smd = to_at91sam9x5_clk_smd(hw);
6877 -       struct at91_pmc *pmc = smd->pmc;
6878         unsigned long div = parent_rate / rate;
6880         if (parent_rate % rate || div < 1 || div > (SMD_MAX_DIV + 1))
6881                 return -EINVAL;
6882 -       tmp = pmc_read(pmc, AT91_PMC_SMD) & ~AT91_PMC_SMD_DIV;
6883 -       tmp |= (div - 1) << SMD_DIV_SHIFT;
6884 -       pmc_write(pmc, AT91_PMC_SMD, tmp);
6886 +       regmap_update_bits(smd->regmap, AT91_PMC_SMD, AT91_PMC_SMD_DIV,
6887 +                          (div - 1) << SMD_DIV_SHIFT);
6889         return 0;
6891 @@ -114,7 +112,7 @@
6892  };
6894  static struct clk * __init
6895 -at91sam9x5_clk_register_smd(struct at91_pmc *pmc, const char *name,
6896 +at91sam9x5_clk_register_smd(struct regmap *regmap, const char *name,
6897                             const char **parent_names, u8 num_parents)
6899         struct at91sam9x5_clk_smd *smd;
6900 @@ -132,7 +130,7 @@
6901         init.flags = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE;
6903         smd->hw.init = &init;
6904 -       smd->pmc = pmc;
6905 +       smd->regmap = regmap;
6907         clk = clk_register(NULL, &smd->hw);
6908         if (IS_ERR(clk))
6909 @@ -141,13 +139,13 @@
6910         return clk;
6913 -void __init of_at91sam9x5_clk_smd_setup(struct device_node *np,
6914 -                                       struct at91_pmc *pmc)
6915 +static void __init of_at91sam9x5_clk_smd_setup(struct device_node *np)
6917         struct clk *clk;
6918         int num_parents;
6919         const char *parent_names[SMD_SOURCE_MAX];
6920         const char *name = np->name;
6921 +       struct regmap *regmap;
6923         num_parents = of_clk_get_parent_count(np);
6924         if (num_parents <= 0 || num_parents > SMD_SOURCE_MAX)
6925 @@ -157,10 +155,16 @@
6927         of_property_read_string(np, "clock-output-names", &name);
6929 -       clk = at91sam9x5_clk_register_smd(pmc, name, parent_names,
6930 +       regmap = syscon_node_to_regmap(of_get_parent(np));
6931 +       if (IS_ERR(regmap))
6932 +               return;
6934 +       clk = at91sam9x5_clk_register_smd(regmap, name, parent_names,
6935                                           num_parents);
6936         if (IS_ERR(clk))
6937                 return;
6939         of_clk_add_provider(np, of_clk_src_simple_get, clk);
6941 +CLK_OF_DECLARE(at91sam9x5_clk_smd, "atmel,at91sam9x5-clk-smd",
6942 +              of_at91sam9x5_clk_smd_setup);
6943 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-system.c linux-4.4.46/drivers/clk/at91/clk-system.c
6944 --- linux-4.4.46.orig/drivers/clk/at91/clk-system.c     2017-02-01 08:31:11.000000000 +0100
6945 +++ linux-4.4.46/drivers/clk/at91/clk-system.c  2017-02-03 17:18:05.675416176 +0100
6946 @@ -12,13 +12,8 @@
6947  #include <linux/clkdev.h>
6948  #include <linux/clk/at91_pmc.h>
6949  #include <linux/of.h>
6950 -#include <linux/of_address.h>
6951 -#include <linux/io.h>
6952 -#include <linux/irq.h>
6953 -#include <linux/of_irq.h>
6954 -#include <linux/interrupt.h>
6955 -#include <linux/wait.h>
6956 -#include <linux/sched.h>
6957 +#include <linux/mfd/syscon.h>
6958 +#include <linux/regmap.h>
6960  #include "pmc.h"
6962 @@ -29,9 +24,7 @@
6963  #define to_clk_system(hw) container_of(hw, struct clk_system, hw)
6964  struct clk_system {
6965         struct clk_hw hw;
6966 -       struct at91_pmc *pmc;
6967 -       unsigned int irq;
6968 -       wait_queue_head_t wait;
6969 +       struct regmap *regmap;
6970         u8 id;
6971  };
6973 @@ -39,58 +32,54 @@
6975         return (id >= 8) && (id <= 15);
6977 -static irqreturn_t clk_system_irq_handler(int irq, void *dev_id)
6979 +static inline bool clk_system_ready(struct regmap *regmap, int id)
6981 -       struct clk_system *sys = (struct clk_system *)dev_id;
6982 +       unsigned int status;
6984 -       wake_up(&sys->wait);
6985 -       disable_irq_nosync(sys->irq);
6986 +       regmap_read(regmap, AT91_PMC_SR, &status);
6988 -       return IRQ_HANDLED;
6989 +       return status & (1 << id) ? 1 : 0;
6992  static int clk_system_prepare(struct clk_hw *hw)
6994         struct clk_system *sys = to_clk_system(hw);
6995 -       struct at91_pmc *pmc = sys->pmc;
6996 -       u32 mask = 1 << sys->id;
6998 -       pmc_write(pmc, AT91_PMC_SCER, mask);
6999 +       regmap_write(sys->regmap, AT91_PMC_SCER, 1 << sys->id);
7001         if (!is_pck(sys->id))
7002                 return 0;
7004 -       while (!(pmc_read(pmc, AT91_PMC_SR) & mask)) {
7005 -               if (sys->irq) {
7006 -                       enable_irq(sys->irq);
7007 -                       wait_event(sys->wait,
7008 -                                  pmc_read(pmc, AT91_PMC_SR) & mask);
7009 -               } else
7010 -                       cpu_relax();
7011 -       }
7012 +       while (!clk_system_ready(sys->regmap, sys->id))
7013 +               cpu_relax();
7015         return 0;
7018  static void clk_system_unprepare(struct clk_hw *hw)
7020         struct clk_system *sys = to_clk_system(hw);
7021 -       struct at91_pmc *pmc = sys->pmc;
7023 -       pmc_write(pmc, AT91_PMC_SCDR, 1 << sys->id);
7024 +       regmap_write(sys->regmap, AT91_PMC_SCDR, 1 << sys->id);
7027  static int clk_system_is_prepared(struct clk_hw *hw)
7029         struct clk_system *sys = to_clk_system(hw);
7030 -       struct at91_pmc *pmc = sys->pmc;
7031 +       unsigned int status;
7033 +       regmap_read(sys->regmap, AT91_PMC_SCSR, &status);
7035 -       if (!(pmc_read(pmc, AT91_PMC_SCSR) & (1 << sys->id)))
7036 +       if (!(status & (1 << sys->id)))
7037                 return 0;
7039         if (!is_pck(sys->id))
7040                 return 1;
7042 -       return !!(pmc_read(pmc, AT91_PMC_SR) & (1 << sys->id));
7043 +       regmap_read(sys->regmap, AT91_PMC_SR, &status);
7045 +       return status & (1 << sys->id) ? 1 : 0;
7048  static const struct clk_ops system_ops = {
7049 @@ -100,13 +89,12 @@
7050  };
7052  static struct clk * __init
7053 -at91_clk_register_system(struct at91_pmc *pmc, const char *name,
7054 -                        const char *parent_name, u8 id, int irq)
7055 +at91_clk_register_system(struct regmap *regmap, const char *name,
7056 +                        const char *parent_name, u8 id)
7058         struct clk_system *sys;
7059         struct clk *clk = NULL;
7060         struct clk_init_data init;
7061 -       int ret;
7063         if (!parent_name || id > SYSTEM_MAX_ID)
7064                 return ERR_PTR(-EINVAL);
7065 @@ -123,44 +111,33 @@
7067         sys->id = id;
7068         sys->hw.init = &init;
7069 -       sys->pmc = pmc;
7070 -       sys->irq = irq;
7071 -       if (irq) {
7072 -               init_waitqueue_head(&sys->wait);
7073 -               irq_set_status_flags(sys->irq, IRQ_NOAUTOEN);
7074 -               ret = request_irq(sys->irq, clk_system_irq_handler,
7075 -                               IRQF_TRIGGER_HIGH, name, sys);
7076 -               if (ret) {
7077 -                       kfree(sys);
7078 -                       return ERR_PTR(ret);
7079 -               }
7080 -       }
7081 +       sys->regmap = regmap;
7083         clk = clk_register(NULL, &sys->hw);
7084 -       if (IS_ERR(clk)) {
7085 -               if (irq)
7086 -                       free_irq(sys->irq, sys);
7087 +       if (IS_ERR(clk))
7088                 kfree(sys);
7089 -       }
7091         return clk;
7094 -static void __init
7095 -of_at91_clk_sys_setup(struct device_node *np, struct at91_pmc *pmc)
7096 +static void __init of_at91rm9200_clk_sys_setup(struct device_node *np)
7098         int num;
7099 -       int irq = 0;
7100         u32 id;
7101         struct clk *clk;
7102         const char *name;
7103         struct device_node *sysclknp;
7104         const char *parent_name;
7105 +       struct regmap *regmap;
7107         num = of_get_child_count(np);
7108         if (num > (SYSTEM_MAX_ID + 1))
7109                 return;
7111 +       regmap = syscon_node_to_regmap(of_get_parent(np));
7112 +       if (IS_ERR(regmap))
7113 +               return;
7115         for_each_child_of_node(np, sysclknp) {
7116                 if (of_property_read_u32(sysclknp, "reg", &id))
7117                         continue;
7118 @@ -168,21 +145,14 @@
7119                 if (of_property_read_string(np, "clock-output-names", &name))
7120                         name = sysclknp->name;
7122 -               if (is_pck(id))
7123 -                       irq = irq_of_parse_and_map(sysclknp, 0);
7125                 parent_name = of_clk_get_parent_name(sysclknp, 0);
7127 -               clk = at91_clk_register_system(pmc, name, parent_name, id, irq);
7128 +               clk = at91_clk_register_system(regmap, name, parent_name, id);
7129                 if (IS_ERR(clk))
7130                         continue;
7132                 of_clk_add_provider(sysclknp, of_clk_src_simple_get, clk);
7133         }
7136 -void __init of_at91rm9200_clk_sys_setup(struct device_node *np,
7137 -                                       struct at91_pmc *pmc)
7139 -       of_at91_clk_sys_setup(np, pmc);
7141 +CLK_OF_DECLARE(at91rm9200_clk_sys, "atmel,at91rm9200-clk-system",
7142 +              of_at91rm9200_clk_sys_setup);
7143 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-usb.c linux-4.4.46/drivers/clk/at91/clk-usb.c
7144 --- linux-4.4.46.orig/drivers/clk/at91/clk-usb.c        2017-02-01 08:31:11.000000000 +0100
7145 +++ linux-4.4.46/drivers/clk/at91/clk-usb.c     2017-02-03 17:18:05.675416176 +0100
7146 @@ -12,8 +12,8 @@
7147  #include <linux/clkdev.h>
7148  #include <linux/clk/at91_pmc.h>
7149  #include <linux/of.h>
7150 -#include <linux/of_address.h>
7151 -#include <linux/io.h>
7152 +#include <linux/mfd/syscon.h>
7153 +#include <linux/regmap.h>
7155  #include "pmc.h"
7157 @@ -27,7 +27,7 @@
7159  struct at91sam9x5_clk_usb {
7160         struct clk_hw hw;
7161 -       struct at91_pmc *pmc;
7162 +       struct regmap *regmap;
7163  };
7165  #define to_at91sam9x5_clk_usb(hw) \
7166 @@ -35,7 +35,7 @@
7168  struct at91rm9200_clk_usb {
7169         struct clk_hw hw;
7170 -       struct at91_pmc *pmc;
7171 +       struct regmap *regmap;
7172         u32 divisors[4];
7173  };
7175 @@ -45,13 +45,12 @@
7176  static unsigned long at91sam9x5_clk_usb_recalc_rate(struct clk_hw *hw,
7177                                                     unsigned long parent_rate)
7179 -       u32 tmp;
7180 -       u8 usbdiv;
7181         struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
7182 -       struct at91_pmc *pmc = usb->pmc;
7183 +       unsigned int usbr;
7184 +       u8 usbdiv;
7186 -       tmp = pmc_read(pmc, AT91_PMC_USB);
7187 -       usbdiv = (tmp & AT91_PMC_OHCIUSBDIV) >> SAM9X5_USB_DIV_SHIFT;
7188 +       regmap_read(usb->regmap, AT91_PMC_USB, &usbr);
7189 +       usbdiv = (usbr & AT91_PMC_OHCIUSBDIV) >> SAM9X5_USB_DIV_SHIFT;
7191         return DIV_ROUND_CLOSEST(parent_rate, (usbdiv + 1));
7193 @@ -109,33 +108,31 @@
7195  static int at91sam9x5_clk_usb_set_parent(struct clk_hw *hw, u8 index)
7197 -       u32 tmp;
7198         struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
7199 -       struct at91_pmc *pmc = usb->pmc;
7201         if (index > 1)
7202                 return -EINVAL;
7203 -       tmp = pmc_read(pmc, AT91_PMC_USB) & ~AT91_PMC_USBS;
7204 -       if (index)
7205 -               tmp |= AT91_PMC_USBS;
7206 -       pmc_write(pmc, AT91_PMC_USB, tmp);
7208 +       regmap_update_bits(usb->regmap, AT91_PMC_USB, AT91_PMC_USBS,
7209 +                          index ? AT91_PMC_USBS : 0);
7211         return 0;
7214  static u8 at91sam9x5_clk_usb_get_parent(struct clk_hw *hw)
7216         struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
7217 -       struct at91_pmc *pmc = usb->pmc;
7218 +       unsigned int usbr;
7220 -       return pmc_read(pmc, AT91_PMC_USB) & AT91_PMC_USBS;
7221 +       regmap_read(usb->regmap, AT91_PMC_USB, &usbr);
7223 +       return usbr & AT91_PMC_USBS;
7226  static int at91sam9x5_clk_usb_set_rate(struct clk_hw *hw, unsigned long rate,
7227                                        unsigned long parent_rate)
7229 -       u32 tmp;
7230         struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
7231 -       struct at91_pmc *pmc = usb->pmc;
7232         unsigned long div;
7234         if (!rate)
7235 @@ -145,9 +142,8 @@
7236         if (div > SAM9X5_USB_MAX_DIV + 1 || !div)
7237                 return -EINVAL;
7239 -       tmp = pmc_read(pmc, AT91_PMC_USB) & ~AT91_PMC_OHCIUSBDIV;
7240 -       tmp |= (div - 1) << SAM9X5_USB_DIV_SHIFT;
7241 -       pmc_write(pmc, AT91_PMC_USB, tmp);
7242 +       regmap_update_bits(usb->regmap, AT91_PMC_USB, AT91_PMC_OHCIUSBDIV,
7243 +                          (div - 1) << SAM9X5_USB_DIV_SHIFT);
7245         return 0;
7247 @@ -163,28 +159,28 @@
7248  static int at91sam9n12_clk_usb_enable(struct clk_hw *hw)
7250         struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
7251 -       struct at91_pmc *pmc = usb->pmc;
7253 -       pmc_write(pmc, AT91_PMC_USB,
7254 -                 pmc_read(pmc, AT91_PMC_USB) | AT91_PMC_USBS);
7255 +       regmap_update_bits(usb->regmap, AT91_PMC_USB, AT91_PMC_USBS,
7256 +                          AT91_PMC_USBS);
7258         return 0;
7261  static void at91sam9n12_clk_usb_disable(struct clk_hw *hw)
7263         struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
7264 -       struct at91_pmc *pmc = usb->pmc;
7266 -       pmc_write(pmc, AT91_PMC_USB,
7267 -                 pmc_read(pmc, AT91_PMC_USB) & ~AT91_PMC_USBS);
7268 +       regmap_update_bits(usb->regmap, AT91_PMC_USB, AT91_PMC_USBS, 0);
7271  static int at91sam9n12_clk_usb_is_enabled(struct clk_hw *hw)
7273         struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
7274 -       struct at91_pmc *pmc = usb->pmc;
7275 +       unsigned int usbr;
7277 -       return !!(pmc_read(pmc, AT91_PMC_USB) & AT91_PMC_USBS);
7278 +       regmap_read(usb->regmap, AT91_PMC_USB, &usbr);
7280 +       return usbr & AT91_PMC_USBS;
7283  static const struct clk_ops at91sam9n12_usb_ops = {
7284 @@ -197,7 +193,7 @@
7285  };
7287  static struct clk * __init
7288 -at91sam9x5_clk_register_usb(struct at91_pmc *pmc, const char *name,
7289 +at91sam9x5_clk_register_usb(struct regmap *regmap, const char *name,
7290                             const char **parent_names, u8 num_parents)
7292         struct at91sam9x5_clk_usb *usb;
7293 @@ -216,7 +212,7 @@
7294                      CLK_SET_RATE_PARENT;
7296         usb->hw.init = &init;
7297 -       usb->pmc = pmc;
7298 +       usb->regmap = regmap;
7300         clk = clk_register(NULL, &usb->hw);
7301         if (IS_ERR(clk))
7302 @@ -226,7 +222,7 @@
7305  static struct clk * __init
7306 -at91sam9n12_clk_register_usb(struct at91_pmc *pmc, const char *name,
7307 +at91sam9n12_clk_register_usb(struct regmap *regmap, const char *name,
7308                              const char *parent_name)
7310         struct at91sam9x5_clk_usb *usb;
7311 @@ -244,7 +240,7 @@
7312         init.flags = CLK_SET_RATE_GATE | CLK_SET_RATE_PARENT;
7314         usb->hw.init = &init;
7315 -       usb->pmc = pmc;
7316 +       usb->regmap = regmap;
7318         clk = clk_register(NULL, &usb->hw);
7319         if (IS_ERR(clk))
7320 @@ -257,12 +253,12 @@
7321                                                     unsigned long parent_rate)
7323         struct at91rm9200_clk_usb *usb = to_at91rm9200_clk_usb(hw);
7324 -       struct at91_pmc *pmc = usb->pmc;
7325 -       u32 tmp;
7326 +       unsigned int pllbr;
7327         u8 usbdiv;
7329 -       tmp = pmc_read(pmc, AT91_CKGR_PLLBR);
7330 -       usbdiv = (tmp & AT91_PMC_USBDIV) >> RM9200_USB_DIV_SHIFT;
7331 +       regmap_read(usb->regmap, AT91_CKGR_PLLBR, &pllbr);
7333 +       usbdiv = (pllbr & AT91_PMC_USBDIV) >> RM9200_USB_DIV_SHIFT;
7334         if (usb->divisors[usbdiv])
7335                 return parent_rate / usb->divisors[usbdiv];
7337 @@ -310,10 +306,8 @@
7338  static int at91rm9200_clk_usb_set_rate(struct clk_hw *hw, unsigned long rate,
7339                                        unsigned long parent_rate)
7341 -       u32 tmp;
7342         int i;
7343         struct at91rm9200_clk_usb *usb = to_at91rm9200_clk_usb(hw);
7344 -       struct at91_pmc *pmc = usb->pmc;
7345         unsigned long div;
7347         if (!rate)
7348 @@ -323,10 +317,10 @@
7350         for (i = 0; i < RM9200_USB_DIV_TAB_SIZE; i++) {
7351                 if (usb->divisors[i] == div) {
7352 -                       tmp = pmc_read(pmc, AT91_CKGR_PLLBR) &
7353 -                             ~AT91_PMC_USBDIV;
7354 -                       tmp |= i << RM9200_USB_DIV_SHIFT;
7355 -                       pmc_write(pmc, AT91_CKGR_PLLBR, tmp);
7356 +                       regmap_update_bits(usb->regmap, AT91_CKGR_PLLBR,
7357 +                                          AT91_PMC_USBDIV,
7358 +                                          i << RM9200_USB_DIV_SHIFT);
7360                         return 0;
7361                 }
7362         }
7363 @@ -341,7 +335,7 @@
7364  };
7366  static struct clk * __init
7367 -at91rm9200_clk_register_usb(struct at91_pmc *pmc, const char *name,
7368 +at91rm9200_clk_register_usb(struct regmap *regmap, const char *name,
7369                             const char *parent_name, const u32 *divisors)
7371         struct at91rm9200_clk_usb *usb;
7372 @@ -359,7 +353,7 @@
7373         init.flags = CLK_SET_RATE_PARENT;
7375         usb->hw.init = &init;
7376 -       usb->pmc = pmc;
7377 +       usb->regmap = regmap;
7378         memcpy(usb->divisors, divisors, sizeof(usb->divisors));
7380         clk = clk_register(NULL, &usb->hw);
7381 @@ -369,13 +363,13 @@
7382         return clk;
7385 -void __init of_at91sam9x5_clk_usb_setup(struct device_node *np,
7386 -                                       struct at91_pmc *pmc)
7387 +static void __init of_at91sam9x5_clk_usb_setup(struct device_node *np)
7389         struct clk *clk;
7390         int num_parents;
7391         const char *parent_names[USB_SOURCE_MAX];
7392         const char *name = np->name;
7393 +       struct regmap *regmap;
7395         num_parents = of_clk_get_parent_count(np);
7396         if (num_parents <= 0 || num_parents > USB_SOURCE_MAX)
7397 @@ -385,19 +379,26 @@
7399         of_property_read_string(np, "clock-output-names", &name);
7401 -       clk = at91sam9x5_clk_register_usb(pmc, name, parent_names, num_parents);
7402 +       regmap = syscon_node_to_regmap(of_get_parent(np));
7403 +       if (IS_ERR(regmap))
7404 +               return;
7406 +       clk = at91sam9x5_clk_register_usb(regmap, name, parent_names,
7407 +                                         num_parents);
7408         if (IS_ERR(clk))
7409                 return;
7411         of_clk_add_provider(np, of_clk_src_simple_get, clk);
7413 +CLK_OF_DECLARE(at91sam9x5_clk_usb, "atmel,at91sam9x5-clk-usb",
7414 +              of_at91sam9x5_clk_usb_setup);
7416 -void __init of_at91sam9n12_clk_usb_setup(struct device_node *np,
7417 -                                        struct at91_pmc *pmc)
7418 +static void __init of_at91sam9n12_clk_usb_setup(struct device_node *np)
7420         struct clk *clk;
7421         const char *parent_name;
7422         const char *name = np->name;
7423 +       struct regmap *regmap;
7425         parent_name = of_clk_get_parent_name(np, 0);
7426         if (!parent_name)
7427 @@ -405,20 +406,26 @@
7429         of_property_read_string(np, "clock-output-names", &name);
7431 -       clk = at91sam9n12_clk_register_usb(pmc, name, parent_name);
7432 +       regmap = syscon_node_to_regmap(of_get_parent(np));
7433 +       if (IS_ERR(regmap))
7434 +               return;
7436 +       clk = at91sam9n12_clk_register_usb(regmap, name, parent_name);
7437         if (IS_ERR(clk))
7438                 return;
7440         of_clk_add_provider(np, of_clk_src_simple_get, clk);
7442 +CLK_OF_DECLARE(at91sam9n12_clk_usb, "atmel,at91sam9n12-clk-usb",
7443 +              of_at91sam9n12_clk_usb_setup);
7445 -void __init of_at91rm9200_clk_usb_setup(struct device_node *np,
7446 -                                       struct at91_pmc *pmc)
7447 +static void __init of_at91rm9200_clk_usb_setup(struct device_node *np)
7449         struct clk *clk;
7450         const char *parent_name;
7451         const char *name = np->name;
7452         u32 divisors[4] = {0, 0, 0, 0};
7453 +       struct regmap *regmap;
7455         parent_name = of_clk_get_parent_name(np, 0);
7456         if (!parent_name)
7457 @@ -430,9 +437,15 @@
7459         of_property_read_string(np, "clock-output-names", &name);
7461 -       clk = at91rm9200_clk_register_usb(pmc, name, parent_name, divisors);
7462 +       regmap = syscon_node_to_regmap(of_get_parent(np));
7463 +       if (IS_ERR(regmap))
7464 +               return;
7466 +       clk = at91rm9200_clk_register_usb(regmap, name, parent_name, divisors);
7467         if (IS_ERR(clk))
7468                 return;
7470         of_clk_add_provider(np, of_clk_src_simple_get, clk);
7472 +CLK_OF_DECLARE(at91rm9200_clk_usb, "atmel,at91rm9200-clk-usb",
7473 +              of_at91rm9200_clk_usb_setup);
7474 diff -Nur linux-4.4.46.orig/drivers/clk/at91/clk-utmi.c linux-4.4.46/drivers/clk/at91/clk-utmi.c
7475 --- linux-4.4.46.orig/drivers/clk/at91/clk-utmi.c       2017-02-01 08:31:11.000000000 +0100
7476 +++ linux-4.4.46/drivers/clk/at91/clk-utmi.c    2017-02-03 17:18:05.675416176 +0100
7477 @@ -11,14 +11,9 @@
7478  #include <linux/clk-provider.h>
7479  #include <linux/clkdev.h>
7480  #include <linux/clk/at91_pmc.h>
7481 -#include <linux/interrupt.h>
7482 -#include <linux/irq.h>
7483  #include <linux/of.h>
7484 -#include <linux/of_address.h>
7485 -#include <linux/of_irq.h>
7486 -#include <linux/io.h>
7487 -#include <linux/sched.h>
7488 -#include <linux/wait.h>
7489 +#include <linux/mfd/syscon.h>
7490 +#include <linux/regmap.h>
7492  #include "pmc.h"
7494 @@ -26,37 +21,30 @@
7496  struct clk_utmi {
7497         struct clk_hw hw;
7498 -       struct at91_pmc *pmc;
7499 -       unsigned int irq;
7500 -       wait_queue_head_t wait;
7501 +       struct regmap *regmap;
7502  };
7504  #define to_clk_utmi(hw) container_of(hw, struct clk_utmi, hw)
7506 -static irqreturn_t clk_utmi_irq_handler(int irq, void *dev_id)
7507 +static inline bool clk_utmi_ready(struct regmap *regmap)
7509 -       struct clk_utmi *utmi = (struct clk_utmi *)dev_id;
7510 +       unsigned int status;
7512 -       wake_up(&utmi->wait);
7513 -       disable_irq_nosync(utmi->irq);
7514 +       regmap_read(regmap, AT91_PMC_SR, &status);
7516 -       return IRQ_HANDLED;
7517 +       return status & AT91_PMC_LOCKU;
7520  static int clk_utmi_prepare(struct clk_hw *hw)
7522         struct clk_utmi *utmi = to_clk_utmi(hw);
7523 -       struct at91_pmc *pmc = utmi->pmc;
7524 -       u32 tmp = pmc_read(pmc, AT91_CKGR_UCKR) | AT91_PMC_UPLLEN |
7525 -                 AT91_PMC_UPLLCOUNT | AT91_PMC_BIASEN;
7527 -       pmc_write(pmc, AT91_CKGR_UCKR, tmp);
7529 -       while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_LOCKU)) {
7530 -               enable_irq(utmi->irq);
7531 -               wait_event(utmi->wait,
7532 -                          pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_LOCKU);
7533 -       }
7534 +       unsigned int uckr = AT91_PMC_UPLLEN | AT91_PMC_UPLLCOUNT |
7535 +                           AT91_PMC_BIASEN;
7537 +       regmap_update_bits(utmi->regmap, AT91_CKGR_UCKR, uckr, uckr);
7539 +       while (!clk_utmi_ready(utmi->regmap))
7540 +               cpu_relax();
7542         return 0;
7544 @@ -64,18 +52,15 @@
7545  static int clk_utmi_is_prepared(struct clk_hw *hw)
7547         struct clk_utmi *utmi = to_clk_utmi(hw);
7548 -       struct at91_pmc *pmc = utmi->pmc;
7550 -       return !!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_LOCKU);
7551 +       return clk_utmi_ready(utmi->regmap);
7554  static void clk_utmi_unprepare(struct clk_hw *hw)
7556         struct clk_utmi *utmi = to_clk_utmi(hw);
7557 -       struct at91_pmc *pmc = utmi->pmc;
7558 -       u32 tmp = pmc_read(pmc, AT91_CKGR_UCKR) & ~AT91_PMC_UPLLEN;
7560 -       pmc_write(pmc, AT91_CKGR_UCKR, tmp);
7561 +       regmap_update_bits(utmi->regmap, AT91_CKGR_UCKR, AT91_PMC_UPLLEN, 0);
7564  static unsigned long clk_utmi_recalc_rate(struct clk_hw *hw,
7565 @@ -93,10 +78,9 @@
7566  };
7568  static struct clk * __init
7569 -at91_clk_register_utmi(struct at91_pmc *pmc, unsigned int irq,
7570 +at91_clk_register_utmi(struct regmap *regmap,
7571                        const char *name, const char *parent_name)
7573 -       int ret;
7574         struct clk_utmi *utmi;
7575         struct clk *clk = NULL;
7576         struct clk_init_data init;
7577 @@ -112,52 +96,36 @@
7578         init.flags = CLK_SET_RATE_GATE;
7580         utmi->hw.init = &init;
7581 -       utmi->pmc = pmc;
7582 -       utmi->irq = irq;
7583 -       init_waitqueue_head(&utmi->wait);
7584 -       irq_set_status_flags(utmi->irq, IRQ_NOAUTOEN);
7585 -       ret = request_irq(utmi->irq, clk_utmi_irq_handler,
7586 -                         IRQF_TRIGGER_HIGH, "clk-utmi", utmi);
7587 -       if (ret) {
7588 -               kfree(utmi);
7589 -               return ERR_PTR(ret);
7590 -       }
7591 +       utmi->regmap = regmap;
7593         clk = clk_register(NULL, &utmi->hw);
7594 -       if (IS_ERR(clk)) {
7595 -               free_irq(utmi->irq, utmi);
7596 +       if (IS_ERR(clk))
7597                 kfree(utmi);
7598 -       }
7600         return clk;
7603 -static void __init
7604 -of_at91_clk_utmi_setup(struct device_node *np, struct at91_pmc *pmc)
7605 +static void __init of_at91sam9x5_clk_utmi_setup(struct device_node *np)
7607 -       unsigned int irq;
7608         struct clk *clk;
7609         const char *parent_name;
7610         const char *name = np->name;
7611 +       struct regmap *regmap;
7613         parent_name = of_clk_get_parent_name(np, 0);
7615         of_property_read_string(np, "clock-output-names", &name);
7617 -       irq = irq_of_parse_and_map(np, 0);
7618 -       if (!irq)
7619 +       regmap = syscon_node_to_regmap(of_get_parent(np));
7620 +       if (IS_ERR(regmap))
7621                 return;
7623 -       clk = at91_clk_register_utmi(pmc, irq, name, parent_name);
7624 +       clk = at91_clk_register_utmi(regmap, name, parent_name);
7625         if (IS_ERR(clk))
7626                 return;
7628         of_clk_add_provider(np, of_clk_src_simple_get, clk);
7629         return;
7632 -void __init of_at91sam9x5_clk_utmi_setup(struct device_node *np,
7633 -                                        struct at91_pmc *pmc)
7635 -       of_at91_clk_utmi_setup(np, pmc);
7637 +CLK_OF_DECLARE(at91sam9x5_clk_utmi, "atmel,at91sam9x5-clk-utmi",
7638 +              of_at91sam9x5_clk_utmi_setup);
7639 diff -Nur linux-4.4.46.orig/drivers/clk/at91/pmc.c linux-4.4.46/drivers/clk/at91/pmc.c
7640 --- linux-4.4.46.orig/drivers/clk/at91/pmc.c    2017-02-01 08:31:11.000000000 +0100
7641 +++ linux-4.4.46/drivers/clk/at91/pmc.c 2017-02-03 17:18:05.675416176 +0100
7642 @@ -12,36 +12,13 @@
7643  #include <linux/clkdev.h>
7644  #include <linux/clk/at91_pmc.h>
7645  #include <linux/of.h>
7646 -#include <linux/of_address.h>
7647 -#include <linux/io.h>
7648 -#include <linux/interrupt.h>
7649 -#include <linux/irq.h>
7650 -#include <linux/irqchip/chained_irq.h>
7651 -#include <linux/irqdomain.h>
7652 -#include <linux/of_irq.h>
7653 +#include <linux/mfd/syscon.h>
7654 +#include <linux/regmap.h>
7656  #include <asm/proc-fns.h>
7658  #include "pmc.h"
7660 -void __iomem *at91_pmc_base;
7661 -EXPORT_SYMBOL_GPL(at91_pmc_base);
7663 -void at91rm9200_idle(void)
7665 -       /*
7666 -        * Disable the processor clock.  The processor will be automatically
7667 -        * re-enabled by an interrupt or by a reset.
7668 -        */
7669 -       at91_pmc_write(AT91_PMC_SCDR, AT91_PMC_PCK);
7672 -void at91sam9_idle(void)
7674 -       at91_pmc_write(AT91_PMC_SCDR, AT91_PMC_PCK);
7675 -       cpu_do_idle();
7678  int of_at91_get_clk_range(struct device_node *np, const char *propname,
7679                           struct clk_range *range)
7681 @@ -64,402 +41,3 @@
7682         return 0;
7684  EXPORT_SYMBOL_GPL(of_at91_get_clk_range);
7686 -static void pmc_irq_mask(struct irq_data *d)
7688 -       struct at91_pmc *pmc = irq_data_get_irq_chip_data(d);
7690 -       pmc_write(pmc, AT91_PMC_IDR, 1 << d->hwirq);
7693 -static void pmc_irq_unmask(struct irq_data *d)
7695 -       struct at91_pmc *pmc = irq_data_get_irq_chip_data(d);
7697 -       pmc_write(pmc, AT91_PMC_IER, 1 << d->hwirq);
7700 -static int pmc_irq_set_type(struct irq_data *d, unsigned type)
7702 -       if (type != IRQ_TYPE_LEVEL_HIGH) {
7703 -               pr_warn("PMC: type not supported (support only IRQ_TYPE_LEVEL_HIGH type)\n");
7704 -               return -EINVAL;
7705 -       }
7707 -       return 0;
7710 -static void pmc_irq_suspend(struct irq_data *d)
7712 -       struct at91_pmc *pmc = irq_data_get_irq_chip_data(d);
7714 -       pmc->imr = pmc_read(pmc, AT91_PMC_IMR);
7715 -       pmc_write(pmc, AT91_PMC_IDR, pmc->imr);
7718 -static void pmc_irq_resume(struct irq_data *d)
7720 -       struct at91_pmc *pmc = irq_data_get_irq_chip_data(d);
7722 -       pmc_write(pmc, AT91_PMC_IER, pmc->imr);
7725 -static struct irq_chip pmc_irq = {
7726 -       .name = "PMC",
7727 -       .irq_disable = pmc_irq_mask,
7728 -       .irq_mask = pmc_irq_mask,
7729 -       .irq_unmask = pmc_irq_unmask,
7730 -       .irq_set_type = pmc_irq_set_type,
7731 -       .irq_suspend = pmc_irq_suspend,
7732 -       .irq_resume = pmc_irq_resume,
7735 -static struct lock_class_key pmc_lock_class;
7737 -static int pmc_irq_map(struct irq_domain *h, unsigned int virq,
7738 -                      irq_hw_number_t hw)
7740 -       struct at91_pmc *pmc = h->host_data;
7742 -       irq_set_lockdep_class(virq, &pmc_lock_class);
7744 -       irq_set_chip_and_handler(virq, &pmc_irq,
7745 -                                handle_level_irq);
7746 -       irq_set_chip_data(virq, pmc);
7748 -       return 0;
7751 -static int pmc_irq_domain_xlate(struct irq_domain *d,
7752 -                               struct device_node *ctrlr,
7753 -                               const u32 *intspec, unsigned int intsize,
7754 -                               irq_hw_number_t *out_hwirq,
7755 -                               unsigned int *out_type)
7757 -       struct at91_pmc *pmc = d->host_data;
7758 -       const struct at91_pmc_caps *caps = pmc->caps;
7760 -       if (WARN_ON(intsize < 1))
7761 -               return -EINVAL;
7763 -       *out_hwirq = intspec[0];
7765 -       if (!(caps->available_irqs & (1 << *out_hwirq)))
7766 -               return -EINVAL;
7768 -       *out_type = IRQ_TYPE_LEVEL_HIGH;
7770 -       return 0;
7773 -static const struct irq_domain_ops pmc_irq_ops = {
7774 -       .map    = pmc_irq_map,
7775 -       .xlate  = pmc_irq_domain_xlate,
7778 -static irqreturn_t pmc_irq_handler(int irq, void *data)
7780 -       struct at91_pmc *pmc = (struct at91_pmc *)data;
7781 -       unsigned long sr;
7782 -       int n;
7784 -       sr = pmc_read(pmc, AT91_PMC_SR) & pmc_read(pmc, AT91_PMC_IMR);
7785 -       if (!sr)
7786 -               return IRQ_NONE;
7788 -       for_each_set_bit(n, &sr, BITS_PER_LONG)
7789 -               generic_handle_irq(irq_find_mapping(pmc->irqdomain, n));
7791 -       return IRQ_HANDLED;
7794 -static const struct at91_pmc_caps at91rm9200_caps = {
7795 -       .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_LOCKB |
7796 -                         AT91_PMC_MCKRDY | AT91_PMC_PCK0RDY |
7797 -                         AT91_PMC_PCK1RDY | AT91_PMC_PCK2RDY |
7798 -                         AT91_PMC_PCK3RDY,
7801 -static const struct at91_pmc_caps at91sam9260_caps = {
7802 -       .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_LOCKB |
7803 -                         AT91_PMC_MCKRDY | AT91_PMC_PCK0RDY |
7804 -                         AT91_PMC_PCK1RDY,
7807 -static const struct at91_pmc_caps at91sam9g45_caps = {
7808 -       .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_MCKRDY |
7809 -                         AT91_PMC_LOCKU | AT91_PMC_PCK0RDY |
7810 -                         AT91_PMC_PCK1RDY,
7813 -static const struct at91_pmc_caps at91sam9n12_caps = {
7814 -       .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_LOCKB |
7815 -                         AT91_PMC_MCKRDY | AT91_PMC_PCK0RDY |
7816 -                         AT91_PMC_PCK1RDY | AT91_PMC_MOSCSELS |
7817 -                         AT91_PMC_MOSCRCS | AT91_PMC_CFDEV,
7820 -static const struct at91_pmc_caps at91sam9x5_caps = {
7821 -       .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_MCKRDY |
7822 -                         AT91_PMC_LOCKU | AT91_PMC_PCK0RDY |
7823 -                         AT91_PMC_PCK1RDY | AT91_PMC_MOSCSELS |
7824 -                         AT91_PMC_MOSCRCS | AT91_PMC_CFDEV,
7827 -static const struct at91_pmc_caps sama5d2_caps = {
7828 -       .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_MCKRDY |
7829 -                         AT91_PMC_LOCKU | AT91_PMC_PCK0RDY |
7830 -                         AT91_PMC_PCK1RDY | AT91_PMC_PCK2RDY |
7831 -                         AT91_PMC_MOSCSELS | AT91_PMC_MOSCRCS |
7832 -                         AT91_PMC_CFDEV | AT91_PMC_GCKRDY,
7835 -static const struct at91_pmc_caps sama5d3_caps = {
7836 -       .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_MCKRDY |
7837 -                         AT91_PMC_LOCKU | AT91_PMC_PCK0RDY |
7838 -                         AT91_PMC_PCK1RDY | AT91_PMC_PCK2RDY |
7839 -                         AT91_PMC_MOSCSELS | AT91_PMC_MOSCRCS |
7840 -                         AT91_PMC_CFDEV,
7843 -static struct at91_pmc *__init at91_pmc_init(struct device_node *np,
7844 -                                            void __iomem *regbase, int virq,
7845 -                                            const struct at91_pmc_caps *caps)
7847 -       struct at91_pmc *pmc;
7849 -       if (!regbase || !virq ||  !caps)
7850 -               return NULL;
7852 -       at91_pmc_base = regbase;
7854 -       pmc = kzalloc(sizeof(*pmc), GFP_KERNEL);
7855 -       if (!pmc)
7856 -               return NULL;
7858 -       spin_lock_init(&pmc->lock);
7859 -       pmc->regbase = regbase;
7860 -       pmc->virq = virq;
7861 -       pmc->caps = caps;
7863 -       pmc->irqdomain = irq_domain_add_linear(np, 32, &pmc_irq_ops, pmc);
7865 -       if (!pmc->irqdomain)
7866 -               goto out_free_pmc;
7868 -       pmc_write(pmc, AT91_PMC_IDR, 0xffffffff);
7869 -       if (request_irq(pmc->virq, pmc_irq_handler,
7870 -                       IRQF_SHARED | IRQF_COND_SUSPEND, "pmc", pmc))
7871 -               goto out_remove_irqdomain;
7873 -       return pmc;
7875 -out_remove_irqdomain:
7876 -       irq_domain_remove(pmc->irqdomain);
7877 -out_free_pmc:
7878 -       kfree(pmc);
7880 -       return NULL;
7883 -static const struct of_device_id pmc_clk_ids[] __initconst = {
7884 -       /* Slow oscillator */
7885 -       {
7886 -               .compatible = "atmel,at91sam9260-clk-slow",
7887 -               .data = of_at91sam9260_clk_slow_setup,
7888 -       },
7889 -       /* Main clock */
7890 -       {
7891 -               .compatible = "atmel,at91rm9200-clk-main-osc",
7892 -               .data = of_at91rm9200_clk_main_osc_setup,
7893 -       },
7894 -       {
7895 -               .compatible = "atmel,at91sam9x5-clk-main-rc-osc",
7896 -               .data = of_at91sam9x5_clk_main_rc_osc_setup,
7897 -       },
7898 -       {
7899 -               .compatible = "atmel,at91rm9200-clk-main",
7900 -               .data = of_at91rm9200_clk_main_setup,
7901 -       },
7902 -       {
7903 -               .compatible = "atmel,at91sam9x5-clk-main",
7904 -               .data = of_at91sam9x5_clk_main_setup,
7905 -       },
7906 -       /* PLL clocks */
7907 -       {
7908 -               .compatible = "atmel,at91rm9200-clk-pll",
7909 -               .data = of_at91rm9200_clk_pll_setup,
7910 -       },
7911 -       {
7912 -               .compatible = "atmel,at91sam9g45-clk-pll",
7913 -               .data = of_at91sam9g45_clk_pll_setup,
7914 -       },
7915 -       {
7916 -               .compatible = "atmel,at91sam9g20-clk-pllb",
7917 -               .data = of_at91sam9g20_clk_pllb_setup,
7918 -       },
7919 -       {
7920 -               .compatible = "atmel,sama5d3-clk-pll",
7921 -               .data = of_sama5d3_clk_pll_setup,
7922 -       },
7923 -       {
7924 -               .compatible = "atmel,at91sam9x5-clk-plldiv",
7925 -               .data = of_at91sam9x5_clk_plldiv_setup,
7926 -       },
7927 -       /* Master clock */
7928 -       {
7929 -               .compatible = "atmel,at91rm9200-clk-master",
7930 -               .data = of_at91rm9200_clk_master_setup,
7931 -       },
7932 -       {
7933 -               .compatible = "atmel,at91sam9x5-clk-master",
7934 -               .data = of_at91sam9x5_clk_master_setup,
7935 -       },
7936 -       /* System clocks */
7937 -       {
7938 -               .compatible = "atmel,at91rm9200-clk-system",
7939 -               .data = of_at91rm9200_clk_sys_setup,
7940 -       },
7941 -       /* Peripheral clocks */
7942 -       {
7943 -               .compatible = "atmel,at91rm9200-clk-peripheral",
7944 -               .data = of_at91rm9200_clk_periph_setup,
7945 -       },
7946 -       {
7947 -               .compatible = "atmel,at91sam9x5-clk-peripheral",
7948 -               .data = of_at91sam9x5_clk_periph_setup,
7949 -       },
7950 -       /* Programmable clocks */
7951 -       {
7952 -               .compatible = "atmel,at91rm9200-clk-programmable",
7953 -               .data = of_at91rm9200_clk_prog_setup,
7954 -       },
7955 -       {
7956 -               .compatible = "atmel,at91sam9g45-clk-programmable",
7957 -               .data = of_at91sam9g45_clk_prog_setup,
7958 -       },
7959 -       {
7960 -               .compatible = "atmel,at91sam9x5-clk-programmable",
7961 -               .data = of_at91sam9x5_clk_prog_setup,
7962 -       },
7963 -       /* UTMI clock */
7964 -#if defined(CONFIG_HAVE_AT91_UTMI)
7965 -       {
7966 -               .compatible = "atmel,at91sam9x5-clk-utmi",
7967 -               .data = of_at91sam9x5_clk_utmi_setup,
7968 -       },
7969 -#endif
7970 -       /* USB clock */
7971 -#if defined(CONFIG_HAVE_AT91_USB_CLK)
7972 -       {
7973 -               .compatible = "atmel,at91rm9200-clk-usb",
7974 -               .data = of_at91rm9200_clk_usb_setup,
7975 -       },
7976 -       {
7977 -               .compatible = "atmel,at91sam9x5-clk-usb",
7978 -               .data = of_at91sam9x5_clk_usb_setup,
7979 -       },
7980 -       {
7981 -               .compatible = "atmel,at91sam9n12-clk-usb",
7982 -               .data = of_at91sam9n12_clk_usb_setup,
7983 -       },
7984 -#endif
7985 -       /* SMD clock */
7986 -#if defined(CONFIG_HAVE_AT91_SMD)
7987 -       {
7988 -               .compatible = "atmel,at91sam9x5-clk-smd",
7989 -               .data = of_at91sam9x5_clk_smd_setup,
7990 -       },
7991 -#endif
7992 -#if defined(CONFIG_HAVE_AT91_H32MX)
7993 -       {
7994 -               .compatible = "atmel,sama5d4-clk-h32mx",
7995 -               .data = of_sama5d4_clk_h32mx_setup,
7996 -       },
7997 -#endif
7998 -#if defined(CONFIG_HAVE_AT91_GENERATED_CLK)
7999 -       {
8000 -               .compatible = "atmel,sama5d2-clk-generated",
8001 -               .data = of_sama5d2_clk_generated_setup,
8002 -       },
8003 -#endif
8004 -       { /*sentinel*/ }
8007 -static void __init of_at91_pmc_setup(struct device_node *np,
8008 -                                    const struct at91_pmc_caps *caps)
8010 -       struct at91_pmc *pmc;
8011 -       struct device_node *childnp;
8012 -       void (*clk_setup)(struct device_node *, struct at91_pmc *);
8013 -       const struct of_device_id *clk_id;
8014 -       void __iomem *regbase = of_iomap(np, 0);
8015 -       int virq;
8017 -       if (!regbase)
8018 -               return;
8020 -       virq = irq_of_parse_and_map(np, 0);
8021 -       if (!virq)
8022 -               return;
8024 -       pmc = at91_pmc_init(np, regbase, virq, caps);
8025 -       if (!pmc)
8026 -               return;
8027 -       for_each_child_of_node(np, childnp) {
8028 -               clk_id = of_match_node(pmc_clk_ids, childnp);
8029 -               if (!clk_id)
8030 -                       continue;
8031 -               clk_setup = clk_id->data;
8032 -               clk_setup(childnp, pmc);
8033 -       }
8036 -static void __init of_at91rm9200_pmc_setup(struct device_node *np)
8038 -       of_at91_pmc_setup(np, &at91rm9200_caps);
8040 -CLK_OF_DECLARE(at91rm9200_clk_pmc, "atmel,at91rm9200-pmc",
8041 -              of_at91rm9200_pmc_setup);
8043 -static void __init of_at91sam9260_pmc_setup(struct device_node *np)
8045 -       of_at91_pmc_setup(np, &at91sam9260_caps);
8047 -CLK_OF_DECLARE(at91sam9260_clk_pmc, "atmel,at91sam9260-pmc",
8048 -              of_at91sam9260_pmc_setup);
8050 -static void __init of_at91sam9g45_pmc_setup(struct device_node *np)
8052 -       of_at91_pmc_setup(np, &at91sam9g45_caps);
8054 -CLK_OF_DECLARE(at91sam9g45_clk_pmc, "atmel,at91sam9g45-pmc",
8055 -              of_at91sam9g45_pmc_setup);
8057 -static void __init of_at91sam9n12_pmc_setup(struct device_node *np)
8059 -       of_at91_pmc_setup(np, &at91sam9n12_caps);
8061 -CLK_OF_DECLARE(at91sam9n12_clk_pmc, "atmel,at91sam9n12-pmc",
8062 -              of_at91sam9n12_pmc_setup);
8064 -static void __init of_at91sam9x5_pmc_setup(struct device_node *np)
8066 -       of_at91_pmc_setup(np, &at91sam9x5_caps);
8068 -CLK_OF_DECLARE(at91sam9x5_clk_pmc, "atmel,at91sam9x5-pmc",
8069 -              of_at91sam9x5_pmc_setup);
8071 -static void __init of_sama5d2_pmc_setup(struct device_node *np)
8073 -       of_at91_pmc_setup(np, &sama5d2_caps);
8075 -CLK_OF_DECLARE(sama5d2_clk_pmc, "atmel,sama5d2-pmc",
8076 -              of_sama5d2_pmc_setup);
8078 -static void __init of_sama5d3_pmc_setup(struct device_node *np)
8080 -       of_at91_pmc_setup(np, &sama5d3_caps);
8082 -CLK_OF_DECLARE(sama5d3_clk_pmc, "atmel,sama5d3-pmc",
8083 -              of_sama5d3_pmc_setup);
8084 diff -Nur linux-4.4.46.orig/drivers/clk/at91/pmc.h linux-4.4.46/drivers/clk/at91/pmc.h
8085 --- linux-4.4.46.orig/drivers/clk/at91/pmc.h    2017-02-01 08:31:11.000000000 +0100
8086 +++ linux-4.4.46/drivers/clk/at91/pmc.h 2017-02-03 17:18:05.675416176 +0100
8087 @@ -14,8 +14,11 @@
8089  #include <linux/io.h>
8090  #include <linux/irqdomain.h>
8091 +#include <linux/regmap.h>
8092  #include <linux/spinlock.h>
8094 +extern spinlock_t pmc_pcr_lock;
8096  struct clk_range {
8097         unsigned long min;
8098         unsigned long max;
8099 @@ -23,102 +26,7 @@
8101  #define CLK_RANGE(MIN, MAX) {.min = MIN, .max = MAX,}
8103 -struct at91_pmc_caps {
8104 -       u32 available_irqs;
8107 -struct at91_pmc {
8108 -       void __iomem *regbase;
8109 -       int virq;
8110 -       spinlock_t lock;
8111 -       const struct at91_pmc_caps *caps;
8112 -       struct irq_domain *irqdomain;
8113 -       u32 imr;
8116 -static inline void pmc_lock(struct at91_pmc *pmc)
8118 -       spin_lock(&pmc->lock);
8121 -static inline void pmc_unlock(struct at91_pmc *pmc)
8123 -       spin_unlock(&pmc->lock);
8126 -static inline u32 pmc_read(struct at91_pmc *pmc, int offset)
8128 -       return readl(pmc->regbase + offset);
8131 -static inline void pmc_write(struct at91_pmc *pmc, int offset, u32 value)
8133 -       writel(value, pmc->regbase + offset);
8136  int of_at91_get_clk_range(struct device_node *np, const char *propname,
8137                           struct clk_range *range);
8139 -void of_at91sam9260_clk_slow_setup(struct device_node *np,
8140 -                                  struct at91_pmc *pmc);
8142 -void of_at91rm9200_clk_main_osc_setup(struct device_node *np,
8143 -                                     struct at91_pmc *pmc);
8144 -void of_at91sam9x5_clk_main_rc_osc_setup(struct device_node *np,
8145 -                                        struct at91_pmc *pmc);
8146 -void of_at91rm9200_clk_main_setup(struct device_node *np,
8147 -                                 struct at91_pmc *pmc);
8148 -void of_at91sam9x5_clk_main_setup(struct device_node *np,
8149 -                                 struct at91_pmc *pmc);
8151 -void of_at91rm9200_clk_pll_setup(struct device_node *np,
8152 -                                struct at91_pmc *pmc);
8153 -void of_at91sam9g45_clk_pll_setup(struct device_node *np,
8154 -                                 struct at91_pmc *pmc);
8155 -void of_at91sam9g20_clk_pllb_setup(struct device_node *np,
8156 -                                  struct at91_pmc *pmc);
8157 -void of_sama5d3_clk_pll_setup(struct device_node *np,
8158 -                             struct at91_pmc *pmc);
8159 -void of_at91sam9x5_clk_plldiv_setup(struct device_node *np,
8160 -                                   struct at91_pmc *pmc);
8162 -void of_at91rm9200_clk_master_setup(struct device_node *np,
8163 -                                   struct at91_pmc *pmc);
8164 -void of_at91sam9x5_clk_master_setup(struct device_node *np,
8165 -                                   struct at91_pmc *pmc);
8167 -void of_at91rm9200_clk_sys_setup(struct device_node *np,
8168 -                                struct at91_pmc *pmc);
8170 -void of_at91rm9200_clk_periph_setup(struct device_node *np,
8171 -                                   struct at91_pmc *pmc);
8172 -void of_at91sam9x5_clk_periph_setup(struct device_node *np,
8173 -                                   struct at91_pmc *pmc);
8175 -void of_at91rm9200_clk_prog_setup(struct device_node *np,
8176 -                                 struct at91_pmc *pmc);
8177 -void of_at91sam9g45_clk_prog_setup(struct device_node *np,
8178 -                                  struct at91_pmc *pmc);
8179 -void of_at91sam9x5_clk_prog_setup(struct device_node *np,
8180 -                                 struct at91_pmc *pmc);
8182 -void of_at91sam9x5_clk_utmi_setup(struct device_node *np,
8183 -                                 struct at91_pmc *pmc);
8185 -void of_at91rm9200_clk_usb_setup(struct device_node *np,
8186 -                                struct at91_pmc *pmc);
8187 -void of_at91sam9x5_clk_usb_setup(struct device_node *np,
8188 -                                struct at91_pmc *pmc);
8189 -void of_at91sam9n12_clk_usb_setup(struct device_node *np,
8190 -                                 struct at91_pmc *pmc);
8192 -void of_at91sam9x5_clk_smd_setup(struct device_node *np,
8193 -                                struct at91_pmc *pmc);
8195 -void of_sama5d4_clk_h32mx_setup(struct device_node *np,
8196 -                               struct at91_pmc *pmc);
8198 -void of_sama5d2_clk_generated_setup(struct device_node *np,
8199 -                                   struct at91_pmc *pmc);
8201  #endif /* __PMC_H_ */
8202 diff -Nur linux-4.4.46.orig/drivers/clocksource/tcb_clksrc.c linux-4.4.46/drivers/clocksource/tcb_clksrc.c
8203 --- linux-4.4.46.orig/drivers/clocksource/tcb_clksrc.c  2017-02-01 08:31:11.000000000 +0100
8204 +++ linux-4.4.46/drivers/clocksource/tcb_clksrc.c       2017-02-03 17:18:05.675416176 +0100
8205 @@ -23,8 +23,7 @@
8206   *     this 32 bit free-running counter. the second channel is not used.
8207   *
8208   *   - The third channel may be used to provide a 16-bit clockevent
8209 - *     source, used in either periodic or oneshot mode.  This runs
8210 - *     at 32 KiHZ, and can handle delays of up to two seconds.
8211 + *     source, used in either periodic or oneshot mode.
8212   *
8213   * A boot clocksource and clockevent source are also currently needed,
8214   * unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so
8215 @@ -74,6 +73,8 @@
8216  struct tc_clkevt_device {
8217         struct clock_event_device       clkevt;
8218         struct clk                      *clk;
8219 +       bool                            clk_enabled;
8220 +       u32                             freq;
8221         void __iomem                    *regs;
8222  };
8224 @@ -82,15 +83,26 @@
8225         return container_of(clkevt, struct tc_clkevt_device, clkevt);
8228 -/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
8229 - * because using one of the divided clocks would usually mean the
8230 - * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
8231 - *
8232 - * A divided clock could be good for high resolution timers, since
8233 - * 30.5 usec resolution can seem "low".
8234 - */
8235  static u32 timer_clock;
8237 +static void tc_clk_disable(struct clock_event_device *d)
8239 +       struct tc_clkevt_device *tcd = to_tc_clkevt(d);
8241 +       clk_disable(tcd->clk);
8242 +       tcd->clk_enabled = false;
8245 +static void tc_clk_enable(struct clock_event_device *d)
8247 +       struct tc_clkevt_device *tcd = to_tc_clkevt(d);
8249 +       if (tcd->clk_enabled)
8250 +               return;
8251 +       clk_enable(tcd->clk);
8252 +       tcd->clk_enabled = true;
8255  static int tc_shutdown(struct clock_event_device *d)
8257         struct tc_clkevt_device *tcd = to_tc_clkevt(d);
8258 @@ -98,8 +110,14 @@
8260         __raw_writel(0xff, regs + ATMEL_TC_REG(2, IDR));
8261         __raw_writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR));
8262 +       return 0;
8265 +static int tc_shutdown_clk_off(struct clock_event_device *d)
8267 +       tc_shutdown(d);
8268         if (!clockevent_state_detached(d))
8269 -               clk_disable(tcd->clk);
8270 +               tc_clk_disable(d);
8272         return 0;
8274 @@ -112,9 +130,9 @@
8275         if (clockevent_state_oneshot(d) || clockevent_state_periodic(d))
8276                 tc_shutdown(d);
8278 -       clk_enable(tcd->clk);
8279 +       tc_clk_enable(d);
8281 -       /* slow clock, count up to RC, then irq and stop */
8282 +       /* count up to RC, then irq and stop */
8283         __raw_writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE |
8284                      ATMEL_TC_WAVESEL_UP_AUTO, regs + ATMEL_TC_REG(2, CMR));
8285         __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
8286 @@ -134,12 +152,12 @@
8287         /* By not making the gentime core emulate periodic mode on top
8288          * of oneshot, we get lower overhead and improved accuracy.
8289          */
8290 -       clk_enable(tcd->clk);
8291 +       tc_clk_enable(d);
8293 -       /* slow clock, count up to RC, then irq and restart */
8294 +       /* count up to RC, then irq and restart */
8295         __raw_writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
8296                      regs + ATMEL_TC_REG(2, CMR));
8297 -       __raw_writel((32768 + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
8298 +       __raw_writel((tcd->freq + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
8300         /* Enable clock and interrupts on RC compare */
8301         __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
8302 @@ -166,9 +184,13 @@
8303                 .features               = CLOCK_EVT_FEAT_PERIODIC |
8304                                           CLOCK_EVT_FEAT_ONESHOT,
8305                 /* Should be lower than at91rm9200's system timer */
8306 +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
8307                 .rating                 = 125,
8308 +#else
8309 +               .rating                 = 200,
8310 +#endif
8311                 .set_next_event         = tc_next_event,
8312 -               .set_state_shutdown     = tc_shutdown,
8313 +               .set_state_shutdown     = tc_shutdown_clk_off,
8314                 .set_state_periodic     = tc_set_periodic,
8315                 .set_state_oneshot      = tc_set_oneshot,
8316         },
8317 @@ -188,8 +210,9 @@
8318         return IRQ_NONE;
8321 -static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
8322 +static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx)
8324 +       unsigned divisor = atmel_tc_divisors[divisor_idx];
8325         int ret;
8326         struct clk *t2_clk = tc->clk[2];
8327         int irq = tc->irq[2];
8328 @@ -210,7 +233,11 @@
8329         clkevt.regs = tc->regs;
8330         clkevt.clk = t2_clk;
8332 -       timer_clock = clk32k_divisor_idx;
8333 +       timer_clock = divisor_idx;
8334 +       if (!divisor)
8335 +               clkevt.freq = 32768;
8336 +       else
8337 +               clkevt.freq = clk_get_rate(t2_clk) / divisor;
8339         clkevt.clkevt.cpumask = cpumask_of(0);
8341 @@ -221,7 +248,7 @@
8342                 return ret;
8343         }
8345 -       clockevents_config_and_register(&clkevt.clkevt, 32768, 1, 0xffff);
8346 +       clockevents_config_and_register(&clkevt.clkevt, clkevt.freq, 1, 0xffff);
8348         return ret;
8350 @@ -358,7 +385,11 @@
8351                 goto err_disable_t1;
8353         /* channel 2:  periodic and oneshot timer support */
8354 +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
8355         ret = setup_clkevents(tc, clk32k_divisor_idx);
8356 +#else
8357 +       ret = setup_clkevents(tc, best_divisor_idx);
8358 +#endif
8359         if (ret)
8360                 goto err_unregister_clksrc;
8362 diff -Nur linux-4.4.46.orig/drivers/clocksource/timer-atmel-pit.c linux-4.4.46/drivers/clocksource/timer-atmel-pit.c
8363 --- linux-4.4.46.orig/drivers/clocksource/timer-atmel-pit.c     2017-02-01 08:31:11.000000000 +0100
8364 +++ linux-4.4.46/drivers/clocksource/timer-atmel-pit.c  2017-02-03 17:18:05.675416176 +0100
8365 @@ -46,6 +46,7 @@
8366         u32             cycle;
8367         u32             cnt;
8368         unsigned int    irq;
8369 +       bool            irq_requested;
8370         struct clk      *mck;
8371  };
8373 @@ -96,15 +97,29 @@
8375         /* disable irq, leaving the clocksource active */
8376         pit_write(data->base, AT91_PIT_MR, (data->cycle - 1) | AT91_PIT_PITEN);
8377 +       if (data->irq_requested) {
8378 +               free_irq(data->irq, data);
8379 +               data->irq_requested = false;
8380 +       }
8381         return 0;
8384 +static irqreturn_t at91sam926x_pit_interrupt(int irq, void *dev_id);
8385  /*
8386   * Clockevent device:  interrupts every 1/HZ (== pit_cycles * MCK/16)
8387   */
8388  static int pit_clkevt_set_periodic(struct clock_event_device *dev)
8390         struct pit_data *data = clkevt_to_pit_data(dev);
8391 +       int ret;
8393 +       ret = request_irq(data->irq, at91sam926x_pit_interrupt,
8394 +                         IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
8395 +                         "at91_tick", data);
8396 +       if (ret)
8397 +               panic(pr_fmt("Unable to setup IRQ\n"));
8399 +       data->irq_requested = true;
8401         /* update clocksource counter */
8402         data->cnt += data->cycle * PIT_PICNT(pit_read(data->base, AT91_PIT_PIVR));
8403 @@ -181,7 +196,6 @@
8405         unsigned long   pit_rate;
8406         unsigned        bits;
8407 -       int             ret;
8409         /*
8410          * Use our actual MCK to figure out how many MCK/16 ticks per
8411 @@ -206,13 +220,6 @@
8412         data->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS;
8413         clocksource_register_hz(&data->clksrc, pit_rate);
8415 -       /* Set up irq handler */
8416 -       ret = request_irq(data->irq, at91sam926x_pit_interrupt,
8417 -                         IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
8418 -                         "at91_tick", data);
8419 -       if (ret)
8420 -               panic(pr_fmt("Unable to setup IRQ\n"));
8422         /* Set up and register clockevents */
8423         data->clkevt.name = "pit";
8424         data->clkevt.features = CLOCK_EVT_FEAT_PERIODIC;
8425 diff -Nur linux-4.4.46.orig/drivers/clocksource/timer-atmel-st.c linux-4.4.46/drivers/clocksource/timer-atmel-st.c
8426 --- linux-4.4.46.orig/drivers/clocksource/timer-atmel-st.c      2017-02-01 08:31:11.000000000 +0100
8427 +++ linux-4.4.46/drivers/clocksource/timer-atmel-st.c   2017-02-03 17:18:05.675416176 +0100
8428 @@ -115,18 +115,29 @@
8429         last_crtr = read_CRTR();
8432 +static int atmel_st_irq;
8434  static int clkevt32k_shutdown(struct clock_event_device *evt)
8436         clkdev32k_disable_and_flush_irq();
8437         irqmask = 0;
8438         regmap_write(regmap_st, AT91_ST_IER, irqmask);
8439 +       free_irq(atmel_st_irq, regmap_st);
8440         return 0;
8443  static int clkevt32k_set_oneshot(struct clock_event_device *dev)
8445 +       int ret;
8447         clkdev32k_disable_and_flush_irq();
8449 +       ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt,
8450 +                         IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
8451 +                         "at91_tick", regmap_st);
8452 +       if (ret)
8453 +               panic(pr_fmt("Unable to setup IRQ\n"));
8455         /*
8456          * ALM for oneshot irqs, set by next_event()
8457          * before 32 seconds have passed.
8458 @@ -139,8 +150,16 @@
8460  static int clkevt32k_set_periodic(struct clock_event_device *dev)
8462 +       int ret;
8464         clkdev32k_disable_and_flush_irq();
8466 +       ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt,
8467 +                         IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
8468 +                         "at91_tick", regmap_st);
8469 +       if (ret)
8470 +               panic(pr_fmt("Unable to setup IRQ\n"));
8472         /* PIT for periodic irqs; fixed rate of 1/HZ */
8473         irqmask = AT91_ST_PITS;
8474         regmap_write(regmap_st, AT91_ST_PIMR, timer_latch);
8475 @@ -198,7 +217,7 @@
8477         struct clk *sclk;
8478         unsigned int sclk_rate, val;
8479 -       int irq, ret;
8480 +       int ret;
8482         regmap_st = syscon_node_to_regmap(node);
8483         if (IS_ERR(regmap_st))
8484 @@ -210,17 +229,10 @@
8485         regmap_read(regmap_st, AT91_ST_SR, &val);
8487         /* Get the interrupts property */
8488 -       irq  = irq_of_parse_and_map(node, 0);
8489 -       if (!irq)
8490 +       atmel_st_irq  = irq_of_parse_and_map(node, 0);
8491 +       if (!atmel_st_irq)
8492                 panic(pr_fmt("Unable to get IRQ from DT\n"));
8494 -       /* Make IRQs happen for the system timer */
8495 -       ret = request_irq(irq, at91rm9200_timer_interrupt,
8496 -                         IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
8497 -                         "at91_tick", regmap_st);
8498 -       if (ret)
8499 -               panic(pr_fmt("Unable to setup IRQ\n"));
8501         sclk = of_clk_get(node, 0);
8502         if (IS_ERR(sclk))
8503                 panic(pr_fmt("Unable to get slow clock\n"));
8504 diff -Nur linux-4.4.46.orig/drivers/cpufreq/Kconfig.x86 linux-4.4.46/drivers/cpufreq/Kconfig.x86
8505 --- linux-4.4.46.orig/drivers/cpufreq/Kconfig.x86       2017-02-01 08:31:11.000000000 +0100
8506 +++ linux-4.4.46/drivers/cpufreq/Kconfig.x86    2017-02-03 17:18:05.675416176 +0100
8507 @@ -123,7 +123,7 @@
8509  config X86_POWERNOW_K8
8510         tristate "AMD Opteron/Athlon64 PowerNow!"
8511 -       depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ
8512 +       depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ && !PREEMPT_RT_BASE
8513         help
8514           This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors.
8515           Support for K10 and newer processors is now in acpi-cpufreq.
8516 diff -Nur linux-4.4.46.orig/drivers/cpuidle/coupled.c linux-4.4.46/drivers/cpuidle/coupled.c
8517 --- linux-4.4.46.orig/drivers/cpuidle/coupled.c 2017-02-01 08:31:11.000000000 +0100
8518 +++ linux-4.4.46/drivers/cpuidle/coupled.c      2017-02-03 17:18:05.675416176 +0100
8519 @@ -119,7 +119,6 @@
8521  #define CPUIDLE_COUPLED_NOT_IDLE       (-1)
8523 -static DEFINE_MUTEX(cpuidle_coupled_lock);
8524  static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb);
8526  /*
8527 diff -Nur linux-4.4.46.orig/drivers/gpu/drm/i915/i915_gem_execbuffer.c linux-4.4.46/drivers/gpu/drm/i915/i915_gem_execbuffer.c
8528 --- linux-4.4.46.orig/drivers/gpu/drm/i915/i915_gem_execbuffer.c        2017-02-01 08:31:11.000000000 +0100
8529 +++ linux-4.4.46/drivers/gpu/drm/i915/i915_gem_execbuffer.c     2017-02-03 17:18:05.675416176 +0100
8530 @@ -1264,7 +1264,9 @@
8531         if (ret)
8532                 return ret;
8534 +#ifndef CONFIG_PREEMPT_RT_BASE
8535         trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
8536 +#endif
8538         i915_gem_execbuffer_move_to_active(vmas, params->request);
8539         i915_gem_execbuffer_retire_commands(params);
8540 diff -Nur linux-4.4.46.orig/drivers/gpu/drm/i915/i915_gem_shrinker.c linux-4.4.46/drivers/gpu/drm/i915/i915_gem_shrinker.c
8541 --- linux-4.4.46.orig/drivers/gpu/drm/i915/i915_gem_shrinker.c  2017-02-01 08:31:11.000000000 +0100
8542 +++ linux-4.4.46/drivers/gpu/drm/i915/i915_gem_shrinker.c       2017-02-03 17:18:05.675416176 +0100
8543 @@ -39,7 +39,7 @@
8544         if (!mutex_is_locked(mutex))
8545                 return false;
8547 -#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)
8548 +#if (defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)) && !defined(CONFIG_PREEMPT_RT_BASE)
8549         return mutex->owner == task;
8550  #else
8551         /* Since UP may be pre-empted, we cannot assume that we own the lock */
8552 diff -Nur linux-4.4.46.orig/drivers/gpu/drm/i915/i915_irq.c linux-4.4.46/drivers/gpu/drm/i915/i915_irq.c
8553 --- linux-4.4.46.orig/drivers/gpu/drm/i915/i915_irq.c   2017-02-01 08:31:11.000000000 +0100
8554 +++ linux-4.4.46/drivers/gpu/drm/i915/i915_irq.c        2017-02-03 17:18:05.675416176 +0100
8555 @@ -812,6 +812,7 @@
8556         spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
8558         /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
8559 +       preempt_disable_rt();
8561         /* Get optional system timestamp before query. */
8562         if (stime)
8563 @@ -863,6 +864,7 @@
8564                 *etime = ktime_get();
8566         /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
8567 +       preempt_enable_rt();
8569         spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
8571 diff -Nur linux-4.4.46.orig/drivers/gpu/drm/i915/intel_display.c linux-4.4.46/drivers/gpu/drm/i915/intel_display.c
8572 --- linux-4.4.46.orig/drivers/gpu/drm/i915/intel_display.c      2017-02-01 08:31:11.000000000 +0100
8573 +++ linux-4.4.46/drivers/gpu/drm/i915/intel_display.c   2017-02-03 17:18:05.679416330 +0100
8574 @@ -11400,7 +11400,7 @@
8575         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
8576         struct intel_unpin_work *work;
8578 -       WARN_ON(!in_interrupt());
8579 +       WARN_ON_NONRT(!in_interrupt());
8581         if (crtc == NULL)
8582                 return;
8583 diff -Nur linux-4.4.46.orig/drivers/gpu/drm/i915/intel_sprite.c linux-4.4.46/drivers/gpu/drm/i915/intel_sprite.c
8584 --- linux-4.4.46.orig/drivers/gpu/drm/i915/intel_sprite.c       2017-02-01 08:31:11.000000000 +0100
8585 +++ linux-4.4.46/drivers/gpu/drm/i915/intel_sprite.c    2017-02-03 17:18:05.679416330 +0100
8586 @@ -38,6 +38,7 @@
8587  #include "intel_drv.h"
8588  #include <drm/i915_drm.h>
8589  #include "i915_drv.h"
8590 +#include <linux/locallock.h>
8592  static bool
8593  format_is_yuv(uint32_t format)
8594 @@ -64,6 +65,8 @@
8595                             1000 * adjusted_mode->crtc_htotal);
8598 +static DEFINE_LOCAL_IRQ_LOCK(pipe_update_lock);
8600  /**
8601   * intel_pipe_update_start() - start update of a set of display registers
8602   * @crtc: the crtc of which the registers are going to be updated
8603 @@ -96,7 +99,7 @@
8604         min = vblank_start - usecs_to_scanlines(adjusted_mode, 100);
8605         max = vblank_start - 1;
8607 -       local_irq_disable();
8608 +       local_lock_irq(pipe_update_lock);
8610         if (min <= 0 || max <= 0)
8611                 return;
8612 @@ -126,11 +129,11 @@
8613                         break;
8614                 }
8616 -               local_irq_enable();
8617 +               local_unlock_irq(pipe_update_lock);
8619                 timeout = schedule_timeout(timeout);
8621 -               local_irq_disable();
8622 +               local_lock_irq(pipe_update_lock);
8623         }
8625         finish_wait(wq, &wait);
8626 @@ -164,7 +167,7 @@
8628         trace_i915_pipe_update_end(crtc, end_vbl_count, scanline_end);
8630 -       local_irq_enable();
8631 +       local_unlock_irq(pipe_update_lock);
8633         if (crtc->debug.start_vbl_count &&
8634             crtc->debug.start_vbl_count != end_vbl_count) {
8635 diff -Nur linux-4.4.46.orig/drivers/gpu/drm/radeon/radeon_display.c linux-4.4.46/drivers/gpu/drm/radeon/radeon_display.c
8636 --- linux-4.4.46.orig/drivers/gpu/drm/radeon/radeon_display.c   2017-02-01 08:31:11.000000000 +0100
8637 +++ linux-4.4.46/drivers/gpu/drm/radeon/radeon_display.c        2017-02-03 17:18:05.679416330 +0100
8638 @@ -1862,6 +1862,7 @@
8639         struct radeon_device *rdev = dev->dev_private;
8641         /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
8642 +       preempt_disable_rt();
8644         /* Get optional system timestamp before query. */
8645         if (stime)
8646 @@ -1954,6 +1955,7 @@
8647                 *etime = ktime_get();
8649         /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
8650 +       preempt_enable_rt();
8652         /* Decode into vertical and horizontal scanout position. */
8653         *vpos = position & 0x1fff;
8654 diff -Nur linux-4.4.46.orig/drivers/hv/vmbus_drv.c linux-4.4.46/drivers/hv/vmbus_drv.c
8655 --- linux-4.4.46.orig/drivers/hv/vmbus_drv.c    2017-02-01 08:31:11.000000000 +0100
8656 +++ linux-4.4.46/drivers/hv/vmbus_drv.c 2017-02-03 17:18:05.679416330 +0100
8657 @@ -820,7 +820,7 @@
8658                         tasklet_schedule(&msg_dpc);
8659         }
8661 -       add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0);
8662 +       add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0, 0);
8666 diff -Nur linux-4.4.46.orig/drivers/i2c/busses/i2c-omap.c linux-4.4.46/drivers/i2c/busses/i2c-omap.c
8667 --- linux-4.4.46.orig/drivers/i2c/busses/i2c-omap.c     2017-02-01 08:31:11.000000000 +0100
8668 +++ linux-4.4.46/drivers/i2c/busses/i2c-omap.c  2017-02-03 17:18:05.679416330 +0100
8669 @@ -995,15 +995,12 @@
8670         u16 mask;
8671         u16 stat;
8673 -       spin_lock(&omap->lock);
8674 -       mask = omap_i2c_read_reg(omap, OMAP_I2C_IE_REG);
8675         stat = omap_i2c_read_reg(omap, OMAP_I2C_STAT_REG);
8676 +       mask = omap_i2c_read_reg(omap, OMAP_I2C_IE_REG);
8678         if (stat & mask)
8679                 ret = IRQ_WAKE_THREAD;
8681 -       spin_unlock(&omap->lock);
8683         return ret;
8686 diff -Nur linux-4.4.46.orig/drivers/ide/alim15x3.c linux-4.4.46/drivers/ide/alim15x3.c
8687 --- linux-4.4.46.orig/drivers/ide/alim15x3.c    2017-02-01 08:31:11.000000000 +0100
8688 +++ linux-4.4.46/drivers/ide/alim15x3.c 2017-02-03 17:18:05.679416330 +0100
8689 @@ -234,7 +234,7 @@
8691         isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL);
8693 -       local_irq_save(flags);
8694 +       local_irq_save_nort(flags);
8696         if (m5229_revision < 0xC2) {
8697                 /*
8698 @@ -325,7 +325,7 @@
8699         }
8700         pci_dev_put(north);
8701         pci_dev_put(isa_dev);
8702 -       local_irq_restore(flags);
8703 +       local_irq_restore_nort(flags);
8704         return 0;
8707 diff -Nur linux-4.4.46.orig/drivers/ide/hpt366.c linux-4.4.46/drivers/ide/hpt366.c
8708 --- linux-4.4.46.orig/drivers/ide/hpt366.c      2017-02-01 08:31:11.000000000 +0100
8709 +++ linux-4.4.46/drivers/ide/hpt366.c   2017-02-03 17:18:05.679416330 +0100
8710 @@ -1241,7 +1241,7 @@
8712         dma_old = inb(base + 2);
8714 -       local_irq_save(flags);
8715 +       local_irq_save_nort(flags);
8717         dma_new = dma_old;
8718         pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma);
8719 @@ -1252,7 +1252,7 @@
8720         if (dma_new != dma_old)
8721                 outb(dma_new, base + 2);
8723 -       local_irq_restore(flags);
8724 +       local_irq_restore_nort(flags);
8726         printk(KERN_INFO "    %s: BM-DMA at 0x%04lx-0x%04lx\n",
8727                          hwif->name, base, base + 7);
8728 diff -Nur linux-4.4.46.orig/drivers/ide/ide-io.c linux-4.4.46/drivers/ide/ide-io.c
8729 --- linux-4.4.46.orig/drivers/ide/ide-io.c      2017-02-01 08:31:11.000000000 +0100
8730 +++ linux-4.4.46/drivers/ide/ide-io.c   2017-02-03 17:18:05.679416330 +0100
8731 @@ -659,7 +659,7 @@
8732                 /* disable_irq_nosync ?? */
8733                 disable_irq(hwif->irq);
8734                 /* local CPU only, as if we were handling an interrupt */
8735 -               local_irq_disable();
8736 +               local_irq_disable_nort();
8737                 if (hwif->polling) {
8738                         startstop = handler(drive);
8739                 } else if (drive_is_ready(drive)) {
8740 diff -Nur linux-4.4.46.orig/drivers/ide/ide-iops.c linux-4.4.46/drivers/ide/ide-iops.c
8741 --- linux-4.4.46.orig/drivers/ide/ide-iops.c    2017-02-01 08:31:11.000000000 +0100
8742 +++ linux-4.4.46/drivers/ide/ide-iops.c 2017-02-03 17:18:05.679416330 +0100
8743 @@ -129,12 +129,12 @@
8744                                 if ((stat & ATA_BUSY) == 0)
8745                                         break;
8747 -                               local_irq_restore(flags);
8748 +                               local_irq_restore_nort(flags);
8749                                 *rstat = stat;
8750                                 return -EBUSY;
8751                         }
8752                 }
8753 -               local_irq_restore(flags);
8754 +               local_irq_restore_nort(flags);
8755         }
8756         /*
8757          * Allow status to settle, then read it again.
8758 diff -Nur linux-4.4.46.orig/drivers/ide/ide-io-std.c linux-4.4.46/drivers/ide/ide-io-std.c
8759 --- linux-4.4.46.orig/drivers/ide/ide-io-std.c  2017-02-01 08:31:11.000000000 +0100
8760 +++ linux-4.4.46/drivers/ide/ide-io-std.c       2017-02-03 17:18:05.679416330 +0100
8761 @@ -175,7 +175,7 @@
8762                 unsigned long uninitialized_var(flags);
8764                 if ((io_32bit & 2) && !mmio) {
8765 -                       local_irq_save(flags);
8766 +                       local_irq_save_nort(flags);
8767                         ata_vlb_sync(io_ports->nsect_addr);
8768                 }
8770 @@ -186,7 +186,7 @@
8771                         insl(data_addr, buf, words);
8773                 if ((io_32bit & 2) && !mmio)
8774 -                       local_irq_restore(flags);
8775 +                       local_irq_restore_nort(flags);
8777                 if (((len + 1) & 3) < 2)
8778                         return;
8779 @@ -219,7 +219,7 @@
8780                 unsigned long uninitialized_var(flags);
8782                 if ((io_32bit & 2) && !mmio) {
8783 -                       local_irq_save(flags);
8784 +                       local_irq_save_nort(flags);
8785                         ata_vlb_sync(io_ports->nsect_addr);
8786                 }
8788 @@ -230,7 +230,7 @@
8789                         outsl(data_addr, buf, words);
8791                 if ((io_32bit & 2) && !mmio)
8792 -                       local_irq_restore(flags);
8793 +                       local_irq_restore_nort(flags);
8795                 if (((len + 1) & 3) < 2)
8796                         return;
8797 diff -Nur linux-4.4.46.orig/drivers/ide/ide-probe.c linux-4.4.46/drivers/ide/ide-probe.c
8798 --- linux-4.4.46.orig/drivers/ide/ide-probe.c   2017-02-01 08:31:11.000000000 +0100
8799 +++ linux-4.4.46/drivers/ide/ide-probe.c        2017-02-03 17:18:05.679416330 +0100
8800 @@ -196,10 +196,10 @@
8801         int bswap = 1;
8803         /* local CPU only; some systems need this */
8804 -       local_irq_save(flags);
8805 +       local_irq_save_nort(flags);
8806         /* read 512 bytes of id info */
8807         hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE);
8808 -       local_irq_restore(flags);
8809 +       local_irq_restore_nort(flags);
8811         drive->dev_flags |= IDE_DFLAG_ID_READ;
8812  #ifdef DEBUG
8813 diff -Nur linux-4.4.46.orig/drivers/ide/ide-taskfile.c linux-4.4.46/drivers/ide/ide-taskfile.c
8814 --- linux-4.4.46.orig/drivers/ide/ide-taskfile.c        2017-02-01 08:31:11.000000000 +0100
8815 +++ linux-4.4.46/drivers/ide/ide-taskfile.c     2017-02-03 17:18:05.683416484 +0100
8816 @@ -250,7 +250,7 @@
8818                 page_is_high = PageHighMem(page);
8819                 if (page_is_high)
8820 -                       local_irq_save(flags);
8821 +                       local_irq_save_nort(flags);
8823                 buf = kmap_atomic(page) + offset;
8825 @@ -271,7 +271,7 @@
8826                 kunmap_atomic(buf);
8828                 if (page_is_high)
8829 -                       local_irq_restore(flags);
8830 +                       local_irq_restore_nort(flags);
8832                 len -= nr_bytes;
8833         }
8834 @@ -414,7 +414,7 @@
8835         }
8837         if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0)
8838 -               local_irq_disable();
8839 +               local_irq_disable_nort();
8841         ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE);
8843 diff -Nur linux-4.4.46.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c linux-4.4.46/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
8844 --- linux-4.4.46.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c    2017-02-01 08:31:11.000000000 +0100
8845 +++ linux-4.4.46/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2017-02-03 17:18:05.683416484 +0100
8846 @@ -862,7 +862,7 @@
8848         ipoib_dbg_mcast(priv, "restarting multicast task\n");
8850 -       local_irq_save(flags);
8851 +       local_irq_save_nort(flags);
8852         netif_addr_lock(dev);
8853         spin_lock(&priv->lock);
8855 @@ -944,7 +944,7 @@
8857         spin_unlock(&priv->lock);
8858         netif_addr_unlock(dev);
8859 -       local_irq_restore(flags);
8860 +       local_irq_restore_nort(flags);
8862         /*
8863          * make sure the in-flight joins have finished before we attempt
8864 diff -Nur linux-4.4.46.orig/drivers/input/gameport/gameport.c linux-4.4.46/drivers/input/gameport/gameport.c
8865 --- linux-4.4.46.orig/drivers/input/gameport/gameport.c 2017-02-01 08:31:11.000000000 +0100
8866 +++ linux-4.4.46/drivers/input/gameport/gameport.c      2017-02-03 17:18:05.683416484 +0100
8867 @@ -91,13 +91,13 @@
8868         tx = ~0;
8870         for (i = 0; i < 50; i++) {
8871 -               local_irq_save(flags);
8872 +               local_irq_save_nort(flags);
8873                 t1 = ktime_get_ns();
8874                 for (t = 0; t < 50; t++)
8875                         gameport_read(gameport);
8876                 t2 = ktime_get_ns();
8877                 t3 = ktime_get_ns();
8878 -               local_irq_restore(flags);
8879 +               local_irq_restore_nort(flags);
8880                 udelay(i * 10);
8881                 t = (t2 - t1) - (t3 - t2);
8882                 if (t < tx)
8883 @@ -124,12 +124,12 @@
8884         tx = 1 << 30;
8886         for(i = 0; i < 50; i++) {
8887 -               local_irq_save(flags);
8888 +               local_irq_save_nort(flags);
8889                 GET_TIME(t1);
8890                 for (t = 0; t < 50; t++) gameport_read(gameport);
8891                 GET_TIME(t2);
8892                 GET_TIME(t3);
8893 -               local_irq_restore(flags);
8894 +               local_irq_restore_nort(flags);
8895                 udelay(i * 10);
8896                 if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t;
8897         }
8898 @@ -148,11 +148,11 @@
8899         tx = 1 << 30;
8901         for(i = 0; i < 50; i++) {
8902 -               local_irq_save(flags);
8903 +               local_irq_save_nort(flags);
8904                 t1 = rdtsc();
8905                 for (t = 0; t < 50; t++) gameport_read(gameport);
8906                 t2 = rdtsc();
8907 -               local_irq_restore(flags);
8908 +               local_irq_restore_nort(flags);
8909                 udelay(i * 10);
8910                 if (t2 - t1 < tx) tx = t2 - t1;
8911         }
8912 diff -Nur linux-4.4.46.orig/drivers/iommu/amd_iommu.c linux-4.4.46/drivers/iommu/amd_iommu.c
8913 --- linux-4.4.46.orig/drivers/iommu/amd_iommu.c 2017-02-01 08:31:11.000000000 +0100
8914 +++ linux-4.4.46/drivers/iommu/amd_iommu.c      2017-02-03 17:18:05.683416484 +0100
8915 @@ -2022,10 +2022,10 @@
8916         int ret;
8918         /*
8919 -        * Must be called with IRQs disabled. Warn here to detect early
8920 -        * when its not.
8921 +        * Must be called with IRQs disabled on a non RT kernel. Warn here to
8922 +        * detect early when its not.
8923          */
8924 -       WARN_ON(!irqs_disabled());
8925 +       WARN_ON_NONRT(!irqs_disabled());
8927         /* lock domain */
8928         spin_lock(&domain->lock);
8929 @@ -2188,10 +2188,10 @@
8930         struct protection_domain *domain;
8932         /*
8933 -        * Must be called with IRQs disabled. Warn here to detect early
8934 -        * when its not.
8935 +        * Must be called with IRQs disabled on a non RT kernel. Warn here to
8936 +        * detect early when its not.
8937          */
8938 -       WARN_ON(!irqs_disabled());
8939 +       WARN_ON_NONRT(!irqs_disabled());
8941         if (WARN_ON(!dev_data->domain))
8942                 return;
8943 diff -Nur linux-4.4.46.orig/drivers/leds/trigger/Kconfig linux-4.4.46/drivers/leds/trigger/Kconfig
8944 --- linux-4.4.46.orig/drivers/leds/trigger/Kconfig      2017-02-01 08:31:11.000000000 +0100
8945 +++ linux-4.4.46/drivers/leds/trigger/Kconfig   2017-02-03 17:18:05.683416484 +0100
8946 @@ -61,7 +61,7 @@
8948  config LEDS_TRIGGER_CPU
8949         bool "LED CPU Trigger"
8950 -       depends on LEDS_TRIGGERS
8951 +       depends on LEDS_TRIGGERS && !PREEMPT_RT_BASE
8952         help
8953           This allows LEDs to be controlled by active CPUs. This shows
8954           the active CPUs across an array of LEDs so you can see which
8955 diff -Nur linux-4.4.46.orig/drivers/md/bcache/Kconfig linux-4.4.46/drivers/md/bcache/Kconfig
8956 --- linux-4.4.46.orig/drivers/md/bcache/Kconfig 2017-02-01 08:31:11.000000000 +0100
8957 +++ linux-4.4.46/drivers/md/bcache/Kconfig      2017-02-03 17:18:05.683416484 +0100
8958 @@ -1,6 +1,7 @@
8960  config BCACHE
8961         tristate "Block device as cache"
8962 +       depends on !PREEMPT_RT_FULL
8963         ---help---
8964         Allows a block device to be used as cache for other devices; uses
8965         a btree for indexing and the layout is optimized for SSDs.
8966 diff -Nur linux-4.4.46.orig/drivers/md/dm.c linux-4.4.46/drivers/md/dm.c
8967 --- linux-4.4.46.orig/drivers/md/dm.c   2017-02-01 08:31:11.000000000 +0100
8968 +++ linux-4.4.46/drivers/md/dm.c        2017-02-03 17:18:05.683416484 +0100
8969 @@ -2127,7 +2127,7 @@
8970                 /* Establish tio->ti before queuing work (map_tio_request) */
8971                 tio->ti = ti;
8972                 queue_kthread_work(&md->kworker, &tio->work);
8973 -               BUG_ON(!irqs_disabled());
8974 +               BUG_ON_NONRT(!irqs_disabled());
8975         }
8977         goto out;
8978 diff -Nur linux-4.4.46.orig/drivers/md/raid5.c linux-4.4.46/drivers/md/raid5.c
8979 --- linux-4.4.46.orig/drivers/md/raid5.c        2017-02-01 08:31:11.000000000 +0100
8980 +++ linux-4.4.46/drivers/md/raid5.c     2017-02-03 17:18:05.683416484 +0100
8981 @@ -1920,8 +1920,9 @@
8982         struct raid5_percpu *percpu;
8983         unsigned long cpu;
8985 -       cpu = get_cpu();
8986 +       cpu = get_cpu_light();
8987         percpu = per_cpu_ptr(conf->percpu, cpu);
8988 +       spin_lock(&percpu->lock);
8989         if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
8990                 ops_run_biofill(sh);
8991                 overlap_clear++;
8992 @@ -1977,7 +1978,8 @@
8993                         if (test_and_clear_bit(R5_Overlap, &dev->flags))
8994                                 wake_up(&sh->raid_conf->wait_for_overlap);
8995                 }
8996 -       put_cpu();
8997 +       spin_unlock(&percpu->lock);
8998 +       put_cpu_light();
9001  static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp)
9002 @@ -6414,6 +6416,7 @@
9003                                __func__, cpu);
9004                         break;
9005                 }
9006 +               spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock);
9007         }
9008         put_online_cpus();
9010 diff -Nur linux-4.4.46.orig/drivers/md/raid5.h linux-4.4.46/drivers/md/raid5.h
9011 --- linux-4.4.46.orig/drivers/md/raid5.h        2017-02-01 08:31:11.000000000 +0100
9012 +++ linux-4.4.46/drivers/md/raid5.h     2017-02-03 17:18:05.683416484 +0100
9013 @@ -504,6 +504,7 @@
9014         int                     recovery_disabled;
9015         /* per cpu variables */
9016         struct raid5_percpu {
9017 +               spinlock_t      lock;           /* Protection for -RT */
9018                 struct page     *spare_page; /* Used when checking P/Q in raid6 */
9019                 struct flex_array *scribble;   /* space for constructing buffer
9020                                               * lists and performing address
9021 diff -Nur linux-4.4.46.orig/drivers/media/platform/vsp1/vsp1_video.c linux-4.4.46/drivers/media/platform/vsp1/vsp1_video.c
9022 --- linux-4.4.46.orig/drivers/media/platform/vsp1/vsp1_video.c  2017-02-01 08:31:11.000000000 +0100
9023 +++ linux-4.4.46/drivers/media/platform/vsp1/vsp1_video.c       2017-02-03 17:18:05.683416484 +0100
9024 @@ -520,7 +520,7 @@
9025         bool stopped;
9027         spin_lock_irqsave(&pipe->irqlock, flags);
9028 -       stopped = pipe->state == VSP1_PIPELINE_STOPPED,
9029 +       stopped = pipe->state == VSP1_PIPELINE_STOPPED;
9030         spin_unlock_irqrestore(&pipe->irqlock, flags);
9032         return stopped;
9033 diff -Nur linux-4.4.46.orig/drivers/misc/hwlat_detector.c linux-4.4.46/drivers/misc/hwlat_detector.c
9034 --- linux-4.4.46.orig/drivers/misc/hwlat_detector.c     1970-01-01 01:00:00.000000000 +0100
9035 +++ linux-4.4.46/drivers/misc/hwlat_detector.c  2017-02-03 17:18:05.687416638 +0100
9036 @@ -0,0 +1,1240 @@
9038 + * hwlat_detector.c - A simple Hardware Latency detector.
9039 + *
9040 + * Use this module to detect large system latencies induced by the behavior of
9041 + * certain underlying system hardware or firmware, independent of Linux itself.
9042 + * The code was developed originally to detect the presence of SMIs on Intel
9043 + * and AMD systems, although there is no dependency upon x86 herein.
9044 + *
9045 + * The classical example usage of this module is in detecting the presence of
9046 + * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a
9047 + * somewhat special form of hardware interrupt spawned from earlier CPU debug
9048 + * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge
9049 + * LPC (or other device) to generate a special interrupt under certain
9050 + * circumstances, for example, upon expiration of a special SMI timer device,
9051 + * due to certain external thermal readings, on certain I/O address accesses,
9052 + * and other situations. An SMI hits a special CPU pin, triggers a special
9053 + * SMI mode (complete with special memory map), and the OS is unaware.
9054 + *
9055 + * Although certain hardware-inducing latencies are necessary (for example,
9056 + * a modern system often requires an SMI handler for correct thermal control
9057 + * and remote management) they can wreak havoc upon any OS-level performance
9058 + * guarantees toward low-latency, especially when the OS is not even made
9059 + * aware of the presence of these interrupts. For this reason, we need a
9060 + * somewhat brute force mechanism to detect these interrupts. In this case,
9061 + * we do it by hogging all of the CPU(s) for configurable timer intervals,
9062 + * sampling the built-in CPU timer, looking for discontiguous readings.
9063 + *
9064 + * WARNING: This implementation necessarily introduces latencies. Therefore,
9065 + *          you should NEVER use this module in a production environment
9066 + *          requiring any kind of low-latency performance guarantee(s).
9067 + *
9068 + * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
9069 + *
9070 + * Includes useful feedback from Clark Williams <clark@redhat.com>
9071 + *
9072 + * This file is licensed under the terms of the GNU General Public
9073 + * License version 2. This program is licensed "as is" without any
9074 + * warranty of any kind, whether express or implied.
9075 + */
9077 +#include <linux/module.h>
9078 +#include <linux/init.h>
9079 +#include <linux/ring_buffer.h>
9080 +#include <linux/time.h>
9081 +#include <linux/hrtimer.h>
9082 +#include <linux/kthread.h>
9083 +#include <linux/debugfs.h>
9084 +#include <linux/seq_file.h>
9085 +#include <linux/uaccess.h>
9086 +#include <linux/version.h>
9087 +#include <linux/delay.h>
9088 +#include <linux/slab.h>
9089 +#include <linux/trace_clock.h>
9091 +#define BUF_SIZE_DEFAULT       262144UL                /* 8K*(sizeof(entry)) */
9092 +#define BUF_FLAGS              (RB_FL_OVERWRITE)       /* no block on full */
9093 +#define U64STR_SIZE            22                      /* 20 digits max */
9095 +#define VERSION                        "1.0.0"
9096 +#define BANNER                 "hwlat_detector: "
9097 +#define DRVNAME                        "hwlat_detector"
9098 +#define DEFAULT_SAMPLE_WINDOW  1000000                 /* 1s */
9099 +#define DEFAULT_SAMPLE_WIDTH   500000                  /* 0.5s */
9100 +#define DEFAULT_LAT_THRESHOLD  10                      /* 10us */
9102 +/* Module metadata */
9104 +MODULE_LICENSE("GPL");
9105 +MODULE_AUTHOR("Jon Masters <jcm@redhat.com>");
9106 +MODULE_DESCRIPTION("A simple hardware latency detector");
9107 +MODULE_VERSION(VERSION);
9109 +/* Module parameters */
9111 +static int debug;
9112 +static int enabled;
9113 +static int threshold;
9115 +module_param(debug, int, 0);                   /* enable debug */
9116 +module_param(enabled, int, 0);                 /* enable detector */
9117 +module_param(threshold, int, 0);               /* latency threshold */
9119 +/* Buffering and sampling */
9121 +static struct ring_buffer *ring_buffer;                /* sample buffer */
9122 +static DEFINE_MUTEX(ring_buffer_mutex);                /* lock changes */
9123 +static unsigned long buf_size = BUF_SIZE_DEFAULT;
9124 +static struct task_struct *kthread;            /* sampling thread */
9126 +/* DebugFS filesystem entries */
9128 +static struct dentry *debug_dir;               /* debugfs directory */
9129 +static struct dentry *debug_max;               /* maximum TSC delta */
9130 +static struct dentry *debug_count;             /* total detect count */
9131 +static struct dentry *debug_sample_width;      /* sample width us */
9132 +static struct dentry *debug_sample_window;     /* sample window us */
9133 +static struct dentry *debug_sample;            /* raw samples us */
9134 +static struct dentry *debug_threshold;         /* threshold us */
9135 +static struct dentry *debug_enable;            /* enable/disable */
9137 +/* Individual samples and global state */
9139 +struct sample;                                 /* latency sample */
9140 +struct data;                                   /* Global state */
9142 +/* Sampling functions */
9143 +static int __buffer_add_sample(struct sample *sample);
9144 +static struct sample *buffer_get_sample(struct sample *sample);
9146 +/* Threading and state */
9147 +static int kthread_fn(void *unused);
9148 +static int start_kthread(void);
9149 +static int stop_kthread(void);
9150 +static void __reset_stats(void);
9151 +static int init_stats(void);
9153 +/* Debugfs interface */
9154 +static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
9155 +                               size_t cnt, loff_t *ppos, const u64 *entry);
9156 +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
9157 +                                size_t cnt, loff_t *ppos, u64 *entry);
9158 +static int debug_sample_fopen(struct inode *inode, struct file *filp);
9159 +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
9160 +                                 size_t cnt, loff_t *ppos);
9161 +static int debug_sample_release(struct inode *inode, struct file *filp);
9162 +static int debug_enable_fopen(struct inode *inode, struct file *filp);
9163 +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
9164 +                                 size_t cnt, loff_t *ppos);
9165 +static ssize_t debug_enable_fwrite(struct file *file,
9166 +                                  const char __user *user_buffer,
9167 +                                  size_t user_size, loff_t *offset);
9169 +/* Initialization functions */
9170 +static int init_debugfs(void);
9171 +static void free_debugfs(void);
9172 +static int detector_init(void);
9173 +static void detector_exit(void);
9175 +/* Individual latency samples are stored here when detected and packed into
9176 + * the ring_buffer circular buffer, where they are overwritten when
9177 + * more than buf_size/sizeof(sample) samples are received. */
9178 +struct sample {
9179 +       u64             seqnum;         /* unique sequence */
9180 +       u64             duration;       /* ktime delta */
9181 +       u64             outer_duration; /* ktime delta (outer loop) */
9182 +       struct timespec timestamp;      /* wall time */
9183 +       unsigned long   lost;
9186 +/* keep the global state somewhere. */
9187 +static struct data {
9189 +       struct mutex lock;              /* protect changes */
9191 +       u64     count;                  /* total since reset */
9192 +       u64     max_sample;             /* max hardware latency */
9193 +       u64     threshold;              /* sample threshold level */
9195 +       u64     sample_window;          /* total sampling window (on+off) */
9196 +       u64     sample_width;           /* active sampling portion of window */
9198 +       atomic_t sample_open;           /* whether the sample file is open */
9200 +       wait_queue_head_t wq;           /* waitqeue for new sample values */
9202 +} data;
9204 +/**
9205 + * __buffer_add_sample - add a new latency sample recording to the ring buffer
9206 + * @sample: The new latency sample value
9207 + *
9208 + * This receives a new latency sample and records it in a global ring buffer.
9209 + * No additional locking is used in this case.
9210 + */
9211 +static int __buffer_add_sample(struct sample *sample)
9213 +       return ring_buffer_write(ring_buffer,
9214 +                                sizeof(struct sample), sample);
9217 +/**
9218 + * buffer_get_sample - remove a hardware latency sample from the ring buffer
9219 + * @sample: Pre-allocated storage for the sample
9220 + *
9221 + * This retrieves a hardware latency sample from the global circular buffer
9222 + */
9223 +static struct sample *buffer_get_sample(struct sample *sample)
9225 +       struct ring_buffer_event *e = NULL;
9226 +       struct sample *s = NULL;
9227 +       unsigned int cpu = 0;
9229 +       if (!sample)
9230 +               return NULL;
9232 +       mutex_lock(&ring_buffer_mutex);
9233 +       for_each_online_cpu(cpu) {
9234 +               e = ring_buffer_consume(ring_buffer, cpu, NULL, &sample->lost);
9235 +               if (e)
9236 +                       break;
9237 +       }
9239 +       if (e) {
9240 +               s = ring_buffer_event_data(e);
9241 +               memcpy(sample, s, sizeof(struct sample));
9242 +       } else
9243 +               sample = NULL;
9244 +       mutex_unlock(&ring_buffer_mutex);
9246 +       return sample;
9249 +#ifndef CONFIG_TRACING
9250 +#define time_type      ktime_t
9251 +#define time_get()     ktime_get()
9252 +#define time_to_us(x)  ktime_to_us(x)
9253 +#define time_sub(a, b) ktime_sub(a, b)
9254 +#define init_time(a, b)        (a).tv64 = b
9255 +#define time_u64(a)    ((a).tv64)
9256 +#else
9257 +#define time_type      u64
9258 +#define time_get()     trace_clock_local()
9259 +#define time_to_us(x)  div_u64(x, 1000)
9260 +#define time_sub(a, b) ((a) - (b))
9261 +#define init_time(a, b)        (a = b)
9262 +#define time_u64(a)    a
9263 +#endif
9264 +/**
9265 + * get_sample - sample the CPU TSC and look for likely hardware latencies
9266 + *
9267 + * Used to repeatedly capture the CPU TSC (or similar), looking for potential
9268 + * hardware-induced latency. Called with interrupts disabled and with
9269 + * data.lock held.
9270 + */
9271 +static int get_sample(void)
9273 +       time_type start, t1, t2, last_t2;
9274 +       s64 diff, total = 0;
9275 +       u64 sample = 0;
9276 +       u64 outer_sample = 0;
9277 +       int ret = -1;
9279 +       init_time(last_t2, 0);
9280 +       start = time_get(); /* start timestamp */
9282 +       do {
9284 +               t1 = time_get();        /* we'll look for a discontinuity */
9285 +               t2 = time_get();
9287 +               if (time_u64(last_t2)) {
9288 +                       /* Check the delta from outer loop (t2 to next t1) */
9289 +                       diff = time_to_us(time_sub(t1, last_t2));
9290 +                       /* This shouldn't happen */
9291 +                       if (diff < 0) {
9292 +                               pr_err(BANNER "time running backwards\n");
9293 +                               goto out;
9294 +                       }
9295 +                       if (diff > outer_sample)
9296 +                               outer_sample = diff;
9297 +               }
9298 +               last_t2 = t2;
9300 +               total = time_to_us(time_sub(t2, start)); /* sample width */
9302 +               /* This checks the inner loop (t1 to t2) */
9303 +               diff = time_to_us(time_sub(t2, t1));     /* current diff */
9305 +               /* This shouldn't happen */
9306 +               if (diff < 0) {
9307 +                       pr_err(BANNER "time running backwards\n");
9308 +                       goto out;
9309 +               }
9311 +               if (diff > sample)
9312 +                       sample = diff; /* only want highest value */
9314 +       } while (total <= data.sample_width);
9316 +       ret = 0;
9318 +       /* If we exceed the threshold value, we have found a hardware latency */
9319 +       if (sample > data.threshold || outer_sample > data.threshold) {
9320 +               struct sample s;
9322 +               ret = 1;
9324 +               data.count++;
9325 +               s.seqnum = data.count;
9326 +               s.duration = sample;
9327 +               s.outer_duration = outer_sample;
9328 +               s.timestamp = CURRENT_TIME;
9329 +               __buffer_add_sample(&s);
9331 +               /* Keep a running maximum ever recorded hardware latency */
9332 +               if (sample > data.max_sample)
9333 +                       data.max_sample = sample;
9334 +       }
9336 +out:
9337 +       return ret;
9341 + * kthread_fn - The CPU time sampling/hardware latency detection kernel thread
9342 + * @unused: A required part of the kthread API.
9343 + *
9344 + * Used to periodically sample the CPU TSC via a call to get_sample. We
9345 + * disable interrupts, which does (intentionally) introduce latency since we
9346 + * need to ensure nothing else might be running (and thus pre-empting).
9347 + * Obviously this should never be used in production environments.
9348 + *
9349 + * Currently this runs on which ever CPU it was scheduled on, but most
9350 + * real-worald hardware latency situations occur across several CPUs,
9351 + * but we might later generalize this if we find there are any actualy
9352 + * systems with alternate SMI delivery or other hardware latencies.
9353 + */
9354 +static int kthread_fn(void *unused)
9356 +       int ret;
9357 +       u64 interval;
9359 +       while (!kthread_should_stop()) {
9361 +               mutex_lock(&data.lock);
9363 +               local_irq_disable();
9364 +               ret = get_sample();
9365 +               local_irq_enable();
9367 +               if (ret > 0)
9368 +                       wake_up(&data.wq); /* wake up reader(s) */
9370 +               interval = data.sample_window - data.sample_width;
9371 +               do_div(interval, USEC_PER_MSEC); /* modifies interval value */
9373 +               mutex_unlock(&data.lock);
9375 +               if (msleep_interruptible(interval))
9376 +                       break;
9377 +       }
9379 +       return 0;
9382 +/**
9383 + * start_kthread - Kick off the hardware latency sampling/detector kthread
9384 + *
9385 + * This starts a kernel thread that will sit and sample the CPU timestamp
9386 + * counter (TSC or similar) and look for potential hardware latencies.
9387 + */
9388 +static int start_kthread(void)
9390 +       kthread = kthread_run(kthread_fn, NULL,
9391 +                                       DRVNAME);
9392 +       if (IS_ERR(kthread)) {
9393 +               pr_err(BANNER "could not start sampling thread\n");
9394 +               enabled = 0;
9395 +               return -ENOMEM;
9396 +       }
9398 +       return 0;
9401 +/**
9402 + * stop_kthread - Inform the hardware latency samping/detector kthread to stop
9403 + *
9404 + * This kicks the running hardware latency sampling/detector kernel thread and
9405 + * tells it to stop sampling now. Use this on unload and at system shutdown.
9406 + */
9407 +static int stop_kthread(void)
9409 +       int ret;
9411 +       ret = kthread_stop(kthread);
9413 +       return ret;
9416 +/**
9417 + * __reset_stats - Reset statistics for the hardware latency detector
9418 + *
9419 + * We use data to store various statistics and global state. We call this
9420 + * function in order to reset those when "enable" is toggled on or off, and
9421 + * also at initialization. Should be called with data.lock held.
9422 + */
9423 +static void __reset_stats(void)
9425 +       data.count = 0;
9426 +       data.max_sample = 0;
9427 +       ring_buffer_reset(ring_buffer); /* flush out old sample entries */
9430 +/**
9431 + * init_stats - Setup global state statistics for the hardware latency detector
9432 + *
9433 + * We use data to store various statistics and global state. We also use
9434 + * a global ring buffer (ring_buffer) to keep raw samples of detected hardware
9435 + * induced system latencies. This function initializes these structures and
9436 + * allocates the global ring buffer also.
9437 + */
9438 +static int init_stats(void)
9440 +       int ret = -ENOMEM;
9442 +       mutex_init(&data.lock);
9443 +       init_waitqueue_head(&data.wq);
9444 +       atomic_set(&data.sample_open, 0);
9446 +       ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS);
9448 +       if (WARN(!ring_buffer, KERN_ERR BANNER
9449 +                              "failed to allocate ring buffer!\n"))
9450 +               goto out;
9452 +       __reset_stats();
9453 +       data.threshold = threshold ?: DEFAULT_LAT_THRESHOLD; /* threshold us */
9454 +       data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */
9455 +       data.sample_width = DEFAULT_SAMPLE_WIDTH;   /* width us */
9457 +       ret = 0;
9459 +out:
9460 +       return ret;
9465 + * simple_data_read - Wrapper read function for global state debugfs entries
9466 + * @filp: The active open file structure for the debugfs "file"
9467 + * @ubuf: The userspace provided buffer to read value into
9468 + * @cnt: The maximum number of bytes to read
9469 + * @ppos: The current "file" position
9470 + * @entry: The entry to read from
9471 + *
9472 + * This function provides a generic read implementation for the global state
9473 + * "data" structure debugfs filesystem entries. It would be nice to use
9474 + * simple_attr_read directly, but we need to make sure that the data.lock
9475 + * is held during the actual read.
9476 + */
9477 +static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
9478 +                               size_t cnt, loff_t *ppos, const u64 *entry)
9480 +       char buf[U64STR_SIZE];
9481 +       u64 val = 0;
9482 +       int len = 0;
9484 +       memset(buf, 0, sizeof(buf));
9486 +       if (!entry)
9487 +               return -EFAULT;
9489 +       mutex_lock(&data.lock);
9490 +       val = *entry;
9491 +       mutex_unlock(&data.lock);
9493 +       len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val);
9495 +       return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
9500 + * simple_data_write - Wrapper write function for global state debugfs entries
9501 + * @filp: The active open file structure for the debugfs "file"
9502 + * @ubuf: The userspace provided buffer to write value from
9503 + * @cnt: The maximum number of bytes to write
9504 + * @ppos: The current "file" position
9505 + * @entry: The entry to write to
9506 + *
9507 + * This function provides a generic write implementation for the global state
9508 + * "data" structure debugfs filesystem entries. It would be nice to use
9509 + * simple_attr_write directly, but we need to make sure that the data.lock
9510 + * is held during the actual write.
9511 + */
9512 +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
9513 +                                size_t cnt, loff_t *ppos, u64 *entry)
9515 +       char buf[U64STR_SIZE];
9516 +       int csize = min(cnt, sizeof(buf));
9517 +       u64 val = 0;
9518 +       int err = 0;
9520 +       memset(buf, '\0', sizeof(buf));
9521 +       if (copy_from_user(buf, ubuf, csize))
9522 +               return -EFAULT;
9524 +       buf[U64STR_SIZE-1] = '\0';                      /* just in case */
9525 +       err = kstrtoull(buf, 10, &val);
9526 +       if (err)
9527 +               return -EINVAL;
9529 +       mutex_lock(&data.lock);
9530 +       *entry = val;
9531 +       mutex_unlock(&data.lock);
9533 +       return csize;
9536 +/**
9537 + * debug_count_fopen - Open function for "count" debugfs entry
9538 + * @inode: The in-kernel inode representation of the debugfs "file"
9539 + * @filp: The active open file structure for the debugfs "file"
9540 + *
9541 + * This function provides an open implementation for the "count" debugfs
9542 + * interface to the hardware latency detector.
9543 + */
9544 +static int debug_count_fopen(struct inode *inode, struct file *filp)
9546 +       return 0;
9549 +/**
9550 + * debug_count_fread - Read function for "count" debugfs entry
9551 + * @filp: The active open file structure for the debugfs "file"
9552 + * @ubuf: The userspace provided buffer to read value into
9553 + * @cnt: The maximum number of bytes to read
9554 + * @ppos: The current "file" position
9555 + *
9556 + * This function provides a read implementation for the "count" debugfs
9557 + * interface to the hardware latency detector. Can be used to read the
9558 + * number of latency readings exceeding the configured threshold since
9559 + * the detector was last reset (e.g. by writing a zero into "count").
9560 + */
9561 +static ssize_t debug_count_fread(struct file *filp, char __user *ubuf,
9562 +                                    size_t cnt, loff_t *ppos)
9564 +       return simple_data_read(filp, ubuf, cnt, ppos, &data.count);
9567 +/**
9568 + * debug_count_fwrite - Write function for "count" debugfs entry
9569 + * @filp: The active open file structure for the debugfs "file"
9570 + * @ubuf: The user buffer that contains the value to write
9571 + * @cnt: The maximum number of bytes to write to "file"
9572 + * @ppos: The current position in the debugfs "file"
9573 + *
9574 + * This function provides a write implementation for the "count" debugfs
9575 + * interface to the hardware latency detector. Can be used to write a
9576 + * desired value, especially to zero the total count.
9577 + */
9578 +static ssize_t  debug_count_fwrite(struct file *filp,
9579 +                                      const char __user *ubuf,
9580 +                                      size_t cnt,
9581 +                                      loff_t *ppos)
9583 +       return simple_data_write(filp, ubuf, cnt, ppos, &data.count);
9586 +/**
9587 + * debug_enable_fopen - Dummy open function for "enable" debugfs interface
9588 + * @inode: The in-kernel inode representation of the debugfs "file"
9589 + * @filp: The active open file structure for the debugfs "file"
9590 + *
9591 + * This function provides an open implementation for the "enable" debugfs
9592 + * interface to the hardware latency detector.
9593 + */
9594 +static int debug_enable_fopen(struct inode *inode, struct file *filp)
9596 +       return 0;
9599 +/**
9600 + * debug_enable_fread - Read function for "enable" debugfs interface
9601 + * @filp: The active open file structure for the debugfs "file"
9602 + * @ubuf: The userspace provided buffer to read value into
9603 + * @cnt: The maximum number of bytes to read
9604 + * @ppos: The current "file" position
9605 + *
9606 + * This function provides a read implementation for the "enable" debugfs
9607 + * interface to the hardware latency detector. Can be used to determine
9608 + * whether the detector is currently enabled ("0\n" or "1\n" returned).
9609 + */
9610 +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
9611 +                                     size_t cnt, loff_t *ppos)
9613 +       char buf[4];
9615 +       if ((cnt < sizeof(buf)) || (*ppos))
9616 +               return 0;
9618 +       buf[0] = enabled ? '1' : '0';
9619 +       buf[1] = '\n';
9620 +       buf[2] = '\0';
9621 +       if (copy_to_user(ubuf, buf, strlen(buf)))
9622 +               return -EFAULT;
9623 +       return *ppos = strlen(buf);
9626 +/**
9627 + * debug_enable_fwrite - Write function for "enable" debugfs interface
9628 + * @filp: The active open file structure for the debugfs "file"
9629 + * @ubuf: The user buffer that contains the value to write
9630 + * @cnt: The maximum number of bytes to write to "file"
9631 + * @ppos: The current position in the debugfs "file"
9632 + *
9633 + * This function provides a write implementation for the "enable" debugfs
9634 + * interface to the hardware latency detector. Can be used to enable or
9635 + * disable the detector, which will have the side-effect of possibly
9636 + * also resetting the global stats and kicking off the measuring
9637 + * kthread (on an enable) or the converse (upon a disable).
9638 + */
9639 +static ssize_t  debug_enable_fwrite(struct file *filp,
9640 +                                       const char __user *ubuf,
9641 +                                       size_t cnt,
9642 +                                       loff_t *ppos)
9644 +       char buf[4];
9645 +       int csize = min(cnt, sizeof(buf));
9646 +       long val = 0;
9647 +       int err = 0;
9649 +       memset(buf, '\0', sizeof(buf));
9650 +       if (copy_from_user(buf, ubuf, csize))
9651 +               return -EFAULT;
9653 +       buf[sizeof(buf)-1] = '\0';                      /* just in case */
9654 +       err = kstrtoul(buf, 10, &val);
9655 +       if (err)
9656 +               return -EINVAL;
9658 +       if (val) {
9659 +               if (enabled)
9660 +                       goto unlock;
9661 +               enabled = 1;
9662 +               __reset_stats();
9663 +               if (start_kthread())
9664 +                       return -EFAULT;
9665 +       } else {
9666 +               if (!enabled)
9667 +                       goto unlock;
9668 +               enabled = 0;
9669 +               err = stop_kthread();
9670 +               if (err) {
9671 +                       pr_err(BANNER "cannot stop kthread\n");
9672 +                       return -EFAULT;
9673 +               }
9674 +               wake_up(&data.wq);              /* reader(s) should return */
9675 +       }
9676 +unlock:
9677 +       return csize;
9680 +/**
9681 + * debug_max_fopen - Open function for "max" debugfs entry
9682 + * @inode: The in-kernel inode representation of the debugfs "file"
9683 + * @filp: The active open file structure for the debugfs "file"
9684 + *
9685 + * This function provides an open implementation for the "max" debugfs
9686 + * interface to the hardware latency detector.
9687 + */
9688 +static int debug_max_fopen(struct inode *inode, struct file *filp)
9690 +       return 0;
9693 +/**
9694 + * debug_max_fread - Read function for "max" debugfs entry
9695 + * @filp: The active open file structure for the debugfs "file"
9696 + * @ubuf: The userspace provided buffer to read value into
9697 + * @cnt: The maximum number of bytes to read
9698 + * @ppos: The current "file" position
9699 + *
9700 + * This function provides a read implementation for the "max" debugfs
9701 + * interface to the hardware latency detector. Can be used to determine
9702 + * the maximum latency value observed since it was last reset.
9703 + */
9704 +static ssize_t debug_max_fread(struct file *filp, char __user *ubuf,
9705 +                                  size_t cnt, loff_t *ppos)
9707 +       return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample);
9710 +/**
9711 + * debug_max_fwrite - Write function for "max" debugfs entry
9712 + * @filp: The active open file structure for the debugfs "file"
9713 + * @ubuf: The user buffer that contains the value to write
9714 + * @cnt: The maximum number of bytes to write to "file"
9715 + * @ppos: The current position in the debugfs "file"
9716 + *
9717 + * This function provides a write implementation for the "max" debugfs
9718 + * interface to the hardware latency detector. Can be used to reset the
9719 + * maximum or set it to some other desired value - if, then, subsequent
9720 + * measurements exceed this value, the maximum will be updated.
9721 + */
9722 +static ssize_t  debug_max_fwrite(struct file *filp,
9723 +                                    const char __user *ubuf,
9724 +                                    size_t cnt,
9725 +                                    loff_t *ppos)
9727 +       return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample);
9731 +/**
9732 + * debug_sample_fopen - An open function for "sample" debugfs interface
9733 + * @inode: The in-kernel inode representation of this debugfs "file"
9734 + * @filp: The active open file structure for the debugfs "file"
9735 + *
9736 + * This function handles opening the "sample" file within the hardware
9737 + * latency detector debugfs directory interface. This file is used to read
9738 + * raw samples from the global ring_buffer and allows the user to see a
9739 + * running latency history. Can be opened blocking or non-blocking,
9740 + * affecting whether it behaves as a buffer read pipe, or does not.
9741 + * Implements simple locking to prevent multiple simultaneous use.
9742 + */
9743 +static int debug_sample_fopen(struct inode *inode, struct file *filp)
9745 +       if (!atomic_add_unless(&data.sample_open, 1, 1))
9746 +               return -EBUSY;
9747 +       else
9748 +               return 0;
9751 +/**
9752 + * debug_sample_fread - A read function for "sample" debugfs interface
9753 + * @filp: The active open file structure for the debugfs "file"
9754 + * @ubuf: The user buffer that will contain the samples read
9755 + * @cnt: The maximum bytes to read from the debugfs "file"
9756 + * @ppos: The current position in the debugfs "file"
9757 + *
9758 + * This function handles reading from the "sample" file within the hardware
9759 + * latency detector debugfs directory interface. This file is used to read
9760 + * raw samples from the global ring_buffer and allows the user to see a
9761 + * running latency history. By default this will block pending a new
9762 + * value written into the sample buffer, unless there are already a
9763 + * number of value(s) waiting in the buffer, or the sample file was
9764 + * previously opened in a non-blocking mode of operation.
9765 + */
9766 +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
9767 +                                       size_t cnt, loff_t *ppos)
9769 +       int len = 0;
9770 +       char buf[64];
9771 +       struct sample *sample = NULL;
9773 +       if (!enabled)
9774 +               return 0;
9776 +       sample = kzalloc(sizeof(struct sample), GFP_KERNEL);
9777 +       if (!sample)
9778 +               return -ENOMEM;
9780 +       while (!buffer_get_sample(sample)) {
9782 +               DEFINE_WAIT(wait);
9784 +               if (filp->f_flags & O_NONBLOCK) {
9785 +                       len = -EAGAIN;
9786 +                       goto out;
9787 +               }
9789 +               prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE);
9790 +               schedule();
9791 +               finish_wait(&data.wq, &wait);
9793 +               if (signal_pending(current)) {
9794 +                       len = -EINTR;
9795 +                       goto out;
9796 +               }
9798 +               if (!enabled) {                 /* enable was toggled */
9799 +                       len = 0;
9800 +                       goto out;
9801 +               }
9802 +       }
9804 +       len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\t%llu\n",
9805 +                      sample->timestamp.tv_sec,
9806 +                      sample->timestamp.tv_nsec,
9807 +                      sample->duration,
9808 +                      sample->outer_duration);
9811 +       /* handling partial reads is more trouble than it's worth */
9812 +       if (len > cnt)
9813 +               goto out;
9815 +       if (copy_to_user(ubuf, buf, len))
9816 +               len = -EFAULT;
9818 +out:
9819 +       kfree(sample);
9820 +       return len;
9823 +/**
9824 + * debug_sample_release - Release function for "sample" debugfs interface
9825 + * @inode: The in-kernel inode represenation of the debugfs "file"
9826 + * @filp: The active open file structure for the debugfs "file"
9827 + *
9828 + * This function completes the close of the debugfs interface "sample" file.
9829 + * Frees the sample_open "lock" so that other users may open the interface.
9830 + */
9831 +static int debug_sample_release(struct inode *inode, struct file *filp)
9833 +       atomic_dec(&data.sample_open);
9835 +       return 0;
9838 +/**
9839 + * debug_threshold_fopen - Open function for "threshold" debugfs entry
9840 + * @inode: The in-kernel inode representation of the debugfs "file"
9841 + * @filp: The active open file structure for the debugfs "file"
9842 + *
9843 + * This function provides an open implementation for the "threshold" debugfs
9844 + * interface to the hardware latency detector.
9845 + */
9846 +static int debug_threshold_fopen(struct inode *inode, struct file *filp)
9848 +       return 0;
9851 +/**
9852 + * debug_threshold_fread - Read function for "threshold" debugfs entry
9853 + * @filp: The active open file structure for the debugfs "file"
9854 + * @ubuf: The userspace provided buffer to read value into
9855 + * @cnt: The maximum number of bytes to read
9856 + * @ppos: The current "file" position
9857 + *
9858 + * This function provides a read implementation for the "threshold" debugfs
9859 + * interface to the hardware latency detector. It can be used to determine
9860 + * the current threshold level at which a latency will be recorded in the
9861 + * global ring buffer, typically on the order of 10us.
9862 + */
9863 +static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf,
9864 +                                        size_t cnt, loff_t *ppos)
9866 +       return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold);
9869 +/**
9870 + * debug_threshold_fwrite - Write function for "threshold" debugfs entry
9871 + * @filp: The active open file structure for the debugfs "file"
9872 + * @ubuf: The user buffer that contains the value to write
9873 + * @cnt: The maximum number of bytes to write to "file"
9874 + * @ppos: The current position in the debugfs "file"
9875 + *
9876 + * This function provides a write implementation for the "threshold" debugfs
9877 + * interface to the hardware latency detector. It can be used to configure
9878 + * the threshold level at which any subsequently detected latencies will
9879 + * be recorded into the global ring buffer.
9880 + */
9881 +static ssize_t  debug_threshold_fwrite(struct file *filp,
9882 +                                       const char __user *ubuf,
9883 +                                       size_t cnt,
9884 +                                       loff_t *ppos)
9886 +       int ret;
9888 +       ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold);
9890 +       if (enabled)
9891 +               wake_up_process(kthread);
9893 +       return ret;
9896 +/**
9897 + * debug_width_fopen - Open function for "width" debugfs entry
9898 + * @inode: The in-kernel inode representation of the debugfs "file"
9899 + * @filp: The active open file structure for the debugfs "file"
9900 + *
9901 + * This function provides an open implementation for the "width" debugfs
9902 + * interface to the hardware latency detector.
9903 + */
9904 +static int debug_width_fopen(struct inode *inode, struct file *filp)
9906 +       return 0;
9909 +/**
9910 + * debug_width_fread - Read function for "width" debugfs entry
9911 + * @filp: The active open file structure for the debugfs "file"
9912 + * @ubuf: The userspace provided buffer to read value into
9913 + * @cnt: The maximum number of bytes to read
9914 + * @ppos: The current "file" position
9915 + *
9916 + * This function provides a read implementation for the "width" debugfs
9917 + * interface to the hardware latency detector. It can be used to determine
9918 + * for how many us of the total window us we will actively sample for any
9919 + * hardware-induced latecy periods. Obviously, it is not possible to
9920 + * sample constantly and have the system respond to a sample reader, or,
9921 + * worse, without having the system appear to have gone out to lunch.
9922 + */
9923 +static ssize_t debug_width_fread(struct file *filp, char __user *ubuf,
9924 +                                    size_t cnt, loff_t *ppos)
9926 +       return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width);
9929 +/**
9930 + * debug_width_fwrite - Write function for "width" debugfs entry
9931 + * @filp: The active open file structure for the debugfs "file"
9932 + * @ubuf: The user buffer that contains the value to write
9933 + * @cnt: The maximum number of bytes to write to "file"
9934 + * @ppos: The current position in the debugfs "file"
9935 + *
9936 + * This function provides a write implementation for the "width" debugfs
9937 + * interface to the hardware latency detector. It can be used to configure
9938 + * for how many us of the total window us we will actively sample for any
9939 + * hardware-induced latency periods. Obviously, it is not possible to
9940 + * sample constantly and have the system respond to a sample reader, or,
9941 + * worse, without having the system appear to have gone out to lunch. It
9942 + * is enforced that width is less that the total window size.
9943 + */
9944 +static ssize_t  debug_width_fwrite(struct file *filp,
9945 +                                      const char __user *ubuf,
9946 +                                      size_t cnt,
9947 +                                      loff_t *ppos)
9949 +       char buf[U64STR_SIZE];
9950 +       int csize = min(cnt, sizeof(buf));
9951 +       u64 val = 0;
9952 +       int err = 0;
9954 +       memset(buf, '\0', sizeof(buf));
9955 +       if (copy_from_user(buf, ubuf, csize))
9956 +               return -EFAULT;
9958 +       buf[U64STR_SIZE-1] = '\0';                      /* just in case */
9959 +       err = kstrtoull(buf, 10, &val);
9960 +       if (err)
9961 +               return -EINVAL;
9963 +       mutex_lock(&data.lock);
9964 +       if (val < data.sample_window)
9965 +               data.sample_width = val;
9966 +       else {
9967 +               mutex_unlock(&data.lock);
9968 +               return -EINVAL;
9969 +       }
9970 +       mutex_unlock(&data.lock);
9972 +       if (enabled)
9973 +               wake_up_process(kthread);
9975 +       return csize;
9978 +/**
9979 + * debug_window_fopen - Open function for "window" debugfs entry
9980 + * @inode: The in-kernel inode representation of the debugfs "file"
9981 + * @filp: The active open file structure for the debugfs "file"
9982 + *
9983 + * This function provides an open implementation for the "window" debugfs
9984 + * interface to the hardware latency detector. The window is the total time
9985 + * in us that will be considered one sample period. Conceptually, windows
9986 + * occur back-to-back and contain a sample width period during which
9987 + * actual sampling occurs.
9988 + */
9989 +static int debug_window_fopen(struct inode *inode, struct file *filp)
9991 +       return 0;
9994 +/**
9995 + * debug_window_fread - Read function for "window" debugfs entry
9996 + * @filp: The active open file structure for the debugfs "file"
9997 + * @ubuf: The userspace provided buffer to read value into
9998 + * @cnt: The maximum number of bytes to read
9999 + * @ppos: The current "file" position
10000 + *
10001 + * This function provides a read implementation for the "window" debugfs
10002 + * interface to the hardware latency detector. The window is the total time
10003 + * in us that will be considered one sample period. Conceptually, windows
10004 + * occur back-to-back and contain a sample width period during which
10005 + * actual sampling occurs. Can be used to read the total window size.
10006 + */
10007 +static ssize_t debug_window_fread(struct file *filp, char __user *ubuf,
10008 +                                     size_t cnt, loff_t *ppos)
10010 +       return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window);
10013 +/**
10014 + * debug_window_fwrite - Write function for "window" debugfs entry
10015 + * @filp: The active open file structure for the debugfs "file"
10016 + * @ubuf: The user buffer that contains the value to write
10017 + * @cnt: The maximum number of bytes to write to "file"
10018 + * @ppos: The current position in the debugfs "file"
10019 + *
10020 + * This function provides a write implementation for the "window" debufds
10021 + * interface to the hardware latency detetector. The window is the total time
10022 + * in us that will be considered one sample period. Conceptually, windows
10023 + * occur back-to-back and contain a sample width period during which
10024 + * actual sampling occurs. Can be used to write a new total window size. It
10025 + * is enfoced that any value written must be greater than the sample width
10026 + * size, or an error results.
10027 + */
10028 +static ssize_t  debug_window_fwrite(struct file *filp,
10029 +                                       const char __user *ubuf,
10030 +                                       size_t cnt,
10031 +                                       loff_t *ppos)
10033 +       char buf[U64STR_SIZE];
10034 +       int csize = min(cnt, sizeof(buf));
10035 +       u64 val = 0;
10036 +       int err = 0;
10038 +       memset(buf, '\0', sizeof(buf));
10039 +       if (copy_from_user(buf, ubuf, csize))
10040 +               return -EFAULT;
10042 +       buf[U64STR_SIZE-1] = '\0';                      /* just in case */
10043 +       err = kstrtoull(buf, 10, &val);
10044 +       if (err)
10045 +               return -EINVAL;
10047 +       mutex_lock(&data.lock);
10048 +       if (data.sample_width < val)
10049 +               data.sample_window = val;
10050 +       else {
10051 +               mutex_unlock(&data.lock);
10052 +               return -EINVAL;
10053 +       }
10054 +       mutex_unlock(&data.lock);
10056 +       return csize;
10060 + * Function pointers for the "count" debugfs file operations
10061 + */
10062 +static const struct file_operations count_fops = {
10063 +       .open           = debug_count_fopen,
10064 +       .read           = debug_count_fread,
10065 +       .write          = debug_count_fwrite,
10066 +       .owner          = THIS_MODULE,
10070 + * Function pointers for the "enable" debugfs file operations
10071 + */
10072 +static const struct file_operations enable_fops = {
10073 +       .open           = debug_enable_fopen,
10074 +       .read           = debug_enable_fread,
10075 +       .write          = debug_enable_fwrite,
10076 +       .owner          = THIS_MODULE,
10080 + * Function pointers for the "max" debugfs file operations
10081 + */
10082 +static const struct file_operations max_fops = {
10083 +       .open           = debug_max_fopen,
10084 +       .read           = debug_max_fread,
10085 +       .write          = debug_max_fwrite,
10086 +       .owner          = THIS_MODULE,
10090 + * Function pointers for the "sample" debugfs file operations
10091 + */
10092 +static const struct file_operations sample_fops = {
10093 +       .open           = debug_sample_fopen,
10094 +       .read           = debug_sample_fread,
10095 +       .release        = debug_sample_release,
10096 +       .owner          = THIS_MODULE,
10100 + * Function pointers for the "threshold" debugfs file operations
10101 + */
10102 +static const struct file_operations threshold_fops = {
10103 +       .open           = debug_threshold_fopen,
10104 +       .read           = debug_threshold_fread,
10105 +       .write          = debug_threshold_fwrite,
10106 +       .owner          = THIS_MODULE,
10110 + * Function pointers for the "width" debugfs file operations
10111 + */
10112 +static const struct file_operations width_fops = {
10113 +       .open           = debug_width_fopen,
10114 +       .read           = debug_width_fread,
10115 +       .write          = debug_width_fwrite,
10116 +       .owner          = THIS_MODULE,
10120 + * Function pointers for the "window" debugfs file operations
10121 + */
10122 +static const struct file_operations window_fops = {
10123 +       .open           = debug_window_fopen,
10124 +       .read           = debug_window_fread,
10125 +       .write          = debug_window_fwrite,
10126 +       .owner          = THIS_MODULE,
10129 +/**
10130 + * init_debugfs - A function to initialize the debugfs interface files
10131 + *
10132 + * This function creates entries in debugfs for "hwlat_detector", including
10133 + * files to read values from the detector, current samples, and the
10134 + * maximum sample that has been captured since the hardware latency
10135 + * dectector was started.
10136 + */
10137 +static int init_debugfs(void)
10139 +       int ret = -ENOMEM;
10141 +       debug_dir = debugfs_create_dir(DRVNAME, NULL);
10142 +       if (!debug_dir)
10143 +               goto err_debug_dir;
10145 +       debug_sample = debugfs_create_file("sample", 0444,
10146 +                                              debug_dir, NULL,
10147 +                                              &sample_fops);
10148 +       if (!debug_sample)
10149 +               goto err_sample;
10151 +       debug_count = debugfs_create_file("count", 0444,
10152 +                                             debug_dir, NULL,
10153 +                                             &count_fops);
10154 +       if (!debug_count)
10155 +               goto err_count;
10157 +       debug_max = debugfs_create_file("max", 0444,
10158 +                                           debug_dir, NULL,
10159 +                                           &max_fops);
10160 +       if (!debug_max)
10161 +               goto err_max;
10163 +       debug_sample_window = debugfs_create_file("window", 0644,
10164 +                                                     debug_dir, NULL,
10165 +                                                     &window_fops);
10166 +       if (!debug_sample_window)
10167 +               goto err_window;
10169 +       debug_sample_width = debugfs_create_file("width", 0644,
10170 +                                                    debug_dir, NULL,
10171 +                                                    &width_fops);
10172 +       if (!debug_sample_width)
10173 +               goto err_width;
10175 +       debug_threshold = debugfs_create_file("threshold", 0644,
10176 +                                                 debug_dir, NULL,
10177 +                                                 &threshold_fops);
10178 +       if (!debug_threshold)
10179 +               goto err_threshold;
10181 +       debug_enable = debugfs_create_file("enable", 0644,
10182 +                                              debug_dir, &enabled,
10183 +                                              &enable_fops);
10184 +       if (!debug_enable)
10185 +               goto err_enable;
10187 +       else {
10188 +               ret = 0;
10189 +               goto out;
10190 +       }
10192 +err_enable:
10193 +       debugfs_remove(debug_threshold);
10194 +err_threshold:
10195 +       debugfs_remove(debug_sample_width);
10196 +err_width:
10197 +       debugfs_remove(debug_sample_window);
10198 +err_window:
10199 +       debugfs_remove(debug_max);
10200 +err_max:
10201 +       debugfs_remove(debug_count);
10202 +err_count:
10203 +       debugfs_remove(debug_sample);
10204 +err_sample:
10205 +       debugfs_remove(debug_dir);
10206 +err_debug_dir:
10207 +out:
10208 +       return ret;
10211 +/**
10212 + * free_debugfs - A function to cleanup the debugfs file interface
10213 + */
10214 +static void free_debugfs(void)
10216 +       /* could also use a debugfs_remove_recursive */
10217 +       debugfs_remove(debug_enable);
10218 +       debugfs_remove(debug_threshold);
10219 +       debugfs_remove(debug_sample_width);
10220 +       debugfs_remove(debug_sample_window);
10221 +       debugfs_remove(debug_max);
10222 +       debugfs_remove(debug_count);
10223 +       debugfs_remove(debug_sample);
10224 +       debugfs_remove(debug_dir);
10227 +/**
10228 + * detector_init - Standard module initialization code
10229 + */
10230 +static int detector_init(void)
10232 +       int ret = -ENOMEM;
10234 +       pr_info(BANNER "version %s\n", VERSION);
10236 +       ret = init_stats();
10237 +       if (ret)
10238 +               goto out;
10240 +       ret = init_debugfs();
10241 +       if (ret)
10242 +               goto err_stats;
10244 +       if (enabled)
10245 +               ret = start_kthread();
10247 +       goto out;
10249 +err_stats:
10250 +       ring_buffer_free(ring_buffer);
10251 +out:
10252 +       return ret;
10256 +/**
10257 + * detector_exit - Standard module cleanup code
10258 + */
10259 +static void detector_exit(void)
10261 +       int err;
10263 +       if (enabled) {
10264 +               enabled = 0;
10265 +               err = stop_kthread();
10266 +               if (err)
10267 +                       pr_err(BANNER "cannot stop kthread\n");
10268 +       }
10270 +       free_debugfs();
10271 +       ring_buffer_free(ring_buffer);  /* free up the ring buffer */
10275 +module_init(detector_init);
10276 +module_exit(detector_exit);
10277 diff -Nur linux-4.4.46.orig/drivers/misc/Kconfig linux-4.4.46/drivers/misc/Kconfig
10278 --- linux-4.4.46.orig/drivers/misc/Kconfig      2017-02-01 08:31:11.000000000 +0100
10279 +++ linux-4.4.46/drivers/misc/Kconfig   2017-02-03 17:18:05.683416484 +0100
10280 @@ -54,6 +54,7 @@
10281  config ATMEL_TCLIB
10282         bool "Atmel AT32/AT91 Timer/Counter Library"
10283         depends on (AVR32 || ARCH_AT91)
10284 +       default y if PREEMPT_RT_FULL
10285         help
10286           Select this if you want a library to allocate the Timer/Counter
10287           blocks found on many Atmel processors.  This facilitates using
10288 @@ -69,8 +70,7 @@
10289           are combined to make a single 32-bit timer.
10291           When GENERIC_CLOCKEVENTS is defined, the third timer channel
10292 -         may be used as a clock event device supporting oneshot mode
10293 -         (delays of up to two seconds) based on the 32 KiHz clock.
10294 +         may be used as a clock event device supporting oneshot mode.
10296  config ATMEL_TCB_CLKSRC_BLOCK
10297         int
10298 @@ -84,6 +84,15 @@
10299           TC can be used for other purposes, such as PWM generation and
10300           interval timing.
10302 +config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
10303 +       bool "TC Block use 32 KiHz clock"
10304 +       depends on ATMEL_TCB_CLKSRC
10305 +       default y if !PREEMPT_RT_FULL
10306 +       help
10307 +         Select this to use 32 KiHz base clock rate as TC block clock
10308 +         source for clock events.
10311  config DUMMY_IRQ
10312         tristate "Dummy IRQ handler"
10313         default n
10314 @@ -113,6 +122,35 @@
10315           for information on the specific driver level and support statement
10316           for your IBM server.
10318 +config HWLAT_DETECTOR
10319 +       tristate "Testing module to detect hardware-induced latencies"
10320 +       depends on DEBUG_FS
10321 +       depends on RING_BUFFER
10322 +       default m
10323 +       ---help---
10324 +         A simple hardware latency detector. Use this module to detect
10325 +         large latencies introduced by the behavior of the underlying
10326 +         system firmware external to Linux. We do this using periodic
10327 +         use of stop_machine to grab all available CPUs and measure
10328 +         for unexplainable gaps in the CPU timestamp counter(s). By
10329 +         default, the module is not enabled until the "enable" file
10330 +         within the "hwlat_detector" debugfs directory is toggled.
10332 +         This module is often used to detect SMI (System Management
10333 +         Interrupts) on x86 systems, though is not x86 specific. To
10334 +         this end, we default to using a sample window of 1 second,
10335 +         during which we will sample for 0.5 seconds. If an SMI or
10336 +         similar event occurs during that time, it is recorded
10337 +         into an 8K samples global ring buffer until retreived.
10339 +         WARNING: This software should never be enabled (it can be built
10340 +         but should not be turned on after it is loaded) in a production
10341 +         environment where high latencies are a concern since the
10342 +         sampling mechanism actually introduces latencies for
10343 +         regular tasks while the CPU(s) are being held.
10345 +         If unsure, say N
10347  config PHANTOM
10348         tristate "Sensable PHANToM (PCI)"
10349         depends on PCI
10350 diff -Nur linux-4.4.46.orig/drivers/misc/Makefile linux-4.4.46/drivers/misc/Makefile
10351 --- linux-4.4.46.orig/drivers/misc/Makefile     2017-02-01 08:31:11.000000000 +0100
10352 +++ linux-4.4.46/drivers/misc/Makefile  2017-02-03 17:18:05.683416484 +0100
10353 @@ -39,6 +39,7 @@
10354  obj-$(CONFIG_HMC6352)          += hmc6352.o
10355  obj-y                          += eeprom/
10356  obj-y                          += cb710/
10357 +obj-$(CONFIG_HWLAT_DETECTOR)   += hwlat_detector.o
10358  obj-$(CONFIG_SPEAR13XX_PCIE_GADGET)    += spear13xx_pcie_gadget.o
10359  obj-$(CONFIG_VMWARE_BALLOON)   += vmw_balloon.o
10360  obj-$(CONFIG_ARM_CHARLCD)      += arm-charlcd.o
10361 diff -Nur linux-4.4.46.orig/drivers/mmc/host/mmci.c linux-4.4.46/drivers/mmc/host/mmci.c
10362 --- linux-4.4.46.orig/drivers/mmc/host/mmci.c   2017-02-01 08:31:11.000000000 +0100
10363 +++ linux-4.4.46/drivers/mmc/host/mmci.c        2017-02-03 17:18:05.687416638 +0100
10364 @@ -1155,15 +1155,12 @@
10365         struct sg_mapping_iter *sg_miter = &host->sg_miter;
10366         struct variant_data *variant = host->variant;
10367         void __iomem *base = host->base;
10368 -       unsigned long flags;
10369         u32 status;
10371         status = readl(base + MMCISTATUS);
10373         dev_dbg(mmc_dev(host->mmc), "irq1 (pio) %08x\n", status);
10375 -       local_irq_save(flags);
10377         do {
10378                 unsigned int remain, len;
10379                 char *buffer;
10380 @@ -1203,8 +1200,6 @@
10382         sg_miter_stop(sg_miter);
10384 -       local_irq_restore(flags);
10386         /*
10387          * If we have less than the fifo 'half-full' threshold to transfer,
10388          * trigger a PIO interrupt as soon as any data is available.
10389 diff -Nur linux-4.4.46.orig/drivers/net/ethernet/3com/3c59x.c linux-4.4.46/drivers/net/ethernet/3com/3c59x.c
10390 --- linux-4.4.46.orig/drivers/net/ethernet/3com/3c59x.c 2017-02-01 08:31:11.000000000 +0100
10391 +++ linux-4.4.46/drivers/net/ethernet/3com/3c59x.c      2017-02-03 17:18:05.687416638 +0100
10392 @@ -842,9 +842,9 @@
10394         struct vortex_private *vp = netdev_priv(dev);
10395         unsigned long flags;
10396 -       local_irq_save(flags);
10397 +       local_irq_save_nort(flags);
10398         (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev);
10399 -       local_irq_restore(flags);
10400 +       local_irq_restore_nort(flags);
10402  #endif
10404 @@ -1916,12 +1916,12 @@
10405                          * Block interrupts because vortex_interrupt does a bare spin_lock()
10406                          */
10407                         unsigned long flags;
10408 -                       local_irq_save(flags);
10409 +                       local_irq_save_nort(flags);
10410                         if (vp->full_bus_master_tx)
10411                                 boomerang_interrupt(dev->irq, dev);
10412                         else
10413                                 vortex_interrupt(dev->irq, dev);
10414 -                       local_irq_restore(flags);
10415 +                       local_irq_restore_nort(flags);
10416                 }
10417         }
10419 diff -Nur linux-4.4.46.orig/drivers/net/ethernet/atheros/atl1c/atl1c_main.c linux-4.4.46/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
10420 --- linux-4.4.46.orig/drivers/net/ethernet/atheros/atl1c/atl1c_main.c   2017-02-01 08:31:11.000000000 +0100
10421 +++ linux-4.4.46/drivers/net/ethernet/atheros/atl1c/atl1c_main.c        2017-02-03 17:18:05.687416638 +0100
10422 @@ -2221,11 +2221,7 @@
10423         }
10425         tpd_req = atl1c_cal_tpd_req(skb);
10426 -       if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) {
10427 -               if (netif_msg_pktdata(adapter))
10428 -                       dev_info(&adapter->pdev->dev, "tx locked\n");
10429 -               return NETDEV_TX_LOCKED;
10430 -       }
10431 +       spin_lock_irqsave(&adapter->tx_lock, flags);
10433         if (atl1c_tpd_avail(adapter, type) < tpd_req) {
10434                 /* no enough descriptor, just stop queue */
10435 diff -Nur linux-4.4.46.orig/drivers/net/ethernet/atheros/atl1e/atl1e_main.c linux-4.4.46/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
10436 --- linux-4.4.46.orig/drivers/net/ethernet/atheros/atl1e/atl1e_main.c   2017-02-01 08:31:11.000000000 +0100
10437 +++ linux-4.4.46/drivers/net/ethernet/atheros/atl1e/atl1e_main.c        2017-02-03 17:18:05.687416638 +0100
10438 @@ -1880,8 +1880,7 @@
10439                 return NETDEV_TX_OK;
10440         }
10441         tpd_req = atl1e_cal_tdp_req(skb);
10442 -       if (!spin_trylock_irqsave(&adapter->tx_lock, flags))
10443 -               return NETDEV_TX_LOCKED;
10444 +       spin_lock_irqsave(&adapter->tx_lock, flags);
10446         if (atl1e_tpd_avail(adapter) < tpd_req) {
10447                 /* no enough descriptor, just stop queue */
10448 diff -Nur linux-4.4.46.orig/drivers/net/ethernet/chelsio/cxgb/sge.c linux-4.4.46/drivers/net/ethernet/chelsio/cxgb/sge.c
10449 --- linux-4.4.46.orig/drivers/net/ethernet/chelsio/cxgb/sge.c   2017-02-01 08:31:11.000000000 +0100
10450 +++ linux-4.4.46/drivers/net/ethernet/chelsio/cxgb/sge.c        2017-02-03 17:18:05.691416794 +0100
10451 @@ -1664,8 +1664,7 @@
10452         struct cmdQ *q = &sge->cmdQ[qid];
10453         unsigned int credits, pidx, genbit, count, use_sched_skb = 0;
10455 -       if (!spin_trylock(&q->lock))
10456 -               return NETDEV_TX_LOCKED;
10457 +       spin_lock(&q->lock);
10459         reclaim_completed_tx(sge, q);
10461 diff -Nur linux-4.4.46.orig/drivers/net/ethernet/neterion/s2io.c linux-4.4.46/drivers/net/ethernet/neterion/s2io.c
10462 --- linux-4.4.46.orig/drivers/net/ethernet/neterion/s2io.c      2017-02-01 08:31:11.000000000 +0100
10463 +++ linux-4.4.46/drivers/net/ethernet/neterion/s2io.c   2017-02-03 17:18:05.691416794 +0100
10464 @@ -4084,12 +4084,7 @@
10465                         [skb->priority & (MAX_TX_FIFOS - 1)];
10466         fifo = &mac_control->fifos[queue];
10468 -       if (do_spin_lock)
10469 -               spin_lock_irqsave(&fifo->tx_lock, flags);
10470 -       else {
10471 -               if (unlikely(!spin_trylock_irqsave(&fifo->tx_lock, flags)))
10472 -                       return NETDEV_TX_LOCKED;
10473 -       }
10474 +       spin_lock_irqsave(&fifo->tx_lock, flags);
10476         if (sp->config.multiq) {
10477                 if (__netif_subqueue_stopped(dev, fifo->fifo_no)) {
10478 diff -Nur linux-4.4.46.orig/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c linux-4.4.46/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
10479 --- linux-4.4.46.orig/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c      2017-02-01 08:31:11.000000000 +0100
10480 +++ linux-4.4.46/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c   2017-02-03 17:18:05.691416794 +0100
10481 @@ -2137,10 +2137,8 @@
10482         struct pch_gbe_tx_ring *tx_ring = adapter->tx_ring;
10483         unsigned long flags;
10485 -       if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags)) {
10486 -               /* Collision - tell upper layer to requeue */
10487 -               return NETDEV_TX_LOCKED;
10488 -       }
10489 +       spin_lock_irqsave(&tx_ring->tx_lock, flags);
10491         if (unlikely(!PCH_GBE_DESC_UNUSED(tx_ring))) {
10492                 netif_stop_queue(netdev);
10493                 spin_unlock_irqrestore(&tx_ring->tx_lock, flags);
10494 diff -Nur linux-4.4.46.orig/drivers/net/ethernet/realtek/8139too.c linux-4.4.46/drivers/net/ethernet/realtek/8139too.c
10495 --- linux-4.4.46.orig/drivers/net/ethernet/realtek/8139too.c    2017-02-01 08:31:11.000000000 +0100
10496 +++ linux-4.4.46/drivers/net/ethernet/realtek/8139too.c 2017-02-03 17:18:05.691416794 +0100
10497 @@ -2229,7 +2229,7 @@
10498         struct rtl8139_private *tp = netdev_priv(dev);
10499         const int irq = tp->pci_dev->irq;
10501 -       disable_irq(irq);
10502 +       disable_irq_nosync(irq);
10503         rtl8139_interrupt(irq, dev);
10504         enable_irq(irq);
10506 diff -Nur linux-4.4.46.orig/drivers/net/ethernet/tehuti/tehuti.c linux-4.4.46/drivers/net/ethernet/tehuti/tehuti.c
10507 --- linux-4.4.46.orig/drivers/net/ethernet/tehuti/tehuti.c      2017-02-01 08:31:11.000000000 +0100
10508 +++ linux-4.4.46/drivers/net/ethernet/tehuti/tehuti.c   2017-02-03 17:18:05.691416794 +0100
10509 @@ -1629,13 +1629,8 @@
10510         unsigned long flags;
10512         ENTER;
10513 -       local_irq_save(flags);
10514 -       if (!spin_trylock(&priv->tx_lock)) {
10515 -               local_irq_restore(flags);
10516 -               DBG("%s[%s]: TX locked, returning NETDEV_TX_LOCKED\n",
10517 -                   BDX_DRV_NAME, ndev->name);
10518 -               return NETDEV_TX_LOCKED;
10519 -       }
10521 +       spin_lock_irqsave(&priv->tx_lock, flags);
10523         /* build tx descriptor */
10524         BDX_ASSERT(f->m.wptr >= f->m.memsz);    /* started with valid wptr */
10525 diff -Nur linux-4.4.46.orig/drivers/net/rionet.c linux-4.4.46/drivers/net/rionet.c
10526 --- linux-4.4.46.orig/drivers/net/rionet.c      2017-02-01 08:31:11.000000000 +0100
10527 +++ linux-4.4.46/drivers/net/rionet.c   2017-02-03 17:18:05.691416794 +0100
10528 @@ -174,11 +174,7 @@
10529         unsigned long flags;
10530         int add_num = 1;
10532 -       local_irq_save(flags);
10533 -       if (!spin_trylock(&rnet->tx_lock)) {
10534 -               local_irq_restore(flags);
10535 -               return NETDEV_TX_LOCKED;
10536 -       }
10537 +       spin_lock_irqsave(&rnet->tx_lock, flags);
10539         if (is_multicast_ether_addr(eth->h_dest))
10540                 add_num = nets[rnet->mport->id].nact;
10541 diff -Nur linux-4.4.46.orig/drivers/net/wireless/orinoco/orinoco_usb.c linux-4.4.46/drivers/net/wireless/orinoco/orinoco_usb.c
10542 --- linux-4.4.46.orig/drivers/net/wireless/orinoco/orinoco_usb.c        2017-02-01 08:31:11.000000000 +0100
10543 +++ linux-4.4.46/drivers/net/wireless/orinoco/orinoco_usb.c     2017-02-03 17:18:05.695416949 +0100
10544 @@ -697,7 +697,7 @@
10545                         while (!ctx->done.done && msecs--)
10546                                 udelay(1000);
10547                 } else {
10548 -                       wait_event_interruptible(ctx->done.wait,
10549 +                       swait_event_interruptible(ctx->done.wait,
10550                                                  ctx->done.done);
10551                 }
10552                 break;
10553 diff -Nur linux-4.4.46.orig/drivers/pci/access.c linux-4.4.46/drivers/pci/access.c
10554 --- linux-4.4.46.orig/drivers/pci/access.c      2017-02-01 08:31:11.000000000 +0100
10555 +++ linux-4.4.46/drivers/pci/access.c   2017-02-03 17:18:05.695416949 +0100
10556 @@ -561,7 +561,7 @@
10557         WARN_ON(!dev->block_cfg_access);
10559         dev->block_cfg_access = 0;
10560 -       wake_up_all(&pci_cfg_wait);
10561 +       wake_up_all_locked(&pci_cfg_wait);
10562         raw_spin_unlock_irqrestore(&pci_lock, flags);
10564  EXPORT_SYMBOL_GPL(pci_cfg_access_unlock);
10565 diff -Nur linux-4.4.46.orig/drivers/scsi/fcoe/fcoe.c linux-4.4.46/drivers/scsi/fcoe/fcoe.c
10566 --- linux-4.4.46.orig/drivers/scsi/fcoe/fcoe.c  2017-02-01 08:31:11.000000000 +0100
10567 +++ linux-4.4.46/drivers/scsi/fcoe/fcoe.c       2017-02-03 17:18:05.695416949 +0100
10568 @@ -1286,7 +1286,7 @@
10569         struct sk_buff *skb;
10570  #ifdef CONFIG_SMP
10571         struct fcoe_percpu_s *p0;
10572 -       unsigned targ_cpu = get_cpu();
10573 +       unsigned targ_cpu = get_cpu_light();
10574  #endif /* CONFIG_SMP */
10576         FCOE_DBG("Destroying receive thread for CPU %d\n", cpu);
10577 @@ -1342,7 +1342,7 @@
10578                         kfree_skb(skb);
10579                 spin_unlock_bh(&p->fcoe_rx_list.lock);
10580         }
10581 -       put_cpu();
10582 +       put_cpu_light();
10583  #else
10584         /*
10585          * This a non-SMP scenario where the singular Rx thread is
10586 @@ -1566,11 +1566,11 @@
10587  static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen)
10589         struct fcoe_percpu_s *fps;
10590 -       int rc;
10591 +       int rc, cpu = get_cpu_light();
10593 -       fps = &get_cpu_var(fcoe_percpu);
10594 +       fps = &per_cpu(fcoe_percpu, cpu);
10595         rc = fcoe_get_paged_crc_eof(skb, tlen, fps);
10596 -       put_cpu_var(fcoe_percpu);
10597 +       put_cpu_light();
10599         return rc;
10601 @@ -1766,11 +1766,11 @@
10602                 return 0;
10603         }
10605 -       stats = per_cpu_ptr(lport->stats, get_cpu());
10606 +       stats = per_cpu_ptr(lport->stats, get_cpu_light());
10607         stats->InvalidCRCCount++;
10608         if (stats->InvalidCRCCount < 5)
10609                 printk(KERN_WARNING "fcoe: dropping frame with CRC error\n");
10610 -       put_cpu();
10611 +       put_cpu_light();
10612         return -EINVAL;
10615 @@ -1814,7 +1814,7 @@
10616          */
10617         hp = (struct fcoe_hdr *) skb_network_header(skb);
10619 -       stats = per_cpu_ptr(lport->stats, get_cpu());
10620 +       stats = per_cpu_ptr(lport->stats, get_cpu_light());
10621         if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) {
10622                 if (stats->ErrorFrames < 5)
10623                         printk(KERN_WARNING "fcoe: FCoE version "
10624 @@ -1846,13 +1846,13 @@
10625                 goto drop;
10627         if (!fcoe_filter_frames(lport, fp)) {
10628 -               put_cpu();
10629 +               put_cpu_light();
10630                 fc_exch_recv(lport, fp);
10631                 return;
10632         }
10633  drop:
10634         stats->ErrorFrames++;
10635 -       put_cpu();
10636 +       put_cpu_light();
10637         kfree_skb(skb);
10640 diff -Nur linux-4.4.46.orig/drivers/scsi/fcoe/fcoe_ctlr.c linux-4.4.46/drivers/scsi/fcoe/fcoe_ctlr.c
10641 --- linux-4.4.46.orig/drivers/scsi/fcoe/fcoe_ctlr.c     2017-02-01 08:31:11.000000000 +0100
10642 +++ linux-4.4.46/drivers/scsi/fcoe/fcoe_ctlr.c  2017-02-03 17:18:05.695416949 +0100
10643 @@ -831,7 +831,7 @@
10645         INIT_LIST_HEAD(&del_list);
10647 -       stats = per_cpu_ptr(fip->lp->stats, get_cpu());
10648 +       stats = per_cpu_ptr(fip->lp->stats, get_cpu_light());
10650         list_for_each_entry_safe(fcf, next, &fip->fcfs, list) {
10651                 deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2;
10652 @@ -867,7 +867,7 @@
10653                                 sel_time = fcf->time;
10654                 }
10655         }
10656 -       put_cpu();
10657 +       put_cpu_light();
10659         list_for_each_entry_safe(fcf, next, &del_list, list) {
10660                 /* Removes fcf from current list */
10661 diff -Nur linux-4.4.46.orig/drivers/scsi/libfc/fc_exch.c linux-4.4.46/drivers/scsi/libfc/fc_exch.c
10662 --- linux-4.4.46.orig/drivers/scsi/libfc/fc_exch.c      2017-02-01 08:31:11.000000000 +0100
10663 +++ linux-4.4.46/drivers/scsi/libfc/fc_exch.c   2017-02-03 17:18:05.695416949 +0100
10664 @@ -814,10 +814,10 @@
10665         }
10666         memset(ep, 0, sizeof(*ep));
10668 -       cpu = get_cpu();
10669 +       cpu = get_cpu_light();
10670         pool = per_cpu_ptr(mp->pool, cpu);
10671         spin_lock_bh(&pool->lock);
10672 -       put_cpu();
10673 +       put_cpu_light();
10675         /* peek cache of free slot */
10676         if (pool->left != FC_XID_UNKNOWN) {
10677 diff -Nur linux-4.4.46.orig/drivers/scsi/libsas/sas_ata.c linux-4.4.46/drivers/scsi/libsas/sas_ata.c
10678 --- linux-4.4.46.orig/drivers/scsi/libsas/sas_ata.c     2017-02-01 08:31:11.000000000 +0100
10679 +++ linux-4.4.46/drivers/scsi/libsas/sas_ata.c  2017-02-03 17:18:05.695416949 +0100
10680 @@ -190,7 +190,7 @@
10681         /* TODO: audit callers to ensure they are ready for qc_issue to
10682          * unconditionally re-enable interrupts
10683          */
10684 -       local_irq_save(flags);
10685 +       local_irq_save_nort(flags);
10686         spin_unlock(ap->lock);
10688         /* If the device fell off, no sense in issuing commands */
10689 @@ -255,7 +255,7 @@
10691   out:
10692         spin_lock(ap->lock);
10693 -       local_irq_restore(flags);
10694 +       local_irq_restore_nort(flags);
10695         return ret;
10698 diff -Nur linux-4.4.46.orig/drivers/scsi/qla2xxx/qla_inline.h linux-4.4.46/drivers/scsi/qla2xxx/qla_inline.h
10699 --- linux-4.4.46.orig/drivers/scsi/qla2xxx/qla_inline.h 2017-02-01 08:31:11.000000000 +0100
10700 +++ linux-4.4.46/drivers/scsi/qla2xxx/qla_inline.h      2017-02-03 17:18:05.695416949 +0100
10701 @@ -59,12 +59,12 @@
10703         unsigned long flags;
10704         struct qla_hw_data *ha = rsp->hw;
10705 -       local_irq_save(flags);
10706 +       local_irq_save_nort(flags);
10707         if (IS_P3P_TYPE(ha))
10708                 qla82xx_poll(0, rsp);
10709         else
10710                 ha->isp_ops->intr_handler(0, rsp);
10711 -       local_irq_restore(flags);
10712 +       local_irq_restore_nort(flags);
10715  static inline uint8_t *
10716 diff -Nur linux-4.4.46.orig/drivers/thermal/x86_pkg_temp_thermal.c linux-4.4.46/drivers/thermal/x86_pkg_temp_thermal.c
10717 --- linux-4.4.46.orig/drivers/thermal/x86_pkg_temp_thermal.c    2017-02-01 08:31:11.000000000 +0100
10718 +++ linux-4.4.46/drivers/thermal/x86_pkg_temp_thermal.c 2017-02-03 17:18:05.695416949 +0100
10719 @@ -29,6 +29,7 @@
10720  #include <linux/pm.h>
10721  #include <linux/thermal.h>
10722  #include <linux/debugfs.h>
10723 +#include <linux/swork.h>
10724  #include <asm/cpu_device_id.h>
10725  #include <asm/mce.h>
10727 @@ -352,7 +353,7 @@
10728         }
10731 -static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
10732 +static void platform_thermal_notify_work(struct swork_event *event)
10734         unsigned long flags;
10735         int cpu = smp_processor_id();
10736 @@ -369,7 +370,7 @@
10737                         pkg_work_scheduled[phy_id]) {
10738                 disable_pkg_thres_interrupt();
10739                 spin_unlock_irqrestore(&pkg_work_lock, flags);
10740 -               return -EINVAL;
10741 +               return;
10742         }
10743         pkg_work_scheduled[phy_id] = 1;
10744         spin_unlock_irqrestore(&pkg_work_lock, flags);
10745 @@ -378,9 +379,48 @@
10746         schedule_delayed_work_on(cpu,
10747                                 &per_cpu(pkg_temp_thermal_threshold_work, cpu),
10748                                 msecs_to_jiffies(notify_delay_ms));
10751 +#ifdef CONFIG_PREEMPT_RT_FULL
10752 +static struct swork_event notify_work;
10754 +static int thermal_notify_work_init(void)
10756 +       int err;
10758 +       err = swork_get();
10759 +       if (err)
10760 +               return err;
10762 +       INIT_SWORK(&notify_work, platform_thermal_notify_work);
10763         return 0;
10766 +static void thermal_notify_work_cleanup(void)
10768 +       swork_put();
10771 +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
10773 +       swork_queue(&notify_work);
10774 +       return 0;
10777 +#else  /* !CONFIG_PREEMPT_RT_FULL */
10779 +static int thermal_notify_work_init(void) { return 0; }
10781 +static void thermal_notify_work_cleanup(void) {  }
10783 +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
10785 +       platform_thermal_notify_work(NULL);
10787 +       return 0;
10789 +#endif /* CONFIG_PREEMPT_RT_FULL */
10791  static int find_siblings_cpu(int cpu)
10793         int i;
10794 @@ -584,6 +624,9 @@
10795         if (!x86_match_cpu(pkg_temp_thermal_ids))
10796                 return -ENODEV;
10798 +       if (!thermal_notify_work_init())
10799 +               return -ENODEV;
10801         spin_lock_init(&pkg_work_lock);
10802         platform_thermal_package_notify =
10803                         pkg_temp_thermal_platform_thermal_notify;
10804 @@ -608,7 +651,7 @@
10805         kfree(pkg_work_scheduled);
10806         platform_thermal_package_notify = NULL;
10807         platform_thermal_package_rate_control = NULL;
10809 +       thermal_notify_work_cleanup();
10810         return -ENODEV;
10813 @@ -633,6 +676,7 @@
10814         mutex_unlock(&phy_dev_list_mutex);
10815         platform_thermal_package_notify = NULL;
10816         platform_thermal_package_rate_control = NULL;
10817 +       thermal_notify_work_cleanup();
10818         for_each_online_cpu(i)
10819                 cancel_delayed_work_sync(
10820                         &per_cpu(pkg_temp_thermal_threshold_work, i));
10821 diff -Nur linux-4.4.46.orig/drivers/tty/serial/8250/8250_core.c linux-4.4.46/drivers/tty/serial/8250/8250_core.c
10822 --- linux-4.4.46.orig/drivers/tty/serial/8250/8250_core.c       2017-02-01 08:31:11.000000000 +0100
10823 +++ linux-4.4.46/drivers/tty/serial/8250/8250_core.c    2017-02-03 17:18:05.695416949 +0100
10824 @@ -58,7 +58,16 @@
10826  static unsigned int skip_txen_test; /* force skip of txen test at init time */
10828 -#define PASS_LIMIT     512
10830 + * On -rt we can have a more delays, and legitimately
10831 + * so - so don't drop work spuriously and spam the
10832 + * syslog:
10833 + */
10834 +#ifdef CONFIG_PREEMPT_RT_FULL
10835 +# define PASS_LIMIT    1000000
10836 +#else
10837 +# define PASS_LIMIT    512
10838 +#endif
10840  #include <asm/serial.h>
10841  /*
10842 diff -Nur linux-4.4.46.orig/drivers/tty/serial/8250/8250_port.c linux-4.4.46/drivers/tty/serial/8250/8250_port.c
10843 --- linux-4.4.46.orig/drivers/tty/serial/8250/8250_port.c       2017-02-01 08:31:11.000000000 +0100
10844 +++ linux-4.4.46/drivers/tty/serial/8250/8250_port.c    2017-02-03 17:18:05.695416949 +0100
10845 @@ -35,6 +35,7 @@
10846  #include <linux/nmi.h>
10847  #include <linux/mutex.h>
10848  #include <linux/slab.h>
10849 +#include <linux/kdb.h>
10850  #include <linux/uaccess.h>
10851  #include <linux/pm_runtime.h>
10853 @@ -2843,9 +2844,9 @@
10855         serial8250_rpm_get(up);
10857 -       if (port->sysrq)
10858 +       if (port->sysrq || oops_in_progress)
10859                 locked = 0;
10860 -       else if (oops_in_progress)
10861 +       else if (in_kdb_printk())
10862                 locked = spin_trylock_irqsave(&port->lock, flags);
10863         else
10864                 spin_lock_irqsave(&port->lock, flags);
10865 diff -Nur linux-4.4.46.orig/drivers/tty/serial/amba-pl011.c linux-4.4.46/drivers/tty/serial/amba-pl011.c
10866 --- linux-4.4.46.orig/drivers/tty/serial/amba-pl011.c   2017-02-01 08:31:11.000000000 +0100
10867 +++ linux-4.4.46/drivers/tty/serial/amba-pl011.c        2017-02-03 17:18:05.695416949 +0100
10868 @@ -2067,13 +2067,19 @@
10870         clk_enable(uap->clk);
10872 -       local_irq_save(flags);
10873 +       /*
10874 +        * local_irq_save(flags);
10875 +        *
10876 +        * This local_irq_save() is nonsense. If we come in via sysrq
10877 +        * handling then interrupts are already disabled. Aside of
10878 +        * that the port.sysrq check is racy on SMP regardless.
10879 +       */
10880         if (uap->port.sysrq)
10881                 locked = 0;
10882         else if (oops_in_progress)
10883 -               locked = spin_trylock(&uap->port.lock);
10884 +               locked = spin_trylock_irqsave(&uap->port.lock, flags);
10885         else
10886 -               spin_lock(&uap->port.lock);
10887 +               spin_lock_irqsave(&uap->port.lock, flags);
10889         /*
10890          *      First save the CR then disable the interrupts
10891 @@ -2098,8 +2104,7 @@
10892                 writew(old_cr, uap->port.membase + UART011_CR);
10894         if (locked)
10895 -               spin_unlock(&uap->port.lock);
10896 -       local_irq_restore(flags);
10897 +               spin_unlock_irqrestore(&uap->port.lock, flags);
10899         clk_disable(uap->clk);
10901 diff -Nur linux-4.4.46.orig/drivers/tty/serial/omap-serial.c linux-4.4.46/drivers/tty/serial/omap-serial.c
10902 --- linux-4.4.46.orig/drivers/tty/serial/omap-serial.c  2017-02-01 08:31:11.000000000 +0100
10903 +++ linux-4.4.46/drivers/tty/serial/omap-serial.c       2017-02-03 17:18:05.695416949 +0100
10904 @@ -1257,13 +1257,10 @@
10906         pm_runtime_get_sync(up->dev);
10908 -       local_irq_save(flags);
10909 -       if (up->port.sysrq)
10910 -               locked = 0;
10911 -       else if (oops_in_progress)
10912 -               locked = spin_trylock(&up->port.lock);
10913 +       if (up->port.sysrq || oops_in_progress)
10914 +               locked = spin_trylock_irqsave(&up->port.lock, flags);
10915         else
10916 -               spin_lock(&up->port.lock);
10917 +               spin_lock_irqsave(&up->port.lock, flags);
10919         /*
10920          * First save the IER then disable the interrupts
10921 @@ -1292,8 +1289,7 @@
10922         pm_runtime_mark_last_busy(up->dev);
10923         pm_runtime_put_autosuspend(up->dev);
10924         if (locked)
10925 -               spin_unlock(&up->port.lock);
10926 -       local_irq_restore(flags);
10927 +               spin_unlock_irqrestore(&up->port.lock, flags);
10930  static int __init
10931 diff -Nur linux-4.4.46.orig/drivers/usb/core/hcd.c linux-4.4.46/drivers/usb/core/hcd.c
10932 --- linux-4.4.46.orig/drivers/usb/core/hcd.c    2017-02-01 08:31:11.000000000 +0100
10933 +++ linux-4.4.46/drivers/usb/core/hcd.c 2017-02-03 17:18:10.895617822 +0100
10934 @@ -1735,9 +1735,9 @@
10935          * and no one may trigger the above deadlock situation when
10936          * running complete() in tasklet.
10937          */
10938 -       local_irq_save(flags);
10939 +       local_irq_save_nort(flags);
10940         urb->complete(urb);
10941 -       local_irq_restore(flags);
10942 +       local_irq_restore_nort(flags);
10944         usb_anchor_resume_wakeups(anchor);
10945         atomic_dec(&urb->use_count);
10946 diff -Nur linux-4.4.46.orig/drivers/usb/gadget/function/f_fs.c linux-4.4.46/drivers/usb/gadget/function/f_fs.c
10947 --- linux-4.4.46.orig/drivers/usb/gadget/function/f_fs.c        2017-02-01 08:31:11.000000000 +0100
10948 +++ linux-4.4.46/drivers/usb/gadget/function/f_fs.c     2017-02-03 17:18:10.895617822 +0100
10949 @@ -1404,7 +1404,7 @@
10950                 pr_info("%s(): freeing\n", __func__);
10951                 ffs_data_clear(ffs);
10952                 BUG_ON(waitqueue_active(&ffs->ev.waitq) ||
10953 -                      waitqueue_active(&ffs->ep0req_completion.wait));
10954 +                      swait_active(&ffs->ep0req_completion.wait));
10955                 kfree(ffs->dev_name);
10956                 kfree(ffs);
10957         }
10958 diff -Nur linux-4.4.46.orig/drivers/usb/gadget/legacy/inode.c linux-4.4.46/drivers/usb/gadget/legacy/inode.c
10959 --- linux-4.4.46.orig/drivers/usb/gadget/legacy/inode.c 2017-02-01 08:31:11.000000000 +0100
10960 +++ linux-4.4.46/drivers/usb/gadget/legacy/inode.c      2017-02-03 17:18:10.895617822 +0100
10961 @@ -345,7 +345,7 @@
10962         spin_unlock_irq (&epdata->dev->lock);
10964         if (likely (value == 0)) {
10965 -               value = wait_event_interruptible (done.wait, done.done);
10966 +               value = swait_event_interruptible (done.wait, done.done);
10967                 if (value != 0) {
10968                         spin_lock_irq (&epdata->dev->lock);
10969                         if (likely (epdata->ep != NULL)) {
10970 @@ -354,7 +354,7 @@
10971                                 usb_ep_dequeue (epdata->ep, epdata->req);
10972                                 spin_unlock_irq (&epdata->dev->lock);
10974 -                               wait_event (done.wait, done.done);
10975 +                               swait_event (done.wait, done.done);
10976                                 if (epdata->status == -ECONNRESET)
10977                                         epdata->status = -EINTR;
10978                         } else {
10979 diff -Nur linux-4.4.46.orig/drivers/usb/gadget/udc/atmel_usba_udc.c linux-4.4.46/drivers/usb/gadget/udc/atmel_usba_udc.c
10980 --- linux-4.4.46.orig/drivers/usb/gadget/udc/atmel_usba_udc.c   2017-02-01 08:31:11.000000000 +0100
10981 +++ linux-4.4.46/drivers/usb/gadget/udc/atmel_usba_udc.c        2017-02-03 17:18:10.895617822 +0100
10982 @@ -17,7 +17,9 @@
10983  #include <linux/device.h>
10984  #include <linux/dma-mapping.h>
10985  #include <linux/list.h>
10986 +#include <linux/mfd/syscon.h>
10987  #include <linux/platform_device.h>
10988 +#include <linux/regmap.h>
10989  #include <linux/usb/ch9.h>
10990  #include <linux/usb/gadget.h>
10991  #include <linux/usb/atmel_usba_udc.h>
10992 @@ -1888,20 +1890,15 @@
10993  #ifdef CONFIG_OF
10994  static void at91sam9rl_toggle_bias(struct usba_udc *udc, int is_on)
10996 -       unsigned int uckr = at91_pmc_read(AT91_CKGR_UCKR);
10998 -       if (is_on)
10999 -               at91_pmc_write(AT91_CKGR_UCKR, uckr | AT91_PMC_BIASEN);
11000 -       else
11001 -               at91_pmc_write(AT91_CKGR_UCKR, uckr & ~(AT91_PMC_BIASEN));
11002 +       regmap_update_bits(udc->pmc, AT91_CKGR_UCKR, AT91_PMC_BIASEN,
11003 +                          is_on ? AT91_PMC_BIASEN : 0);
11006  static void at91sam9g45_pulse_bias(struct usba_udc *udc)
11008 -       unsigned int uckr = at91_pmc_read(AT91_CKGR_UCKR);
11010 -       at91_pmc_write(AT91_CKGR_UCKR, uckr & ~(AT91_PMC_BIASEN));
11011 -       at91_pmc_write(AT91_CKGR_UCKR, uckr | AT91_PMC_BIASEN);
11012 +       regmap_update_bits(udc->pmc, AT91_CKGR_UCKR, AT91_PMC_BIASEN, 0);
11013 +       regmap_update_bits(udc->pmc, AT91_CKGR_UCKR, AT91_PMC_BIASEN,
11014 +                          AT91_PMC_BIASEN);
11017  static const struct usba_udc_errata at91sam9rl_errata = {
11018 @@ -1938,6 +1935,9 @@
11019                 return ERR_PTR(-EINVAL);
11021         udc->errata = match->data;
11022 +       udc->pmc = syscon_regmap_lookup_by_compatible("atmel,at91sam9g45-pmc");
11023 +       if (udc->errata && IS_ERR(udc->pmc))
11024 +               return ERR_CAST(udc->pmc);
11026         udc->num_ep = 0;
11028 diff -Nur linux-4.4.46.orig/drivers/usb/gadget/udc/atmel_usba_udc.h linux-4.4.46/drivers/usb/gadget/udc/atmel_usba_udc.h
11029 --- linux-4.4.46.orig/drivers/usb/gadget/udc/atmel_usba_udc.h   2017-02-01 08:31:11.000000000 +0100
11030 +++ linux-4.4.46/drivers/usb/gadget/udc/atmel_usba_udc.h        2017-02-03 17:18:10.895617822 +0100
11031 @@ -354,6 +354,8 @@
11032         struct dentry *debugfs_root;
11033         struct dentry *debugfs_regs;
11034  #endif
11036 +       struct regmap *pmc;
11037  };
11039  static inline struct usba_ep *to_usba_ep(struct usb_ep *ep)
11040 diff -Nur linux-4.4.46.orig/fs/aio.c linux-4.4.46/fs/aio.c
11041 --- linux-4.4.46.orig/fs/aio.c  2017-02-01 08:31:11.000000000 +0100
11042 +++ linux-4.4.46/fs/aio.c       2017-02-03 17:18:10.899617976 +0100
11043 @@ -40,6 +40,7 @@
11044  #include <linux/ramfs.h>
11045  #include <linux/percpu-refcount.h>
11046  #include <linux/mount.h>
11047 +#include <linux/swork.h>
11049  #include <asm/kmap_types.h>
11050  #include <asm/uaccess.h>
11051 @@ -115,7 +116,7 @@
11052         struct page             **ring_pages;
11053         long                    nr_pages;
11055 -       struct work_struct      free_work;
11056 +       struct swork_event      free_work;
11058         /*
11059          * signals when all in-flight requests are done
11060 @@ -258,6 +259,7 @@
11061                 .mount          = aio_mount,
11062                 .kill_sb        = kill_anon_super,
11063         };
11064 +       BUG_ON(swork_get());
11065         aio_mnt = kern_mount(&aio_fs);
11066         if (IS_ERR(aio_mnt))
11067                 panic("Failed to create aio fs mount.");
11068 @@ -573,9 +575,9 @@
11069         return cancel(&kiocb->common);
11072 -static void free_ioctx(struct work_struct *work)
11073 +static void free_ioctx(struct swork_event *sev)
11075 -       struct kioctx *ctx = container_of(work, struct kioctx, free_work);
11076 +       struct kioctx *ctx = container_of(sev, struct kioctx, free_work);
11078         pr_debug("freeing %p\n", ctx);
11080 @@ -594,8 +596,8 @@
11081         if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
11082                 complete(&ctx->rq_wait->comp);
11084 -       INIT_WORK(&ctx->free_work, free_ioctx);
11085 -       schedule_work(&ctx->free_work);
11086 +       INIT_SWORK(&ctx->free_work, free_ioctx);
11087 +       swork_queue(&ctx->free_work);
11090  /*
11091 @@ -603,9 +605,9 @@
11092   * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
11093   * now it's safe to cancel any that need to be.
11094   */
11095 -static void free_ioctx_users(struct percpu_ref *ref)
11096 +static void free_ioctx_users_work(struct swork_event *sev)
11098 -       struct kioctx *ctx = container_of(ref, struct kioctx, users);
11099 +       struct kioctx *ctx = container_of(sev, struct kioctx, free_work);
11100         struct aio_kiocb *req;
11102         spin_lock_irq(&ctx->ctx_lock);
11103 @@ -624,6 +626,14 @@
11104         percpu_ref_put(&ctx->reqs);
11107 +static void free_ioctx_users(struct percpu_ref *ref)
11109 +       struct kioctx *ctx = container_of(ref, struct kioctx, users);
11111 +       INIT_SWORK(&ctx->free_work, free_ioctx_users_work);
11112 +       swork_queue(&ctx->free_work);
11115  static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
11117         unsigned i, new_nr;
11118 diff -Nur linux-4.4.46.orig/fs/autofs4/autofs_i.h linux-4.4.46/fs/autofs4/autofs_i.h
11119 --- linux-4.4.46.orig/fs/autofs4/autofs_i.h     2017-02-01 08:31:11.000000000 +0100
11120 +++ linux-4.4.46/fs/autofs4/autofs_i.h  2017-02-03 17:18:10.899617976 +0100
11121 @@ -34,6 +34,7 @@
11122  #include <linux/sched.h>
11123  #include <linux/mount.h>
11124  #include <linux/namei.h>
11125 +#include <linux/delay.h>
11126  #include <asm/current.h>
11127  #include <asm/uaccess.h>
11129 diff -Nur linux-4.4.46.orig/fs/autofs4/expire.c linux-4.4.46/fs/autofs4/expire.c
11130 --- linux-4.4.46.orig/fs/autofs4/expire.c       2017-02-01 08:31:11.000000000 +0100
11131 +++ linux-4.4.46/fs/autofs4/expire.c    2017-02-03 17:18:10.899617976 +0100
11132 @@ -150,7 +150,7 @@
11133                         parent = p->d_parent;
11134                         if (!spin_trylock(&parent->d_lock)) {
11135                                 spin_unlock(&p->d_lock);
11136 -                               cpu_relax();
11137 +                               cpu_chill();
11138                                 goto relock;
11139                         }
11140                         spin_unlock(&p->d_lock);
11141 diff -Nur linux-4.4.46.orig/fs/buffer.c linux-4.4.46/fs/buffer.c
11142 --- linux-4.4.46.orig/fs/buffer.c       2017-02-01 08:31:11.000000000 +0100
11143 +++ linux-4.4.46/fs/buffer.c    2017-02-03 17:18:10.899617976 +0100
11144 @@ -305,8 +305,7 @@
11145          * decide that the page is now completely done.
11146          */
11147         first = page_buffers(page);
11148 -       local_irq_save(flags);
11149 -       bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
11150 +       flags = bh_uptodate_lock_irqsave(first);
11151         clear_buffer_async_read(bh);
11152         unlock_buffer(bh);
11153         tmp = bh;
11154 @@ -319,8 +318,7 @@
11155                 }
11156                 tmp = tmp->b_this_page;
11157         } while (tmp != bh);
11158 -       bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
11159 -       local_irq_restore(flags);
11160 +       bh_uptodate_unlock_irqrestore(first, flags);
11162         /*
11163          * If none of the buffers had errors and they are all
11164 @@ -332,9 +330,7 @@
11165         return;
11167  still_busy:
11168 -       bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
11169 -       local_irq_restore(flags);
11170 -       return;
11171 +       bh_uptodate_unlock_irqrestore(first, flags);
11174  /*
11175 @@ -362,8 +358,7 @@
11176         }
11178         first = page_buffers(page);
11179 -       local_irq_save(flags);
11180 -       bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
11181 +       flags = bh_uptodate_lock_irqsave(first);
11183         clear_buffer_async_write(bh);
11184         unlock_buffer(bh);
11185 @@ -375,15 +370,12 @@
11186                 }
11187                 tmp = tmp->b_this_page;
11188         }
11189 -       bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
11190 -       local_irq_restore(flags);
11191 +       bh_uptodate_unlock_irqrestore(first, flags);
11192         end_page_writeback(page);
11193         return;
11195  still_busy:
11196 -       bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
11197 -       local_irq_restore(flags);
11198 -       return;
11199 +       bh_uptodate_unlock_irqrestore(first, flags);
11201  EXPORT_SYMBOL(end_buffer_async_write);
11203 @@ -3325,6 +3317,7 @@
11204         struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
11205         if (ret) {
11206                 INIT_LIST_HEAD(&ret->b_assoc_buffers);
11207 +               buffer_head_init_locks(ret);
11208                 preempt_disable();
11209                 __this_cpu_inc(bh_accounting.nr);
11210                 recalc_bh_state();
11211 diff -Nur linux-4.4.46.orig/fs/dcache.c linux-4.4.46/fs/dcache.c
11212 --- linux-4.4.46.orig/fs/dcache.c       2017-02-01 08:31:11.000000000 +0100
11213 +++ linux-4.4.46/fs/dcache.c    2017-02-03 17:18:10.899617976 +0100
11214 @@ -19,6 +19,7 @@
11215  #include <linux/mm.h>
11216  #include <linux/fs.h>
11217  #include <linux/fsnotify.h>
11218 +#include <linux/delay.h>
11219  #include <linux/slab.h>
11220  #include <linux/init.h>
11221  #include <linux/hash.h>
11222 @@ -747,6 +748,8 @@
11223   */
11224  void dput(struct dentry *dentry)
11226 +       struct dentry *parent;
11228         if (unlikely(!dentry))
11229                 return;
11231 @@ -783,9 +786,18 @@
11232         return;
11234  kill_it:
11235 -       dentry = dentry_kill(dentry);
11236 -       if (dentry) {
11237 -               cond_resched();
11238 +       parent = dentry_kill(dentry);
11239 +       if (parent) {
11240 +               int r;
11242 +               if (parent == dentry) {
11243 +                       /* the task with the highest priority won't schedule */
11244 +                       r = cond_resched();
11245 +                       if (!r)
11246 +                               cpu_chill();
11247 +               } else {
11248 +                       dentry = parent;
11249 +               }
11250                 goto repeat;
11251         }
11253 @@ -2397,7 +2409,7 @@
11254         if (dentry->d_lockref.count == 1) {
11255                 if (!spin_trylock(&inode->i_lock)) {
11256                         spin_unlock(&dentry->d_lock);
11257 -                       cpu_relax();
11258 +                       cpu_chill();
11259                         goto again;
11260                 }
11261                 dentry->d_flags &= ~DCACHE_CANT_MOUNT;
11262 diff -Nur linux-4.4.46.orig/fs/eventpoll.c linux-4.4.46/fs/eventpoll.c
11263 --- linux-4.4.46.orig/fs/eventpoll.c    2017-02-01 08:31:11.000000000 +0100
11264 +++ linux-4.4.46/fs/eventpoll.c 2017-02-03 17:18:10.899617976 +0100
11265 @@ -505,12 +505,12 @@
11266   */
11267  static void ep_poll_safewake(wait_queue_head_t *wq)
11269 -       int this_cpu = get_cpu();
11270 +       int this_cpu = get_cpu_light();
11272         ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
11273                        ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
11275 -       put_cpu();
11276 +       put_cpu_light();
11279  static void ep_remove_wait_queue(struct eppoll_entry *pwq)
11280 diff -Nur linux-4.4.46.orig/fs/exec.c linux-4.4.46/fs/exec.c
11281 --- linux-4.4.46.orig/fs/exec.c 2017-02-01 08:31:11.000000000 +0100
11282 +++ linux-4.4.46/fs/exec.c      2017-02-03 17:18:10.899617976 +0100
11283 @@ -866,12 +866,14 @@
11284                 }
11285         }
11286         task_lock(tsk);
11287 +       preempt_disable_rt();
11288         active_mm = tsk->active_mm;
11289         tsk->mm = mm;
11290         tsk->active_mm = mm;
11291         activate_mm(active_mm, mm);
11292         tsk->mm->vmacache_seqnum = 0;
11293         vmacache_flush(tsk);
11294 +       preempt_enable_rt();
11295         task_unlock(tsk);
11296         if (old_mm) {
11297                 up_read(&old_mm->mmap_sem);
11298 diff -Nur linux-4.4.46.orig/fs/f2fs/f2fs.h linux-4.4.46/fs/f2fs/f2fs.h
11299 --- linux-4.4.46.orig/fs/f2fs/f2fs.h    2017-02-01 08:31:11.000000000 +0100
11300 +++ linux-4.4.46/fs/f2fs/f2fs.h 2017-02-03 17:18:10.899617976 +0100
11301 @@ -24,7 +24,6 @@
11303  #ifdef CONFIG_F2FS_CHECK_FS
11304  #define f2fs_bug_on(sbi, condition)    BUG_ON(condition)
11305 -#define f2fs_down_write(x, y)  down_write_nest_lock(x, y)
11306  #else
11307  #define f2fs_bug_on(sbi, condition)                                    \
11308         do {                                                            \
11309 @@ -33,7 +32,6 @@
11310                         set_sbi_flag(sbi, SBI_NEED_FSCK);               \
11311                 }                                                       \
11312         } while (0)
11313 -#define f2fs_down_write(x, y)  down_write(x)
11314  #endif
11316  /*
11317 @@ -959,7 +957,7 @@
11319  static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
11321 -       f2fs_down_write(&sbi->cp_rwsem, &sbi->cp_mutex);
11322 +       down_write(&sbi->cp_rwsem);
11325  static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
11326 diff -Nur linux-4.4.46.orig/fs/jbd2/checkpoint.c linux-4.4.46/fs/jbd2/checkpoint.c
11327 --- linux-4.4.46.orig/fs/jbd2/checkpoint.c      2017-02-01 08:31:11.000000000 +0100
11328 +++ linux-4.4.46/fs/jbd2/checkpoint.c   2017-02-03 17:18:10.899617976 +0100
11329 @@ -116,6 +116,8 @@
11330         nblocks = jbd2_space_needed(journal);
11331         while (jbd2_log_space_left(journal) < nblocks) {
11332                 write_unlock(&journal->j_state_lock);
11333 +               if (current->plug)
11334 +                       io_schedule();
11335                 mutex_lock(&journal->j_checkpoint_mutex);
11337                 /*
11338 diff -Nur linux-4.4.46.orig/fs/namespace.c linux-4.4.46/fs/namespace.c
11339 --- linux-4.4.46.orig/fs/namespace.c    2017-02-01 08:31:11.000000000 +0100
11340 +++ linux-4.4.46/fs/namespace.c 2017-02-03 17:18:10.899617976 +0100
11341 @@ -14,6 +14,7 @@
11342  #include <linux/mnt_namespace.h>
11343  #include <linux/user_namespace.h>
11344  #include <linux/namei.h>
11345 +#include <linux/delay.h>
11346  #include <linux/security.h>
11347  #include <linux/idr.h>
11348  #include <linux/init.h>                /* init_rootfs */
11349 @@ -353,8 +354,11 @@
11350          * incremented count after it has set MNT_WRITE_HOLD.
11351          */
11352         smp_mb();
11353 -       while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
11354 -               cpu_relax();
11355 +       while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
11356 +               preempt_enable();
11357 +               cpu_chill();
11358 +               preempt_disable();
11359 +       }
11360         /*
11361          * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
11362          * be set to match its requirements. So we must not load that until
11363 diff -Nur linux-4.4.46.orig/fs/ntfs/aops.c linux-4.4.46/fs/ntfs/aops.c
11364 --- linux-4.4.46.orig/fs/ntfs/aops.c    2017-02-01 08:31:11.000000000 +0100
11365 +++ linux-4.4.46/fs/ntfs/aops.c 2017-02-03 17:18:10.899617976 +0100
11366 @@ -107,8 +107,7 @@
11367                                 "0x%llx.", (unsigned long long)bh->b_blocknr);
11368         }
11369         first = page_buffers(page);
11370 -       local_irq_save(flags);
11371 -       bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
11372 +       flags = bh_uptodate_lock_irqsave(first);
11373         clear_buffer_async_read(bh);
11374         unlock_buffer(bh);
11375         tmp = bh;
11376 @@ -123,8 +122,7 @@
11377                 }
11378                 tmp = tmp->b_this_page;
11379         } while (tmp != bh);
11380 -       bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
11381 -       local_irq_restore(flags);
11382 +       bh_uptodate_unlock_irqrestore(first, flags);
11383         /*
11384          * If none of the buffers had errors then we can set the page uptodate,
11385          * but we first have to perform the post read mst fixups, if the
11386 @@ -145,13 +143,13 @@
11387                 recs = PAGE_CACHE_SIZE / rec_size;
11388                 /* Should have been verified before we got here... */
11389                 BUG_ON(!recs);
11390 -               local_irq_save(flags);
11391 +               local_irq_save_nort(flags);
11392                 kaddr = kmap_atomic(page);
11393                 for (i = 0; i < recs; i++)
11394                         post_read_mst_fixup((NTFS_RECORD*)(kaddr +
11395                                         i * rec_size), rec_size);
11396                 kunmap_atomic(kaddr);
11397 -               local_irq_restore(flags);
11398 +               local_irq_restore_nort(flags);
11399                 flush_dcache_page(page);
11400                 if (likely(page_uptodate && !PageError(page)))
11401                         SetPageUptodate(page);
11402 @@ -159,9 +157,7 @@
11403         unlock_page(page);
11404         return;
11405  still_busy:
11406 -       bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
11407 -       local_irq_restore(flags);
11408 -       return;
11409 +       bh_uptodate_unlock_irqrestore(first, flags);
11412  /**
11413 diff -Nur linux-4.4.46.orig/fs/timerfd.c linux-4.4.46/fs/timerfd.c
11414 --- linux-4.4.46.orig/fs/timerfd.c      2017-02-01 08:31:11.000000000 +0100
11415 +++ linux-4.4.46/fs/timerfd.c   2017-02-03 17:18:10.899617976 +0100
11416 @@ -450,7 +450,10 @@
11417                                 break;
11418                 }
11419                 spin_unlock_irq(&ctx->wqh.lock);
11420 -               cpu_relax();
11421 +               if (isalarm(ctx))
11422 +                       hrtimer_wait_for_timer(&ctx->t.alarm.timer);
11423 +               else
11424 +                       hrtimer_wait_for_timer(&ctx->t.tmr);
11425         }
11427         /*
11428 diff -Nur linux-4.4.46.orig/include/acpi/platform/aclinux.h linux-4.4.46/include/acpi/platform/aclinux.h
11429 --- linux-4.4.46.orig/include/acpi/platform/aclinux.h   2017-02-01 08:31:11.000000000 +0100
11430 +++ linux-4.4.46/include/acpi/platform/aclinux.h        2017-02-03 17:18:10.899617976 +0100
11431 @@ -127,6 +127,7 @@
11433  #define acpi_cache_t                        struct kmem_cache
11434  #define acpi_spinlock                       spinlock_t *
11435 +#define acpi_raw_spinlock              raw_spinlock_t *
11436  #define acpi_cpu_flags                      unsigned long
11438  /* Use native linux version of acpi_os_allocate_zeroed */
11439 @@ -145,6 +146,20 @@
11440  #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_thread_id
11441  #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_lock
11443 +#define acpi_os_create_raw_lock(__handle)                      \
11444 +({                                                             \
11445 +        raw_spinlock_t *lock = ACPI_ALLOCATE(sizeof(*lock));   \
11446 +                                                               \
11447 +        if (lock) {                                            \
11448 +               *(__handle) = lock;                             \
11449 +               raw_spin_lock_init(*(__handle));                \
11450 +        }                                                      \
11451 +        lock ? AE_OK : AE_NO_MEMORY;                           \
11452 + })
11454 +#define acpi_os_delete_raw_lock(__handle)      kfree(__handle)
11457  /*
11458   * OSL interfaces used by debugger/disassembler
11459   */
11460 diff -Nur linux-4.4.46.orig/include/asm-generic/bug.h linux-4.4.46/include/asm-generic/bug.h
11461 --- linux-4.4.46.orig/include/asm-generic/bug.h 2017-02-01 08:31:11.000000000 +0100
11462 +++ linux-4.4.46/include/asm-generic/bug.h      2017-02-03 17:18:10.903618130 +0100
11463 @@ -206,6 +206,20 @@
11464  # define WARN_ON_SMP(x)                        ({0;})
11465  #endif
11467 +#ifdef CONFIG_PREEMPT_RT_BASE
11468 +# define BUG_ON_RT(c)                  BUG_ON(c)
11469 +# define BUG_ON_NONRT(c)               do { } while (0)
11470 +# define WARN_ON_RT(condition)         WARN_ON(condition)
11471 +# define WARN_ON_NONRT(condition)      do { } while (0)
11472 +# define WARN_ON_ONCE_NONRT(condition) do { } while (0)
11473 +#else
11474 +# define BUG_ON_RT(c)                  do { } while (0)
11475 +# define BUG_ON_NONRT(c)               BUG_ON(c)
11476 +# define WARN_ON_RT(condition)         do { } while (0)
11477 +# define WARN_ON_NONRT(condition)      WARN_ON(condition)
11478 +# define WARN_ON_ONCE_NONRT(condition) WARN_ON_ONCE(condition)
11479 +#endif
11481  #endif /* __ASSEMBLY__ */
11483  #endif
11484 diff -Nur linux-4.4.46.orig/include/asm-generic/preempt.h linux-4.4.46/include/asm-generic/preempt.h
11485 --- linux-4.4.46.orig/include/asm-generic/preempt.h     2017-02-01 08:31:11.000000000 +0100
11486 +++ linux-4.4.46/include/asm-generic/preempt.h  2017-02-03 17:18:10.903618130 +0100
11487 @@ -7,10 +7,10 @@
11489  static __always_inline int preempt_count(void)
11491 -       return current_thread_info()->preempt_count;
11492 +       return READ_ONCE(current_thread_info()->preempt_count);
11495 -static __always_inline int *preempt_count_ptr(void)
11496 +static __always_inline volatile int *preempt_count_ptr(void)
11498         return &current_thread_info()->preempt_count;
11500 diff -Nur linux-4.4.46.orig/include/linux/blkdev.h linux-4.4.46/include/linux/blkdev.h
11501 --- linux-4.4.46.orig/include/linux/blkdev.h    2017-02-01 08:31:11.000000000 +0100
11502 +++ linux-4.4.46/include/linux/blkdev.h 2017-02-03 17:18:10.903618130 +0100
11503 @@ -89,6 +89,7 @@
11504         struct list_head queuelist;
11505         union {
11506                 struct call_single_data csd;
11507 +               struct work_struct work;
11508                 unsigned long fifo_time;
11509         };
11511 @@ -455,7 +456,7 @@
11512         struct throtl_data *td;
11513  #endif
11514         struct rcu_head         rcu_head;
11515 -       wait_queue_head_t       mq_freeze_wq;
11516 +       struct swait_queue_head mq_freeze_wq;
11517         struct percpu_ref       q_usage_counter;
11518         struct list_head        all_q_node;
11520 diff -Nur linux-4.4.46.orig/include/linux/blk-mq.h linux-4.4.46/include/linux/blk-mq.h
11521 --- linux-4.4.46.orig/include/linux/blk-mq.h    2017-02-01 08:31:11.000000000 +0100
11522 +++ linux-4.4.46/include/linux/blk-mq.h 2017-02-03 17:18:10.903618130 +0100
11523 @@ -212,6 +212,7 @@
11525  struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
11526  struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
11527 +void __blk_mq_complete_request_remote_work(struct work_struct *work);
11529  int blk_mq_request_started(struct request *rq);
11530  void blk_mq_start_request(struct request *rq);
11531 diff -Nur linux-4.4.46.orig/include/linux/bottom_half.h linux-4.4.46/include/linux/bottom_half.h
11532 --- linux-4.4.46.orig/include/linux/bottom_half.h       2017-02-01 08:31:11.000000000 +0100
11533 +++ linux-4.4.46/include/linux/bottom_half.h    2017-02-03 17:18:10.903618130 +0100
11534 @@ -3,6 +3,39 @@
11536  #include <linux/preempt.h>
11538 +#ifdef CONFIG_PREEMPT_RT_FULL
11540 +extern void __local_bh_disable(void);
11541 +extern void _local_bh_enable(void);
11542 +extern void __local_bh_enable(void);
11544 +static inline void local_bh_disable(void)
11546 +       __local_bh_disable();
11549 +static inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
11551 +       __local_bh_disable();
11554 +static inline void local_bh_enable(void)
11556 +       __local_bh_enable();
11559 +static inline void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
11561 +       __local_bh_enable();
11564 +static inline void local_bh_enable_ip(unsigned long ip)
11566 +       __local_bh_enable();
11569 +#else
11571  #ifdef CONFIG_TRACE_IRQFLAGS
11572  extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt);
11573  #else
11574 @@ -30,5 +63,6 @@
11576         __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
11578 +#endif
11580  #endif /* _LINUX_BH_H */
11581 diff -Nur linux-4.4.46.orig/include/linux/buffer_head.h linux-4.4.46/include/linux/buffer_head.h
11582 --- linux-4.4.46.orig/include/linux/buffer_head.h       2017-02-01 08:31:11.000000000 +0100
11583 +++ linux-4.4.46/include/linux/buffer_head.h    2017-02-03 17:18:10.903618130 +0100
11584 @@ -75,8 +75,50 @@
11585         struct address_space *b_assoc_map;      /* mapping this buffer is
11586                                                    associated with */
11587         atomic_t b_count;               /* users using this buffer_head */
11588 +#ifdef CONFIG_PREEMPT_RT_BASE
11589 +       spinlock_t b_uptodate_lock;
11590 +#if IS_ENABLED(CONFIG_JBD2)
11591 +       spinlock_t b_state_lock;
11592 +       spinlock_t b_journal_head_lock;
11593 +#endif
11594 +#endif
11595  };
11597 +static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh)
11599 +       unsigned long flags;
11601 +#ifndef CONFIG_PREEMPT_RT_BASE
11602 +       local_irq_save(flags);
11603 +       bit_spin_lock(BH_Uptodate_Lock, &bh->b_state);
11604 +#else
11605 +       spin_lock_irqsave(&bh->b_uptodate_lock, flags);
11606 +#endif
11607 +       return flags;
11610 +static inline void
11611 +bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags)
11613 +#ifndef CONFIG_PREEMPT_RT_BASE
11614 +       bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state);
11615 +       local_irq_restore(flags);
11616 +#else
11617 +       spin_unlock_irqrestore(&bh->b_uptodate_lock, flags);
11618 +#endif
11621 +static inline void buffer_head_init_locks(struct buffer_head *bh)
11623 +#ifdef CONFIG_PREEMPT_RT_BASE
11624 +       spin_lock_init(&bh->b_uptodate_lock);
11625 +#if IS_ENABLED(CONFIG_JBD2)
11626 +       spin_lock_init(&bh->b_state_lock);
11627 +       spin_lock_init(&bh->b_journal_head_lock);
11628 +#endif
11629 +#endif
11632  /*
11633   * macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
11634   * and buffer_foo() functions.
11635 diff -Nur linux-4.4.46.orig/include/linux/cgroup-defs.h linux-4.4.46/include/linux/cgroup-defs.h
11636 --- linux-4.4.46.orig/include/linux/cgroup-defs.h       2017-02-01 08:31:11.000000000 +0100
11637 +++ linux-4.4.46/include/linux/cgroup-defs.h    2017-02-03 17:18:10.903618130 +0100
11638 @@ -16,6 +16,7 @@
11639  #include <linux/percpu-refcount.h>
11640  #include <linux/percpu-rwsem.h>
11641  #include <linux/workqueue.h>
11642 +#include <linux/swork.h>
11644  #ifdef CONFIG_CGROUPS
11646 @@ -142,6 +143,7 @@
11647         /* percpu_ref killing and RCU release */
11648         struct rcu_head rcu_head;
11649         struct work_struct destroy_work;
11650 +       struct swork_event destroy_swork;
11651  };
11653  /*
11654 diff -Nur linux-4.4.46.orig/include/linux/clk/at91_pmc.h linux-4.4.46/include/linux/clk/at91_pmc.h
11655 --- linux-4.4.46.orig/include/linux/clk/at91_pmc.h      2017-02-01 08:31:11.000000000 +0100
11656 +++ linux-4.4.46/include/linux/clk/at91_pmc.h   2017-02-03 17:18:10.903618130 +0100
11657 @@ -16,18 +16,6 @@
11658  #ifndef AT91_PMC_H
11659  #define AT91_PMC_H
11661 -#ifndef __ASSEMBLY__
11662 -extern void __iomem *at91_pmc_base;
11664 -#define at91_pmc_read(field) \
11665 -       readl_relaxed(at91_pmc_base + field)
11667 -#define at91_pmc_write(field, value) \
11668 -       writel_relaxed(value, at91_pmc_base + field)
11669 -#else
11670 -.extern at91_pmc_base
11671 -#endif
11673  #define        AT91_PMC_SCER           0x00                    /* System Clock Enable Register */
11674  #define        AT91_PMC_SCDR           0x04                    /* System Clock Disable Register */
11676 diff -Nur linux-4.4.46.orig/include/linux/completion.h linux-4.4.46/include/linux/completion.h
11677 --- linux-4.4.46.orig/include/linux/completion.h        2017-02-01 08:31:11.000000000 +0100
11678 +++ linux-4.4.46/include/linux/completion.h     2017-02-03 17:18:10.903618130 +0100
11679 @@ -7,8 +7,7 @@
11680   * Atomic wait-for-completion handler data structures.
11681   * See kernel/sched/completion.c for details.
11682   */
11684 -#include <linux/wait.h>
11685 +#include <linux/swait.h>
11687  /*
11688   * struct completion - structure used to maintain state for a "completion"
11689 @@ -24,11 +23,11 @@
11690   */
11691  struct completion {
11692         unsigned int done;
11693 -       wait_queue_head_t wait;
11694 +       struct swait_queue_head wait;
11695  };
11697  #define COMPLETION_INITIALIZER(work) \
11698 -       { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
11699 +       { 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
11701  #define COMPLETION_INITIALIZER_ONSTACK(work) \
11702         ({ init_completion(&work); work; })
11703 @@ -73,7 +72,7 @@
11704  static inline void init_completion(struct completion *x)
11706         x->done = 0;
11707 -       init_waitqueue_head(&x->wait);
11708 +       init_swait_queue_head(&x->wait);
11711  /**
11712 diff -Nur linux-4.4.46.orig/include/linux/cpu.h linux-4.4.46/include/linux/cpu.h
11713 --- linux-4.4.46.orig/include/linux/cpu.h       2017-02-01 08:31:11.000000000 +0100
11714 +++ linux-4.4.46/include/linux/cpu.h    2017-02-03 17:18:10.903618130 +0100
11715 @@ -224,6 +224,8 @@
11716  extern void put_online_cpus(void);
11717  extern void cpu_hotplug_disable(void);
11718  extern void cpu_hotplug_enable(void);
11719 +extern void pin_current_cpu(void);
11720 +extern void unpin_current_cpu(void);
11721  #define hotcpu_notifier(fn, pri)       cpu_notifier(fn, pri)
11722  #define __hotcpu_notifier(fn, pri)     __cpu_notifier(fn, pri)
11723  #define register_hotcpu_notifier(nb)   register_cpu_notifier(nb)
11724 @@ -241,6 +243,8 @@
11725  #define put_online_cpus()      do { } while (0)
11726  #define cpu_hotplug_disable()  do { } while (0)
11727  #define cpu_hotplug_enable()   do { } while (0)
11728 +static inline void pin_current_cpu(void) { }
11729 +static inline void unpin_current_cpu(void) { }
11730  #define hotcpu_notifier(fn, pri)       do { (void)(fn); } while (0)
11731  #define __hotcpu_notifier(fn, pri)     do { (void)(fn); } while (0)
11732  /* These aren't inline functions due to a GCC bug. */
11733 diff -Nur linux-4.4.46.orig/include/linux/delay.h linux-4.4.46/include/linux/delay.h
11734 --- linux-4.4.46.orig/include/linux/delay.h     2017-02-01 08:31:11.000000000 +0100
11735 +++ linux-4.4.46/include/linux/delay.h  2017-02-03 17:18:10.903618130 +0100
11736 @@ -52,4 +52,10 @@
11737         msleep(seconds * 1000);
11740 +#ifdef CONFIG_PREEMPT_RT_FULL
11741 +extern void cpu_chill(void);
11742 +#else
11743 +# define cpu_chill()   cpu_relax()
11744 +#endif
11746  #endif /* defined(_LINUX_DELAY_H) */
11747 diff -Nur linux-4.4.46.orig/include/linux/ftrace.h linux-4.4.46/include/linux/ftrace.h
11748 --- linux-4.4.46.orig/include/linux/ftrace.h    2017-02-01 08:31:11.000000000 +0100
11749 +++ linux-4.4.46/include/linux/ftrace.h 2017-02-03 17:18:10.903618130 +0100
11750 @@ -694,6 +694,18 @@
11751  #define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5))
11752  #define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6))
11754 +static inline unsigned long get_lock_parent_ip(void)
11756 +       unsigned long addr = CALLER_ADDR0;
11758 +       if (!in_lock_functions(addr))
11759 +               return addr;
11760 +       addr = CALLER_ADDR1;
11761 +       if (!in_lock_functions(addr))
11762 +               return addr;
11763 +       return CALLER_ADDR2;
11766  #ifdef CONFIG_IRQSOFF_TRACER
11767    extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
11768    extern void time_hardirqs_off(unsigned long a0, unsigned long a1);
11769 diff -Nur linux-4.4.46.orig/include/linux/highmem.h linux-4.4.46/include/linux/highmem.h
11770 --- linux-4.4.46.orig/include/linux/highmem.h   2017-02-01 08:31:11.000000000 +0100
11771 +++ linux-4.4.46/include/linux/highmem.h        2017-02-03 17:18:10.903618130 +0100
11772 @@ -7,6 +7,7 @@
11773  #include <linux/mm.h>
11774  #include <linux/uaccess.h>
11775  #include <linux/hardirq.h>
11776 +#include <linux/sched.h>
11778  #include <asm/cacheflush.h>
11780 @@ -65,7 +66,7 @@
11782  static inline void *kmap_atomic(struct page *page)
11784 -       preempt_disable();
11785 +       preempt_disable_nort();
11786         pagefault_disable();
11787         return page_address(page);
11789 @@ -74,7 +75,7 @@
11790  static inline void __kunmap_atomic(void *addr)
11792         pagefault_enable();
11793 -       preempt_enable();
11794 +       preempt_enable_nort();
11797  #define kmap_atomic_pfn(pfn)   kmap_atomic(pfn_to_page(pfn))
11798 @@ -86,32 +87,51 @@
11800  #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
11802 +#ifndef CONFIG_PREEMPT_RT_FULL
11803  DECLARE_PER_CPU(int, __kmap_atomic_idx);
11804 +#endif
11806  static inline int kmap_atomic_idx_push(void)
11808 +#ifndef CONFIG_PREEMPT_RT_FULL
11809         int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1;
11811 -#ifdef CONFIG_DEBUG_HIGHMEM
11812 +# ifdef CONFIG_DEBUG_HIGHMEM
11813         WARN_ON_ONCE(in_irq() && !irqs_disabled());
11814         BUG_ON(idx >= KM_TYPE_NR);
11815 -#endif
11816 +# endif
11817         return idx;
11818 +#else
11819 +       current->kmap_idx++;
11820 +       BUG_ON(current->kmap_idx > KM_TYPE_NR);
11821 +       return current->kmap_idx - 1;
11822 +#endif
11825  static inline int kmap_atomic_idx(void)
11827 +#ifndef CONFIG_PREEMPT_RT_FULL
11828         return __this_cpu_read(__kmap_atomic_idx) - 1;
11829 +#else
11830 +       return current->kmap_idx - 1;
11831 +#endif
11834  static inline void kmap_atomic_idx_pop(void)
11836 -#ifdef CONFIG_DEBUG_HIGHMEM
11837 +#ifndef CONFIG_PREEMPT_RT_FULL
11838 +# ifdef CONFIG_DEBUG_HIGHMEM
11839         int idx = __this_cpu_dec_return(__kmap_atomic_idx);
11841         BUG_ON(idx < 0);
11842 -#else
11843 +# else
11844         __this_cpu_dec(__kmap_atomic_idx);
11845 +# endif
11846 +#else
11847 +       current->kmap_idx--;
11848 +# ifdef CONFIG_DEBUG_HIGHMEM
11849 +       BUG_ON(current->kmap_idx < 0);
11850 +# endif
11851  #endif
11854 diff -Nur linux-4.4.46.orig/include/linux/hrtimer.h linux-4.4.46/include/linux/hrtimer.h
11855 --- linux-4.4.46.orig/include/linux/hrtimer.h   2017-02-01 08:31:11.000000000 +0100
11856 +++ linux-4.4.46/include/linux/hrtimer.h        2017-02-03 17:18:10.903618130 +0100
11857 @@ -87,6 +87,9 @@
11858   * @function:  timer expiry callback function
11859   * @base:      pointer to the timer base (per cpu and per clock)
11860   * @state:     state information (See bit values above)
11861 + * @cb_entry:  list entry to defer timers from hardirq context
11862 + * @irqsafe:   timer can run in hardirq context
11863 + * @praecox:   timer expiry time if expired at the time of programming
11864   * @is_rel:    Set if the timer was armed relative
11865   * @start_pid:  timer statistics field to store the pid of the task which
11866   *             started the timer
11867 @@ -103,6 +106,11 @@
11868         enum hrtimer_restart            (*function)(struct hrtimer *);
11869         struct hrtimer_clock_base       *base;
11870         u8                              state;
11871 +       struct list_head                cb_entry;
11872 +       int                             irqsafe;
11873 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
11874 +       ktime_t                         praecox;
11875 +#endif
11876         u8                              is_rel;
11877  #ifdef CONFIG_TIMER_STATS
11878         int                             start_pid;
11879 @@ -123,11 +131,7 @@
11880         struct task_struct *task;
11881  };
11883 -#ifdef CONFIG_64BIT
11884  # define HRTIMER_CLOCK_BASE_ALIGN      64
11885 -#else
11886 -# define HRTIMER_CLOCK_BASE_ALIGN      32
11887 -#endif
11889  /**
11890   * struct hrtimer_clock_base - the timer base for a specific clock
11891 @@ -136,6 +140,7 @@
11892   *                     timer to a base on another cpu.
11893   * @clockid:           clock id for per_cpu support
11894   * @active:            red black tree root node for the active timers
11895 + * @expired:           list head for deferred timers.
11896   * @get_time:          function to retrieve the current time of the clock
11897   * @offset:            offset of this clock to the monotonic base
11898   */
11899 @@ -144,6 +149,7 @@
11900         int                     index;
11901         clockid_t               clockid;
11902         struct timerqueue_head  active;
11903 +       struct list_head        expired;
11904         ktime_t                 (*get_time)(void);
11905         ktime_t                 offset;
11906  } __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN)));
11907 @@ -187,6 +193,7 @@
11908         raw_spinlock_t                  lock;
11909         seqcount_t                      seq;
11910         struct hrtimer                  *running;
11911 +       struct hrtimer                  *running_soft;
11912         unsigned int                    cpu;
11913         unsigned int                    active_bases;
11914         unsigned int                    clock_was_set_seq;
11915 @@ -203,6 +210,9 @@
11916         unsigned int                    nr_hangs;
11917         unsigned int                    max_hang_time;
11918  #endif
11919 +#ifdef CONFIG_PREEMPT_RT_BASE
11920 +       wait_queue_head_t               wait;
11921 +#endif
11922         struct hrtimer_clock_base       clock_base[HRTIMER_MAX_CLOCK_BASES];
11923  } ____cacheline_aligned;
11925 @@ -412,6 +422,13 @@
11926         hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
11929 +/* Softirq preemption could deadlock timer removal */
11930 +#ifdef CONFIG_PREEMPT_RT_BASE
11931 +  extern void hrtimer_wait_for_timer(const struct hrtimer *timer);
11932 +#else
11933 +# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0)
11934 +#endif
11936  /* Query timers: */
11937  extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust);
11939 @@ -436,7 +453,7 @@
11940   * Helper function to check, whether the timer is running the callback
11941   * function
11942   */
11943 -static inline int hrtimer_callback_running(struct hrtimer *timer)
11944 +static inline int hrtimer_callback_running(const struct hrtimer *timer)
11946         return timer->base->cpu_base->running == timer;
11948 diff -Nur linux-4.4.46.orig/include/linux/idr.h linux-4.4.46/include/linux/idr.h
11949 --- linux-4.4.46.orig/include/linux/idr.h       2017-02-01 08:31:11.000000000 +0100
11950 +++ linux-4.4.46/include/linux/idr.h    2017-02-03 17:18:10.903618130 +0100
11951 @@ -95,10 +95,14 @@
11952   * Each idr_preload() should be matched with an invocation of this
11953   * function.  See idr_preload() for details.
11954   */
11955 +#ifdef CONFIG_PREEMPT_RT_FULL
11956 +void idr_preload_end(void);
11957 +#else
11958  static inline void idr_preload_end(void)
11960         preempt_enable();
11962 +#endif
11964  /**
11965   * idr_find - return pointer for given id
11966 diff -Nur linux-4.4.46.orig/include/linux/init_task.h linux-4.4.46/include/linux/init_task.h
11967 --- linux-4.4.46.orig/include/linux/init_task.h 2017-02-01 08:31:11.000000000 +0100
11968 +++ linux-4.4.46/include/linux/init_task.h      2017-02-03 17:18:10.903618130 +0100
11969 @@ -148,9 +148,15 @@
11970  # define INIT_PERF_EVENTS(tsk)
11971  #endif
11973 +#ifdef CONFIG_PREEMPT_RT_BASE
11974 +# define INIT_TIMER_LIST               .posix_timer_list = NULL,
11975 +#else
11976 +# define INIT_TIMER_LIST
11977 +#endif
11979  #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
11980  # define INIT_VTIME(tsk)                                               \
11981 -       .vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock), \
11982 +       .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount),      \
11983         .vtime_snap = 0,                                \
11984         .vtime_snap_whence = VTIME_SYS,
11985  #else
11986 @@ -239,6 +245,7 @@
11987         .cpu_timers     = INIT_CPU_TIMERS(tsk.cpu_timers),              \
11988         .pi_lock        = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock),        \
11989         .timer_slack_ns = 50000, /* 50 usec default slack */            \
11990 +       INIT_TIMER_LIST                                                 \
11991         .pids = {                                                       \
11992                 [PIDTYPE_PID]  = INIT_PID_LINK(PIDTYPE_PID),            \
11993                 [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID),           \
11994 diff -Nur linux-4.4.46.orig/include/linux/interrupt.h linux-4.4.46/include/linux/interrupt.h
11995 --- linux-4.4.46.orig/include/linux/interrupt.h 2017-02-01 08:31:11.000000000 +0100
11996 +++ linux-4.4.46/include/linux/interrupt.h      2017-02-03 17:18:10.903618130 +0100
11997 @@ -61,6 +61,7 @@
11998   *                interrupt handler after suspending interrupts. For system
11999   *                wakeup devices users need to implement wakeup detection in
12000   *                their interrupt handlers.
12001 + * IRQF_NO_SOFTIRQ_CALL - Do not process softirqs in the irq thread context (RT)
12002   */
12003  #define IRQF_SHARED            0x00000080
12004  #define IRQF_PROBE_SHARED      0x00000100
12005 @@ -74,6 +75,7 @@
12006  #define IRQF_NO_THREAD         0x00010000
12007  #define IRQF_EARLY_RESUME      0x00020000
12008  #define IRQF_COND_SUSPEND      0x00040000
12009 +#define IRQF_NO_SOFTIRQ_CALL   0x00080000
12011  #define IRQF_TIMER             (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
12013 @@ -186,7 +188,7 @@
12014  #ifdef CONFIG_LOCKDEP
12015  # define local_irq_enable_in_hardirq() do { } while (0)
12016  #else
12017 -# define local_irq_enable_in_hardirq() local_irq_enable()
12018 +# define local_irq_enable_in_hardirq() local_irq_enable_nort()
12019  #endif
12021  extern void disable_irq_nosync(unsigned int irq);
12022 @@ -206,6 +208,7 @@
12023   * @irq:               Interrupt to which notification applies
12024   * @kref:              Reference count, for internal use
12025   * @work:              Work item, for internal use
12026 + * @list:              List item for deferred callbacks
12027   * @notify:            Function to be called on change.  This will be
12028   *                     called in process context.
12029   * @release:           Function to be called on release.  This will be
12030 @@ -217,6 +220,7 @@
12031         unsigned int irq;
12032         struct kref kref;
12033         struct work_struct work;
12034 +       struct list_head list;
12035         void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
12036         void (*release)(struct kref *ref);
12037  };
12038 @@ -379,9 +383,13 @@
12039                                  bool state);
12041  #ifdef CONFIG_IRQ_FORCED_THREADING
12042 +# ifndef CONFIG_PREEMPT_RT_BASE
12043  extern bool force_irqthreads;
12044 +# else
12045 +#  define force_irqthreads     (true)
12046 +# endif
12047  #else
12048 -#define force_irqthreads       (0)
12049 +#define force_irqthreads       (false)
12050  #endif
12052  #ifndef __ARCH_SET_SOFTIRQ_PENDING
12053 @@ -438,9 +446,10 @@
12054         void    (*action)(struct softirq_action *);
12055  };
12057 +#ifndef CONFIG_PREEMPT_RT_FULL
12058  asmlinkage void do_softirq(void);
12059  asmlinkage void __do_softirq(void);
12061 +static inline void thread_do_softirq(void) { do_softirq(); }
12062  #ifdef __ARCH_HAS_DO_SOFTIRQ
12063  void do_softirq_own_stack(void);
12064  #else
12065 @@ -449,13 +458,25 @@
12066         __do_softirq();
12068  #endif
12069 +#else
12070 +extern void thread_do_softirq(void);
12071 +#endif
12073  extern void open_softirq(int nr, void (*action)(struct softirq_action *));
12074  extern void softirq_init(void);
12075  extern void __raise_softirq_irqoff(unsigned int nr);
12076 +#ifdef CONFIG_PREEMPT_RT_FULL
12077 +extern void __raise_softirq_irqoff_ksoft(unsigned int nr);
12078 +#else
12079 +static inline void __raise_softirq_irqoff_ksoft(unsigned int nr)
12081 +       __raise_softirq_irqoff(nr);
12083 +#endif
12085  extern void raise_softirq_irqoff(unsigned int nr);
12086  extern void raise_softirq(unsigned int nr);
12087 +extern void softirq_check_pending_idle(void);
12089  DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
12091 @@ -477,8 +498,9 @@
12092       to be executed on some cpu at least once after this.
12093     * If the tasklet is already scheduled, but its execution is still not
12094       started, it will be executed only once.
12095 -   * If this tasklet is already running on another CPU (or schedule is called
12096 -     from tasklet itself), it is rescheduled for later.
12097 +   * If this tasklet is already running on another CPU, it is rescheduled
12098 +     for later.
12099 +   * Schedule must not be called from the tasklet itself (a lockup occurs)
12100     * Tasklet is strictly serialized wrt itself, but not
12101       wrt another tasklets. If client needs some intertask synchronization,
12102       he makes it with spinlocks.
12103 @@ -503,27 +525,36 @@
12104  enum
12106         TASKLET_STATE_SCHED,    /* Tasklet is scheduled for execution */
12107 -       TASKLET_STATE_RUN       /* Tasklet is running (SMP only) */
12108 +       TASKLET_STATE_RUN,      /* Tasklet is running (SMP only) */
12109 +       TASKLET_STATE_PENDING   /* Tasklet is pending */
12110  };
12112 -#ifdef CONFIG_SMP
12113 +#define TASKLET_STATEF_SCHED   (1 << TASKLET_STATE_SCHED)
12114 +#define TASKLET_STATEF_RUN     (1 << TASKLET_STATE_RUN)
12115 +#define TASKLET_STATEF_PENDING (1 << TASKLET_STATE_PENDING)
12117 +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
12118  static inline int tasklet_trylock(struct tasklet_struct *t)
12120         return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state);
12123 +static inline int tasklet_tryunlock(struct tasklet_struct *t)
12125 +       return cmpxchg(&t->state, TASKLET_STATEF_RUN, 0) == TASKLET_STATEF_RUN;
12128  static inline void tasklet_unlock(struct tasklet_struct *t)
12130         smp_mb__before_atomic();
12131         clear_bit(TASKLET_STATE_RUN, &(t)->state);
12134 -static inline void tasklet_unlock_wait(struct tasklet_struct *t)
12136 -       while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); }
12138 +extern void tasklet_unlock_wait(struct tasklet_struct *t);
12140  #else
12141  #define tasklet_trylock(t) 1
12142 +#define tasklet_tryunlock(t)   1
12143  #define tasklet_unlock_wait(t) do { } while (0)
12144  #define tasklet_unlock(t) do { } while (0)
12145  #endif
12146 @@ -572,12 +603,7 @@
12147         smp_mb();
12150 -static inline void tasklet_enable(struct tasklet_struct *t)
12152 -       smp_mb__before_atomic();
12153 -       atomic_dec(&t->count);
12156 +extern void tasklet_enable(struct tasklet_struct *t);
12157  extern void tasklet_kill(struct tasklet_struct *t);
12158  extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu);
12159  extern void tasklet_init(struct tasklet_struct *t,
12160 @@ -608,6 +634,12 @@
12161         tasklet_kill(&ttimer->tasklet);
12164 +#ifdef CONFIG_PREEMPT_RT_FULL
12165 +extern void softirq_early_init(void);
12166 +#else
12167 +static inline void softirq_early_init(void) { }
12168 +#endif
12170  /*
12171   * Autoprobing for irqs:
12172   *
12173 diff -Nur linux-4.4.46.orig/include/linux/irqdesc.h linux-4.4.46/include/linux/irqdesc.h
12174 --- linux-4.4.46.orig/include/linux/irqdesc.h   2017-02-01 08:31:11.000000000 +0100
12175 +++ linux-4.4.46/include/linux/irqdesc.h        2017-02-03 17:18:10.903618130 +0100
12176 @@ -61,6 +61,7 @@
12177         unsigned int            irqs_unhandled;
12178         atomic_t                threads_handled;
12179         int                     threads_handled_last;
12180 +       u64                     random_ip;
12181         raw_spinlock_t          lock;
12182         struct cpumask          *percpu_enabled;
12183  #ifdef CONFIG_SMP
12184 diff -Nur linux-4.4.46.orig/include/linux/irqflags.h linux-4.4.46/include/linux/irqflags.h
12185 --- linux-4.4.46.orig/include/linux/irqflags.h  2017-02-01 08:31:11.000000000 +0100
12186 +++ linux-4.4.46/include/linux/irqflags.h       2017-02-03 17:18:10.903618130 +0100
12187 @@ -25,8 +25,6 @@
12188  # define trace_softirqs_enabled(p)     ((p)->softirqs_enabled)
12189  # define trace_hardirq_enter() do { current->hardirq_context++; } while (0)
12190  # define trace_hardirq_exit()  do { current->hardirq_context--; } while (0)
12191 -# define lockdep_softirq_enter()       do { current->softirq_context++; } while (0)
12192 -# define lockdep_softirq_exit()        do { current->softirq_context--; } while (0)
12193  # define INIT_TRACE_IRQFLAGS   .softirqs_enabled = 1,
12194  #else
12195  # define trace_hardirqs_on()           do { } while (0)
12196 @@ -39,9 +37,15 @@
12197  # define trace_softirqs_enabled(p)     0
12198  # define trace_hardirq_enter()         do { } while (0)
12199  # define trace_hardirq_exit()          do { } while (0)
12200 +# define INIT_TRACE_IRQFLAGS
12201 +#endif
12203 +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT_FULL)
12204 +# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
12205 +# define lockdep_softirq_exit()         do { current->softirq_context--; } while (0)
12206 +#else
12207  # define lockdep_softirq_enter()       do { } while (0)
12208  # define lockdep_softirq_exit()                do { } while (0)
12209 -# define INIT_TRACE_IRQFLAGS
12210  #endif
12212  #if defined(CONFIG_IRQSOFF_TRACER) || \
12213 @@ -148,4 +152,23 @@
12215  #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags)
12218 + * local_irq* variants depending on RT/!RT
12219 + */
12220 +#ifdef CONFIG_PREEMPT_RT_FULL
12221 +# define local_irq_disable_nort()      do { } while (0)
12222 +# define local_irq_enable_nort()       do { } while (0)
12223 +# define local_irq_save_nort(flags)    local_save_flags(flags)
12224 +# define local_irq_restore_nort(flags) (void)(flags)
12225 +# define local_irq_disable_rt()                local_irq_disable()
12226 +# define local_irq_enable_rt()         local_irq_enable()
12227 +#else
12228 +# define local_irq_disable_nort()      local_irq_disable()
12229 +# define local_irq_enable_nort()       local_irq_enable()
12230 +# define local_irq_save_nort(flags)    local_irq_save(flags)
12231 +# define local_irq_restore_nort(flags) local_irq_restore(flags)
12232 +# define local_irq_disable_rt()                do { } while (0)
12233 +# define local_irq_enable_rt()         do { } while (0)
12234 +#endif
12236  #endif
12237 diff -Nur linux-4.4.46.orig/include/linux/irq.h linux-4.4.46/include/linux/irq.h
12238 --- linux-4.4.46.orig/include/linux/irq.h       2017-02-01 08:31:11.000000000 +0100
12239 +++ linux-4.4.46/include/linux/irq.h    2017-02-03 17:18:10.903618130 +0100
12240 @@ -72,6 +72,7 @@
12241   * IRQ_IS_POLLED               - Always polled by another interrupt. Exclude
12242   *                               it from the spurious interrupt detection
12243   *                               mechanism and from core side polling.
12244 + * IRQ_NO_SOFTIRQ_CALL         - No softirq processing in the irq thread context (RT)
12245   * IRQ_DISABLE_UNLAZY          - Disable lazy irq disable
12246   */
12247  enum {
12248 @@ -99,13 +100,14 @@
12249         IRQ_PER_CPU_DEVID       = (1 << 17),
12250         IRQ_IS_POLLED           = (1 << 18),
12251         IRQ_DISABLE_UNLAZY      = (1 << 19),
12252 +       IRQ_NO_SOFTIRQ_CALL     = (1 << 20),
12253  };
12255  #define IRQF_MODIFY_MASK       \
12256         (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
12257          IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \
12258          IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \
12259 -        IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY)
12260 +        IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY | IRQ_NO_SOFTIRQ_CALL)
12262  #define IRQ_NO_BALANCING_MASK  (IRQ_PER_CPU | IRQ_NO_BALANCING)
12264 diff -Nur linux-4.4.46.orig/include/linux/irq_work.h linux-4.4.46/include/linux/irq_work.h
12265 --- linux-4.4.46.orig/include/linux/irq_work.h  2017-02-01 08:31:11.000000000 +0100
12266 +++ linux-4.4.46/include/linux/irq_work.h       2017-02-03 17:18:10.903618130 +0100
12267 @@ -16,6 +16,7 @@
12268  #define IRQ_WORK_BUSY          2UL
12269  #define IRQ_WORK_FLAGS         3UL
12270  #define IRQ_WORK_LAZY          4UL /* Doesn't want IPI, wait for tick */
12271 +#define IRQ_WORK_HARD_IRQ      8UL /* Run hard IRQ context, even on RT */
12273  struct irq_work {
12274         unsigned long flags;
12275 @@ -51,4 +52,10 @@
12276  static inline void irq_work_run(void) { }
12277  #endif
12279 +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
12280 +void irq_work_tick_soft(void);
12281 +#else
12282 +static inline void irq_work_tick_soft(void) { }
12283 +#endif
12285  #endif /* _LINUX_IRQ_WORK_H */
12286 diff -Nur linux-4.4.46.orig/include/linux/jbd2.h linux-4.4.46/include/linux/jbd2.h
12287 --- linux-4.4.46.orig/include/linux/jbd2.h      2017-02-01 08:31:11.000000000 +0100
12288 +++ linux-4.4.46/include/linux/jbd2.h   2017-02-03 17:18:10.907618284 +0100
12289 @@ -352,32 +352,56 @@
12291  static inline void jbd_lock_bh_state(struct buffer_head *bh)
12293 +#ifndef CONFIG_PREEMPT_RT_BASE
12294         bit_spin_lock(BH_State, &bh->b_state);
12295 +#else
12296 +       spin_lock(&bh->b_state_lock);
12297 +#endif
12300  static inline int jbd_trylock_bh_state(struct buffer_head *bh)
12302 +#ifndef CONFIG_PREEMPT_RT_BASE
12303         return bit_spin_trylock(BH_State, &bh->b_state);
12304 +#else
12305 +       return spin_trylock(&bh->b_state_lock);
12306 +#endif
12309  static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
12311 +#ifndef CONFIG_PREEMPT_RT_BASE
12312         return bit_spin_is_locked(BH_State, &bh->b_state);
12313 +#else
12314 +       return spin_is_locked(&bh->b_state_lock);
12315 +#endif
12318  static inline void jbd_unlock_bh_state(struct buffer_head *bh)
12320 +#ifndef CONFIG_PREEMPT_RT_BASE
12321         bit_spin_unlock(BH_State, &bh->b_state);
12322 +#else
12323 +       spin_unlock(&bh->b_state_lock);
12324 +#endif
12327  static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
12329 +#ifndef CONFIG_PREEMPT_RT_BASE
12330         bit_spin_lock(BH_JournalHead, &bh->b_state);
12331 +#else
12332 +       spin_lock(&bh->b_journal_head_lock);
12333 +#endif
12336  static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
12338 +#ifndef CONFIG_PREEMPT_RT_BASE
12339         bit_spin_unlock(BH_JournalHead, &bh->b_state);
12340 +#else
12341 +       spin_unlock(&bh->b_journal_head_lock);
12342 +#endif
12345  #define J_ASSERT(assert)       BUG_ON(!(assert))
12346 diff -Nur linux-4.4.46.orig/include/linux/kdb.h linux-4.4.46/include/linux/kdb.h
12347 --- linux-4.4.46.orig/include/linux/kdb.h       2017-02-01 08:31:11.000000000 +0100
12348 +++ linux-4.4.46/include/linux/kdb.h    2017-02-03 17:18:10.907618284 +0100
12349 @@ -167,6 +167,7 @@
12350  extern __printf(1, 2) int kdb_printf(const char *, ...);
12351  typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...);
12353 +#define in_kdb_printk()        (kdb_trap_printk)
12354  extern void kdb_init(int level);
12356  /* Access to kdb specific polling devices */
12357 @@ -201,6 +202,7 @@
12358  extern int kdb_unregister(char *);
12359  #else /* ! CONFIG_KGDB_KDB */
12360  static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; }
12361 +#define in_kdb_printk() (0)
12362  static inline void kdb_init(int level) {}
12363  static inline int kdb_register(char *cmd, kdb_func_t func, char *usage,
12364                                char *help, short minlen) { return 0; }
12365 diff -Nur linux-4.4.46.orig/include/linux/kernel.h linux-4.4.46/include/linux/kernel.h
12366 --- linux-4.4.46.orig/include/linux/kernel.h    2017-02-01 08:31:11.000000000 +0100
12367 +++ linux-4.4.46/include/linux/kernel.h 2017-02-03 17:18:10.907618284 +0100
12368 @@ -188,6 +188,9 @@
12369   */
12370  # define might_sleep() \
12371         do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
12373 +# define might_sleep_no_state_check() \
12374 +       do { ___might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
12375  # define sched_annotate_sleep()        (current->task_state_change = 0)
12376  #else
12377    static inline void ___might_sleep(const char *file, int line,
12378 @@ -195,6 +198,7 @@
12379    static inline void __might_sleep(const char *file, int line,
12380                                    int preempt_offset) { }
12381  # define might_sleep() do { might_resched(); } while (0)
12382 +# define might_sleep_no_state_check() do { might_resched(); } while (0)
12383  # define sched_annotate_sleep() do { } while (0)
12384  #endif
12386 @@ -255,6 +259,7 @@
12387  __printf(1, 2)
12388  void panic(const char *fmt, ...)
12389         __noreturn __cold;
12390 +void nmi_panic(struct pt_regs *regs, const char *msg);
12391  extern void oops_enter(void);
12392  extern void oops_exit(void);
12393  void print_oops_end_marker(void);
12394 @@ -448,6 +453,14 @@
12395  extern bool crash_kexec_post_notifiers;
12397  /*
12398 + * panic_cpu is used for synchronizing panic() and crash_kexec() execution. It
12399 + * holds a CPU number which is executing panic() currently. A value of
12400 + * PANIC_CPU_INVALID means no CPU has entered panic() or crash_kexec().
12401 + */
12402 +extern atomic_t panic_cpu;
12403 +#define PANIC_CPU_INVALID      -1
12406   * Only to be used by arch init code. If the user over-wrote the default
12407   * CONFIG_PANIC_TIMEOUT, honor it.
12408   */
12409 @@ -475,6 +488,7 @@
12410         SYSTEM_HALT,
12411         SYSTEM_POWER_OFF,
12412         SYSTEM_RESTART,
12413 +       SYSTEM_SUSPEND,
12414  } system_state;
12416  #define TAINT_PROPRIETARY_MODULE       0
12417 diff -Nur linux-4.4.46.orig/include/linux/kvm_host.h linux-4.4.46/include/linux/kvm_host.h
12418 --- linux-4.4.46.orig/include/linux/kvm_host.h  2017-02-01 08:31:11.000000000 +0100
12419 +++ linux-4.4.46/include/linux/kvm_host.h       2017-02-03 17:18:10.907618284 +0100
12420 @@ -25,6 +25,7 @@
12421  #include <linux/irqflags.h>
12422  #include <linux/context_tracking.h>
12423  #include <linux/irqbypass.h>
12424 +#include <linux/swait.h>
12425  #include <asm/signal.h>
12427  #include <linux/kvm.h>
12428 @@ -243,7 +244,7 @@
12429         int fpu_active;
12430         int guest_fpu_loaded, guest_xcr0_loaded;
12431         unsigned char fpu_counter;
12432 -       wait_queue_head_t wq;
12433 +       struct swait_queue_head wq;
12434         struct pid *pid;
12435         int sigset_active;
12436         sigset_t sigset;
12437 @@ -794,7 +795,7 @@
12439  #endif
12441 -static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
12442 +static inline struct swait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
12444  #ifdef __KVM_HAVE_ARCH_WQP
12445         return vcpu->arch.wqp;
12446 diff -Nur linux-4.4.46.orig/include/linux/lglock.h linux-4.4.46/include/linux/lglock.h
12447 --- linux-4.4.46.orig/include/linux/lglock.h    2017-02-01 08:31:11.000000000 +0100
12448 +++ linux-4.4.46/include/linux/lglock.h 2017-02-03 17:18:10.907618284 +0100
12449 @@ -34,13 +34,30 @@
12450  #endif
12452  struct lglock {
12453 +#ifdef CONFIG_PREEMPT_RT_FULL
12454 +       struct rt_mutex __percpu *lock;
12455 +#else
12456         arch_spinlock_t __percpu *lock;
12457 +#endif
12458  #ifdef CONFIG_DEBUG_LOCK_ALLOC
12459         struct lock_class_key lock_key;
12460         struct lockdep_map    lock_dep_map;
12461  #endif
12462  };
12464 +#ifdef CONFIG_PREEMPT_RT_FULL
12465 +# define DEFINE_LGLOCK(name)                                           \
12466 +       static DEFINE_PER_CPU(struct rt_mutex, name ## _lock)           \
12467 +       = __RT_MUTEX_INITIALIZER( name ## _lock);                       \
12468 +       struct lglock name = { .lock = &name ## _lock }
12470 +# define DEFINE_STATIC_LGLOCK(name)                                    \
12471 +       static DEFINE_PER_CPU(struct rt_mutex, name ## _lock)           \
12472 +       = __RT_MUTEX_INITIALIZER( name ## _lock);                       \
12473 +       static struct lglock name = { .lock = &name ## _lock }
12475 +#else
12477  #define DEFINE_LGLOCK(name)                                            \
12478         static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock)           \
12479         = __ARCH_SPIN_LOCK_UNLOCKED;                                    \
12480 @@ -50,6 +67,7 @@
12481         static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock)           \
12482         = __ARCH_SPIN_LOCK_UNLOCKED;                                    \
12483         static struct lglock name = { .lock = &name ## _lock }
12484 +#endif
12486  void lg_lock_init(struct lglock *lg, char *name);
12488 @@ -64,6 +82,12 @@
12489  void lg_global_lock(struct lglock *lg);
12490  void lg_global_unlock(struct lglock *lg);
12492 +#ifndef CONFIG_PREEMPT_RT_FULL
12493 +#define lg_global_trylock_relax(name)  lg_global_lock(name)
12494 +#else
12495 +void lg_global_trylock_relax(struct lglock *lg);
12496 +#endif
12498  #else
12499  /* When !CONFIG_SMP, map lglock to spinlock */
12500  #define lglock spinlock
12501 diff -Nur linux-4.4.46.orig/include/linux/list_bl.h linux-4.4.46/include/linux/list_bl.h
12502 --- linux-4.4.46.orig/include/linux/list_bl.h   2017-02-01 08:31:11.000000000 +0100
12503 +++ linux-4.4.46/include/linux/list_bl.h        2017-02-03 17:18:10.907618284 +0100
12504 @@ -2,6 +2,7 @@
12505  #define _LINUX_LIST_BL_H
12507  #include <linux/list.h>
12508 +#include <linux/spinlock.h>
12509  #include <linux/bit_spinlock.h>
12511  /*
12512 @@ -32,13 +33,24 @@
12514  struct hlist_bl_head {
12515         struct hlist_bl_node *first;
12516 +#ifdef CONFIG_PREEMPT_RT_BASE
12517 +       raw_spinlock_t lock;
12518 +#endif
12519  };
12521  struct hlist_bl_node {
12522         struct hlist_bl_node *next, **pprev;
12523  };
12524 -#define INIT_HLIST_BL_HEAD(ptr) \
12525 -       ((ptr)->first = NULL)
12527 +#ifdef CONFIG_PREEMPT_RT_BASE
12528 +#define INIT_HLIST_BL_HEAD(h)          \
12529 +do {                                   \
12530 +       (h)->first = NULL;              \
12531 +       raw_spin_lock_init(&(h)->lock); \
12532 +} while (0)
12533 +#else
12534 +#define INIT_HLIST_BL_HEAD(h) (h)->first = NULL
12535 +#endif
12537  static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h)
12539 @@ -118,12 +130,26 @@
12541  static inline void hlist_bl_lock(struct hlist_bl_head *b)
12543 +#ifndef CONFIG_PREEMPT_RT_BASE
12544         bit_spin_lock(0, (unsigned long *)b);
12545 +#else
12546 +       raw_spin_lock(&b->lock);
12547 +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
12548 +       __set_bit(0, (unsigned long *)b);
12549 +#endif
12550 +#endif
12553  static inline void hlist_bl_unlock(struct hlist_bl_head *b)
12555 +#ifndef CONFIG_PREEMPT_RT_BASE
12556         __bit_spin_unlock(0, (unsigned long *)b);
12557 +#else
12558 +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
12559 +       __clear_bit(0, (unsigned long *)b);
12560 +#endif
12561 +       raw_spin_unlock(&b->lock);
12562 +#endif
12565  static inline bool hlist_bl_is_locked(struct hlist_bl_head *b)
12566 diff -Nur linux-4.4.46.orig/include/linux/locallock.h linux-4.4.46/include/linux/locallock.h
12567 --- linux-4.4.46.orig/include/linux/locallock.h 1970-01-01 01:00:00.000000000 +0100
12568 +++ linux-4.4.46/include/linux/locallock.h      2017-02-03 17:18:10.907618284 +0100
12569 @@ -0,0 +1,276 @@
12570 +#ifndef _LINUX_LOCALLOCK_H
12571 +#define _LINUX_LOCALLOCK_H
12573 +#include <linux/percpu.h>
12574 +#include <linux/spinlock.h>
12576 +#ifdef CONFIG_PREEMPT_RT_BASE
12578 +#ifdef CONFIG_DEBUG_SPINLOCK
12579 +# define LL_WARN(cond) WARN_ON(cond)
12580 +#else
12581 +# define LL_WARN(cond) do { } while (0)
12582 +#endif
12585 + * per cpu lock based substitute for local_irq_*()
12586 + */
12587 +struct local_irq_lock {
12588 +       spinlock_t              lock;
12589 +       struct task_struct      *owner;
12590 +       int                     nestcnt;
12591 +       unsigned long           flags;
12594 +#define DEFINE_LOCAL_IRQ_LOCK(lvar)                                    \
12595 +       DEFINE_PER_CPU(struct local_irq_lock, lvar) = {                 \
12596 +               .lock = __SPIN_LOCK_UNLOCKED((lvar).lock) }
12598 +#define DECLARE_LOCAL_IRQ_LOCK(lvar)                                   \
12599 +       DECLARE_PER_CPU(struct local_irq_lock, lvar)
12601 +#define local_irq_lock_init(lvar)                                      \
12602 +       do {                                                            \
12603 +               int __cpu;                                              \
12604 +               for_each_possible_cpu(__cpu)                            \
12605 +                       spin_lock_init(&per_cpu(lvar, __cpu).lock);     \
12606 +       } while (0)
12609 + * spin_lock|trylock|unlock_local flavour that does not migrate disable
12610 + * used for __local_lock|trylock|unlock where get_local_var/put_local_var
12611 + * already takes care of the migrate_disable/enable
12612 + * for CONFIG_PREEMPT_BASE map to the normal spin_* calls.
12613 + */
12614 +#ifdef CONFIG_PREEMPT_RT_FULL
12615 +# define spin_lock_local(lock)                 rt_spin_lock__no_mg(lock)
12616 +# define spin_trylock_local(lock)              rt_spin_trylock__no_mg(lock)
12617 +# define spin_unlock_local(lock)               rt_spin_unlock__no_mg(lock)
12618 +#else
12619 +# define spin_lock_local(lock)                 spin_lock(lock)
12620 +# define spin_trylock_local(lock)              spin_trylock(lock)
12621 +# define spin_unlock_local(lock)               spin_unlock(lock)
12622 +#endif
12624 +static inline void __local_lock(struct local_irq_lock *lv)
12626 +       if (lv->owner != current) {
12627 +               spin_lock_local(&lv->lock);
12628 +               LL_WARN(lv->owner);
12629 +               LL_WARN(lv->nestcnt);
12630 +               lv->owner = current;
12631 +       }
12632 +       lv->nestcnt++;
12635 +#define local_lock(lvar)                                       \
12636 +       do { __local_lock(&get_local_var(lvar)); } while (0)
12638 +#define local_lock_on(lvar, cpu)                               \
12639 +       do { __local_lock(&per_cpu(lvar, cpu)); } while (0)
12641 +static inline int __local_trylock(struct local_irq_lock *lv)
12643 +       if (lv->owner != current && spin_trylock_local(&lv->lock)) {
12644 +               LL_WARN(lv->owner);
12645 +               LL_WARN(lv->nestcnt);
12646 +               lv->owner = current;
12647 +               lv->nestcnt = 1;
12648 +               return 1;
12649 +       }
12650 +       return 0;
12653 +#define local_trylock(lvar)                                            \
12654 +       ({                                                              \
12655 +               int __locked;                                           \
12656 +               __locked = __local_trylock(&get_local_var(lvar));       \
12657 +               if (!__locked)                                          \
12658 +                       put_local_var(lvar);                            \
12659 +               __locked;                                               \
12660 +       })
12662 +static inline void __local_unlock(struct local_irq_lock *lv)
12664 +       LL_WARN(lv->nestcnt == 0);
12665 +       LL_WARN(lv->owner != current);
12666 +       if (--lv->nestcnt)
12667 +               return;
12669 +       lv->owner = NULL;
12670 +       spin_unlock_local(&lv->lock);
12673 +#define local_unlock(lvar)                                     \
12674 +       do {                                                    \
12675 +               __local_unlock(this_cpu_ptr(&lvar));            \
12676 +               put_local_var(lvar);                            \
12677 +       } while (0)
12679 +#define local_unlock_on(lvar, cpu)                       \
12680 +       do { __local_unlock(&per_cpu(lvar, cpu)); } while (0)
12682 +static inline void __local_lock_irq(struct local_irq_lock *lv)
12684 +       spin_lock_irqsave(&lv->lock, lv->flags);
12685 +       LL_WARN(lv->owner);
12686 +       LL_WARN(lv->nestcnt);
12687 +       lv->owner = current;
12688 +       lv->nestcnt = 1;
12691 +#define local_lock_irq(lvar)                                           \
12692 +       do { __local_lock_irq(&get_local_var(lvar)); } while (0)
12694 +#define local_lock_irq_on(lvar, cpu)                                   \
12695 +       do { __local_lock_irq(&per_cpu(lvar, cpu)); } while (0)
12697 +static inline void __local_unlock_irq(struct local_irq_lock *lv)
12699 +       LL_WARN(!lv->nestcnt);
12700 +       LL_WARN(lv->owner != current);
12701 +       lv->owner = NULL;
12702 +       lv->nestcnt = 0;
12703 +       spin_unlock_irq(&lv->lock);
12706 +#define local_unlock_irq(lvar)                                         \
12707 +       do {                                                            \
12708 +               __local_unlock_irq(this_cpu_ptr(&lvar));                \
12709 +               put_local_var(lvar);                                    \
12710 +       } while (0)
12712 +#define local_unlock_irq_on(lvar, cpu)                                 \
12713 +       do {                                                            \
12714 +               __local_unlock_irq(&per_cpu(lvar, cpu));                \
12715 +       } while (0)
12717 +static inline int __local_lock_irqsave(struct local_irq_lock *lv)
12719 +       if (lv->owner != current) {
12720 +               __local_lock_irq(lv);
12721 +               return 0;
12722 +       } else {
12723 +               lv->nestcnt++;
12724 +               return 1;
12725 +       }
12728 +#define local_lock_irqsave(lvar, _flags)                               \
12729 +       do {                                                            \
12730 +               if (__local_lock_irqsave(&get_local_var(lvar)))         \
12731 +                       put_local_var(lvar);                            \
12732 +               _flags = __this_cpu_read(lvar.flags);                   \
12733 +       } while (0)
12735 +#define local_lock_irqsave_on(lvar, _flags, cpu)                       \
12736 +       do {                                                            \
12737 +               __local_lock_irqsave(&per_cpu(lvar, cpu));              \
12738 +               _flags = per_cpu(lvar, cpu).flags;                      \
12739 +       } while (0)
12741 +static inline int __local_unlock_irqrestore(struct local_irq_lock *lv,
12742 +                                           unsigned long flags)
12744 +       LL_WARN(!lv->nestcnt);
12745 +       LL_WARN(lv->owner != current);
12746 +       if (--lv->nestcnt)
12747 +               return 0;
12749 +       lv->owner = NULL;
12750 +       spin_unlock_irqrestore(&lv->lock, lv->flags);
12751 +       return 1;
12754 +#define local_unlock_irqrestore(lvar, flags)                           \
12755 +       do {                                                            \
12756 +               if (__local_unlock_irqrestore(this_cpu_ptr(&lvar), flags)) \
12757 +                       put_local_var(lvar);                            \
12758 +       } while (0)
12760 +#define local_unlock_irqrestore_on(lvar, flags, cpu)                   \
12761 +       do {                                                            \
12762 +               __local_unlock_irqrestore(&per_cpu(lvar, cpu), flags);  \
12763 +       } while (0)
12765 +#define local_spin_trylock_irq(lvar, lock)                             \
12766 +       ({                                                              \
12767 +               int __locked;                                           \
12768 +               local_lock_irq(lvar);                                   \
12769 +               __locked = spin_trylock(lock);                          \
12770 +               if (!__locked)                                          \
12771 +                       local_unlock_irq(lvar);                         \
12772 +               __locked;                                               \
12773 +       })
12775 +#define local_spin_lock_irq(lvar, lock)                                        \
12776 +       do {                                                            \
12777 +               local_lock_irq(lvar);                                   \
12778 +               spin_lock(lock);                                        \
12779 +       } while (0)
12781 +#define local_spin_unlock_irq(lvar, lock)                              \
12782 +       do {                                                            \
12783 +               spin_unlock(lock);                                      \
12784 +               local_unlock_irq(lvar);                                 \
12785 +       } while (0)
12787 +#define local_spin_lock_irqsave(lvar, lock, flags)                     \
12788 +       do {                                                            \
12789 +               local_lock_irqsave(lvar, flags);                        \
12790 +               spin_lock(lock);                                        \
12791 +       } while (0)
12793 +#define local_spin_unlock_irqrestore(lvar, lock, flags)                        \
12794 +       do {                                                            \
12795 +               spin_unlock(lock);                                      \
12796 +               local_unlock_irqrestore(lvar, flags);                   \
12797 +       } while (0)
12799 +#define get_locked_var(lvar, var)                                      \
12800 +       (*({                                                            \
12801 +               local_lock(lvar);                                       \
12802 +               this_cpu_ptr(&var);                                     \
12803 +       }))
12805 +#define put_locked_var(lvar, var)      local_unlock(lvar);
12807 +#define local_lock_cpu(lvar)                                           \
12808 +       ({                                                              \
12809 +               local_lock(lvar);                                       \
12810 +               smp_processor_id();                                     \
12811 +       })
12813 +#define local_unlock_cpu(lvar)                 local_unlock(lvar)
12815 +#else /* PREEMPT_RT_BASE */
12817 +#define DEFINE_LOCAL_IRQ_LOCK(lvar)            __typeof__(const int) lvar
12818 +#define DECLARE_LOCAL_IRQ_LOCK(lvar)           extern __typeof__(const int) lvar
12820 +static inline void local_irq_lock_init(int lvar) { }
12822 +#define local_lock(lvar)                       preempt_disable()
12823 +#define local_unlock(lvar)                     preempt_enable()
12824 +#define local_lock_irq(lvar)                   local_irq_disable()
12825 +#define local_unlock_irq(lvar)                 local_irq_enable()
12826 +#define local_lock_irqsave(lvar, flags)                local_irq_save(flags)
12827 +#define local_unlock_irqrestore(lvar, flags)   local_irq_restore(flags)
12829 +#define local_spin_trylock_irq(lvar, lock)     spin_trylock_irq(lock)
12830 +#define local_spin_lock_irq(lvar, lock)                spin_lock_irq(lock)
12831 +#define local_spin_unlock_irq(lvar, lock)      spin_unlock_irq(lock)
12832 +#define local_spin_lock_irqsave(lvar, lock, flags)     \
12833 +       spin_lock_irqsave(lock, flags)
12834 +#define local_spin_unlock_irqrestore(lvar, lock, flags)        \
12835 +       spin_unlock_irqrestore(lock, flags)
12837 +#define get_locked_var(lvar, var)              get_cpu_var(var)
12838 +#define put_locked_var(lvar, var)              put_cpu_var(var)
12840 +#define local_lock_cpu(lvar)                   get_cpu()
12841 +#define local_unlock_cpu(lvar)                 put_cpu()
12843 +#endif
12845 +#endif
12846 diff -Nur linux-4.4.46.orig/include/linux/mm_types.h linux-4.4.46/include/linux/mm_types.h
12847 --- linux-4.4.46.orig/include/linux/mm_types.h  2017-02-01 08:31:11.000000000 +0100
12848 +++ linux-4.4.46/include/linux/mm_types.h       2017-02-03 17:18:10.907618284 +0100
12849 @@ -11,6 +11,7 @@
12850  #include <linux/completion.h>
12851  #include <linux/cpumask.h>
12852  #include <linux/uprobes.h>
12853 +#include <linux/rcupdate.h>
12854  #include <linux/page-flags-layout.h>
12855  #include <asm/page.h>
12856  #include <asm/mmu.h>
12857 @@ -505,6 +506,9 @@
12858         bool tlb_flush_pending;
12859  #endif
12860         struct uprobes_state uprobes_state;
12861 +#ifdef CONFIG_PREEMPT_RT_BASE
12862 +       struct rcu_head delayed_drop;
12863 +#endif
12864  #ifdef CONFIG_X86_INTEL_MPX
12865         /* address of the bounds directory */
12866         void __user *bd_addr;
12867 diff -Nur linux-4.4.46.orig/include/linux/mutex.h linux-4.4.46/include/linux/mutex.h
12868 --- linux-4.4.46.orig/include/linux/mutex.h     2017-02-01 08:31:11.000000000 +0100
12869 +++ linux-4.4.46/include/linux/mutex.h  2017-02-03 17:18:10.907618284 +0100
12870 @@ -19,6 +19,17 @@
12871  #include <asm/processor.h>
12872  #include <linux/osq_lock.h>
12874 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
12875 +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
12876 +       , .dep_map = { .name = #lockname }
12877 +#else
12878 +# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
12879 +#endif
12881 +#ifdef CONFIG_PREEMPT_RT_FULL
12882 +# include <linux/mutex_rt.h>
12883 +#else
12885  /*
12886   * Simple, straightforward mutexes with strict semantics:
12887   *
12888 @@ -99,13 +110,6 @@
12889  static inline void mutex_destroy(struct mutex *lock) {}
12890  #endif
12892 -#ifdef CONFIG_DEBUG_LOCK_ALLOC
12893 -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
12894 -               , .dep_map = { .name = #lockname }
12895 -#else
12896 -# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
12897 -#endif
12899  #define __MUTEX_INITIALIZER(lockname) \
12900                 { .count = ATOMIC_INIT(1) \
12901                 , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
12902 @@ -173,6 +177,8 @@
12903  extern int mutex_trylock(struct mutex *lock);
12904  extern void mutex_unlock(struct mutex *lock);
12906 +#endif /* !PREEMPT_RT_FULL */
12908  extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
12910  #endif /* __LINUX_MUTEX_H */
12911 diff -Nur linux-4.4.46.orig/include/linux/mutex_rt.h linux-4.4.46/include/linux/mutex_rt.h
12912 --- linux-4.4.46.orig/include/linux/mutex_rt.h  1970-01-01 01:00:00.000000000 +0100
12913 +++ linux-4.4.46/include/linux/mutex_rt.h       2017-02-03 17:18:10.907618284 +0100
12914 @@ -0,0 +1,84 @@
12915 +#ifndef __LINUX_MUTEX_RT_H
12916 +#define __LINUX_MUTEX_RT_H
12918 +#ifndef __LINUX_MUTEX_H
12919 +#error "Please include mutex.h"
12920 +#endif
12922 +#include <linux/rtmutex.h>
12924 +/* FIXME: Just for __lockfunc */
12925 +#include <linux/spinlock.h>
12927 +struct mutex {
12928 +       struct rt_mutex         lock;
12929 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
12930 +       struct lockdep_map      dep_map;
12931 +#endif
12934 +#define __MUTEX_INITIALIZER(mutexname)                                 \
12935 +       {                                                               \
12936 +               .lock = __RT_MUTEX_INITIALIZER(mutexname.lock)          \
12937 +               __DEP_MAP_MUTEX_INITIALIZER(mutexname)                  \
12938 +       }
12940 +#define DEFINE_MUTEX(mutexname)                                                \
12941 +       struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
12943 +extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key);
12944 +extern void __lockfunc _mutex_lock(struct mutex *lock);
12945 +extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock);
12946 +extern int __lockfunc _mutex_lock_killable(struct mutex *lock);
12947 +extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass);
12948 +extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);
12949 +extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass);
12950 +extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass);
12951 +extern int __lockfunc _mutex_trylock(struct mutex *lock);
12952 +extern void __lockfunc _mutex_unlock(struct mutex *lock);
12954 +#define mutex_is_locked(l)             rt_mutex_is_locked(&(l)->lock)
12955 +#define mutex_lock(l)                  _mutex_lock(l)
12956 +#define mutex_lock_interruptible(l)    _mutex_lock_interruptible(l)
12957 +#define mutex_lock_killable(l)         _mutex_lock_killable(l)
12958 +#define mutex_trylock(l)               _mutex_trylock(l)
12959 +#define mutex_unlock(l)                        _mutex_unlock(l)
12960 +#define mutex_destroy(l)               rt_mutex_destroy(&(l)->lock)
12962 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
12963 +# define mutex_lock_nested(l, s)       _mutex_lock_nested(l, s)
12964 +# define mutex_lock_interruptible_nested(l, s) \
12965 +                                       _mutex_lock_interruptible_nested(l, s)
12966 +# define mutex_lock_killable_nested(l, s) \
12967 +                                       _mutex_lock_killable_nested(l, s)
12969 +# define mutex_lock_nest_lock(lock, nest_lock)                         \
12970 +do {                                                                   \
12971 +       typecheck(struct lockdep_map *, &(nest_lock)->dep_map);         \
12972 +       _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map);             \
12973 +} while (0)
12975 +#else
12976 +# define mutex_lock_nested(l, s)       _mutex_lock(l)
12977 +# define mutex_lock_interruptible_nested(l, s) \
12978 +                                       _mutex_lock_interruptible(l)
12979 +# define mutex_lock_killable_nested(l, s) \
12980 +                                       _mutex_lock_killable(l)
12981 +# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
12982 +#endif
12984 +# define mutex_init(mutex)                             \
12985 +do {                                                   \
12986 +       static struct lock_class_key __key;             \
12987 +                                                       \
12988 +       rt_mutex_init(&(mutex)->lock);                  \
12989 +       __mutex_do_init((mutex), #mutex, &__key);       \
12990 +} while (0)
12992 +# define __mutex_init(mutex, name, key)                        \
12993 +do {                                                   \
12994 +       rt_mutex_init(&(mutex)->lock);                  \
12995 +       __mutex_do_init((mutex), name, key);            \
12996 +} while (0)
12998 +#endif
12999 diff -Nur linux-4.4.46.orig/include/linux/netdevice.h linux-4.4.46/include/linux/netdevice.h
13000 --- linux-4.4.46.orig/include/linux/netdevice.h 2017-02-01 08:31:11.000000000 +0100
13001 +++ linux-4.4.46/include/linux/netdevice.h      2017-02-03 17:18:10.907618284 +0100
13002 @@ -2286,11 +2286,20 @@
13003  void synchronize_net(void);
13004  int init_dummy_netdev(struct net_device *dev);
13006 +#ifdef CONFIG_PREEMPT_RT_FULL
13007 +static inline int dev_recursion_level(void)
13009 +       return current->xmit_recursion;
13012 +#else
13014  DECLARE_PER_CPU(int, xmit_recursion);
13015  static inline int dev_recursion_level(void)
13017         return this_cpu_read(xmit_recursion);
13019 +#endif
13021  struct net_device *dev_get_by_index(struct net *net, int ifindex);
13022  struct net_device *__dev_get_by_index(struct net *net, int ifindex);
13023 @@ -2606,6 +2615,7 @@
13024         unsigned int            dropped;
13025         struct sk_buff_head     input_pkt_queue;
13026         struct napi_struct      backlog;
13027 +       struct sk_buff_head     tofree_queue;
13029  };
13031 diff -Nur linux-4.4.46.orig/include/linux/netfilter/x_tables.h linux-4.4.46/include/linux/netfilter/x_tables.h
13032 --- linux-4.4.46.orig/include/linux/netfilter/x_tables.h        2017-02-01 08:31:11.000000000 +0100
13033 +++ linux-4.4.46/include/linux/netfilter/x_tables.h     2017-02-03 17:18:10.907618284 +0100
13034 @@ -4,6 +4,7 @@
13036  #include <linux/netdevice.h>
13037  #include <linux/static_key.h>
13038 +#include <linux/locallock.h>
13039  #include <uapi/linux/netfilter/x_tables.h>
13041  /**
13042 @@ -289,6 +290,8 @@
13043   */
13044  DECLARE_PER_CPU(seqcount_t, xt_recseq);
13046 +DECLARE_LOCAL_IRQ_LOCK(xt_write_lock);
13048  /* xt_tee_enabled - true if x_tables needs to handle reentrancy
13049   *
13050   * Enabled if current ip(6)tables ruleset has at least one -j TEE rule.
13051 @@ -309,6 +312,9 @@
13053         unsigned int addend;
13055 +       /* RT protection */
13056 +       local_lock(xt_write_lock);
13058         /*
13059          * Low order bit of sequence is set if we already
13060          * called xt_write_recseq_begin().
13061 @@ -339,6 +345,7 @@
13062         /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */
13063         smp_wmb();
13064         __this_cpu_add(xt_recseq.sequence, addend);
13065 +       local_unlock(xt_write_lock);
13068  /*
13069 diff -Nur linux-4.4.46.orig/include/linux/notifier.h linux-4.4.46/include/linux/notifier.h
13070 --- linux-4.4.46.orig/include/linux/notifier.h  2017-02-01 08:31:11.000000000 +0100
13071 +++ linux-4.4.46/include/linux/notifier.h       2017-02-03 17:18:10.907618284 +0100
13072 @@ -6,7 +6,7 @@
13073   *
13074   *                             Alan Cox <Alan.Cox@linux.org>
13075   */
13078  #ifndef _LINUX_NOTIFIER_H
13079  #define _LINUX_NOTIFIER_H
13080  #include <linux/errno.h>
13081 @@ -42,9 +42,7 @@
13082   * in srcu_notifier_call_chain(): no cache bounces and no memory barriers.
13083   * As compensation, srcu_notifier_chain_unregister() is rather expensive.
13084   * SRCU notifier chains should be used when the chain will be called very
13085 - * often but notifier_blocks will seldom be removed.  Also, SRCU notifier
13086 - * chains are slightly more difficult to use because they require special
13087 - * runtime initialization.
13088 + * often but notifier_blocks will seldom be removed.
13089   */
13091  typedef        int (*notifier_fn_t)(struct notifier_block *nb,
13092 @@ -88,7 +86,7 @@
13093                 (name)->head = NULL;            \
13094         } while (0)
13096 -/* srcu_notifier_heads must be initialized and cleaned up dynamically */
13097 +/* srcu_notifier_heads must be cleaned up dynamically */
13098  extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
13099  #define srcu_cleanup_notifier_head(name)       \
13100                 cleanup_srcu_struct(&(name)->srcu);
13101 @@ -101,7 +99,13 @@
13102                 .head = NULL }
13103  #define RAW_NOTIFIER_INIT(name)        {                               \
13104                 .head = NULL }
13105 -/* srcu_notifier_heads cannot be initialized statically */
13107 +#define SRCU_NOTIFIER_INIT(name, pcpu)                         \
13108 +       {                                                       \
13109 +               .mutex = __MUTEX_INITIALIZER(name.mutex),       \
13110 +               .head = NULL,                                   \
13111 +               .srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu),    \
13112 +       }
13114  #define ATOMIC_NOTIFIER_HEAD(name)                             \
13115         struct atomic_notifier_head name =                      \
13116 @@ -113,6 +117,18 @@
13117         struct raw_notifier_head name =                         \
13118                 RAW_NOTIFIER_INIT(name)
13120 +#define _SRCU_NOTIFIER_HEAD(name, mod)                         \
13121 +       static DEFINE_PER_CPU(struct srcu_struct_array,         \
13122 +                       name##_head_srcu_array);                \
13123 +       mod struct srcu_notifier_head name =                    \
13124 +                       SRCU_NOTIFIER_INIT(name, name##_head_srcu_array)
13126 +#define SRCU_NOTIFIER_HEAD(name)                               \
13127 +       _SRCU_NOTIFIER_HEAD(name, )
13129 +#define SRCU_NOTIFIER_HEAD_STATIC(name)                                \
13130 +       _SRCU_NOTIFIER_HEAD(name, static)
13132  #ifdef __KERNEL__
13134  extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
13135 @@ -182,12 +198,12 @@
13137  /*
13138   *     Declared notifiers so far. I can imagine quite a few more chains
13139 - *     over time (eg laptop power reset chains, reboot chain (to clean 
13140 + *     over time (eg laptop power reset chains, reboot chain (to clean
13141   *     device units up), device [un]mount chain, module load/unload chain,
13142 - *     low memory chain, screenblank chain (for plug in modular screenblankers) 
13143 + *     low memory chain, screenblank chain (for plug in modular screenblankers)
13144   *     VC switch chains (for loadable kernel svgalib VC switch helpers) etc...
13145   */
13148  /* CPU notfiers are defined in include/linux/cpu.h. */
13150  /* netdevice notifiers are defined in include/linux/netdevice.h */
13151 diff -Nur linux-4.4.46.orig/include/linux/percpu.h linux-4.4.46/include/linux/percpu.h
13152 --- linux-4.4.46.orig/include/linux/percpu.h    2017-02-01 08:31:11.000000000 +0100
13153 +++ linux-4.4.46/include/linux/percpu.h 2017-02-03 17:18:10.907618284 +0100
13154 @@ -24,6 +24,35 @@
13155          PERCPU_MODULE_RESERVE)
13156  #endif
13158 +#ifdef CONFIG_PREEMPT_RT_FULL
13160 +#define get_local_var(var) (*({                \
13161 +              migrate_disable();       \
13162 +              this_cpu_ptr(&var);      }))
13164 +#define put_local_var(var) do {        \
13165 +       (void)&(var);           \
13166 +       migrate_enable();       \
13167 +} while (0)
13169 +# define get_local_ptr(var) ({         \
13170 +               migrate_disable();      \
13171 +               this_cpu_ptr(var);      })
13173 +# define put_local_ptr(var) do {       \
13174 +       (void)(var);                    \
13175 +       migrate_enable();               \
13176 +} while (0)
13178 +#else
13180 +#define get_local_var(var)     get_cpu_var(var)
13181 +#define put_local_var(var)     put_cpu_var(var)
13182 +#define get_local_ptr(var)     get_cpu_ptr(var)
13183 +#define put_local_ptr(var)     put_cpu_ptr(var)
13185 +#endif
13187  /* minimum unit size, also is the maximum supported allocation size */
13188  #define PCPU_MIN_UNIT_SIZE             PFN_ALIGN(32 << 10)
13190 diff -Nur linux-4.4.46.orig/include/linux/pid.h linux-4.4.46/include/linux/pid.h
13191 --- linux-4.4.46.orig/include/linux/pid.h       2017-02-01 08:31:11.000000000 +0100
13192 +++ linux-4.4.46/include/linux/pid.h    2017-02-03 17:18:10.907618284 +0100
13193 @@ -2,6 +2,7 @@
13194  #define _LINUX_PID_H
13196  #include <linux/rcupdate.h>
13197 +#include <linux/atomic.h>
13199  enum pid_type
13201 diff -Nur linux-4.4.46.orig/include/linux/preempt.h linux-4.4.46/include/linux/preempt.h
13202 --- linux-4.4.46.orig/include/linux/preempt.h   2017-02-01 08:31:11.000000000 +0100
13203 +++ linux-4.4.46/include/linux/preempt.h        2017-02-03 17:18:10.907618284 +0100
13204 @@ -50,7 +50,11 @@
13205  #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
13206  #define NMI_OFFSET     (1UL << NMI_SHIFT)
13208 -#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
13209 +#ifndef CONFIG_PREEMPT_RT_FULL
13210 +# define SOFTIRQ_DISABLE_OFFSET                (2 * SOFTIRQ_OFFSET)
13211 +#else
13212 +# define SOFTIRQ_DISABLE_OFFSET                (0)
13213 +#endif
13215  /* We use the MSB mostly because its available */
13216  #define PREEMPT_NEED_RESCHED   0x80000000
13217 @@ -59,9 +63,15 @@
13218  #include <asm/preempt.h>
13220  #define hardirq_count()        (preempt_count() & HARDIRQ_MASK)
13221 -#define softirq_count()        (preempt_count() & SOFTIRQ_MASK)
13222  #define irq_count()    (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
13223                                  | NMI_MASK))
13224 +#ifndef CONFIG_PREEMPT_RT_FULL
13225 +# define softirq_count()       (preempt_count() & SOFTIRQ_MASK)
13226 +# define in_serving_softirq()  (softirq_count() & SOFTIRQ_OFFSET)
13227 +#else
13228 +# define softirq_count()       (0UL)
13229 +extern int in_serving_softirq(void);
13230 +#endif
13232  /*
13233   * Are we doing bottom half or hardware interrupt processing?
13234 @@ -72,7 +82,6 @@
13235  #define in_irq()               (hardirq_count())
13236  #define in_softirq()           (softirq_count())
13237  #define in_interrupt()         (irq_count())
13238 -#define in_serving_softirq()   (softirq_count() & SOFTIRQ_OFFSET)
13240  /*
13241   * Are we in NMI context?
13242 @@ -91,7 +100,11 @@
13243  /*
13244   * The preempt_count offset after spin_lock()
13245   */
13246 +#if !defined(CONFIG_PREEMPT_RT_FULL)
13247  #define PREEMPT_LOCK_OFFSET    PREEMPT_DISABLE_OFFSET
13248 +#else
13249 +#define PREEMPT_LOCK_OFFSET    0
13250 +#endif
13252  /*
13253   * The preempt_count offset needed for things like:
13254 @@ -140,6 +153,20 @@
13255  #define preempt_count_inc() preempt_count_add(1)
13256  #define preempt_count_dec() preempt_count_sub(1)
13258 +#ifdef CONFIG_PREEMPT_LAZY
13259 +#define add_preempt_lazy_count(val)    do { preempt_lazy_count() += (val); } while (0)
13260 +#define sub_preempt_lazy_count(val)    do { preempt_lazy_count() -= (val); } while (0)
13261 +#define inc_preempt_lazy_count()       add_preempt_lazy_count(1)
13262 +#define dec_preempt_lazy_count()       sub_preempt_lazy_count(1)
13263 +#define preempt_lazy_count()           (current_thread_info()->preempt_lazy_count)
13264 +#else
13265 +#define add_preempt_lazy_count(val)    do { } while (0)
13266 +#define sub_preempt_lazy_count(val)    do { } while (0)
13267 +#define inc_preempt_lazy_count()       do { } while (0)
13268 +#define dec_preempt_lazy_count()       do { } while (0)
13269 +#define preempt_lazy_count()           (0)
13270 +#endif
13272  #ifdef CONFIG_PREEMPT_COUNT
13274  #define preempt_disable() \
13275 @@ -148,13 +175,25 @@
13276         barrier(); \
13277  } while (0)
13279 +#define preempt_lazy_disable() \
13280 +do { \
13281 +       inc_preempt_lazy_count(); \
13282 +       barrier(); \
13283 +} while (0)
13285  #define sched_preempt_enable_no_resched() \
13286  do { \
13287         barrier(); \
13288         preempt_count_dec(); \
13289  } while (0)
13291 -#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
13292 +#ifdef CONFIG_PREEMPT_RT_BASE
13293 +# define preempt_enable_no_resched() sched_preempt_enable_no_resched()
13294 +# define preempt_check_resched_rt() preempt_check_resched()
13295 +#else
13296 +# define preempt_enable_no_resched() preempt_enable()
13297 +# define preempt_check_resched_rt() barrier();
13298 +#endif
13300  #define preemptible()  (preempt_count() == 0 && !irqs_disabled())
13302 @@ -179,6 +218,13 @@
13303                 __preempt_schedule(); \
13304  } while (0)
13306 +#define preempt_lazy_enable() \
13307 +do { \
13308 +       dec_preempt_lazy_count(); \
13309 +       barrier(); \
13310 +       preempt_check_resched(); \
13311 +} while (0)
13313  #else /* !CONFIG_PREEMPT */
13314  #define preempt_enable() \
13315  do { \
13316 @@ -224,6 +270,7 @@
13317  #define preempt_disable_notrace()              barrier()
13318  #define preempt_enable_no_resched_notrace()    barrier()
13319  #define preempt_enable_notrace()               barrier()
13320 +#define preempt_check_resched_rt()             barrier()
13321  #define preemptible()                          0
13323  #endif /* CONFIG_PREEMPT_COUNT */
13324 @@ -244,10 +291,31 @@
13325  } while (0)
13326  #define preempt_fold_need_resched() \
13327  do { \
13328 -       if (tif_need_resched()) \
13329 +       if (tif_need_resched_now()) \
13330                 set_preempt_need_resched(); \
13331  } while (0)
13333 +#ifdef CONFIG_PREEMPT_RT_FULL
13334 +# define preempt_disable_rt()          preempt_disable()
13335 +# define preempt_enable_rt()           preempt_enable()
13336 +# define preempt_disable_nort()                barrier()
13337 +# define preempt_enable_nort()         barrier()
13338 +# ifdef CONFIG_SMP
13339 +   extern void migrate_disable(void);
13340 +   extern void migrate_enable(void);
13341 +# else /* CONFIG_SMP */
13342 +#  define migrate_disable()            barrier()
13343 +#  define migrate_enable()             barrier()
13344 +# endif /* CONFIG_SMP */
13345 +#else
13346 +# define preempt_disable_rt()          barrier()
13347 +# define preempt_enable_rt()           barrier()
13348 +# define preempt_disable_nort()                preempt_disable()
13349 +# define preempt_enable_nort()         preempt_enable()
13350 +# define migrate_disable()             preempt_disable()
13351 +# define migrate_enable()              preempt_enable()
13352 +#endif
13354  #ifdef CONFIG_PREEMPT_NOTIFIERS
13356  struct preempt_notifier;
13357 diff -Nur linux-4.4.46.orig/include/linux/printk.h linux-4.4.46/include/linux/printk.h
13358 --- linux-4.4.46.orig/include/linux/printk.h    2017-02-01 08:31:11.000000000 +0100
13359 +++ linux-4.4.46/include/linux/printk.h 2017-02-03 17:18:10.907618284 +0100
13360 @@ -117,9 +117,11 @@
13361  #ifdef CONFIG_EARLY_PRINTK
13362  extern asmlinkage __printf(1, 2)
13363  void early_printk(const char *fmt, ...);
13364 +extern void printk_kill(void);
13365  #else
13366  static inline __printf(1, 2) __cold
13367  void early_printk(const char *s, ...) { }
13368 +static inline void printk_kill(void) { }
13369  #endif
13371  typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args);
13372 diff -Nur linux-4.4.46.orig/include/linux/radix-tree.h linux-4.4.46/include/linux/radix-tree.h
13373 --- linux-4.4.46.orig/include/linux/radix-tree.h        2017-02-01 08:31:11.000000000 +0100
13374 +++ linux-4.4.46/include/linux/radix-tree.h     2017-02-03 17:18:10.907618284 +0100
13375 @@ -277,8 +277,13 @@
13376  unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root,
13377                         void ***results, unsigned long *indices,
13378                         unsigned long first_index, unsigned int max_items);
13379 +#ifndef CONFIG_PREEMPT_RT_FULL
13380  int radix_tree_preload(gfp_t gfp_mask);
13381  int radix_tree_maybe_preload(gfp_t gfp_mask);
13382 +#else
13383 +static inline int radix_tree_preload(gfp_t gm) { return 0; }
13384 +static inline int radix_tree_maybe_preload(gfp_t gfp_mask) { return 0; }
13385 +#endif
13386  void radix_tree_init(void);
13387  void *radix_tree_tag_set(struct radix_tree_root *root,
13388                         unsigned long index, unsigned int tag);
13389 @@ -303,7 +308,7 @@
13391  static inline void radix_tree_preload_end(void)
13393 -       preempt_enable();
13394 +       preempt_enable_nort();
13397  /**
13398 diff -Nur linux-4.4.46.orig/include/linux/random.h linux-4.4.46/include/linux/random.h
13399 --- linux-4.4.46.orig/include/linux/random.h    2017-02-01 08:31:11.000000000 +0100
13400 +++ linux-4.4.46/include/linux/random.h 2017-02-03 17:18:10.907618284 +0100
13401 @@ -20,7 +20,7 @@
13402  extern void add_device_randomness(const void *, unsigned int);
13403  extern void add_input_randomness(unsigned int type, unsigned int code,
13404                                  unsigned int value);
13405 -extern void add_interrupt_randomness(int irq, int irq_flags);
13406 +extern void add_interrupt_randomness(int irq, int irq_flags, __u64 ip);
13408  extern void get_random_bytes(void *buf, int nbytes);
13409  extern int add_random_ready_callback(struct random_ready_callback *rdy);
13410 diff -Nur linux-4.4.46.orig/include/linux/rbtree.h linux-4.4.46/include/linux/rbtree.h
13411 --- linux-4.4.46.orig/include/linux/rbtree.h    2017-02-01 08:31:11.000000000 +0100
13412 +++ linux-4.4.46/include/linux/rbtree.h 2017-02-03 17:18:10.907618284 +0100
13413 @@ -31,7 +31,6 @@
13415  #include <linux/kernel.h>
13416  #include <linux/stddef.h>
13417 -#include <linux/rcupdate.h>
13419  struct rb_node {
13420         unsigned long  __rb_parent_color;
13421 @@ -86,14 +85,8 @@
13422         *rb_link = node;
13425 -static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent,
13426 -                                   struct rb_node **rb_link)
13428 -       node->__rb_parent_color = (unsigned long)parent;
13429 -       node->rb_left = node->rb_right = NULL;
13431 -       rcu_assign_pointer(*rb_link, node);
13433 +void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent,
13434 +                     struct rb_node **rb_link);
13436  #define rb_entry_safe(ptr, type, member) \
13437         ({ typeof(ptr) ____ptr = (ptr); \
13438 diff -Nur linux-4.4.46.orig/include/linux/rcupdate.h linux-4.4.46/include/linux/rcupdate.h
13439 --- linux-4.4.46.orig/include/linux/rcupdate.h  2017-02-01 08:31:11.000000000 +0100
13440 +++ linux-4.4.46/include/linux/rcupdate.h       2017-02-03 17:18:10.907618284 +0100
13441 @@ -169,6 +169,9 @@
13443  #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
13445 +#ifdef CONFIG_PREEMPT_RT_FULL
13446 +#define call_rcu_bh    call_rcu
13447 +#else
13448  /**
13449   * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
13450   * @head: structure to be used for queueing the RCU updates.
13451 @@ -192,6 +195,7 @@
13452   */
13453  void call_rcu_bh(struct rcu_head *head,
13454                  rcu_callback_t func);
13455 +#endif
13457  /**
13458   * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
13459 @@ -292,6 +296,11 @@
13460   * types of kernel builds, the rcu_read_lock() nesting depth is unknowable.
13461   */
13462  #define rcu_preempt_depth() (current->rcu_read_lock_nesting)
13463 +#ifndef CONFIG_PREEMPT_RT_FULL
13464 +#define sched_rcu_preempt_depth()      rcu_preempt_depth()
13465 +#else
13466 +static inline int sched_rcu_preempt_depth(void) { return 0; }
13467 +#endif
13469  #else /* #ifdef CONFIG_PREEMPT_RCU */
13471 @@ -317,6 +326,8 @@
13472         return 0;
13475 +#define sched_rcu_preempt_depth()      rcu_preempt_depth()
13477  #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
13479  /* Internal to kernel */
13480 @@ -489,7 +500,14 @@
13481  int debug_lockdep_rcu_enabled(void);
13483  int rcu_read_lock_held(void);
13484 +#ifdef CONFIG_PREEMPT_RT_FULL
13485 +static inline int rcu_read_lock_bh_held(void)
13487 +       return rcu_read_lock_held();
13489 +#else
13490  int rcu_read_lock_bh_held(void);
13491 +#endif
13493  /**
13494   * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
13495 @@ -937,10 +955,14 @@
13496  static inline void rcu_read_lock_bh(void)
13498         local_bh_disable();
13499 +#ifdef CONFIG_PREEMPT_RT_FULL
13500 +       rcu_read_lock();
13501 +#else
13502         __acquire(RCU_BH);
13503         rcu_lock_acquire(&rcu_bh_lock_map);
13504         RCU_LOCKDEP_WARN(!rcu_is_watching(),
13505                          "rcu_read_lock_bh() used illegally while idle");
13506 +#endif
13509  /*
13510 @@ -950,10 +972,14 @@
13511   */
13512  static inline void rcu_read_unlock_bh(void)
13514 +#ifdef CONFIG_PREEMPT_RT_FULL
13515 +       rcu_read_unlock();
13516 +#else
13517         RCU_LOCKDEP_WARN(!rcu_is_watching(),
13518                          "rcu_read_unlock_bh() used illegally while idle");
13519         rcu_lock_release(&rcu_bh_lock_map);
13520         __release(RCU_BH);
13521 +#endif
13522         local_bh_enable();
13525 diff -Nur linux-4.4.46.orig/include/linux/rcutree.h linux-4.4.46/include/linux/rcutree.h
13526 --- linux-4.4.46.orig/include/linux/rcutree.h   2017-02-01 08:31:11.000000000 +0100
13527 +++ linux-4.4.46/include/linux/rcutree.h        2017-02-03 17:18:10.907618284 +0100
13528 @@ -44,7 +44,11 @@
13529         rcu_note_context_switch();
13532 +#ifdef CONFIG_PREEMPT_RT_FULL
13533 +# define synchronize_rcu_bh    synchronize_rcu
13534 +#else
13535  void synchronize_rcu_bh(void);
13536 +#endif
13537  void synchronize_sched_expedited(void);
13538  void synchronize_rcu_expedited(void);
13540 @@ -72,7 +76,11 @@
13543  void rcu_barrier(void);
13544 +#ifdef CONFIG_PREEMPT_RT_FULL
13545 +# define rcu_barrier_bh                rcu_barrier
13546 +#else
13547  void rcu_barrier_bh(void);
13548 +#endif
13549  void rcu_barrier_sched(void);
13550  unsigned long get_state_synchronize_rcu(void);
13551  void cond_synchronize_rcu(unsigned long oldstate);
13552 @@ -85,12 +93,10 @@
13553  unsigned long rcu_batches_started_bh(void);
13554  unsigned long rcu_batches_started_sched(void);
13555  unsigned long rcu_batches_completed(void);
13556 -unsigned long rcu_batches_completed_bh(void);
13557  unsigned long rcu_batches_completed_sched(void);
13558  void show_rcu_gp_kthreads(void);
13560  void rcu_force_quiescent_state(void);
13561 -void rcu_bh_force_quiescent_state(void);
13562  void rcu_sched_force_quiescent_state(void);
13564  void rcu_idle_enter(void);
13565 @@ -105,6 +111,14 @@
13567  bool rcu_is_watching(void);
13569 +#ifndef CONFIG_PREEMPT_RT_FULL
13570 +void rcu_bh_force_quiescent_state(void);
13571 +unsigned long rcu_batches_completed_bh(void);
13572 +#else
13573 +# define rcu_bh_force_quiescent_state  rcu_force_quiescent_state
13574 +# define rcu_batches_completed_bh      rcu_batches_completed
13575 +#endif
13577  void rcu_all_qs(void);
13579  #endif /* __LINUX_RCUTREE_H */
13580 diff -Nur linux-4.4.46.orig/include/linux/rtmutex.h linux-4.4.46/include/linux/rtmutex.h
13581 --- linux-4.4.46.orig/include/linux/rtmutex.h   2017-02-01 08:31:11.000000000 +0100
13582 +++ linux-4.4.46/include/linux/rtmutex.h        2017-02-03 17:18:10.911618440 +0100
13583 @@ -13,11 +13,15 @@
13584  #define __LINUX_RT_MUTEX_H
13586  #include <linux/linkage.h>
13587 +#include <linux/spinlock_types_raw.h>
13588  #include <linux/rbtree.h>
13589 -#include <linux/spinlock_types.h>
13591  extern int max_lock_depth; /* for sysctl */
13593 +#ifdef CONFIG_DEBUG_MUTEXES
13594 +#include <linux/debug_locks.h>
13595 +#endif
13597  /**
13598   * The rt_mutex structure
13599   *
13600 @@ -31,8 +35,8 @@
13601         struct rb_root          waiters;
13602         struct rb_node          *waiters_leftmost;
13603         struct task_struct      *owner;
13604 -#ifdef CONFIG_DEBUG_RT_MUTEXES
13605         int                     save_state;
13606 +#ifdef CONFIG_DEBUG_RT_MUTEXES
13607         const char              *name, *file;
13608         int                     line;
13609         void                    *magic;
13610 @@ -55,22 +59,33 @@
13611  # define rt_mutex_debug_check_no_locks_held(task)      do { } while (0)
13612  #endif
13614 +# define rt_mutex_init(mutex)                                  \
13615 +       do {                                                    \
13616 +               raw_spin_lock_init(&(mutex)->wait_lock);        \
13617 +               __rt_mutex_init(mutex, #mutex);                 \
13618 +       } while (0)
13620  #ifdef CONFIG_DEBUG_RT_MUTEXES
13621  # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
13622         , .name = #mutexname, .file = __FILE__, .line = __LINE__
13623 -# define rt_mutex_init(mutex)                  __rt_mutex_init(mutex, __func__)
13624   extern void rt_mutex_debug_task_free(struct task_struct *tsk);
13625  #else
13626  # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
13627 -# define rt_mutex_init(mutex)                  __rt_mutex_init(mutex, NULL)
13628  # define rt_mutex_debug_task_free(t)                   do { } while (0)
13629  #endif
13631 -#define __RT_MUTEX_INITIALIZER(mutexname) \
13632 -       { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
13633 +#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
13634 +        .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
13635         , .waiters = RB_ROOT \
13636         , .owner = NULL \
13637 -       __DEBUG_RT_MUTEX_INITIALIZER(mutexname)}
13638 +       __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
13640 +#define __RT_MUTEX_INITIALIZER(mutexname) \
13641 +       { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) }
13643 +#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \
13644 +       { __RT_MUTEX_INITIALIZER_PLAIN(mutexname)    \
13645 +       , .save_state = 1 }
13647  #define DEFINE_RT_MUTEX(mutexname) \
13648         struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname)
13649 @@ -91,6 +106,7 @@
13651  extern void rt_mutex_lock(struct rt_mutex *lock);
13652  extern int rt_mutex_lock_interruptible(struct rt_mutex *lock);
13653 +extern int rt_mutex_lock_killable(struct rt_mutex *lock);
13654  extern int rt_mutex_timed_lock(struct rt_mutex *lock,
13655                                struct hrtimer_sleeper *timeout);
13657 diff -Nur linux-4.4.46.orig/include/linux/rwlock_rt.h linux-4.4.46/include/linux/rwlock_rt.h
13658 --- linux-4.4.46.orig/include/linux/rwlock_rt.h 1970-01-01 01:00:00.000000000 +0100
13659 +++ linux-4.4.46/include/linux/rwlock_rt.h      2017-02-03 17:18:10.911618440 +0100
13660 @@ -0,0 +1,99 @@
13661 +#ifndef __LINUX_RWLOCK_RT_H
13662 +#define __LINUX_RWLOCK_RT_H
13664 +#ifndef __LINUX_SPINLOCK_H
13665 +#error Do not include directly. Use spinlock.h
13666 +#endif
13668 +#define rwlock_init(rwl)                               \
13669 +do {                                                   \
13670 +       static struct lock_class_key __key;             \
13671 +                                                       \
13672 +       rt_mutex_init(&(rwl)->lock);                    \
13673 +       __rt_rwlock_init(rwl, #rwl, &__key);            \
13674 +} while (0)
13676 +extern void __lockfunc rt_write_lock(rwlock_t *rwlock);
13677 +extern void __lockfunc rt_read_lock(rwlock_t *rwlock);
13678 +extern int __lockfunc rt_write_trylock(rwlock_t *rwlock);
13679 +extern int __lockfunc rt_write_trylock_irqsave(rwlock_t *trylock, unsigned long *flags);
13680 +extern int __lockfunc rt_read_trylock(rwlock_t *rwlock);
13681 +extern void __lockfunc rt_write_unlock(rwlock_t *rwlock);
13682 +extern void __lockfunc rt_read_unlock(rwlock_t *rwlock);
13683 +extern unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock);
13684 +extern unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock);
13685 +extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key);
13687 +#define read_trylock(lock)     __cond_lock(lock, rt_read_trylock(lock))
13688 +#define write_trylock(lock)    __cond_lock(lock, rt_write_trylock(lock))
13690 +#define write_trylock_irqsave(lock, flags)     \
13691 +       __cond_lock(lock, rt_write_trylock_irqsave(lock, &flags))
13693 +#define read_lock_irqsave(lock, flags)                 \
13694 +       do {                                            \
13695 +               typecheck(unsigned long, flags);        \
13696 +               flags = rt_read_lock_irqsave(lock);     \
13697 +       } while (0)
13699 +#define write_lock_irqsave(lock, flags)                        \
13700 +       do {                                            \
13701 +               typecheck(unsigned long, flags);        \
13702 +               flags = rt_write_lock_irqsave(lock);    \
13703 +       } while (0)
13705 +#define read_lock(lock)                rt_read_lock(lock)
13707 +#define read_lock_bh(lock)                             \
13708 +       do {                                            \
13709 +               local_bh_disable();                     \
13710 +               rt_read_lock(lock);                     \
13711 +       } while (0)
13713 +#define read_lock_irq(lock)    read_lock(lock)
13715 +#define write_lock(lock)       rt_write_lock(lock)
13717 +#define write_lock_bh(lock)                            \
13718 +       do {                                            \
13719 +               local_bh_disable();                     \
13720 +               rt_write_lock(lock);                    \
13721 +       } while (0)
13723 +#define write_lock_irq(lock)   write_lock(lock)
13725 +#define read_unlock(lock)      rt_read_unlock(lock)
13727 +#define read_unlock_bh(lock)                           \
13728 +       do {                                            \
13729 +               rt_read_unlock(lock);                   \
13730 +               local_bh_enable();                      \
13731 +       } while (0)
13733 +#define read_unlock_irq(lock)  read_unlock(lock)
13735 +#define write_unlock(lock)     rt_write_unlock(lock)
13737 +#define write_unlock_bh(lock)                          \
13738 +       do {                                            \
13739 +               rt_write_unlock(lock);                  \
13740 +               local_bh_enable();                      \
13741 +       } while (0)
13743 +#define write_unlock_irq(lock) write_unlock(lock)
13745 +#define read_unlock_irqrestore(lock, flags)            \
13746 +       do {                                            \
13747 +               typecheck(unsigned long, flags);        \
13748 +               (void) flags;                           \
13749 +               rt_read_unlock(lock);                   \
13750 +       } while (0)
13752 +#define write_unlock_irqrestore(lock, flags) \
13753 +       do {                                            \
13754 +               typecheck(unsigned long, flags);        \
13755 +               (void) flags;                           \
13756 +               rt_write_unlock(lock);                  \
13757 +       } while (0)
13759 +#endif
13760 diff -Nur linux-4.4.46.orig/include/linux/rwlock_types.h linux-4.4.46/include/linux/rwlock_types.h
13761 --- linux-4.4.46.orig/include/linux/rwlock_types.h      2017-02-01 08:31:11.000000000 +0100
13762 +++ linux-4.4.46/include/linux/rwlock_types.h   2017-02-03 17:18:10.911618440 +0100
13763 @@ -1,6 +1,10 @@
13764  #ifndef __LINUX_RWLOCK_TYPES_H
13765  #define __LINUX_RWLOCK_TYPES_H
13767 +#if !defined(__LINUX_SPINLOCK_TYPES_H)
13768 +# error "Do not include directly, include spinlock_types.h"
13769 +#endif
13771  /*
13772   * include/linux/rwlock_types.h - generic rwlock type definitions
13773   *                               and initializers
13774 @@ -43,6 +47,7 @@
13775                                 RW_DEP_MAP_INIT(lockname) }
13776  #endif
13778 -#define DEFINE_RWLOCK(x)       rwlock_t x = __RW_LOCK_UNLOCKED(x)
13779 +#define DEFINE_RWLOCK(name) \
13780 +       rwlock_t name __cacheline_aligned_in_smp = __RW_LOCK_UNLOCKED(name)
13782  #endif /* __LINUX_RWLOCK_TYPES_H */
13783 diff -Nur linux-4.4.46.orig/include/linux/rwlock_types_rt.h linux-4.4.46/include/linux/rwlock_types_rt.h
13784 --- linux-4.4.46.orig/include/linux/rwlock_types_rt.h   1970-01-01 01:00:00.000000000 +0100
13785 +++ linux-4.4.46/include/linux/rwlock_types_rt.h        2017-02-03 17:18:10.911618440 +0100
13786 @@ -0,0 +1,33 @@
13787 +#ifndef __LINUX_RWLOCK_TYPES_RT_H
13788 +#define __LINUX_RWLOCK_TYPES_RT_H
13790 +#ifndef __LINUX_SPINLOCK_TYPES_H
13791 +#error "Do not include directly. Include spinlock_types.h instead"
13792 +#endif
13795 + * rwlocks - rtmutex which allows single reader recursion
13796 + */
13797 +typedef struct {
13798 +       struct rt_mutex         lock;
13799 +       int                     read_depth;
13800 +       unsigned int            break_lock;
13801 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
13802 +       struct lockdep_map      dep_map;
13803 +#endif
13804 +} rwlock_t;
13806 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
13807 +# define RW_DEP_MAP_INIT(lockname)     .dep_map = { .name = #lockname }
13808 +#else
13809 +# define RW_DEP_MAP_INIT(lockname)
13810 +#endif
13812 +#define __RW_LOCK_UNLOCKED(name) \
13813 +       { .lock = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.lock), \
13814 +         RW_DEP_MAP_INIT(name) }
13816 +#define DEFINE_RWLOCK(name) \
13817 +       rwlock_t name __cacheline_aligned_in_smp = __RW_LOCK_UNLOCKED(name)
13819 +#endif
13820 diff -Nur linux-4.4.46.orig/include/linux/rwsem.h linux-4.4.46/include/linux/rwsem.h
13821 --- linux-4.4.46.orig/include/linux/rwsem.h     2017-02-01 08:31:11.000000000 +0100
13822 +++ linux-4.4.46/include/linux/rwsem.h  2017-02-03 17:18:10.911618440 +0100
13823 @@ -18,6 +18,10 @@
13824  #include <linux/osq_lock.h>
13825  #endif
13827 +#ifdef CONFIG_PREEMPT_RT_FULL
13828 +#include <linux/rwsem_rt.h>
13829 +#else /* PREEMPT_RT_FULL */
13831  struct rw_semaphore;
13833  #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
13834 @@ -177,4 +181,6 @@
13835  # define up_read_non_owner(sem)                        up_read(sem)
13836  #endif
13838 +#endif /* !PREEMPT_RT_FULL */
13840  #endif /* _LINUX_RWSEM_H */
13841 diff -Nur linux-4.4.46.orig/include/linux/rwsem_rt.h linux-4.4.46/include/linux/rwsem_rt.h
13842 --- linux-4.4.46.orig/include/linux/rwsem_rt.h  1970-01-01 01:00:00.000000000 +0100
13843 +++ linux-4.4.46/include/linux/rwsem_rt.h       2017-02-03 17:18:10.911618440 +0100
13844 @@ -0,0 +1,152 @@
13845 +#ifndef _LINUX_RWSEM_RT_H
13846 +#define _LINUX_RWSEM_RT_H
13848 +#ifndef _LINUX_RWSEM_H
13849 +#error "Include rwsem.h"
13850 +#endif
13853 + * RW-semaphores are a spinlock plus a reader-depth count.
13854 + *
13855 + * Note that the semantics are different from the usual
13856 + * Linux rw-sems, in PREEMPT_RT mode we do not allow
13857 + * multiple readers to hold the lock at once, we only allow
13858 + * a read-lock owner to read-lock recursively. This is
13859 + * better for latency, makes the implementation inherently
13860 + * fair and makes it simpler as well.
13861 + */
13863 +#include <linux/rtmutex.h>
13865 +struct rw_semaphore {
13866 +       struct rt_mutex         lock;
13867 +       int                     read_depth;
13868 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
13869 +       struct lockdep_map      dep_map;
13870 +#endif
13873 +#define __RWSEM_INITIALIZER(name) \
13874 +       { .lock = __RT_MUTEX_INITIALIZER(name.lock), \
13875 +         RW_DEP_MAP_INIT(name) }
13877 +#define DECLARE_RWSEM(lockname) \
13878 +       struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname)
13880 +extern void  __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name,
13881 +                                    struct lock_class_key *key);
13883 +#define __rt_init_rwsem(sem, name, key)                        \
13884 +       do {                                            \
13885 +               rt_mutex_init(&(sem)->lock);            \
13886 +               __rt_rwsem_init((sem), (name), (key));\
13887 +       } while (0)
13889 +#define __init_rwsem(sem, name, key) __rt_init_rwsem(sem, name, key)
13891 +# define rt_init_rwsem(sem)                            \
13892 +do {                                                   \
13893 +       static struct lock_class_key __key;             \
13894 +                                                       \
13895 +       __rt_init_rwsem((sem), #sem, &__key);           \
13896 +} while (0)
13898 +extern void rt_down_write(struct rw_semaphore *rwsem);
13899 +extern void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass);
13900 +extern void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass);
13901 +extern void rt_down_write_nested_lock(struct rw_semaphore *rwsem,
13902 +                                     struct lockdep_map *nest);
13903 +extern void rt__down_read(struct rw_semaphore *rwsem);
13904 +extern void rt_down_read(struct rw_semaphore *rwsem);
13905 +extern int  rt_down_write_trylock(struct rw_semaphore *rwsem);
13906 +extern int  rt__down_read_trylock(struct rw_semaphore *rwsem);
13907 +extern int  rt_down_read_trylock(struct rw_semaphore *rwsem);
13908 +extern void __rt_up_read(struct rw_semaphore *rwsem);
13909 +extern void rt_up_read(struct rw_semaphore *rwsem);
13910 +extern void rt_up_write(struct rw_semaphore *rwsem);
13911 +extern void rt_downgrade_write(struct rw_semaphore *rwsem);
13913 +#define init_rwsem(sem)                rt_init_rwsem(sem)
13914 +#define rwsem_is_locked(s)     rt_mutex_is_locked(&(s)->lock)
13916 +static inline int rwsem_is_contended(struct rw_semaphore *sem)
13918 +       /* rt_mutex_has_waiters() */
13919 +       return !RB_EMPTY_ROOT(&sem->lock.waiters);
13922 +static inline void __down_read(struct rw_semaphore *sem)
13924 +       rt__down_read(sem);
13927 +static inline void down_read(struct rw_semaphore *sem)
13929 +       rt_down_read(sem);
13932 +static inline int __down_read_trylock(struct rw_semaphore *sem)
13934 +       return rt__down_read_trylock(sem);
13937 +static inline int down_read_trylock(struct rw_semaphore *sem)
13939 +       return rt_down_read_trylock(sem);
13942 +static inline void down_write(struct rw_semaphore *sem)
13944 +       rt_down_write(sem);
13947 +static inline int down_write_trylock(struct rw_semaphore *sem)
13949 +       return rt_down_write_trylock(sem);
13952 +static inline void __up_read(struct rw_semaphore *sem)
13954 +       __rt_up_read(sem);
13957 +static inline void up_read(struct rw_semaphore *sem)
13959 +       rt_up_read(sem);
13962 +static inline void up_write(struct rw_semaphore *sem)
13964 +       rt_up_write(sem);
13967 +static inline void downgrade_write(struct rw_semaphore *sem)
13969 +       rt_downgrade_write(sem);
13972 +static inline void down_read_nested(struct rw_semaphore *sem, int subclass)
13974 +       return rt_down_read_nested(sem, subclass);
13977 +static inline void down_write_nested(struct rw_semaphore *sem, int subclass)
13979 +       rt_down_write_nested(sem, subclass);
13981 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
13982 +static inline void down_write_nest_lock(struct rw_semaphore *sem,
13983 +               struct rw_semaphore *nest_lock)
13985 +       rt_down_write_nested_lock(sem, &nest_lock->dep_map);
13988 +#else
13990 +static inline void down_write_nest_lock(struct rw_semaphore *sem,
13991 +               struct rw_semaphore *nest_lock)
13993 +       rt_down_write_nested_lock(sem, NULL);
13995 +#endif
13996 +#endif
13997 diff -Nur linux-4.4.46.orig/include/linux/sched.h linux-4.4.46/include/linux/sched.h
13998 --- linux-4.4.46.orig/include/linux/sched.h     2017-02-01 08:31:11.000000000 +0100
13999 +++ linux-4.4.46/include/linux/sched.h  2017-02-03 17:18:10.911618440 +0100
14000 @@ -26,6 +26,7 @@
14001  #include <linux/nodemask.h>
14002  #include <linux/mm_types.h>
14003  #include <linux/preempt.h>
14004 +#include <asm/kmap_types.h>
14006  #include <asm/page.h>
14007  #include <asm/ptrace.h>
14008 @@ -182,8 +183,6 @@
14009  static inline void update_cpu_load_nohz(void) { }
14010  #endif
14012 -extern unsigned long get_parent_ip(unsigned long addr);
14014  extern void dump_cpu_task(int cpu);
14016  struct seq_file;
14017 @@ -242,10 +241,7 @@
14018                                  TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
14019                                  __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
14021 -#define task_is_traced(task)   ((task->state & __TASK_TRACED) != 0)
14022  #define task_is_stopped(task)  ((task->state & __TASK_STOPPED) != 0)
14023 -#define task_is_stopped_or_traced(task)        \
14024 -                       ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
14025  #define task_contributes_to_load(task) \
14026                                 ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
14027                                  (task->flags & PF_FROZEN) == 0 && \
14028 @@ -311,6 +307,11 @@
14030  #endif
14032 +#define __set_current_state_no_track(state_value)      \
14033 +       do { current->state = (state_value); } while (0)
14034 +#define set_current_state_no_track(state_value)                \
14035 +       set_mb(current->state, (state_value))
14037  /* Task command name length */
14038  #define TASK_COMM_LEN 16
14040 @@ -970,8 +971,18 @@
14041         struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
14043  extern void wake_q_add(struct wake_q_head *head,
14044 -                      struct task_struct *task);
14045 -extern void wake_up_q(struct wake_q_head *head);
14046 +                             struct task_struct *task);
14047 +extern void __wake_up_q(struct wake_q_head *head, bool sleeper);
14049 +static inline void wake_up_q(struct wake_q_head *head)
14051 +       __wake_up_q(head, false);
14054 +static inline void wake_up_q_sleeper(struct wake_q_head *head)
14056 +       __wake_up_q(head, true);
14059  /*
14060   * sched-domains (multiprocessor balancing) declarations:
14061 @@ -1379,6 +1390,7 @@
14063  struct task_struct {
14064         volatile long state;    /* -1 unrunnable, 0 runnable, >0 stopped */
14065 +       volatile long saved_state;      /* saved state for "spinlock sleepers" */
14066         void *stack;
14067         atomic_t usage;
14068         unsigned int flags;     /* per process flags, defined below */
14069 @@ -1415,6 +1427,12 @@
14070  #endif
14072         unsigned int policy;
14073 +#ifdef CONFIG_PREEMPT_RT_FULL
14074 +       int migrate_disable;
14075 +# ifdef CONFIG_SCHED_DEBUG
14076 +       int migrate_disable_atomic;
14077 +# endif
14078 +#endif
14079         int nr_cpus_allowed;
14080         cpumask_t cpus_allowed;
14082 @@ -1522,11 +1540,14 @@
14083         cputime_t gtime;
14084         struct prev_cputime prev_cputime;
14085  #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
14086 -       seqlock_t vtime_seqlock;
14087 +       seqcount_t vtime_seqcount;
14088         unsigned long long vtime_snap;
14089         enum {
14090 -               VTIME_SLEEPING = 0,
14091 +               /* Task is sleeping or running in a CPU with VTIME inactive */
14092 +               VTIME_INACTIVE = 0,
14093 +               /* Task runs in userspace in a CPU with VTIME active */
14094                 VTIME_USER,
14095 +               /* Task runs in kernelspace in a CPU with VTIME active */
14096                 VTIME_SYS,
14097         } vtime_snap_whence;
14098  #endif
14099 @@ -1538,6 +1559,9 @@
14101         struct task_cputime cputime_expires;
14102         struct list_head cpu_timers[3];
14103 +#ifdef CONFIG_PREEMPT_RT_BASE
14104 +       struct task_struct *posix_timer_list;
14105 +#endif
14107  /* process credentials */
14108         const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */
14109 @@ -1569,10 +1593,15 @@
14110  /* signal handlers */
14111         struct signal_struct *signal;
14112         struct sighand_struct *sighand;
14113 +       struct sigqueue *sigqueue_cache;
14115         sigset_t blocked, real_blocked;
14116         sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
14117         struct sigpending pending;
14118 +#ifdef CONFIG_PREEMPT_RT_FULL
14119 +       /* TODO: move me into ->restart_block ? */
14120 +       struct siginfo forced_info;
14121 +#endif
14123         unsigned long sas_ss_sp;
14124         size_t sas_ss_size;
14125 @@ -1796,6 +1825,12 @@
14126         unsigned long trace;
14127         /* bitmask and counter of trace recursion */
14128         unsigned long trace_recursion;
14129 +#ifdef CONFIG_WAKEUP_LATENCY_HIST
14130 +       u64 preempt_timestamp_hist;
14131 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
14132 +       long timer_offset;
14133 +#endif
14134 +#endif
14135  #endif /* CONFIG_TRACING */
14136  #ifdef CONFIG_MEMCG
14137         struct mem_cgroup *memcg_in_oom;
14138 @@ -1812,9 +1847,23 @@
14139         unsigned int    sequential_io;
14140         unsigned int    sequential_io_avg;
14141  #endif
14142 +#ifdef CONFIG_PREEMPT_RT_BASE
14143 +       struct rcu_head put_rcu;
14144 +       int softirq_nestcnt;
14145 +       unsigned int softirqs_raised;
14146 +#endif
14147 +#ifdef CONFIG_PREEMPT_RT_FULL
14148 +# if defined CONFIG_HIGHMEM || defined CONFIG_X86_32
14149 +       int kmap_idx;
14150 +       pte_t kmap_pte[KM_TYPE_NR];
14151 +# endif
14152 +#endif
14153  #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
14154         unsigned long   task_state_change;
14155  #endif
14156 +#ifdef CONFIG_PREEMPT_RT_FULL
14157 +       int xmit_recursion;
14158 +#endif
14159         int pagefault_disabled;
14160  /* CPU-specific state of this task */
14161         struct thread_struct thread;
14162 @@ -1832,9 +1881,6 @@
14163  # define arch_task_struct_size (sizeof(struct task_struct))
14164  #endif
14166 -/* Future-safe accessor for struct task_struct's cpus_allowed. */
14167 -#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
14169  #define TNF_MIGRATED   0x01
14170  #define TNF_NO_GROUP   0x02
14171  #define TNF_SHARED     0x04
14172 @@ -2024,6 +2070,15 @@
14173  extern void free_task(struct task_struct *tsk);
14174  #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
14176 +#ifdef CONFIG_PREEMPT_RT_BASE
14177 +extern void __put_task_struct_cb(struct rcu_head *rhp);
14179 +static inline void put_task_struct(struct task_struct *t)
14181 +       if (atomic_dec_and_test(&t->usage))
14182 +               call_rcu(&t->put_rcu, __put_task_struct_cb);
14184 +#else
14185  extern void __put_task_struct(struct task_struct *t);
14187  static inline void put_task_struct(struct task_struct *t)
14188 @@ -2031,6 +2086,7 @@
14189         if (atomic_dec_and_test(&t->usage))
14190                 __put_task_struct(t);
14192 +#endif
14194  #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
14195  extern void task_cputime(struct task_struct *t,
14196 @@ -2069,6 +2125,7 @@
14197  /*
14198   * Per process flags
14199   */
14200 +#define PF_IN_SOFTIRQ  0x00000001      /* Task is serving softirq */
14201  #define PF_EXITING     0x00000004      /* getting shut down */
14202  #define PF_EXITPIDONE  0x00000008      /* pi exit done on shut down */
14203  #define PF_VCPU                0x00000010      /* I'm a virtual CPU */
14204 @@ -2233,6 +2290,10 @@
14206  extern int set_cpus_allowed_ptr(struct task_struct *p,
14207                                 const struct cpumask *new_mask);
14208 +int migrate_me(void);
14209 +void tell_sched_cpu_down_begin(int cpu);
14210 +void tell_sched_cpu_down_done(int cpu);
14212  #else
14213  static inline void do_set_cpus_allowed(struct task_struct *p,
14214                                       const struct cpumask *new_mask)
14215 @@ -2245,6 +2306,9 @@
14216                 return -EINVAL;
14217         return 0;
14219 +static inline int migrate_me(void) { return 0; }
14220 +static inline void tell_sched_cpu_down_begin(int cpu) { }
14221 +static inline void tell_sched_cpu_down_done(int cpu) { }
14222  #endif
14224  #ifdef CONFIG_NO_HZ_COMMON
14225 @@ -2454,6 +2518,7 @@
14227  extern int wake_up_state(struct task_struct *tsk, unsigned int state);
14228  extern int wake_up_process(struct task_struct *tsk);
14229 +extern int wake_up_lock_sleeper(struct task_struct * tsk);
14230  extern void wake_up_new_task(struct task_struct *tsk);
14231  #ifdef CONFIG_SMP
14232   extern void kick_process(struct task_struct *tsk);
14233 @@ -2577,12 +2642,24 @@
14235  /* mmdrop drops the mm and the page tables */
14236  extern void __mmdrop(struct mm_struct *);
14238  static inline void mmdrop(struct mm_struct * mm)
14240         if (unlikely(atomic_dec_and_test(&mm->mm_count)))
14241                 __mmdrop(mm);
14244 +#ifdef CONFIG_PREEMPT_RT_BASE
14245 +extern void __mmdrop_delayed(struct rcu_head *rhp);
14246 +static inline void mmdrop_delayed(struct mm_struct *mm)
14248 +       if (atomic_dec_and_test(&mm->mm_count))
14249 +               call_rcu(&mm->delayed_drop, __mmdrop_delayed);
14251 +#else
14252 +# define mmdrop_delayed(mm)    mmdrop(mm)
14253 +#endif
14255  /* mmput gets rid of the mappings and all user-space */
14256  extern void mmput(struct mm_struct *);
14257  /* Grab a reference to a task's mm, if it is not already going away */
14258 @@ -2892,6 +2969,43 @@
14259         return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
14262 +#ifdef CONFIG_PREEMPT_LAZY
14263 +static inline void set_tsk_need_resched_lazy(struct task_struct *tsk)
14265 +       set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
14268 +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk)
14270 +       clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
14273 +static inline int test_tsk_need_resched_lazy(struct task_struct *tsk)
14275 +       return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY));
14278 +static inline int need_resched_lazy(void)
14280 +       return test_thread_flag(TIF_NEED_RESCHED_LAZY);
14283 +static inline int need_resched_now(void)
14285 +       return test_thread_flag(TIF_NEED_RESCHED);
14288 +#else
14289 +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { }
14290 +static inline int need_resched_lazy(void) { return 0; }
14292 +static inline int need_resched_now(void)
14294 +       return test_thread_flag(TIF_NEED_RESCHED);
14297 +#endif
14299  static inline int restart_syscall(void)
14301         set_tsk_thread_flag(current, TIF_SIGPENDING);
14302 @@ -2923,6 +3037,51 @@
14303         return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
14306 +static inline bool __task_is_stopped_or_traced(struct task_struct *task)
14308 +       if (task->state & (__TASK_STOPPED | __TASK_TRACED))
14309 +               return true;
14310 +#ifdef CONFIG_PREEMPT_RT_FULL
14311 +       if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED))
14312 +               return true;
14313 +#endif
14314 +       return false;
14317 +static inline bool task_is_stopped_or_traced(struct task_struct *task)
14319 +       bool traced_stopped;
14321 +#ifdef CONFIG_PREEMPT_RT_FULL
14322 +       unsigned long flags;
14324 +       raw_spin_lock_irqsave(&task->pi_lock, flags);
14325 +       traced_stopped = __task_is_stopped_or_traced(task);
14326 +       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
14327 +#else
14328 +       traced_stopped = __task_is_stopped_or_traced(task);
14329 +#endif
14330 +       return traced_stopped;
14333 +static inline bool task_is_traced(struct task_struct *task)
14335 +       bool traced = false;
14337 +       if (task->state & __TASK_TRACED)
14338 +               return true;
14339 +#ifdef CONFIG_PREEMPT_RT_FULL
14340 +       /* in case the task is sleeping on tasklist_lock */
14341 +       raw_spin_lock_irq(&task->pi_lock);
14342 +       if (task->state & __TASK_TRACED)
14343 +               traced = true;
14344 +       else if (task->saved_state & __TASK_TRACED)
14345 +               traced = true;
14346 +       raw_spin_unlock_irq(&task->pi_lock);
14347 +#endif
14348 +       return traced;
14351  /*
14352   * cond_resched() and cond_resched_lock(): latency reduction via
14353   * explicit rescheduling in places that are safe. The return
14354 @@ -2944,12 +3103,16 @@
14355         __cond_resched_lock(lock);                              \
14356  })
14358 +#ifndef CONFIG_PREEMPT_RT_FULL
14359  extern int __cond_resched_softirq(void);
14361  #define cond_resched_softirq() ({                                      \
14362         ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET);     \
14363         __cond_resched_softirq();                                       \
14364  })
14365 +#else
14366 +# define cond_resched_softirq()                cond_resched()
14367 +#endif
14369  static inline void cond_resched_rcu(void)
14371 @@ -3111,6 +3274,31 @@
14373  #endif /* CONFIG_SMP */
14375 +static inline int __migrate_disabled(struct task_struct *p)
14377 +#ifdef CONFIG_PREEMPT_RT_FULL
14378 +       return p->migrate_disable;
14379 +#else
14380 +       return 0;
14381 +#endif
14384 +/* Future-safe accessor for struct task_struct's cpus_allowed. */
14385 +static inline const struct cpumask *tsk_cpus_allowed(struct task_struct *p)
14387 +       if (__migrate_disabled(p))
14388 +               return cpumask_of(task_cpu(p));
14390 +       return &p->cpus_allowed;
14393 +static inline int tsk_nr_cpus_allowed(struct task_struct *p)
14395 +       if (__migrate_disabled(p))
14396 +               return 1;
14397 +       return p->nr_cpus_allowed;
14400  extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
14401  extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
14403 diff -Nur linux-4.4.46.orig/include/linux/seqlock.h linux-4.4.46/include/linux/seqlock.h
14404 --- linux-4.4.46.orig/include/linux/seqlock.h   2017-02-01 08:31:11.000000000 +0100
14405 +++ linux-4.4.46/include/linux/seqlock.h        2017-02-03 17:18:10.911618440 +0100
14406 @@ -220,20 +220,30 @@
14407         return __read_seqcount_retry(s, start);
14412 -static inline void raw_write_seqcount_begin(seqcount_t *s)
14413 +static inline void __raw_write_seqcount_begin(seqcount_t *s)
14415         s->sequence++;
14416         smp_wmb();
14419 -static inline void raw_write_seqcount_end(seqcount_t *s)
14420 +static inline void raw_write_seqcount_begin(seqcount_t *s)
14422 +       preempt_disable_rt();
14423 +       __raw_write_seqcount_begin(s);
14426 +static inline void __raw_write_seqcount_end(seqcount_t *s)
14428         smp_wmb();
14429         s->sequence++;
14432 +static inline void raw_write_seqcount_end(seqcount_t *s)
14434 +       __raw_write_seqcount_end(s);
14435 +       preempt_enable_rt();
14438  /**
14439   * raw_write_seqcount_barrier - do a seq write barrier
14440   * @s: pointer to seqcount_t
14441 @@ -425,10 +435,32 @@
14442  /*
14443   * Read side functions for starting and finalizing a read side section.
14444   */
14445 +#ifndef CONFIG_PREEMPT_RT_FULL
14446  static inline unsigned read_seqbegin(const seqlock_t *sl)
14448         return read_seqcount_begin(&sl->seqcount);
14450 +#else
14452 + * Starvation safe read side for RT
14453 + */
14454 +static inline unsigned read_seqbegin(seqlock_t *sl)
14456 +       unsigned ret;
14458 +repeat:
14459 +       ret = ACCESS_ONCE(sl->seqcount.sequence);
14460 +       if (unlikely(ret & 1)) {
14461 +               /*
14462 +                * Take the lock and let the writer proceed (i.e. evtl
14463 +                * boost it), otherwise we could loop here forever.
14464 +                */
14465 +               spin_unlock_wait(&sl->lock);
14466 +               goto repeat;
14467 +       }
14468 +       return ret;
14470 +#endif
14472  static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
14474 @@ -443,36 +475,36 @@
14475  static inline void write_seqlock(seqlock_t *sl)
14477         spin_lock(&sl->lock);
14478 -       write_seqcount_begin(&sl->seqcount);
14479 +       __raw_write_seqcount_begin(&sl->seqcount);
14482  static inline void write_sequnlock(seqlock_t *sl)
14484 -       write_seqcount_end(&sl->seqcount);
14485 +       __raw_write_seqcount_end(&sl->seqcount);
14486         spin_unlock(&sl->lock);
14489  static inline void write_seqlock_bh(seqlock_t *sl)
14491         spin_lock_bh(&sl->lock);
14492 -       write_seqcount_begin(&sl->seqcount);
14493 +       __raw_write_seqcount_begin(&sl->seqcount);
14496  static inline void write_sequnlock_bh(seqlock_t *sl)
14498 -       write_seqcount_end(&sl->seqcount);
14499 +       __raw_write_seqcount_end(&sl->seqcount);
14500         spin_unlock_bh(&sl->lock);
14503  static inline void write_seqlock_irq(seqlock_t *sl)
14505         spin_lock_irq(&sl->lock);
14506 -       write_seqcount_begin(&sl->seqcount);
14507 +       __raw_write_seqcount_begin(&sl->seqcount);
14510  static inline void write_sequnlock_irq(seqlock_t *sl)
14512 -       write_seqcount_end(&sl->seqcount);
14513 +       __raw_write_seqcount_end(&sl->seqcount);
14514         spin_unlock_irq(&sl->lock);
14517 @@ -481,7 +513,7 @@
14518         unsigned long flags;
14520         spin_lock_irqsave(&sl->lock, flags);
14521 -       write_seqcount_begin(&sl->seqcount);
14522 +       __raw_write_seqcount_begin(&sl->seqcount);
14523         return flags;
14526 @@ -491,7 +523,7 @@
14527  static inline void
14528  write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
14530 -       write_seqcount_end(&sl->seqcount);
14531 +       __raw_write_seqcount_end(&sl->seqcount);
14532         spin_unlock_irqrestore(&sl->lock, flags);
14535 diff -Nur linux-4.4.46.orig/include/linux/signal.h linux-4.4.46/include/linux/signal.h
14536 --- linux-4.4.46.orig/include/linux/signal.h    2017-02-01 08:31:11.000000000 +0100
14537 +++ linux-4.4.46/include/linux/signal.h 2017-02-03 17:18:10.911618440 +0100
14538 @@ -233,6 +233,7 @@
14541  extern void flush_sigqueue(struct sigpending *queue);
14542 +extern void flush_task_sigqueue(struct task_struct *tsk);
14544  /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */
14545  static inline int valid_signal(unsigned long sig)
14546 diff -Nur linux-4.4.46.orig/include/linux/skbuff.h linux-4.4.46/include/linux/skbuff.h
14547 --- linux-4.4.46.orig/include/linux/skbuff.h    2017-02-01 08:31:11.000000000 +0100
14548 +++ linux-4.4.46/include/linux/skbuff.h 2017-02-03 17:18:10.911618440 +0100
14549 @@ -203,6 +203,7 @@
14551         __u32           qlen;
14552         spinlock_t      lock;
14553 +       raw_spinlock_t  raw_lock;
14554  };
14556  struct sk_buff;
14557 @@ -1465,6 +1466,12 @@
14558         __skb_queue_head_init(list);
14561 +static inline void skb_queue_head_init_raw(struct sk_buff_head *list)
14563 +       raw_spin_lock_init(&list->raw_lock);
14564 +       __skb_queue_head_init(list);
14567  static inline void skb_queue_head_init_class(struct sk_buff_head *list,
14568                 struct lock_class_key *class)
14570 diff -Nur linux-4.4.46.orig/include/linux/smp.h linux-4.4.46/include/linux/smp.h
14571 --- linux-4.4.46.orig/include/linux/smp.h       2017-02-01 08:31:11.000000000 +0100
14572 +++ linux-4.4.46/include/linux/smp.h    2017-02-03 17:18:10.911618440 +0100
14573 @@ -185,6 +185,9 @@
14574  #define get_cpu()              ({ preempt_disable(); smp_processor_id(); })
14575  #define put_cpu()              preempt_enable()
14577 +#define get_cpu_light()                ({ migrate_disable(); smp_processor_id(); })
14578 +#define put_cpu_light()                migrate_enable()
14580  /*
14581   * Callback to arch code if there's nosmp or maxcpus=0 on the
14582   * boot command line:
14583 diff -Nur linux-4.4.46.orig/include/linux/spinlock_api_smp.h linux-4.4.46/include/linux/spinlock_api_smp.h
14584 --- linux-4.4.46.orig/include/linux/spinlock_api_smp.h  2017-02-01 08:31:11.000000000 +0100
14585 +++ linux-4.4.46/include/linux/spinlock_api_smp.h       2017-02-03 17:18:10.911618440 +0100
14586 @@ -189,6 +189,8 @@
14587         return 0;
14590 -#include <linux/rwlock_api_smp.h>
14591 +#ifndef CONFIG_PREEMPT_RT_FULL
14592 +# include <linux/rwlock_api_smp.h>
14593 +#endif
14595  #endif /* __LINUX_SPINLOCK_API_SMP_H */
14596 diff -Nur linux-4.4.46.orig/include/linux/spinlock.h linux-4.4.46/include/linux/spinlock.h
14597 --- linux-4.4.46.orig/include/linux/spinlock.h  2017-02-01 08:31:11.000000000 +0100
14598 +++ linux-4.4.46/include/linux/spinlock.h       2017-02-03 17:18:10.911618440 +0100
14599 @@ -271,7 +271,11 @@
14600  #define raw_spin_can_lock(lock)        (!raw_spin_is_locked(lock))
14602  /* Include rwlock functions */
14603 -#include <linux/rwlock.h>
14604 +#ifdef CONFIG_PREEMPT_RT_FULL
14605 +# include <linux/rwlock_rt.h>
14606 +#else
14607 +# include <linux/rwlock.h>
14608 +#endif
14610  /*
14611   * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
14612 @@ -282,6 +286,10 @@
14613  # include <linux/spinlock_api_up.h>
14614  #endif
14616 +#ifdef CONFIG_PREEMPT_RT_FULL
14617 +# include <linux/spinlock_rt.h>
14618 +#else /* PREEMPT_RT_FULL */
14620  /*
14621   * Map the spin_lock functions to the raw variants for PREEMPT_RT=n
14622   */
14623 @@ -347,6 +355,12 @@
14624         raw_spin_unlock(&lock->rlock);
14627 +static __always_inline int spin_unlock_no_deboost(spinlock_t *lock)
14629 +       raw_spin_unlock(&lock->rlock);
14630 +       return 0;
14633  static __always_inline void spin_unlock_bh(spinlock_t *lock)
14635         raw_spin_unlock_bh(&lock->rlock);
14636 @@ -416,4 +430,6 @@
14637  #define atomic_dec_and_lock(atomic, lock) \
14638                 __cond_lock(lock, _atomic_dec_and_lock(atomic, lock))
14640 +#endif /* !PREEMPT_RT_FULL */
14642  #endif /* __LINUX_SPINLOCK_H */
14643 diff -Nur linux-4.4.46.orig/include/linux/spinlock_rt.h linux-4.4.46/include/linux/spinlock_rt.h
14644 --- linux-4.4.46.orig/include/linux/spinlock_rt.h       1970-01-01 01:00:00.000000000 +0100
14645 +++ linux-4.4.46/include/linux/spinlock_rt.h    2017-02-03 17:18:10.911618440 +0100
14646 @@ -0,0 +1,165 @@
14647 +#ifndef __LINUX_SPINLOCK_RT_H
14648 +#define __LINUX_SPINLOCK_RT_H
14650 +#ifndef __LINUX_SPINLOCK_H
14651 +#error Do not include directly. Use spinlock.h
14652 +#endif
14654 +#include <linux/bug.h>
14656 +extern void
14657 +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key);
14659 +#define spin_lock_init(slock)                          \
14660 +do {                                                   \
14661 +       static struct lock_class_key __key;             \
14662 +                                                       \
14663 +       rt_mutex_init(&(slock)->lock);                  \
14664 +       __rt_spin_lock_init(slock, #slock, &__key);     \
14665 +} while (0)
14667 +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock);
14668 +void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock);
14669 +int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock);
14671 +extern void __lockfunc rt_spin_lock(spinlock_t *lock);
14672 +extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock);
14673 +extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass);
14674 +extern void __lockfunc rt_spin_unlock(spinlock_t *lock);
14675 +extern int __lockfunc rt_spin_unlock_no_deboost(spinlock_t *lock);
14676 +extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock);
14677 +extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags);
14678 +extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock);
14679 +extern int __lockfunc rt_spin_trylock(spinlock_t *lock);
14680 +extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock);
14683 + * lockdep-less calls, for derived types like rwlock:
14684 + * (for trylock they can use rt_mutex_trylock() directly.
14685 + */
14686 +extern void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock);
14687 +extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock);
14688 +extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock);
14689 +extern int __lockfunc __rt_spin_trylock(struct rt_mutex *lock);
14691 +#define spin_lock(lock)                        rt_spin_lock(lock)
14693 +#define spin_lock_bh(lock)                     \
14694 +       do {                                    \
14695 +               local_bh_disable();             \
14696 +               rt_spin_lock(lock);             \
14697 +       } while (0)
14699 +#define spin_lock_irq(lock)            spin_lock(lock)
14701 +#define spin_do_trylock(lock)          __cond_lock(lock, rt_spin_trylock(lock))
14703 +#define spin_trylock(lock)                     \
14704 +({                                             \
14705 +       int __locked;                           \
14706 +       __locked = spin_do_trylock(lock);       \
14707 +       __locked;                               \
14710 +#ifdef CONFIG_LOCKDEP
14711 +# define spin_lock_nested(lock, subclass)              \
14712 +       do {                                            \
14713 +               rt_spin_lock_nested(lock, subclass);    \
14714 +       } while (0)
14716 +#define spin_lock_bh_nested(lock, subclass)            \
14717 +       do {                                            \
14718 +               local_bh_disable();                     \
14719 +               rt_spin_lock_nested(lock, subclass);    \
14720 +       } while (0)
14722 +# define spin_lock_irqsave_nested(lock, flags, subclass) \
14723 +       do {                                             \
14724 +               typecheck(unsigned long, flags);         \
14725 +               flags = 0;                               \
14726 +               rt_spin_lock_nested(lock, subclass);     \
14727 +       } while (0)
14728 +#else
14729 +# define spin_lock_nested(lock, subclass)      spin_lock(lock)
14730 +# define spin_lock_bh_nested(lock, subclass)   spin_lock_bh(lock)
14732 +# define spin_lock_irqsave_nested(lock, flags, subclass) \
14733 +       do {                                             \
14734 +               typecheck(unsigned long, flags);         \
14735 +               flags = 0;                               \
14736 +               spin_lock(lock);                         \
14737 +       } while (0)
14738 +#endif
14740 +#define spin_lock_irqsave(lock, flags)                  \
14741 +       do {                                             \
14742 +               typecheck(unsigned long, flags);         \
14743 +               flags = 0;                               \
14744 +               spin_lock(lock);                         \
14745 +       } while (0)
14747 +static inline unsigned long spin_lock_trace_flags(spinlock_t *lock)
14749 +       unsigned long flags = 0;
14750 +#ifdef CONFIG_TRACE_IRQFLAGS
14751 +       flags = rt_spin_lock_trace_flags(lock);
14752 +#else
14753 +       spin_lock(lock); /* lock_local */
14754 +#endif
14755 +       return flags;
14758 +/* FIXME: we need rt_spin_lock_nest_lock */
14759 +#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0)
14761 +#define spin_unlock(lock)                      rt_spin_unlock(lock)
14762 +#define spin_unlock_no_deboost(lock)           rt_spin_unlock_no_deboost(lock)
14764 +#define spin_unlock_bh(lock)                           \
14765 +       do {                                            \
14766 +               rt_spin_unlock(lock);                   \
14767 +               local_bh_enable();                      \
14768 +       } while (0)
14770 +#define spin_unlock_irq(lock)          spin_unlock(lock)
14772 +#define spin_unlock_irqrestore(lock, flags)            \
14773 +       do {                                            \
14774 +               typecheck(unsigned long, flags);        \
14775 +               (void) flags;                           \
14776 +               spin_unlock(lock);                      \
14777 +       } while (0)
14779 +#define spin_trylock_bh(lock)  __cond_lock(lock, rt_spin_trylock_bh(lock))
14780 +#define spin_trylock_irq(lock) spin_trylock(lock)
14782 +#define spin_trylock_irqsave(lock, flags)      \
14783 +       rt_spin_trylock_irqsave(lock, &(flags))
14785 +#define spin_unlock_wait(lock)         rt_spin_unlock_wait(lock)
14787 +#ifdef CONFIG_GENERIC_LOCKBREAK
14788 +# define spin_is_contended(lock)       ((lock)->break_lock)
14789 +#else
14790 +# define spin_is_contended(lock)       (((void)(lock), 0))
14791 +#endif
14793 +static inline int spin_can_lock(spinlock_t *lock)
14795 +       return !rt_mutex_is_locked(&lock->lock);
14798 +static inline int spin_is_locked(spinlock_t *lock)
14800 +       return rt_mutex_is_locked(&lock->lock);
14803 +static inline void assert_spin_locked(spinlock_t *lock)
14805 +       BUG_ON(!spin_is_locked(lock));
14808 +#define atomic_dec_and_lock(atomic, lock) \
14809 +       atomic_dec_and_spin_lock(atomic, lock)
14811 +#endif
14812 diff -Nur linux-4.4.46.orig/include/linux/spinlock_types.h linux-4.4.46/include/linux/spinlock_types.h
14813 --- linux-4.4.46.orig/include/linux/spinlock_types.h    2017-02-01 08:31:11.000000000 +0100
14814 +++ linux-4.4.46/include/linux/spinlock_types.h 2017-02-03 17:18:10.911618440 +0100
14815 @@ -9,80 +9,15 @@
14816   * Released under the General Public License (GPL).
14817   */
14819 -#if defined(CONFIG_SMP)
14820 -# include <asm/spinlock_types.h>
14821 -#else
14822 -# include <linux/spinlock_types_up.h>
14823 -#endif
14825 -#include <linux/lockdep.h>
14827 -typedef struct raw_spinlock {
14828 -       arch_spinlock_t raw_lock;
14829 -#ifdef CONFIG_GENERIC_LOCKBREAK
14830 -       unsigned int break_lock;
14831 -#endif
14832 -#ifdef CONFIG_DEBUG_SPINLOCK
14833 -       unsigned int magic, owner_cpu;
14834 -       void *owner;
14835 -#endif
14836 -#ifdef CONFIG_DEBUG_LOCK_ALLOC
14837 -       struct lockdep_map dep_map;
14838 -#endif
14839 -} raw_spinlock_t;
14841 -#define SPINLOCK_MAGIC         0xdead4ead
14843 -#define SPINLOCK_OWNER_INIT    ((void *)-1L)
14845 -#ifdef CONFIG_DEBUG_LOCK_ALLOC
14846 -# define SPIN_DEP_MAP_INIT(lockname)   .dep_map = { .name = #lockname }
14847 -#else
14848 -# define SPIN_DEP_MAP_INIT(lockname)
14849 -#endif
14850 +#include <linux/spinlock_types_raw.h>
14852 -#ifdef CONFIG_DEBUG_SPINLOCK
14853 -# define SPIN_DEBUG_INIT(lockname)             \
14854 -       .magic = SPINLOCK_MAGIC,                \
14855 -       .owner_cpu = -1,                        \
14856 -       .owner = SPINLOCK_OWNER_INIT,
14857 +#ifndef CONFIG_PREEMPT_RT_FULL
14858 +# include <linux/spinlock_types_nort.h>
14859 +# include <linux/rwlock_types.h>
14860  #else
14861 -# define SPIN_DEBUG_INIT(lockname)
14862 +# include <linux/rtmutex.h>
14863 +# include <linux/spinlock_types_rt.h>
14864 +# include <linux/rwlock_types_rt.h>
14865  #endif
14867 -#define __RAW_SPIN_LOCK_INITIALIZER(lockname)  \
14868 -       {                                       \
14869 -       .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED,  \
14870 -       SPIN_DEBUG_INIT(lockname)               \
14871 -       SPIN_DEP_MAP_INIT(lockname) }
14873 -#define __RAW_SPIN_LOCK_UNLOCKED(lockname)     \
14874 -       (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
14876 -#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
14878 -typedef struct spinlock {
14879 -       union {
14880 -               struct raw_spinlock rlock;
14882 -#ifdef CONFIG_DEBUG_LOCK_ALLOC
14883 -# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
14884 -               struct {
14885 -                       u8 __padding[LOCK_PADSIZE];
14886 -                       struct lockdep_map dep_map;
14887 -               };
14888 -#endif
14889 -       };
14890 -} spinlock_t;
14892 -#define __SPIN_LOCK_INITIALIZER(lockname) \
14893 -       { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
14895 -#define __SPIN_LOCK_UNLOCKED(lockname) \
14896 -       (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
14898 -#define DEFINE_SPINLOCK(x)     spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
14900 -#include <linux/rwlock_types.h>
14902  #endif /* __LINUX_SPINLOCK_TYPES_H */
14903 diff -Nur linux-4.4.46.orig/include/linux/spinlock_types_nort.h linux-4.4.46/include/linux/spinlock_types_nort.h
14904 --- linux-4.4.46.orig/include/linux/spinlock_types_nort.h       1970-01-01 01:00:00.000000000 +0100
14905 +++ linux-4.4.46/include/linux/spinlock_types_nort.h    2017-02-03 17:18:10.911618440 +0100
14906 @@ -0,0 +1,33 @@
14907 +#ifndef __LINUX_SPINLOCK_TYPES_NORT_H
14908 +#define __LINUX_SPINLOCK_TYPES_NORT_H
14910 +#ifndef __LINUX_SPINLOCK_TYPES_H
14911 +#error "Do not include directly. Include spinlock_types.h instead"
14912 +#endif
14915 + * The non RT version maps spinlocks to raw_spinlocks
14916 + */
14917 +typedef struct spinlock {
14918 +       union {
14919 +               struct raw_spinlock rlock;
14921 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
14922 +# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
14923 +               struct {
14924 +                       u8 __padding[LOCK_PADSIZE];
14925 +                       struct lockdep_map dep_map;
14926 +               };
14927 +#endif
14928 +       };
14929 +} spinlock_t;
14931 +#define __SPIN_LOCK_INITIALIZER(lockname) \
14932 +       { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
14934 +#define __SPIN_LOCK_UNLOCKED(lockname) \
14935 +       (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
14937 +#define DEFINE_SPINLOCK(x)     spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
14939 +#endif
14940 diff -Nur linux-4.4.46.orig/include/linux/spinlock_types_raw.h linux-4.4.46/include/linux/spinlock_types_raw.h
14941 --- linux-4.4.46.orig/include/linux/spinlock_types_raw.h        1970-01-01 01:00:00.000000000 +0100
14942 +++ linux-4.4.46/include/linux/spinlock_types_raw.h     2017-02-03 17:18:10.911618440 +0100
14943 @@ -0,0 +1,56 @@
14944 +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
14945 +#define __LINUX_SPINLOCK_TYPES_RAW_H
14947 +#if defined(CONFIG_SMP)
14948 +# include <asm/spinlock_types.h>
14949 +#else
14950 +# include <linux/spinlock_types_up.h>
14951 +#endif
14953 +#include <linux/lockdep.h>
14955 +typedef struct raw_spinlock {
14956 +       arch_spinlock_t raw_lock;
14957 +#ifdef CONFIG_GENERIC_LOCKBREAK
14958 +       unsigned int break_lock;
14959 +#endif
14960 +#ifdef CONFIG_DEBUG_SPINLOCK
14961 +       unsigned int magic, owner_cpu;
14962 +       void *owner;
14963 +#endif
14964 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
14965 +       struct lockdep_map dep_map;
14966 +#endif
14967 +} raw_spinlock_t;
14969 +#define SPINLOCK_MAGIC         0xdead4ead
14971 +#define SPINLOCK_OWNER_INIT    ((void *)-1L)
14973 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
14974 +# define SPIN_DEP_MAP_INIT(lockname)   .dep_map = { .name = #lockname }
14975 +#else
14976 +# define SPIN_DEP_MAP_INIT(lockname)
14977 +#endif
14979 +#ifdef CONFIG_DEBUG_SPINLOCK
14980 +# define SPIN_DEBUG_INIT(lockname)             \
14981 +       .magic = SPINLOCK_MAGIC,                \
14982 +       .owner_cpu = -1,                        \
14983 +       .owner = SPINLOCK_OWNER_INIT,
14984 +#else
14985 +# define SPIN_DEBUG_INIT(lockname)
14986 +#endif
14988 +#define __RAW_SPIN_LOCK_INITIALIZER(lockname)  \
14989 +       {                                       \
14990 +       .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED,  \
14991 +       SPIN_DEBUG_INIT(lockname)               \
14992 +       SPIN_DEP_MAP_INIT(lockname) }
14994 +#define __RAW_SPIN_LOCK_UNLOCKED(lockname)     \
14995 +       (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
14997 +#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
14999 +#endif
15000 diff -Nur linux-4.4.46.orig/include/linux/spinlock_types_rt.h linux-4.4.46/include/linux/spinlock_types_rt.h
15001 --- linux-4.4.46.orig/include/linux/spinlock_types_rt.h 1970-01-01 01:00:00.000000000 +0100
15002 +++ linux-4.4.46/include/linux/spinlock_types_rt.h      2017-02-03 17:18:10.911618440 +0100
15003 @@ -0,0 +1,51 @@
15004 +#ifndef __LINUX_SPINLOCK_TYPES_RT_H
15005 +#define __LINUX_SPINLOCK_TYPES_RT_H
15007 +#ifndef __LINUX_SPINLOCK_TYPES_H
15008 +#error "Do not include directly. Include spinlock_types.h instead"
15009 +#endif
15011 +#include <linux/cache.h>
15014 + * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field:
15015 + */
15016 +typedef struct spinlock {
15017 +       struct rt_mutex         lock;
15018 +       unsigned int            break_lock;
15019 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
15020 +       struct lockdep_map      dep_map;
15021 +#endif
15022 +} spinlock_t;
15024 +#ifdef CONFIG_DEBUG_RT_MUTEXES
15025 +# define __RT_SPIN_INITIALIZER(name) \
15026 +       { \
15027 +       .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
15028 +       .save_state = 1, \
15029 +       .file = __FILE__, \
15030 +       .line = __LINE__ , \
15031 +       }
15032 +#else
15033 +# define __RT_SPIN_INITIALIZER(name) \
15034 +       {                                                               \
15035 +       .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),          \
15036 +       .save_state = 1, \
15037 +       }
15038 +#endif
15041 +.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock)
15044 +#define __SPIN_LOCK_UNLOCKED(name)                     \
15045 +       { .lock = __RT_SPIN_INITIALIZER(name.lock),             \
15046 +         SPIN_DEP_MAP_INIT(name) }
15048 +#define __DEFINE_SPINLOCK(name) \
15049 +       spinlock_t name = __SPIN_LOCK_UNLOCKED(name)
15051 +#define DEFINE_SPINLOCK(name) \
15052 +       spinlock_t name __cacheline_aligned_in_smp = __SPIN_LOCK_UNLOCKED(name)
15054 +#endif
15055 diff -Nur linux-4.4.46.orig/include/linux/srcu.h linux-4.4.46/include/linux/srcu.h
15056 --- linux-4.4.46.orig/include/linux/srcu.h      2017-02-01 08:31:11.000000000 +0100
15057 +++ linux-4.4.46/include/linux/srcu.h   2017-02-03 17:18:10.911618440 +0100
15058 @@ -84,10 +84,10 @@
15060  void process_srcu(struct work_struct *work);
15062 -#define __SRCU_STRUCT_INIT(name)                                       \
15063 +#define __SRCU_STRUCT_INIT(name, pcpu_name)                            \
15064         {                                                               \
15065                 .completed = -300,                                      \
15066 -               .per_cpu_ref = &name##_srcu_array,                      \
15067 +               .per_cpu_ref = &pcpu_name,                              \
15068                 .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock),    \
15069                 .running = false,                                       \
15070                 .batch_queue = RCU_BATCH_INIT(name.batch_queue),        \
15071 @@ -104,7 +104,7 @@
15072   */
15073  #define __DEFINE_SRCU(name, is_static)                                 \
15074         static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
15075 -       is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
15076 +       is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_array)
15077  #define DEFINE_SRCU(name)              __DEFINE_SRCU(name, /* not static */)
15078  #define DEFINE_STATIC_SRCU(name)       __DEFINE_SRCU(name, static)
15080 diff -Nur linux-4.4.46.orig/include/linux/suspend.h linux-4.4.46/include/linux/suspend.h
15081 --- linux-4.4.46.orig/include/linux/suspend.h   2017-02-01 08:31:11.000000000 +0100
15082 +++ linux-4.4.46/include/linux/suspend.h        2017-02-03 17:18:10.911618440 +0100
15083 @@ -194,6 +194,12 @@
15084         void (*end)(void);
15085  };
15087 +#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION)
15088 +extern bool pm_in_action;
15089 +#else
15090 +# define pm_in_action false
15091 +#endif
15093  #ifdef CONFIG_SUSPEND
15094  /**
15095   * suspend_set_ops - set platform dependent suspend operations
15096 diff -Nur linux-4.4.46.orig/include/linux/swait.h linux-4.4.46/include/linux/swait.h
15097 --- linux-4.4.46.orig/include/linux/swait.h     1970-01-01 01:00:00.000000000 +0100
15098 +++ linux-4.4.46/include/linux/swait.h  2017-02-03 17:18:10.915618595 +0100
15099 @@ -0,0 +1,173 @@
15100 +#ifndef _LINUX_SWAIT_H
15101 +#define _LINUX_SWAIT_H
15103 +#include <linux/list.h>
15104 +#include <linux/stddef.h>
15105 +#include <linux/spinlock.h>
15106 +#include <asm/current.h>
15109 + * Simple wait queues
15110 + *
15111 + * While these are very similar to the other/complex wait queues (wait.h) the
15112 + * most important difference is that the simple waitqueue allows for
15113 + * deterministic behaviour -- IOW it has strictly bounded IRQ and lock hold
15114 + * times.
15115 + *
15116 + * In order to make this so, we had to drop a fair number of features of the
15117 + * other waitqueue code; notably:
15118 + *
15119 + *  - mixing INTERRUPTIBLE and UNINTERRUPTIBLE sleeps on the same waitqueue;
15120 + *    all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right
15121 + *    sleeper state.
15122 + *
15123 + *  - the exclusive mode; because this requires preserving the list order
15124 + *    and this is hard.
15125 + *
15126 + *  - custom wake functions; because you cannot give any guarantees about
15127 + *    random code.
15128 + *
15129 + * As a side effect of this; the data structures are slimmer.
15130 + *
15131 + * One would recommend using this wait queue where possible.
15132 + */
15134 +struct task_struct;
15136 +struct swait_queue_head {
15137 +       raw_spinlock_t          lock;
15138 +       struct list_head        task_list;
15141 +struct swait_queue {
15142 +       struct task_struct      *task;
15143 +       struct list_head        task_list;
15146 +#define __SWAITQUEUE_INITIALIZER(name) {                               \
15147 +       .task           = current,                                      \
15148 +       .task_list      = LIST_HEAD_INIT((name).task_list),             \
15151 +#define DECLARE_SWAITQUEUE(name)                                       \
15152 +       struct swait_queue name = __SWAITQUEUE_INITIALIZER(name)
15154 +#define __SWAIT_QUEUE_HEAD_INITIALIZER(name) {                         \
15155 +       .lock           = __RAW_SPIN_LOCK_UNLOCKED(name.lock),          \
15156 +       .task_list      = LIST_HEAD_INIT((name).task_list),             \
15159 +#define DECLARE_SWAIT_QUEUE_HEAD(name)                                 \
15160 +       struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INITIALIZER(name)
15162 +extern void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
15163 +                                   struct lock_class_key *key);
15165 +#define init_swait_queue_head(q)                               \
15166 +       do {                                                    \
15167 +               static struct lock_class_key __key;             \
15168 +               __init_swait_queue_head((q), #q, &__key);       \
15169 +       } while (0)
15171 +#ifdef CONFIG_LOCKDEP
15172 +# define __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name)                 \
15173 +       ({ init_swait_queue_head(&name); name; })
15174 +# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name)                        \
15175 +       struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name)
15176 +#else
15177 +# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name)                        \
15178 +       DECLARE_SWAIT_QUEUE_HEAD(name)
15179 +#endif
15181 +static inline int swait_active(struct swait_queue_head *q)
15183 +       return !list_empty(&q->task_list);
15186 +extern void swake_up(struct swait_queue_head *q);
15187 +extern void swake_up_all(struct swait_queue_head *q);
15188 +extern void swake_up_locked(struct swait_queue_head *q);
15189 +extern void swake_up_all_locked(struct swait_queue_head *q);
15191 +extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
15192 +extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state);
15193 +extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);
15195 +extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
15196 +extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
15198 +/* as per ___wait_event() but for swait, therefore "exclusive == 0" */
15199 +#define ___swait_event(wq, condition, state, ret, cmd)                 \
15200 +({                                                                     \
15201 +       struct swait_queue __wait;                                      \
15202 +       long __ret = ret;                                               \
15203 +                                                                       \
15204 +       INIT_LIST_HEAD(&__wait.task_list);                              \
15205 +       for (;;) {                                                      \
15206 +               long __int = prepare_to_swait_event(&wq, &__wait, state);\
15207 +                                                                       \
15208 +               if (condition)                                          \
15209 +                       break;                                          \
15210 +                                                                       \
15211 +               if (___wait_is_interruptible(state) && __int) {         \
15212 +                       __ret = __int;                                  \
15213 +                       break;                                          \
15214 +               }                                                       \
15215 +                                                                       \
15216 +               cmd;                                                    \
15217 +       }                                                               \
15218 +       finish_swait(&wq, &__wait);                                     \
15219 +       __ret;                                                          \
15222 +#define __swait_event(wq, condition)                                   \
15223 +       (void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0,    \
15224 +                           schedule())
15226 +#define swait_event(wq, condition)                                     \
15227 +do {                                                                   \
15228 +       if (condition)                                                  \
15229 +               break;                                                  \
15230 +       __swait_event(wq, condition);                                   \
15231 +} while (0)
15233 +#define __swait_event_timeout(wq, condition, timeout)                  \
15234 +       ___swait_event(wq, ___wait_cond_timeout(condition),             \
15235 +                     TASK_UNINTERRUPTIBLE, timeout,                    \
15236 +                     __ret = schedule_timeout(__ret))
15238 +#define swait_event_timeout(wq, condition, timeout)                    \
15239 +({                                                                     \
15240 +       long __ret = timeout;                                           \
15241 +       if (!___wait_cond_timeout(condition))                           \
15242 +               __ret = __swait_event_timeout(wq, condition, timeout);  \
15243 +       __ret;                                                          \
15246 +#define __swait_event_interruptible(wq, condition)                     \
15247 +       ___swait_event(wq, condition, TASK_INTERRUPTIBLE, 0,            \
15248 +                     schedule())
15250 +#define swait_event_interruptible(wq, condition)                       \
15251 +({                                                                     \
15252 +       int __ret = 0;                                                  \
15253 +       if (!(condition))                                               \
15254 +               __ret = __swait_event_interruptible(wq, condition);     \
15255 +       __ret;                                                          \
15258 +#define __swait_event_interruptible_timeout(wq, condition, timeout)    \
15259 +       ___swait_event(wq, ___wait_cond_timeout(condition),             \
15260 +                     TASK_INTERRUPTIBLE, timeout,                      \
15261 +                     __ret = schedule_timeout(__ret))
15263 +#define swait_event_interruptible_timeout(wq, condition, timeout)      \
15264 +({                                                                     \
15265 +       long __ret = timeout;                                           \
15266 +       if (!___wait_cond_timeout(condition))                           \
15267 +               __ret = __swait_event_interruptible_timeout(wq,         \
15268 +                                               condition, timeout);    \
15269 +       __ret;                                                          \
15272 +#endif /* _LINUX_SWAIT_H */
15273 diff -Nur linux-4.4.46.orig/include/linux/swap.h linux-4.4.46/include/linux/swap.h
15274 --- linux-4.4.46.orig/include/linux/swap.h      2017-02-01 08:31:11.000000000 +0100
15275 +++ linux-4.4.46/include/linux/swap.h   2017-02-03 17:18:10.915618595 +0100
15276 @@ -11,6 +11,7 @@
15277  #include <linux/fs.h>
15278  #include <linux/atomic.h>
15279  #include <linux/page-flags.h>
15280 +#include <linux/locallock.h>
15281  #include <asm/page.h>
15283  struct notifier_block;
15284 @@ -252,7 +253,8 @@
15285  void *workingset_eviction(struct address_space *mapping, struct page *page);
15286  bool workingset_refault(void *shadow);
15287  void workingset_activation(struct page *page);
15288 -extern struct list_lru workingset_shadow_nodes;
15289 +extern struct list_lru __workingset_shadow_nodes;
15290 +DECLARE_LOCAL_IRQ_LOCK(workingset_shadow_lock);
15292  static inline unsigned int workingset_node_pages(struct radix_tree_node *node)
15294 @@ -298,6 +300,7 @@
15297  /* linux/mm/swap.c */
15298 +DECLARE_LOCAL_IRQ_LOCK(swapvec_lock);
15299  extern void lru_cache_add(struct page *);
15300  extern void lru_cache_add_anon(struct page *page);
15301  extern void lru_cache_add_file(struct page *page);
15302 diff -Nur linux-4.4.46.orig/include/linux/swork.h linux-4.4.46/include/linux/swork.h
15303 --- linux-4.4.46.orig/include/linux/swork.h     1970-01-01 01:00:00.000000000 +0100
15304 +++ linux-4.4.46/include/linux/swork.h  2017-02-03 17:18:10.915618595 +0100
15305 @@ -0,0 +1,24 @@
15306 +#ifndef _LINUX_SWORK_H
15307 +#define _LINUX_SWORK_H
15309 +#include <linux/list.h>
15311 +struct swork_event {
15312 +       struct list_head item;
15313 +       unsigned long flags;
15314 +       void (*func)(struct swork_event *);
15317 +static inline void INIT_SWORK(struct swork_event *event,
15318 +                             void (*func)(struct swork_event *))
15320 +       event->flags = 0;
15321 +       event->func = func;
15324 +bool swork_queue(struct swork_event *sev);
15326 +int swork_get(void);
15327 +void swork_put(void);
15329 +#endif /* _LINUX_SWORK_H */
15330 diff -Nur linux-4.4.46.orig/include/linux/thread_info.h linux-4.4.46/include/linux/thread_info.h
15331 --- linux-4.4.46.orig/include/linux/thread_info.h       2017-02-01 08:31:11.000000000 +0100
15332 +++ linux-4.4.46/include/linux/thread_info.h    2017-02-03 17:18:10.915618595 +0100
15333 @@ -102,7 +102,17 @@
15334  #define test_thread_flag(flag) \
15335         test_ti_thread_flag(current_thread_info(), flag)
15337 -#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
15338 +#ifdef CONFIG_PREEMPT_LAZY
15339 +#define tif_need_resched()     (test_thread_flag(TIF_NEED_RESCHED) || \
15340 +                                test_thread_flag(TIF_NEED_RESCHED_LAZY))
15341 +#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED))
15342 +#define tif_need_resched_lazy()        test_thread_flag(TIF_NEED_RESCHED_LAZY))
15344 +#else
15345 +#define tif_need_resched()     test_thread_flag(TIF_NEED_RESCHED)
15346 +#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED)
15347 +#define tif_need_resched_lazy()        0
15348 +#endif
15350  #if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK
15351  /*
15352 diff -Nur linux-4.4.46.orig/include/linux/timer.h linux-4.4.46/include/linux/timer.h
15353 --- linux-4.4.46.orig/include/linux/timer.h     2017-02-01 08:31:11.000000000 +0100
15354 +++ linux-4.4.46/include/linux/timer.h  2017-02-03 17:18:10.915618595 +0100
15355 @@ -225,7 +225,7 @@
15357  extern int try_to_del_timer_sync(struct timer_list *timer);
15359 -#ifdef CONFIG_SMP
15360 +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
15361    extern int del_timer_sync(struct timer_list *timer);
15362  #else
15363  # define del_timer_sync(t)             del_timer(t)
15364 diff -Nur linux-4.4.46.orig/include/linux/trace_events.h linux-4.4.46/include/linux/trace_events.h
15365 --- linux-4.4.46.orig/include/linux/trace_events.h      2017-02-01 08:31:11.000000000 +0100
15366 +++ linux-4.4.46/include/linux/trace_events.h   2017-02-03 17:18:10.915618595 +0100
15367 @@ -66,6 +66,9 @@
15368         unsigned char           flags;
15369         unsigned char           preempt_count;
15370         int                     pid;
15371 +       unsigned short          migrate_disable;
15372 +       unsigned short          padding;
15373 +       unsigned char           preempt_lazy_count;
15374  };
15376  #define TRACE_EVENT_TYPE_MAX                                           \
15377 diff -Nur linux-4.4.46.orig/include/linux/uaccess.h linux-4.4.46/include/linux/uaccess.h
15378 --- linux-4.4.46.orig/include/linux/uaccess.h   2017-02-01 08:31:11.000000000 +0100
15379 +++ linux-4.4.46/include/linux/uaccess.h        2017-02-03 17:18:10.915618595 +0100
15380 @@ -24,6 +24,7 @@
15381   */
15382  static inline void pagefault_disable(void)
15384 +       migrate_disable();
15385         pagefault_disabled_inc();
15386         /*
15387          * make sure to have issued the store before a pagefault
15388 @@ -40,6 +41,7 @@
15389          */
15390         barrier();
15391         pagefault_disabled_dec();
15392 +       migrate_enable();
15395  /*
15396 diff -Nur linux-4.4.46.orig/include/linux/uprobes.h linux-4.4.46/include/linux/uprobes.h
15397 --- linux-4.4.46.orig/include/linux/uprobes.h   2017-02-01 08:31:11.000000000 +0100
15398 +++ linux-4.4.46/include/linux/uprobes.h        2017-02-03 17:18:10.915618595 +0100
15399 @@ -27,6 +27,7 @@
15400  #include <linux/errno.h>
15401  #include <linux/rbtree.h>
15402  #include <linux/types.h>
15403 +#include <linux/wait.h>
15405  struct vm_area_struct;
15406  struct mm_struct;
15407 diff -Nur linux-4.4.46.orig/include/linux/vmstat.h linux-4.4.46/include/linux/vmstat.h
15408 --- linux-4.4.46.orig/include/linux/vmstat.h    2017-02-01 08:31:11.000000000 +0100
15409 +++ linux-4.4.46/include/linux/vmstat.h 2017-02-03 17:18:10.915618595 +0100
15410 @@ -33,7 +33,9 @@
15411   */
15412  static inline void __count_vm_event(enum vm_event_item item)
15414 +       preempt_disable_rt();
15415         raw_cpu_inc(vm_event_states.event[item]);
15416 +       preempt_enable_rt();
15419  static inline void count_vm_event(enum vm_event_item item)
15420 @@ -43,7 +45,9 @@
15422  static inline void __count_vm_events(enum vm_event_item item, long delta)
15424 +       preempt_disable_rt();
15425         raw_cpu_add(vm_event_states.event[item], delta);
15426 +       preempt_enable_rt();
15429  static inline void count_vm_events(enum vm_event_item item, long delta)
15430 diff -Nur linux-4.4.46.orig/include/linux/wait.h linux-4.4.46/include/linux/wait.h
15431 --- linux-4.4.46.orig/include/linux/wait.h      2017-02-01 08:31:11.000000000 +0100
15432 +++ linux-4.4.46/include/linux/wait.h   2017-02-03 17:18:10.915618595 +0100
15433 @@ -8,6 +8,7 @@
15434  #include <linux/spinlock.h>
15435  #include <asm/current.h>
15436  #include <uapi/linux/wait.h>
15437 +#include <linux/atomic.h>
15439  typedef struct __wait_queue wait_queue_t;
15440  typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
15441 diff -Nur linux-4.4.46.orig/include/net/dst.h linux-4.4.46/include/net/dst.h
15442 --- linux-4.4.46.orig/include/net/dst.h 2017-02-01 08:31:11.000000000 +0100
15443 +++ linux-4.4.46/include/net/dst.h      2017-02-03 17:18:10.915618595 +0100
15444 @@ -437,7 +437,7 @@
15445  static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
15446                                    struct sk_buff *skb)
15448 -       const struct hh_cache *hh;
15449 +       struct hh_cache *hh;
15451         if (dst->pending_confirm) {
15452                 unsigned long now = jiffies;
15453 diff -Nur linux-4.4.46.orig/include/net/neighbour.h linux-4.4.46/include/net/neighbour.h
15454 --- linux-4.4.46.orig/include/net/neighbour.h   2017-02-01 08:31:11.000000000 +0100
15455 +++ linux-4.4.46/include/net/neighbour.h        2017-02-03 17:18:10.915618595 +0100
15456 @@ -446,7 +446,7 @@
15458  #endif
15460 -static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb)
15461 +static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb)
15463         unsigned int seq;
15464         int hh_len;
15465 @@ -501,7 +501,7 @@
15467  #define NEIGH_CB(skb)  ((struct neighbour_cb *)(skb)->cb)
15469 -static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n,
15470 +static inline void neigh_ha_snapshot(char *dst, struct neighbour *n,
15471                                      const struct net_device *dev)
15473         unsigned int seq;
15474 diff -Nur linux-4.4.46.orig/include/net/netns/ipv4.h linux-4.4.46/include/net/netns/ipv4.h
15475 --- linux-4.4.46.orig/include/net/netns/ipv4.h  2017-02-01 08:31:11.000000000 +0100
15476 +++ linux-4.4.46/include/net/netns/ipv4.h       2017-02-03 17:18:10.915618595 +0100
15477 @@ -70,6 +70,7 @@
15479         int sysctl_icmp_echo_ignore_all;
15480         int sysctl_icmp_echo_ignore_broadcasts;
15481 +       int sysctl_icmp_echo_sysrq;
15482         int sysctl_icmp_ignore_bogus_error_responses;
15483         int sysctl_icmp_ratelimit;
15484         int sysctl_icmp_ratemask;
15485 diff -Nur linux-4.4.46.orig/include/trace/events/hist.h linux-4.4.46/include/trace/events/hist.h
15486 --- linux-4.4.46.orig/include/trace/events/hist.h       1970-01-01 01:00:00.000000000 +0100
15487 +++ linux-4.4.46/include/trace/events/hist.h    2017-02-03 17:18:10.915618595 +0100
15488 @@ -0,0 +1,73 @@
15489 +#undef TRACE_SYSTEM
15490 +#define TRACE_SYSTEM hist
15492 +#if !defined(_TRACE_HIST_H) || defined(TRACE_HEADER_MULTI_READ)
15493 +#define _TRACE_HIST_H
15495 +#include "latency_hist.h"
15496 +#include <linux/tracepoint.h>
15498 +#if !defined(CONFIG_PREEMPT_OFF_HIST) && !defined(CONFIG_INTERRUPT_OFF_HIST)
15499 +#define trace_preemptirqsoff_hist(a, b)
15500 +#define trace_preemptirqsoff_hist_rcuidle(a, b)
15501 +#else
15502 +TRACE_EVENT(preemptirqsoff_hist,
15504 +       TP_PROTO(int reason, int starthist),
15506 +       TP_ARGS(reason, starthist),
15508 +       TP_STRUCT__entry(
15509 +               __field(int,    reason)
15510 +               __field(int,    starthist)
15511 +       ),
15513 +       TP_fast_assign(
15514 +               __entry->reason         = reason;
15515 +               __entry->starthist      = starthist;
15516 +       ),
15518 +       TP_printk("reason=%s starthist=%s", getaction(__entry->reason),
15519 +                 __entry->starthist ? "start" : "stop")
15521 +#endif
15523 +#ifndef CONFIG_MISSED_TIMER_OFFSETS_HIST
15524 +#define trace_hrtimer_interrupt(a, b, c, d)
15525 +#else
15526 +TRACE_EVENT(hrtimer_interrupt,
15528 +       TP_PROTO(int cpu, long long offset, struct task_struct *curr,
15529 +               struct task_struct *task),
15531 +       TP_ARGS(cpu, offset, curr, task),
15533 +       TP_STRUCT__entry(
15534 +               __field(int,            cpu)
15535 +               __field(long long,      offset)
15536 +               __array(char,           ccomm,  TASK_COMM_LEN)
15537 +               __field(int,            cprio)
15538 +               __array(char,           tcomm,  TASK_COMM_LEN)
15539 +               __field(int,            tprio)
15540 +       ),
15542 +       TP_fast_assign(
15543 +               __entry->cpu    = cpu;
15544 +               __entry->offset = offset;
15545 +               memcpy(__entry->ccomm, curr->comm, TASK_COMM_LEN);
15546 +               __entry->cprio  = curr->prio;
15547 +               memcpy(__entry->tcomm, task != NULL ? task->comm : "<none>",
15548 +                       task != NULL ? TASK_COMM_LEN : 7);
15549 +               __entry->tprio  = task != NULL ? task->prio : -1;
15550 +       ),
15552 +       TP_printk("cpu=%d offset=%lld curr=%s[%d] thread=%s[%d]",
15553 +               __entry->cpu, __entry->offset, __entry->ccomm,
15554 +               __entry->cprio, __entry->tcomm, __entry->tprio)
15556 +#endif
15558 +#endif /* _TRACE_HIST_H */
15560 +/* This part must be outside protection */
15561 +#include <trace/define_trace.h>
15562 diff -Nur linux-4.4.46.orig/include/trace/events/latency_hist.h linux-4.4.46/include/trace/events/latency_hist.h
15563 --- linux-4.4.46.orig/include/trace/events/latency_hist.h       1970-01-01 01:00:00.000000000 +0100
15564 +++ linux-4.4.46/include/trace/events/latency_hist.h    2017-02-03 17:18:10.915618595 +0100
15565 @@ -0,0 +1,29 @@
15566 +#ifndef _LATENCY_HIST_H
15567 +#define _LATENCY_HIST_H
15569 +enum hist_action {
15570 +       IRQS_ON,
15571 +       PREEMPT_ON,
15572 +       TRACE_STOP,
15573 +       IRQS_OFF,
15574 +       PREEMPT_OFF,
15575 +       TRACE_START,
15578 +static char *actions[] = {
15579 +       "IRQS_ON",
15580 +       "PREEMPT_ON",
15581 +       "TRACE_STOP",
15582 +       "IRQS_OFF",
15583 +       "PREEMPT_OFF",
15584 +       "TRACE_START",
15587 +static inline char *getaction(int action)
15589 +       if (action >= 0 && action <= sizeof(actions)/sizeof(actions[0]))
15590 +               return actions[action];
15591 +       return "unknown";
15594 +#endif /* _LATENCY_HIST_H */
15595 diff -Nur linux-4.4.46.orig/include/trace/events/writeback.h linux-4.4.46/include/trace/events/writeback.h
15596 --- linux-4.4.46.orig/include/trace/events/writeback.h  2017-02-01 08:31:11.000000000 +0100
15597 +++ linux-4.4.46/include/trace/events/writeback.h       2017-02-03 17:18:10.915618595 +0100
15598 @@ -134,58 +134,28 @@
15599  #ifdef CREATE_TRACE_POINTS
15600  #ifdef CONFIG_CGROUP_WRITEBACK
15602 -static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb)
15603 +static inline unsigned int __trace_wb_assign_cgroup(struct bdi_writeback *wb)
15605 -       return kernfs_path_len(wb->memcg_css->cgroup->kn) + 1;
15606 +       return wb->memcg_css->cgroup->kn->ino;
15609 -static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb)
15611 -       struct cgroup *cgrp = wb->memcg_css->cgroup;
15612 -       char *path;
15614 -       path = cgroup_path(cgrp, buf, kernfs_path_len(cgrp->kn) + 1);
15615 -       WARN_ON_ONCE(path != buf);
15618 -static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc)
15620 -       if (wbc->wb)
15621 -               return __trace_wb_cgroup_size(wbc->wb);
15622 -       else
15623 -               return 2;
15626 -static inline void __trace_wbc_assign_cgroup(char *buf,
15627 -                                            struct writeback_control *wbc)
15628 +static inline unsigned int __trace_wbc_assign_cgroup(struct writeback_control *wbc)
15630         if (wbc->wb)
15631 -               __trace_wb_assign_cgroup(buf, wbc->wb);
15632 +               return __trace_wb_assign_cgroup(wbc->wb);
15633         else
15634 -               strcpy(buf, "/");
15635 +               return -1U;
15638  #else  /* CONFIG_CGROUP_WRITEBACK */
15640 -static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb)
15642 -       return 2;
15645 -static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb)
15647 -       strcpy(buf, "/");
15650 -static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc)
15651 +static inline unsigned int __trace_wb_assign_cgroup(struct bdi_writeback *wb)
15653 -       return 2;
15654 +       return -1U;
15657 -static inline void __trace_wbc_assign_cgroup(char *buf,
15658 -                                            struct writeback_control *wbc)
15659 +static inline unsigned int __trace_wbc_assign_cgroup(struct writeback_control *wbc)
15661 -       strcpy(buf, "/");
15662 +       return -1U;
15665  #endif /* CONFIG_CGROUP_WRITEBACK */
15666 @@ -201,7 +171,7 @@
15667                 __array(char, name, 32)
15668                 __field(unsigned long, ino)
15669                 __field(int, sync_mode)
15670 -               __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
15671 +               __field(unsigned int, cgroup_ino)
15672         ),
15674         TP_fast_assign(
15675 @@ -209,14 +179,14 @@
15676                         dev_name(inode_to_bdi(inode)->dev), 32);
15677                 __entry->ino            = inode->i_ino;
15678                 __entry->sync_mode      = wbc->sync_mode;
15679 -               __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
15680 +               __entry->cgroup_ino     = __trace_wbc_assign_cgroup(wbc);
15681         ),
15683 -       TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup=%s",
15684 +       TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup_ino=%u",
15685                 __entry->name,
15686                 __entry->ino,
15687                 __entry->sync_mode,
15688 -               __get_str(cgroup)
15689 +               __entry->cgroup_ino
15690         )
15691  );
15693 @@ -246,7 +216,7 @@
15694                 __field(int, range_cyclic)
15695                 __field(int, for_background)
15696                 __field(int, reason)
15697 -               __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
15698 +               __field(unsigned int, cgroup_ino)
15699         ),
15700         TP_fast_assign(
15701                 strncpy(__entry->name,
15702 @@ -258,10 +228,10 @@
15703                 __entry->range_cyclic = work->range_cyclic;
15704                 __entry->for_background = work->for_background;
15705                 __entry->reason = work->reason;
15706 -               __trace_wb_assign_cgroup(__get_str(cgroup), wb);
15707 +               __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
15708         ),
15709         TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
15710 -                 "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup=%s",
15711 +                 "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup_ino=%u",
15712                   __entry->name,
15713                   MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
15714                   __entry->nr_pages,
15715 @@ -270,7 +240,7 @@
15716                   __entry->range_cyclic,
15717                   __entry->for_background,
15718                   __print_symbolic(__entry->reason, WB_WORK_REASON),
15719 -                 __get_str(cgroup)
15720 +                 __entry->cgroup_ino
15721         )
15722  );
15723  #define DEFINE_WRITEBACK_WORK_EVENT(name) \
15724 @@ -300,15 +270,15 @@
15725         TP_ARGS(wb),
15726         TP_STRUCT__entry(
15727                 __array(char, name, 32)
15728 -               __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
15729 +               __field(unsigned int, cgroup_ino)
15730         ),
15731         TP_fast_assign(
15732                 strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
15733 -               __trace_wb_assign_cgroup(__get_str(cgroup), wb);
15734 +               __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
15735         ),
15736 -       TP_printk("bdi %s: cgroup=%s",
15737 +       TP_printk("bdi %s: cgroup_ino=%u",
15738                   __entry->name,
15739 -                 __get_str(cgroup)
15740 +                 __entry->cgroup_ino
15741         )
15742  );
15743  #define DEFINE_WRITEBACK_EVENT(name) \
15744 @@ -347,7 +317,7 @@
15745                 __field(int, range_cyclic)
15746                 __field(long, range_start)
15747                 __field(long, range_end)
15748 -               __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
15749 +               __field(unsigned int, cgroup_ino)
15750         ),
15752         TP_fast_assign(
15753 @@ -361,12 +331,12 @@
15754                 __entry->range_cyclic   = wbc->range_cyclic;
15755                 __entry->range_start    = (long)wbc->range_start;
15756                 __entry->range_end      = (long)wbc->range_end;
15757 -               __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
15758 +               __entry->cgroup_ino     = __trace_wbc_assign_cgroup(wbc);
15759         ),
15761         TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d "
15762                 "bgrd=%d reclm=%d cyclic=%d "
15763 -               "start=0x%lx end=0x%lx cgroup=%s",
15764 +               "start=0x%lx end=0x%lx cgroup_ino=%u",
15765                 __entry->name,
15766                 __entry->nr_to_write,
15767                 __entry->pages_skipped,
15768 @@ -377,7 +347,7 @@
15769                 __entry->range_cyclic,
15770                 __entry->range_start,
15771                 __entry->range_end,
15772 -               __get_str(cgroup)
15773 +               __entry->cgroup_ino
15774         )
15777 @@ -398,7 +368,7 @@
15778                 __field(long,           age)
15779                 __field(int,            moved)
15780                 __field(int,            reason)
15781 -               __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
15782 +               __field(unsigned int,   cgroup_ino)
15783         ),
15784         TP_fast_assign(
15785                 unsigned long *older_than_this = work->older_than_this;
15786 @@ -408,15 +378,15 @@
15787                                   (jiffies - *older_than_this) * 1000 / HZ : -1;
15788                 __entry->moved  = moved;
15789                 __entry->reason = work->reason;
15790 -               __trace_wb_assign_cgroup(__get_str(cgroup), wb);
15791 +               __entry->cgroup_ino     = __trace_wb_assign_cgroup(wb);
15792         ),
15793 -       TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup=%s",
15794 +       TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup_ino=%u",
15795                 __entry->name,
15796                 __entry->older, /* older_than_this in jiffies */
15797                 __entry->age,   /* older_than_this in relative milliseconds */
15798                 __entry->moved,
15799                 __print_symbolic(__entry->reason, WB_WORK_REASON),
15800 -               __get_str(cgroup)
15801 +               __entry->cgroup_ino
15802         )
15803  );
15805 @@ -484,7 +454,7 @@
15806                 __field(unsigned long,  dirty_ratelimit)
15807                 __field(unsigned long,  task_ratelimit)
15808                 __field(unsigned long,  balanced_dirty_ratelimit)
15809 -               __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
15810 +               __field(unsigned int,   cgroup_ino)
15811         ),
15813         TP_fast_assign(
15814 @@ -496,13 +466,13 @@
15815                 __entry->task_ratelimit = KBps(task_ratelimit);
15816                 __entry->balanced_dirty_ratelimit =
15817                                         KBps(wb->balanced_dirty_ratelimit);
15818 -               __trace_wb_assign_cgroup(__get_str(cgroup), wb);
15819 +               __entry->cgroup_ino     = __trace_wb_assign_cgroup(wb);
15820         ),
15822         TP_printk("bdi %s: "
15823                   "write_bw=%lu awrite_bw=%lu dirty_rate=%lu "
15824                   "dirty_ratelimit=%lu task_ratelimit=%lu "
15825 -                 "balanced_dirty_ratelimit=%lu cgroup=%s",
15826 +                 "balanced_dirty_ratelimit=%lu cgroup_ino=%u",
15827                   __entry->bdi,
15828                   __entry->write_bw,            /* write bandwidth */
15829                   __entry->avg_write_bw,        /* avg write bandwidth */
15830 @@ -510,7 +480,7 @@
15831                   __entry->dirty_ratelimit,     /* base ratelimit */
15832                   __entry->task_ratelimit, /* ratelimit with position control */
15833                   __entry->balanced_dirty_ratelimit, /* the balanced ratelimit */
15834 -                 __get_str(cgroup)
15835 +                 __entry->cgroup_ino
15836         )
15837  );
15839 @@ -548,7 +518,7 @@
15840                 __field(         long,  pause)
15841                 __field(unsigned long,  period)
15842                 __field(         long,  think)
15843 -               __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
15844 +               __field(unsigned int,   cgroup_ino)
15845         ),
15847         TP_fast_assign(
15848 @@ -571,7 +541,7 @@
15849                 __entry->period         = period * 1000 / HZ;
15850                 __entry->pause          = pause * 1000 / HZ;
15851                 __entry->paused         = (jiffies - start_time) * 1000 / HZ;
15852 -               __trace_wb_assign_cgroup(__get_str(cgroup), wb);
15853 +               __entry->cgroup_ino     = __trace_wb_assign_cgroup(wb);
15854         ),
15857 @@ -580,7 +550,7 @@
15858                   "bdi_setpoint=%lu bdi_dirty=%lu "
15859                   "dirty_ratelimit=%lu task_ratelimit=%lu "
15860                   "dirtied=%u dirtied_pause=%u "
15861 -                 "paused=%lu pause=%ld period=%lu think=%ld cgroup=%s",
15862 +                 "paused=%lu pause=%ld period=%lu think=%ld cgroup_ino=%u",
15863                   __entry->bdi,
15864                   __entry->limit,
15865                   __entry->setpoint,
15866 @@ -595,7 +565,7 @@
15867                   __entry->pause,       /* ms */
15868                   __entry->period,      /* ms */
15869                   __entry->think,       /* ms */
15870 -                 __get_str(cgroup)
15871 +                 __entry->cgroup_ino
15872           )
15873  );
15875 @@ -609,8 +579,7 @@
15876                 __field(unsigned long, ino)
15877                 __field(unsigned long, state)
15878                 __field(unsigned long, dirtied_when)
15879 -               __dynamic_array(char, cgroup,
15880 -                               __trace_wb_cgroup_size(inode_to_wb(inode)))
15881 +               __field(unsigned int, cgroup_ino)
15882         ),
15884         TP_fast_assign(
15885 @@ -619,16 +588,16 @@
15886                 __entry->ino            = inode->i_ino;
15887                 __entry->state          = inode->i_state;
15888                 __entry->dirtied_when   = inode->dirtied_when;
15889 -               __trace_wb_assign_cgroup(__get_str(cgroup), inode_to_wb(inode));
15890 +               __entry->cgroup_ino     = __trace_wb_assign_cgroup(inode_to_wb(inode));
15891         ),
15893 -       TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup=%s",
15894 +       TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup_ino=%u",
15895                   __entry->name,
15896                   __entry->ino,
15897                   show_inode_state(__entry->state),
15898                   __entry->dirtied_when,
15899                   (jiffies - __entry->dirtied_when) / HZ,
15900 -                 __get_str(cgroup)
15901 +                 __entry->cgroup_ino
15902         )
15903  );
15905 @@ -684,7 +653,7 @@
15906                 __field(unsigned long, writeback_index)
15907                 __field(long, nr_to_write)
15908                 __field(unsigned long, wrote)
15909 -               __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
15910 +               __field(unsigned int, cgroup_ino)
15911         ),
15913         TP_fast_assign(
15914 @@ -696,11 +665,11 @@
15915                 __entry->writeback_index = inode->i_mapping->writeback_index;
15916                 __entry->nr_to_write    = nr_to_write;
15917                 __entry->wrote          = nr_to_write - wbc->nr_to_write;
15918 -               __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
15919 +               __entry->cgroup_ino     = __trace_wbc_assign_cgroup(wbc);
15920         ),
15922         TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu "
15923 -                 "index=%lu to_write=%ld wrote=%lu cgroup=%s",
15924 +                 "index=%lu to_write=%ld wrote=%lu cgroup_ino=%u",
15925                   __entry->name,
15926                   __entry->ino,
15927                   show_inode_state(__entry->state),
15928 @@ -709,7 +678,7 @@
15929                   __entry->writeback_index,
15930                   __entry->nr_to_write,
15931                   __entry->wrote,
15932 -                 __get_str(cgroup)
15933 +                 __entry->cgroup_ino
15934         )
15935  );
15937 diff -Nur linux-4.4.46.orig/init/Kconfig linux-4.4.46/init/Kconfig
15938 --- linux-4.4.46.orig/init/Kconfig      2017-02-01 08:31:11.000000000 +0100
15939 +++ linux-4.4.46/init/Kconfig   2017-02-03 17:18:10.915618595 +0100
15940 @@ -498,7 +498,7 @@
15942  config RCU_EXPERT
15943         bool "Make expert-level adjustments to RCU configuration"
15944 -       default n
15945 +       default y if PREEMPT_RT_FULL
15946         help
15947           This option needs to be enabled if you wish to make
15948           expert-level adjustments to RCU configuration.  By default,
15949 @@ -614,7 +614,7 @@
15951  config RCU_FAST_NO_HZ
15952         bool "Accelerate last non-dyntick-idle CPU's grace periods"
15953 -       depends on NO_HZ_COMMON && SMP && RCU_EXPERT
15954 +       depends on NO_HZ_COMMON && SMP && RCU_EXPERT && !PREEMPT_RT_FULL
15955         default n
15956         help
15957           This option permits CPUs to enter dynticks-idle state even if
15958 @@ -641,7 +641,7 @@
15959  config RCU_BOOST
15960         bool "Enable RCU priority boosting"
15961         depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT
15962 -       default n
15963 +       default y if PREEMPT_RT_FULL
15964         help
15965           This option boosts the priority of preempted RCU readers that
15966           block the current preemptible RCU grace period for too long.
15967 @@ -1106,6 +1106,7 @@
15968  config RT_GROUP_SCHED
15969         bool "Group scheduling for SCHED_RR/FIFO"
15970         depends on CGROUP_SCHED
15971 +       depends on !PREEMPT_RT_FULL
15972         default n
15973         help
15974           This feature lets you explicitly allocate real CPU bandwidth
15975 @@ -1719,6 +1720,7 @@
15977  config SLAB
15978         bool "SLAB"
15979 +       depends on !PREEMPT_RT_FULL
15980         help
15981           The regular slab allocator that is established and known to work
15982           well in all environments. It organizes cache hot objects in
15983 @@ -1737,6 +1739,7 @@
15984  config SLOB
15985         depends on EXPERT
15986         bool "SLOB (Simple Allocator)"
15987 +       depends on !PREEMPT_RT_FULL
15988         help
15989            SLOB replaces the stock allocator with a drastically simpler
15990            allocator. SLOB is generally more space efficient but
15991 @@ -1746,7 +1749,7 @@
15993  config SLUB_CPU_PARTIAL
15994         default y
15995 -       depends on SLUB && SMP
15996 +       depends on SLUB && SMP && !PREEMPT_RT_FULL
15997         bool "SLUB per cpu partial cache"
15998         help
15999           Per cpu partial caches accellerate objects allocation and freeing
16000 diff -Nur linux-4.4.46.orig/init/main.c linux-4.4.46/init/main.c
16001 --- linux-4.4.46.orig/init/main.c       2017-02-01 08:31:11.000000000 +0100
16002 +++ linux-4.4.46/init/main.c    2017-02-03 17:18:10.915618595 +0100
16003 @@ -530,6 +530,7 @@
16004         setup_command_line(command_line);
16005         setup_nr_cpu_ids();
16006         setup_per_cpu_areas();
16007 +       softirq_early_init();
16008         smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
16010         build_all_zonelists(NULL, NULL);
16011 diff -Nur linux-4.4.46.orig/init/Makefile linux-4.4.46/init/Makefile
16012 --- linux-4.4.46.orig/init/Makefile     2017-02-01 08:31:11.000000000 +0100
16013 +++ linux-4.4.46/init/Makefile  2017-02-03 17:18:10.915618595 +0100
16014 @@ -33,4 +33,4 @@
16015  include/generated/compile.h: FORCE
16016         @$($(quiet)chk_compile.h)
16017         $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \
16018 -       "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)"
16019 +       "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CONFIG_PREEMPT_RT_FULL)" "$(CC) $(KBUILD_CFLAGS)"
16020 diff -Nur linux-4.4.46.orig/ipc/msg.c linux-4.4.46/ipc/msg.c
16021 --- linux-4.4.46.orig/ipc/msg.c 2017-02-01 08:31:11.000000000 +0100
16022 +++ linux-4.4.46/ipc/msg.c      2017-02-03 17:18:10.919618749 +0100
16023 @@ -183,20 +183,14 @@
16024         }
16027 -static void expunge_all(struct msg_queue *msq, int res)
16028 +static void expunge_all(struct msg_queue *msq, int res,
16029 +                       struct wake_q_head *wake_q)
16031         struct msg_receiver *msr, *t;
16033         list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
16034 -               msr->r_msg = NULL; /* initialize expunge ordering */
16035 -               wake_up_process(msr->r_tsk);
16036 -               /*
16037 -                * Ensure that the wakeup is visible before setting r_msg as
16038 -                * the receiving end depends on it: either spinning on a nil,
16039 -                * or dealing with -EAGAIN cases. See lockless receive part 1
16040 -                * and 2 in do_msgrcv().
16041 -                */
16042 -               smp_wmb(); /* barrier (B) */
16044 +               wake_q_add(wake_q, msr->r_tsk);
16045                 msr->r_msg = ERR_PTR(res);
16046         }
16048 @@ -213,11 +207,13 @@
16050         struct msg_msg *msg, *t;
16051         struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
16052 +       WAKE_Q(wake_q);
16054 -       expunge_all(msq, -EIDRM);
16055 +       expunge_all(msq, -EIDRM, &wake_q);
16056         ss_wakeup(&msq->q_senders, 1);
16057         msg_rmid(ns, msq);
16058         ipc_unlock_object(&msq->q_perm);
16059 +       wake_up_q(&wake_q);
16060         rcu_read_unlock();
16062         list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
16063 @@ -342,6 +338,7 @@
16064         struct kern_ipc_perm *ipcp;
16065         struct msqid64_ds uninitialized_var(msqid64);
16066         struct msg_queue *msq;
16067 +       WAKE_Q(wake_q);
16068         int err;
16070         if (cmd == IPC_SET) {
16071 @@ -389,7 +386,7 @@
16072                 /* sleeping receivers might be excluded by
16073                  * stricter permissions.
16074                  */
16075 -               expunge_all(msq, -EAGAIN);
16076 +               expunge_all(msq, -EAGAIN, &wake_q);
16077                 /* sleeping senders might be able to send
16078                  * due to a larger queue size.
16079                  */
16080 @@ -402,6 +399,7 @@
16082  out_unlock0:
16083         ipc_unlock_object(&msq->q_perm);
16084 +       wake_up_q(&wake_q);
16085  out_unlock1:
16086         rcu_read_unlock();
16087  out_up:
16088 @@ -566,7 +564,8 @@
16089         return 0;
16092 -static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
16093 +static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg,
16094 +                                struct wake_q_head *wake_q)
16096         struct msg_receiver *msr, *t;
16098 @@ -577,27 +576,13 @@
16100                         list_del(&msr->r_list);
16101                         if (msr->r_maxsize < msg->m_ts) {
16102 -                               /* initialize pipelined send ordering */
16103 -                               msr->r_msg = NULL;
16104 -                               wake_up_process(msr->r_tsk);
16105 -                               /* barrier (B) see barrier comment below */
16106 -                               smp_wmb();
16107 +                               wake_q_add(wake_q, msr->r_tsk);
16108                                 msr->r_msg = ERR_PTR(-E2BIG);
16109                         } else {
16110 -                               msr->r_msg = NULL;
16111                                 msq->q_lrpid = task_pid_vnr(msr->r_tsk);
16112                                 msq->q_rtime = get_seconds();
16113 -                               wake_up_process(msr->r_tsk);
16114 -                               /*
16115 -                                * Ensure that the wakeup is visible before
16116 -                                * setting r_msg, as the receiving can otherwise
16117 -                                * exit - once r_msg is set, the receiver can
16118 -                                * continue. See lockless receive part 1 and 2
16119 -                                * in do_msgrcv(). Barrier (B).
16120 -                                */
16121 -                               smp_wmb();
16122 +                               wake_q_add(wake_q, msr->r_tsk);
16123                                 msr->r_msg = msg;
16125                                 return 1;
16126                         }
16127                 }
16128 @@ -613,6 +598,7 @@
16129         struct msg_msg *msg;
16130         int err;
16131         struct ipc_namespace *ns;
16132 +       WAKE_Q(wake_q);
16134         ns = current->nsproxy->ipc_ns;
16136 @@ -698,7 +684,7 @@
16137         msq->q_lspid = task_tgid_vnr(current);
16138         msq->q_stime = get_seconds();
16140 -       if (!pipelined_send(msq, msg)) {
16141 +       if (!pipelined_send(msq, msg, &wake_q)) {
16142                 /* no one is waiting for this message, enqueue it */
16143                 list_add_tail(&msg->m_list, &msq->q_messages);
16144                 msq->q_cbytes += msgsz;
16145 @@ -712,6 +698,7 @@
16147  out_unlock0:
16148         ipc_unlock_object(&msq->q_perm);
16149 +       wake_up_q(&wake_q);
16150  out_unlock1:
16151         rcu_read_unlock();
16152         if (msg != NULL)
16153 @@ -932,57 +919,25 @@
16154                 rcu_read_lock();
16156                 /* Lockless receive, part 2:
16157 -                * Wait until pipelined_send or expunge_all are outside of
16158 -                * wake_up_process(). There is a race with exit(), see
16159 -                * ipc/mqueue.c for the details. The correct serialization
16160 -                * ensures that a receiver cannot continue without the wakeup
16161 -                * being visibible _before_ setting r_msg:
16162 +                * The work in pipelined_send() and expunge_all():
16163 +                * - Set pointer to message
16164 +                * - Queue the receiver task for later wakeup
16165 +                * - Wake up the process after the lock is dropped.
16166                  *
16167 -                * CPU 0                             CPU 1
16168 -                * <loop receiver>
16169 -                *   smp_rmb(); (A) <-- pair -.      <waker thread>
16170 -                *   <load ->r_msg>           |        msr->r_msg = NULL;
16171 -                *                            |        wake_up_process();
16172 -                * <continue>                 `------> smp_wmb(); (B)
16173 -                *                                     msr->r_msg = msg;
16174 -                *
16175 -                * Where (A) orders the message value read and where (B) orders
16176 -                * the write to the r_msg -- done in both pipelined_send and
16177 -                * expunge_all.
16178 +                * Should the process wake up before this wakeup (due to a
16179 +                * signal) it will either see the message and continue â€¦
16180                  */
16181 -               for (;;) {
16182 -                       /*
16183 -                        * Pairs with writer barrier in pipelined_send
16184 -                        * or expunge_all.
16185 -                        */
16186 -                       smp_rmb(); /* barrier (A) */
16187 -                       msg = (struct msg_msg *)msr_d.r_msg;
16188 -                       if (msg)
16189 -                               break;
16191 -                       /*
16192 -                        * The cpu_relax() call is a compiler barrier
16193 -                        * which forces everything in this loop to be
16194 -                        * re-loaded.
16195 -                        */
16196 -                       cpu_relax();
16197 -               }
16199 -               /* Lockless receive, part 3:
16200 -                * If there is a message or an error then accept it without
16201 -                * locking.
16202 -                */
16203 +               msg = (struct msg_msg *)msr_d.r_msg;
16204                 if (msg != ERR_PTR(-EAGAIN))
16205                         goto out_unlock1;
16207 -               /* Lockless receive, part 3:
16208 -                * Acquire the queue spinlock.
16209 -                */
16210 +                /*
16211 +                 * â€¦ or see -EAGAIN, acquire the lock to check the message
16212 +                 * again.
16213 +                 */
16214                 ipc_lock_object(&msq->q_perm);
16216 -               /* Lockless receive, part 4:
16217 -                * Repeat test after acquiring the spinlock.
16218 -                */
16219                 msg = (struct msg_msg *)msr_d.r_msg;
16220                 if (msg != ERR_PTR(-EAGAIN))
16221                         goto out_unlock0;
16222 diff -Nur linux-4.4.46.orig/ipc/sem.c linux-4.4.46/ipc/sem.c
16223 --- linux-4.4.46.orig/ipc/sem.c 2017-02-01 08:31:11.000000000 +0100
16224 +++ linux-4.4.46/ipc/sem.c      2017-02-03 17:18:10.919618749 +0100
16225 @@ -708,6 +708,13 @@
16226  static void wake_up_sem_queue_prepare(struct list_head *pt,
16227                                 struct sem_queue *q, int error)
16229 +#ifdef CONFIG_PREEMPT_RT_BASE
16230 +       struct task_struct *p = q->sleeper;
16231 +       get_task_struct(p);
16232 +       q->status = error;
16233 +       wake_up_process(p);
16234 +       put_task_struct(p);
16235 +#else
16236         if (list_empty(pt)) {
16237                 /*
16238                  * Hold preempt off so that we don't get preempted and have the
16239 @@ -719,6 +726,7 @@
16240         q->pid = error;
16242         list_add_tail(&q->list, pt);
16243 +#endif
16246  /**
16247 @@ -732,6 +740,7 @@
16248   */
16249  static void wake_up_sem_queue_do(struct list_head *pt)
16251 +#ifndef CONFIG_PREEMPT_RT_BASE
16252         struct sem_queue *q, *t;
16253         int did_something;
16255 @@ -744,6 +753,7 @@
16256         }
16257         if (did_something)
16258                 preempt_enable();
16259 +#endif
16262  static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
16263 diff -Nur linux-4.4.46.orig/kernel/cgroup.c linux-4.4.46/kernel/cgroup.c
16264 --- linux-4.4.46.orig/kernel/cgroup.c   2017-02-01 08:31:11.000000000 +0100
16265 +++ linux-4.4.46/kernel/cgroup.c        2017-02-03 17:18:10.919618749 +0100
16266 @@ -4740,10 +4740,10 @@
16267         queue_work(cgroup_destroy_wq, &css->destroy_work);
16270 -static void css_release_work_fn(struct work_struct *work)
16271 +static void css_release_work_fn(struct swork_event *sev)
16273         struct cgroup_subsys_state *css =
16274 -               container_of(work, struct cgroup_subsys_state, destroy_work);
16275 +               container_of(sev, struct cgroup_subsys_state, destroy_swork);
16276         struct cgroup_subsys *ss = css->ss;
16277         struct cgroup *cgrp = css->cgroup;
16279 @@ -4782,8 +4782,8 @@
16280         struct cgroup_subsys_state *css =
16281                 container_of(ref, struct cgroup_subsys_state, refcnt);
16283 -       INIT_WORK(&css->destroy_work, css_release_work_fn);
16284 -       queue_work(cgroup_destroy_wq, &css->destroy_work);
16285 +       INIT_SWORK(&css->destroy_swork, css_release_work_fn);
16286 +       swork_queue(&css->destroy_swork);
16289  static void init_and_link_css(struct cgroup_subsys_state *css,
16290 @@ -5400,6 +5400,7 @@
16291          */
16292         cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
16293         BUG_ON(!cgroup_destroy_wq);
16294 +       BUG_ON(swork_get());
16296         /*
16297          * Used to destroy pidlists and separate to serve as flush domain.
16298 diff -Nur linux-4.4.46.orig/kernel/cpu.c linux-4.4.46/kernel/cpu.c
16299 --- linux-4.4.46.orig/kernel/cpu.c      2017-02-01 08:31:11.000000000 +0100
16300 +++ linux-4.4.46/kernel/cpu.c   2017-02-03 17:18:10.919618749 +0100
16301 @@ -75,8 +75,8 @@
16302  #endif
16303  } cpu_hotplug = {
16304         .active_writer = NULL,
16305 -       .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
16306         .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
16307 +       .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
16308  #ifdef CONFIG_DEBUG_LOCK_ALLOC
16309         .dep_map = {.name = "cpu_hotplug.lock" },
16310  #endif
16311 @@ -89,6 +89,289 @@
16312  #define cpuhp_lock_acquire()      lock_map_acquire(&cpu_hotplug.dep_map)
16313  #define cpuhp_lock_release()      lock_map_release(&cpu_hotplug.dep_map)
16315 +/**
16316 + * hotplug_pcp - per cpu hotplug descriptor
16317 + * @unplug:    set when pin_current_cpu() needs to sync tasks
16318 + * @sync_tsk:  the task that waits for tasks to finish pinned sections
16319 + * @refcount:  counter of tasks in pinned sections
16320 + * @grab_lock: set when the tasks entering pinned sections should wait
16321 + * @synced:    notifier for @sync_tsk to tell cpu_down it's finished
16322 + * @mutex:     the mutex to make tasks wait (used when @grab_lock is true)
16323 + * @mutex_init:        zero if the mutex hasn't been initialized yet.
16324 + *
16325 + * Although @unplug and @sync_tsk may point to the same task, the @unplug
16326 + * is used as a flag and still exists after @sync_tsk has exited and
16327 + * @sync_tsk set to NULL.
16328 + */
16329 +struct hotplug_pcp {
16330 +       struct task_struct *unplug;
16331 +       struct task_struct *sync_tsk;
16332 +       int refcount;
16333 +       int grab_lock;
16334 +       struct completion synced;
16335 +       struct completion unplug_wait;
16336 +#ifdef CONFIG_PREEMPT_RT_FULL
16337 +       /*
16338 +        * Note, on PREEMPT_RT, the hotplug lock must save the state of
16339 +        * the task, otherwise the mutex will cause the task to fail
16340 +        * to sleep when required. (Because it's called from migrate_disable())
16341 +        *
16342 +        * The spinlock_t on PREEMPT_RT is a mutex that saves the task's
16343 +        * state.
16344 +        */
16345 +       spinlock_t lock;
16346 +#else
16347 +       struct mutex mutex;
16348 +#endif
16349 +       int mutex_init;
16352 +#ifdef CONFIG_PREEMPT_RT_FULL
16353 +# define hotplug_lock(hp) rt_spin_lock__no_mg(&(hp)->lock)
16354 +# define hotplug_unlock(hp) rt_spin_unlock__no_mg(&(hp)->lock)
16355 +#else
16356 +# define hotplug_lock(hp) mutex_lock(&(hp)->mutex)
16357 +# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex)
16358 +#endif
16360 +static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
16362 +/**
16363 + * pin_current_cpu - Prevent the current cpu from being unplugged
16364 + *
16365 + * Lightweight version of get_online_cpus() to prevent cpu from being
16366 + * unplugged when code runs in a migration disabled region.
16367 + *
16368 + * Must be called with preemption disabled (preempt_count = 1)!
16369 + */
16370 +void pin_current_cpu(void)
16372 +       struct hotplug_pcp *hp;
16373 +       int force = 0;
16375 +retry:
16376 +       hp = this_cpu_ptr(&hotplug_pcp);
16378 +       if (!hp->unplug || hp->refcount || force || preempt_count() > 1 ||
16379 +           hp->unplug == current) {
16380 +               hp->refcount++;
16381 +               return;
16382 +       }
16383 +       if (hp->grab_lock) {
16384 +               preempt_enable();
16385 +               hotplug_lock(hp);
16386 +               hotplug_unlock(hp);
16387 +       } else {
16388 +               preempt_enable();
16389 +               /*
16390 +                * Try to push this task off of this CPU.
16391 +                */
16392 +               if (!migrate_me()) {
16393 +                       preempt_disable();
16394 +                       hp = this_cpu_ptr(&hotplug_pcp);
16395 +                       if (!hp->grab_lock) {
16396 +                               /*
16397 +                                * Just let it continue it's already pinned
16398 +                                * or about to sleep.
16399 +                                */
16400 +                               force = 1;
16401 +                               goto retry;
16402 +                       }
16403 +                       preempt_enable();
16404 +               }
16405 +       }
16406 +       preempt_disable();
16407 +       goto retry;
16410 +/**
16411 + * unpin_current_cpu - Allow unplug of current cpu
16412 + *
16413 + * Must be called with preemption or interrupts disabled!
16414 + */
16415 +void unpin_current_cpu(void)
16417 +       struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp);
16419 +       WARN_ON(hp->refcount <= 0);
16421 +       /* This is safe. sync_unplug_thread is pinned to this cpu */
16422 +       if (!--hp->refcount && hp->unplug && hp->unplug != current)
16423 +               wake_up_process(hp->unplug);
16426 +static void wait_for_pinned_cpus(struct hotplug_pcp *hp)
16428 +       set_current_state(TASK_UNINTERRUPTIBLE);
16429 +       while (hp->refcount) {
16430 +               schedule_preempt_disabled();
16431 +               set_current_state(TASK_UNINTERRUPTIBLE);
16432 +       }
16435 +static int sync_unplug_thread(void *data)
16437 +       struct hotplug_pcp *hp = data;
16439 +       wait_for_completion(&hp->unplug_wait);
16440 +       preempt_disable();
16441 +       hp->unplug = current;
16442 +       wait_for_pinned_cpus(hp);
16444 +       /*
16445 +        * This thread will synchronize the cpu_down() with threads
16446 +        * that have pinned the CPU. When the pinned CPU count reaches
16447 +        * zero, we inform the cpu_down code to continue to the next step.
16448 +        */
16449 +       set_current_state(TASK_UNINTERRUPTIBLE);
16450 +       preempt_enable();
16451 +       complete(&hp->synced);
16453 +       /*
16454 +        * If all succeeds, the next step will need tasks to wait till
16455 +        * the CPU is offline before continuing. To do this, the grab_lock
16456 +        * is set and tasks going into pin_current_cpu() will block on the
16457 +        * mutex. But we still need to wait for those that are already in
16458 +        * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop()
16459 +        * will kick this thread out.
16460 +        */
16461 +       while (!hp->grab_lock && !kthread_should_stop()) {
16462 +               schedule();
16463 +               set_current_state(TASK_UNINTERRUPTIBLE);
16464 +       }
16466 +       /* Make sure grab_lock is seen before we see a stale completion */
16467 +       smp_mb();
16469 +       /*
16470 +        * Now just before cpu_down() enters stop machine, we need to make
16471 +        * sure all tasks that are in pinned CPU sections are out, and new
16472 +        * tasks will now grab the lock, keeping them from entering pinned
16473 +        * CPU sections.
16474 +        */
16475 +       if (!kthread_should_stop()) {
16476 +               preempt_disable();
16477 +               wait_for_pinned_cpus(hp);
16478 +               preempt_enable();
16479 +               complete(&hp->synced);
16480 +       }
16482 +       set_current_state(TASK_UNINTERRUPTIBLE);
16483 +       while (!kthread_should_stop()) {
16484 +               schedule();
16485 +               set_current_state(TASK_UNINTERRUPTIBLE);
16486 +       }
16487 +       set_current_state(TASK_RUNNING);
16489 +       /*
16490 +        * Force this thread off this CPU as it's going down and
16491 +        * we don't want any more work on this CPU.
16492 +        */
16493 +       current->flags &= ~PF_NO_SETAFFINITY;
16494 +       set_cpus_allowed_ptr(current, cpu_present_mask);
16495 +       migrate_me();
16496 +       return 0;
16499 +static void __cpu_unplug_sync(struct hotplug_pcp *hp)
16501 +       wake_up_process(hp->sync_tsk);
16502 +       wait_for_completion(&hp->synced);
16505 +static void __cpu_unplug_wait(unsigned int cpu)
16507 +       struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
16509 +       complete(&hp->unplug_wait);
16510 +       wait_for_completion(&hp->synced);
16514 + * Start the sync_unplug_thread on the target cpu and wait for it to
16515 + * complete.
16516 + */
16517 +static int cpu_unplug_begin(unsigned int cpu)
16519 +       struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
16520 +       int err;
16522 +       /* Protected by cpu_hotplug.lock */
16523 +       if (!hp->mutex_init) {
16524 +#ifdef CONFIG_PREEMPT_RT_FULL
16525 +               spin_lock_init(&hp->lock);
16526 +#else
16527 +               mutex_init(&hp->mutex);
16528 +#endif
16529 +               hp->mutex_init = 1;
16530 +       }
16532 +       /* Inform the scheduler to migrate tasks off this CPU */
16533 +       tell_sched_cpu_down_begin(cpu);
16535 +       init_completion(&hp->synced);
16536 +       init_completion(&hp->unplug_wait);
16538 +       hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
16539 +       if (IS_ERR(hp->sync_tsk)) {
16540 +               err = PTR_ERR(hp->sync_tsk);
16541 +               hp->sync_tsk = NULL;
16542 +               return err;
16543 +       }
16544 +       kthread_bind(hp->sync_tsk, cpu);
16546 +       /*
16547 +        * Wait for tasks to get out of the pinned sections,
16548 +        * it's still OK if new tasks enter. Some CPU notifiers will
16549 +        * wait for tasks that are going to enter these sections and
16550 +        * we must not have them block.
16551 +        */
16552 +       wake_up_process(hp->sync_tsk);
16553 +       return 0;
16556 +static void cpu_unplug_sync(unsigned int cpu)
16558 +       struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
16560 +       init_completion(&hp->synced);
16561 +       /* The completion needs to be initialzied before setting grab_lock */
16562 +       smp_wmb();
16564 +       /* Grab the mutex before setting grab_lock */
16565 +       hotplug_lock(hp);
16566 +       hp->grab_lock = 1;
16568 +       /*
16569 +        * The CPU notifiers have been completed.
16570 +        * Wait for tasks to get out of pinned CPU sections and have new
16571 +        * tasks block until the CPU is completely down.
16572 +        */
16573 +       __cpu_unplug_sync(hp);
16575 +       /* All done with the sync thread */
16576 +       kthread_stop(hp->sync_tsk);
16577 +       hp->sync_tsk = NULL;
16580 +static void cpu_unplug_done(unsigned int cpu)
16582 +       struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
16584 +       hp->unplug = NULL;
16585 +       /* Let all tasks know cpu unplug is finished before cleaning up */
16586 +       smp_wmb();
16588 +       if (hp->sync_tsk)
16589 +               kthread_stop(hp->sync_tsk);
16591 +       if (hp->grab_lock) {
16592 +               hotplug_unlock(hp);
16593 +               /* protected by cpu_hotplug.lock */
16594 +               hp->grab_lock = 0;
16595 +       }
16596 +       tell_sched_cpu_down_done(cpu);
16599  void get_online_cpus(void)
16601 @@ -338,13 +621,15 @@
16602  /* Requires cpu_add_remove_lock to be held */
16603  static int _cpu_down(unsigned int cpu, int tasks_frozen)
16605 -       int err, nr_calls = 0;
16606 +       int mycpu, err, nr_calls = 0;
16607         void *hcpu = (void *)(long)cpu;
16608         unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
16609         struct take_cpu_down_param tcd_param = {
16610                 .mod = mod,
16611                 .hcpu = hcpu,
16612         };
16613 +       cpumask_var_t cpumask;
16614 +       cpumask_var_t cpumask_org;
16616         if (num_online_cpus() == 1)
16617                 return -EBUSY;
16618 @@ -352,7 +637,34 @@
16619         if (!cpu_online(cpu))
16620                 return -EINVAL;
16622 +       /* Move the downtaker off the unplug cpu */
16623 +       if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
16624 +               return -ENOMEM;
16625 +       if (!alloc_cpumask_var(&cpumask_org, GFP_KERNEL))  {
16626 +               free_cpumask_var(cpumask);
16627 +               return -ENOMEM;
16628 +       }
16630 +       cpumask_copy(cpumask_org, tsk_cpus_allowed(current));
16631 +       cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
16632 +       set_cpus_allowed_ptr(current, cpumask);
16633 +       free_cpumask_var(cpumask);
16634 +       migrate_disable();
16635 +       mycpu = smp_processor_id();
16636 +       if (mycpu == cpu) {
16637 +               printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
16638 +               migrate_enable();
16639 +               err = -EBUSY;
16640 +               goto restore_cpus;
16641 +       }
16642 +       migrate_enable();
16644         cpu_hotplug_begin();
16645 +       err = cpu_unplug_begin(cpu);
16646 +       if (err) {
16647 +               printk("cpu_unplug_begin(%d) failed\n", cpu);
16648 +               goto out_cancel;
16649 +       }
16651         err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
16652         if (err) {
16653 @@ -378,8 +690,12 @@
16654         else
16655                 synchronize_rcu();
16657 +       __cpu_unplug_wait(cpu);
16658         smpboot_park_threads(cpu);
16660 +       /* Notifiers are done. Don't let any more tasks pin this CPU. */
16661 +       cpu_unplug_sync(cpu);
16663         /*
16664          * Prevent irq alloc/free while the dying cpu reorganizes the
16665          * interrupt affinities.
16666 @@ -424,9 +740,14 @@
16667         check_for_tasks(cpu);
16669  out_release:
16670 +       cpu_unplug_done(cpu);
16671 +out_cancel:
16672         cpu_hotplug_done();
16673         if (!err)
16674                 cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
16675 +restore_cpus:
16676 +       set_cpus_allowed_ptr(current, cpumask_org);
16677 +       free_cpumask_var(cpumask_org);
16678         return err;
16681 diff -Nur linux-4.4.46.orig/kernel/debug/kdb/kdb_io.c linux-4.4.46/kernel/debug/kdb/kdb_io.c
16682 --- linux-4.4.46.orig/kernel/debug/kdb/kdb_io.c 2017-02-01 08:31:11.000000000 +0100
16683 +++ linux-4.4.46/kernel/debug/kdb/kdb_io.c      2017-02-03 17:18:10.919618749 +0100
16684 @@ -554,7 +554,6 @@
16685         int linecount;
16686         int colcount;
16687         int logging, saved_loglevel = 0;
16688 -       int saved_trap_printk;
16689         int got_printf_lock = 0;
16690         int retlen = 0;
16691         int fnd, len;
16692 @@ -565,8 +564,6 @@
16693         unsigned long uninitialized_var(flags);
16695         preempt_disable();
16696 -       saved_trap_printk = kdb_trap_printk;
16697 -       kdb_trap_printk = 0;
16699         /* Serialize kdb_printf if multiple cpus try to write at once.
16700          * But if any cpu goes recursive in kdb, just print the output,
16701 @@ -855,7 +852,6 @@
16702         } else {
16703                 __release(kdb_printf_lock);
16704         }
16705 -       kdb_trap_printk = saved_trap_printk;
16706         preempt_enable();
16707         return retlen;
16709 @@ -865,9 +861,11 @@
16710         va_list ap;
16711         int r;
16713 +       kdb_trap_printk++;
16714         va_start(ap, fmt);
16715         r = vkdb_printf(KDB_MSGSRC_INTERNAL, fmt, ap);
16716         va_end(ap);
16717 +       kdb_trap_printk--;
16719         return r;
16721 diff -Nur linux-4.4.46.orig/kernel/events/core.c linux-4.4.46/kernel/events/core.c
16722 --- linux-4.4.46.orig/kernel/events/core.c      2017-02-01 08:31:11.000000000 +0100
16723 +++ linux-4.4.46/kernel/events/core.c   2017-02-03 17:18:10.919618749 +0100
16724 @@ -802,6 +802,7 @@
16725         raw_spin_lock_init(&cpuctx->hrtimer_lock);
16726         hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
16727         timer->function = perf_mux_hrtimer_handler;
16728 +       timer->irqsafe = 1;
16731  static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
16732 @@ -7240,6 +7241,7 @@
16734         hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
16735         hwc->hrtimer.function = perf_swevent_hrtimer;
16736 +       hwc->hrtimer.irqsafe = 1;
16738         /*
16739          * Since hrtimers have a fixed rate, we can do a static freq->period
16740 diff -Nur linux-4.4.46.orig/kernel/exit.c linux-4.4.46/kernel/exit.c
16741 --- linux-4.4.46.orig/kernel/exit.c     2017-02-01 08:31:11.000000000 +0100
16742 +++ linux-4.4.46/kernel/exit.c  2017-02-03 17:18:10.923618903 +0100
16743 @@ -144,7 +144,7 @@
16744          * Do this under ->siglock, we can race with another thread
16745          * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
16746          */
16747 -       flush_sigqueue(&tsk->pending);
16748 +       flush_task_sigqueue(tsk);
16749         tsk->sighand = NULL;
16750         spin_unlock(&sighand->siglock);
16752 diff -Nur linux-4.4.46.orig/kernel/fork.c linux-4.4.46/kernel/fork.c
16753 --- linux-4.4.46.orig/kernel/fork.c     2017-02-01 08:31:11.000000000 +0100
16754 +++ linux-4.4.46/kernel/fork.c  2017-02-03 17:18:10.923618903 +0100
16755 @@ -108,7 +108,7 @@
16757  DEFINE_PER_CPU(unsigned long, process_counts) = 0;
16759 -__cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
16760 +DEFINE_RWLOCK(tasklist_lock);  /* outer */
16762  #ifdef CONFIG_PROVE_RCU
16763  int lockdep_tasklist_lock_is_held(void)
16764 @@ -244,7 +244,9 @@
16765         if (atomic_dec_and_test(&sig->sigcnt))
16766                 free_signal_struct(sig);
16769 +#ifdef CONFIG_PREEMPT_RT_BASE
16770 +static
16771 +#endif
16772  void __put_task_struct(struct task_struct *tsk)
16774         WARN_ON(!tsk->exit_state);
16775 @@ -261,7 +263,18 @@
16776         if (!profile_handoff_task(tsk))
16777                 free_task(tsk);
16779 +#ifndef CONFIG_PREEMPT_RT_BASE
16780  EXPORT_SYMBOL_GPL(__put_task_struct);
16781 +#else
16782 +void __put_task_struct_cb(struct rcu_head *rhp)
16784 +       struct task_struct *tsk = container_of(rhp, struct task_struct, put_rcu);
16786 +       __put_task_struct(tsk);
16789 +EXPORT_SYMBOL_GPL(__put_task_struct_cb);
16790 +#endif
16792  void __init __weak arch_task_cache_init(void) { }
16794 @@ -692,6 +705,19 @@
16796  EXPORT_SYMBOL_GPL(__mmdrop);
16798 +#ifdef CONFIG_PREEMPT_RT_BASE
16800 + * RCU callback for delayed mm drop. Not strictly rcu, but we don't
16801 + * want another facility to make this work.
16802 + */
16803 +void __mmdrop_delayed(struct rcu_head *rhp)
16805 +       struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop);
16807 +       __mmdrop(mm);
16809 +#endif
16811  /*
16812   * Decrement the use count and release all resources for an mm.
16813   */
16814 @@ -1242,6 +1268,9 @@
16815   */
16816  static void posix_cpu_timers_init(struct task_struct *tsk)
16818 +#ifdef CONFIG_PREEMPT_RT_BASE
16819 +       tsk->posix_timer_list = NULL;
16820 +#endif
16821         tsk->cputime_expires.prof_exp = 0;
16822         tsk->cputime_expires.virt_exp = 0;
16823         tsk->cputime_expires.sched_exp = 0;
16824 @@ -1367,15 +1396,16 @@
16825         spin_lock_init(&p->alloc_lock);
16827         init_sigpending(&p->pending);
16828 +       p->sigqueue_cache = NULL;
16830         p->utime = p->stime = p->gtime = 0;
16831         p->utimescaled = p->stimescaled = 0;
16832         prev_cputime_init(&p->prev_cputime);
16834  #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
16835 -       seqlock_init(&p->vtime_seqlock);
16836 +       seqcount_init(&p->vtime_seqcount);
16837         p->vtime_snap = 0;
16838 -       p->vtime_snap_whence = VTIME_SLEEPING;
16839 +       p->vtime_snap_whence = VTIME_INACTIVE;
16840  #endif
16842  #if defined(SPLIT_RSS_COUNTING)
16843 diff -Nur linux-4.4.46.orig/kernel/futex.c linux-4.4.46/kernel/futex.c
16844 --- linux-4.4.46.orig/kernel/futex.c    2017-02-01 08:31:11.000000000 +0100
16845 +++ linux-4.4.46/kernel/futex.c 2017-02-03 17:18:10.923618903 +0100
16846 @@ -815,7 +815,9 @@
16847                  * task still owns the PI-state:
16848                  */
16849                 if (head->next != next) {
16850 +                       raw_spin_unlock_irq(&curr->pi_lock);
16851                         spin_unlock(&hb->lock);
16852 +                       raw_spin_lock_irq(&curr->pi_lock);
16853                         continue;
16854                 }
16856 @@ -1210,6 +1212,7 @@
16857         struct futex_pi_state *pi_state = this->pi_state;
16858         u32 uninitialized_var(curval), newval;
16859         WAKE_Q(wake_q);
16860 +       WAKE_Q(wake_sleeper_q);
16861         bool deboost;
16862         int ret = 0;
16864 @@ -1223,7 +1226,7 @@
16865         if (pi_state->owner != current)
16866                 return -EINVAL;
16868 -       raw_spin_lock(&pi_state->pi_mutex.wait_lock);
16869 +       raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
16870         new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
16872         /*
16873 @@ -1259,24 +1262,25 @@
16874                         ret = -EINVAL;
16875         }
16876         if (ret) {
16877 -               raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
16878 +               raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
16879                 return ret;
16880         }
16882 -       raw_spin_lock_irq(&pi_state->owner->pi_lock);
16883 +       raw_spin_lock(&pi_state->owner->pi_lock);
16884         WARN_ON(list_empty(&pi_state->list));
16885         list_del_init(&pi_state->list);
16886 -       raw_spin_unlock_irq(&pi_state->owner->pi_lock);
16887 +       raw_spin_unlock(&pi_state->owner->pi_lock);
16889 -       raw_spin_lock_irq(&new_owner->pi_lock);
16890 +       raw_spin_lock(&new_owner->pi_lock);
16891         WARN_ON(!list_empty(&pi_state->list));
16892         list_add(&pi_state->list, &new_owner->pi_state_list);
16893         pi_state->owner = new_owner;
16894 -       raw_spin_unlock_irq(&new_owner->pi_lock);
16895 +       raw_spin_unlock(&new_owner->pi_lock);
16897 -       raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
16898 +       raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
16900 -       deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
16901 +       deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q,
16902 +                                       &wake_sleeper_q);
16904         /*
16905          * First unlock HB so the waiter does not spin on it once he got woken
16906 @@ -1284,8 +1288,9 @@
16907          * deboost first (and lose our higher priority), then the task might get
16908          * scheduled away before the wake up can take place.
16909          */
16910 -       spin_unlock(&hb->lock);
16911 +       deboost |= spin_unlock_no_deboost(&hb->lock);
16912         wake_up_q(&wake_q);
16913 +       wake_up_q_sleeper(&wake_sleeper_q);
16914         if (deboost)
16915                 rt_mutex_adjust_prio(current);
16917 @@ -1822,6 +1827,16 @@
16918                                 requeue_pi_wake_futex(this, &key2, hb2);
16919                                 drop_count++;
16920                                 continue;
16921 +                       } else if (ret == -EAGAIN) {
16922 +                               /*
16923 +                                * Waiter was woken by timeout or
16924 +                                * signal and has set pi_blocked_on to
16925 +                                * PI_WAKEUP_INPROGRESS before we
16926 +                                * tried to enqueue it on the rtmutex.
16927 +                                */
16928 +                               this->pi_state = NULL;
16929 +                               free_pi_state(pi_state);
16930 +                               continue;
16931                         } else if (ret) {
16932                                 /* -EDEADLK */
16933                                 this->pi_state = NULL;
16934 @@ -2139,11 +2154,11 @@
16935                  * we returned due to timeout or signal without taking the
16936                  * rt_mutex. Too late.
16937                  */
16938 -               raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
16939 +               raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock);
16940                 owner = rt_mutex_owner(&q->pi_state->pi_mutex);
16941                 if (!owner)
16942                         owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
16943 -               raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
16944 +               raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock);
16945                 ret = fixup_pi_state_owner(uaddr, q, owner);
16946                 goto out;
16947         }
16948 @@ -2691,7 +2706,7 @@
16949         struct hrtimer_sleeper timeout, *to = NULL;
16950         struct rt_mutex_waiter rt_waiter;
16951         struct rt_mutex *pi_mutex = NULL;
16952 -       struct futex_hash_bucket *hb;
16953 +       struct futex_hash_bucket *hb, *hb2;
16954         union futex_key key2 = FUTEX_KEY_INIT;
16955         struct futex_q q = futex_q_init;
16956         int res, ret;
16957 @@ -2716,10 +2731,7 @@
16958          * The waiter is allocated on our stack, manipulated by the requeue
16959          * code while we sleep on uaddr.
16960          */
16961 -       debug_rt_mutex_init_waiter(&rt_waiter);
16962 -       RB_CLEAR_NODE(&rt_waiter.pi_tree_entry);
16963 -       RB_CLEAR_NODE(&rt_waiter.tree_entry);
16964 -       rt_waiter.task = NULL;
16965 +       rt_mutex_init_waiter(&rt_waiter, false);
16967         ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
16968         if (unlikely(ret != 0))
16969 @@ -2750,20 +2762,55 @@
16970         /* Queue the futex_q, drop the hb lock, wait for wakeup. */
16971         futex_wait_queue_me(hb, &q, to);
16973 -       spin_lock(&hb->lock);
16974 -       ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
16975 -       spin_unlock(&hb->lock);
16976 -       if (ret)
16977 -               goto out_put_keys;
16978 +       /*
16979 +        * On RT we must avoid races with requeue and trying to block
16980 +        * on two mutexes (hb->lock and uaddr2's rtmutex) by
16981 +        * serializing access to pi_blocked_on with pi_lock.
16982 +        */
16983 +       raw_spin_lock_irq(&current->pi_lock);
16984 +       if (current->pi_blocked_on) {
16985 +               /*
16986 +                * We have been requeued or are in the process of
16987 +                * being requeued.
16988 +                */
16989 +               raw_spin_unlock_irq(&current->pi_lock);
16990 +       } else {
16991 +               /*
16992 +                * Setting pi_blocked_on to PI_WAKEUP_INPROGRESS
16993 +                * prevents a concurrent requeue from moving us to the
16994 +                * uaddr2 rtmutex. After that we can safely acquire
16995 +                * (and possibly block on) hb->lock.
16996 +                */
16997 +               current->pi_blocked_on = PI_WAKEUP_INPROGRESS;
16998 +               raw_spin_unlock_irq(&current->pi_lock);
17000 +               spin_lock(&hb->lock);
17002 +               /*
17003 +                * Clean up pi_blocked_on. We might leak it otherwise
17004 +                * when we succeeded with the hb->lock in the fast
17005 +                * path.
17006 +                */
17007 +               raw_spin_lock_irq(&current->pi_lock);
17008 +               current->pi_blocked_on = NULL;
17009 +               raw_spin_unlock_irq(&current->pi_lock);
17011 +               ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
17012 +               spin_unlock(&hb->lock);
17013 +               if (ret)
17014 +                       goto out_put_keys;
17015 +       }
17017         /*
17018 -        * In order for us to be here, we know our q.key == key2, and since
17019 -        * we took the hb->lock above, we also know that futex_requeue() has
17020 -        * completed and we no longer have to concern ourselves with a wakeup
17021 -        * race with the atomic proxy lock acquisition by the requeue code. The
17022 -        * futex_requeue dropped our key1 reference and incremented our key2
17023 -        * reference count.
17024 +        * In order to be here, we have either been requeued, are in
17025 +        * the process of being requeued, or requeue successfully
17026 +        * acquired uaddr2 on our behalf.  If pi_blocked_on was
17027 +        * non-null above, we may be racing with a requeue.  Do not
17028 +        * rely on q->lock_ptr to be hb2->lock until after blocking on
17029 +        * hb->lock or hb2->lock. The futex_requeue dropped our key1
17030 +        * reference and incremented our key2 reference count.
17031          */
17032 +       hb2 = hash_futex(&key2);
17034         /* Check if the requeue code acquired the second futex for us. */
17035         if (!q.rt_waiter) {
17036 @@ -2772,14 +2819,15 @@
17037                  * did a lock-steal - fix up the PI-state in that case.
17038                  */
17039                 if (q.pi_state && (q.pi_state->owner != current)) {
17040 -                       spin_lock(q.lock_ptr);
17041 +                       spin_lock(&hb2->lock);
17042 +                       BUG_ON(&hb2->lock != q.lock_ptr);
17043                         ret = fixup_pi_state_owner(uaddr2, &q, current);
17044                         /*
17045                          * Drop the reference to the pi state which
17046                          * the requeue_pi() code acquired for us.
17047                          */
17048                         free_pi_state(q.pi_state);
17049 -                       spin_unlock(q.lock_ptr);
17050 +                       spin_unlock(&hb2->lock);
17051                 }
17052         } else {
17053                 /*
17054 @@ -2792,7 +2840,8 @@
17055                 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter);
17056                 debug_rt_mutex_free_waiter(&rt_waiter);
17058 -               spin_lock(q.lock_ptr);
17059 +               spin_lock(&hb2->lock);
17060 +               BUG_ON(&hb2->lock != q.lock_ptr);
17061                 /*
17062                  * Fixup the pi_state owner and possibly acquire the lock if we
17063                  * haven't already.
17064 diff -Nur linux-4.4.46.orig/kernel/irq/handle.c linux-4.4.46/kernel/irq/handle.c
17065 --- linux-4.4.46.orig/kernel/irq/handle.c       2017-02-01 08:31:11.000000000 +0100
17066 +++ linux-4.4.46/kernel/irq/handle.c    2017-02-03 17:18:10.923618903 +0100
17067 @@ -134,6 +134,8 @@
17069  irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
17071 +       struct pt_regs *regs = get_irq_regs();
17072 +       u64 ip = regs ? instruction_pointer(regs) : 0;
17073         irqreturn_t retval = IRQ_NONE;
17074         unsigned int flags = 0, irq = desc->irq_data.irq;
17075         struct irqaction *action = desc->action;
17076 @@ -176,7 +178,11 @@
17077                 action = action->next;
17078         }
17080 -       add_interrupt_randomness(irq, flags);
17081 +#ifdef CONFIG_PREEMPT_RT_FULL
17082 +       desc->random_ip = ip;
17083 +#else
17084 +       add_interrupt_randomness(irq, flags, ip);
17085 +#endif
17087         if (!noirqdebug)
17088                 note_interrupt(desc, retval);
17089 diff -Nur linux-4.4.46.orig/kernel/irq/irqdesc.c linux-4.4.46/kernel/irq/irqdesc.c
17090 --- linux-4.4.46.orig/kernel/irq/irqdesc.c      2017-02-01 08:31:11.000000000 +0100
17091 +++ linux-4.4.46/kernel/irq/irqdesc.c   2017-02-03 17:18:10.923618903 +0100
17092 @@ -24,10 +24,27 @@
17093  static struct lock_class_key irq_desc_lock_class;
17095  #if defined(CONFIG_SMP)
17096 +static int __init irq_affinity_setup(char *str)
17098 +       zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
17099 +       cpulist_parse(str, irq_default_affinity);
17100 +       /*
17101 +        * Set at least the boot cpu. We don't want to end up with
17102 +        * bugreports caused by random comandline masks
17103 +        */
17104 +       cpumask_set_cpu(smp_processor_id(), irq_default_affinity);
17105 +       return 1;
17107 +__setup("irqaffinity=", irq_affinity_setup);
17109  static void __init init_irq_default_affinity(void)
17111 -       alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
17112 -       cpumask_setall(irq_default_affinity);
17113 +#ifdef CONFIG_CPUMASK_OFFSTACK
17114 +       if (!irq_default_affinity)
17115 +               zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
17116 +#endif
17117 +       if (cpumask_empty(irq_default_affinity))
17118 +               cpumask_setall(irq_default_affinity);
17120  #else
17121  static void __init init_irq_default_affinity(void)
17122 diff -Nur linux-4.4.46.orig/kernel/irq/manage.c linux-4.4.46/kernel/irq/manage.c
17123 --- linux-4.4.46.orig/kernel/irq/manage.c       2017-02-01 08:31:11.000000000 +0100
17124 +++ linux-4.4.46/kernel/irq/manage.c    2017-02-03 17:18:10.923618903 +0100
17125 @@ -22,6 +22,7 @@
17126  #include "internals.h"
17128  #ifdef CONFIG_IRQ_FORCED_THREADING
17129 +# ifndef CONFIG_PREEMPT_RT_BASE
17130  __read_mostly bool force_irqthreads;
17132  static int __init setup_forced_irqthreads(char *arg)
17133 @@ -30,6 +31,7 @@
17134         return 0;
17136  early_param("threadirqs", setup_forced_irqthreads);
17137 +# endif
17138  #endif
17140  static void __synchronize_hardirq(struct irq_desc *desc)
17141 @@ -181,6 +183,62 @@
17142  irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { }
17143  #endif
17145 +#ifdef CONFIG_PREEMPT_RT_FULL
17146 +static void _irq_affinity_notify(struct irq_affinity_notify *notify);
17147 +static struct task_struct *set_affinity_helper;
17148 +static LIST_HEAD(affinity_list);
17149 +static DEFINE_RAW_SPINLOCK(affinity_list_lock);
17151 +static int set_affinity_thread(void *unused)
17153 +       while (1) {
17154 +               struct irq_affinity_notify *notify;
17155 +               int empty;
17157 +               set_current_state(TASK_INTERRUPTIBLE);
17159 +               raw_spin_lock_irq(&affinity_list_lock);
17160 +               empty = list_empty(&affinity_list);
17161 +               raw_spin_unlock_irq(&affinity_list_lock);
17163 +               if (empty)
17164 +                       schedule();
17165 +               if (kthread_should_stop())
17166 +                       break;
17167 +               set_current_state(TASK_RUNNING);
17168 +try_next:
17169 +               notify = NULL;
17171 +               raw_spin_lock_irq(&affinity_list_lock);
17172 +               if (!list_empty(&affinity_list)) {
17173 +                       notify = list_first_entry(&affinity_list,
17174 +                                       struct irq_affinity_notify, list);
17175 +                       list_del_init(&notify->list);
17176 +               }
17177 +               raw_spin_unlock_irq(&affinity_list_lock);
17179 +               if (!notify)
17180 +                       continue;
17181 +               _irq_affinity_notify(notify);
17182 +               goto try_next;
17183 +       }
17184 +       return 0;
17187 +static void init_helper_thread(void)
17189 +       if (set_affinity_helper)
17190 +               return;
17191 +       set_affinity_helper = kthread_run(set_affinity_thread, NULL,
17192 +                       "affinity-cb");
17193 +       WARN_ON(IS_ERR(set_affinity_helper));
17195 +#else
17197 +static inline void init_helper_thread(void) { }
17199 +#endif
17201  int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
17202                         bool force)
17204 @@ -220,7 +278,17 @@
17206         if (desc->affinity_notify) {
17207                 kref_get(&desc->affinity_notify->kref);
17209 +#ifdef CONFIG_PREEMPT_RT_FULL
17210 +               raw_spin_lock(&affinity_list_lock);
17211 +               if (list_empty(&desc->affinity_notify->list))
17212 +                       list_add_tail(&affinity_list,
17213 +                                       &desc->affinity_notify->list);
17214 +               raw_spin_unlock(&affinity_list_lock);
17215 +               wake_up_process(set_affinity_helper);
17216 +#else
17217                 schedule_work(&desc->affinity_notify->work);
17218 +#endif
17219         }
17220         irqd_set(data, IRQD_AFFINITY_SET);
17222 @@ -258,10 +326,8 @@
17224  EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
17226 -static void irq_affinity_notify(struct work_struct *work)
17227 +static void _irq_affinity_notify(struct irq_affinity_notify *notify)
17229 -       struct irq_affinity_notify *notify =
17230 -               container_of(work, struct irq_affinity_notify, work);
17231         struct irq_desc *desc = irq_to_desc(notify->irq);
17232         cpumask_var_t cpumask;
17233         unsigned long flags;
17234 @@ -283,6 +349,13 @@
17235         kref_put(&notify->kref, notify->release);
17238 +static void irq_affinity_notify(struct work_struct *work)
17240 +       struct irq_affinity_notify *notify =
17241 +               container_of(work, struct irq_affinity_notify, work);
17242 +       _irq_affinity_notify(notify);
17245  /**
17246   *     irq_set_affinity_notifier - control notification of IRQ affinity changes
17247   *     @irq:           Interrupt for which to enable/disable notification
17248 @@ -312,6 +385,8 @@
17249                 notify->irq = irq;
17250                 kref_init(&notify->kref);
17251                 INIT_WORK(&notify->work, irq_affinity_notify);
17252 +               INIT_LIST_HEAD(&notify->list);
17253 +               init_helper_thread();
17254         }
17256         raw_spin_lock_irqsave(&desc->lock, flags);
17257 @@ -865,7 +940,15 @@
17258         local_bh_disable();
17259         ret = action->thread_fn(action->irq, action->dev_id);
17260         irq_finalize_oneshot(desc, action);
17261 -       local_bh_enable();
17262 +       /*
17263 +        * Interrupts which have real time requirements can be set up
17264 +        * to avoid softirq processing in the thread handler. This is
17265 +        * safe as these interrupts do not raise soft interrupts.
17266 +        */
17267 +       if (irq_settings_no_softirq_call(desc))
17268 +               _local_bh_enable();
17269 +       else
17270 +               local_bh_enable();
17271         return ret;
17274 @@ -962,6 +1045,12 @@
17275                 if (action_ret == IRQ_WAKE_THREAD)
17276                         irq_wake_secondary(desc, action);
17278 +#ifdef CONFIG_PREEMPT_RT_FULL
17279 +               migrate_disable();
17280 +               add_interrupt_randomness(action->irq, 0,
17281 +                                desc->random_ip ^ (unsigned long) action);
17282 +               migrate_enable();
17283 +#endif
17284                 wake_threads_waitq(desc);
17285         }
17287 @@ -1315,6 +1404,9 @@
17288                         irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
17289                 }
17291 +               if (new->flags & IRQF_NO_SOFTIRQ_CALL)
17292 +                       irq_settings_set_no_softirq_call(desc);
17294                 /* Set default affinity mask once everything is setup */
17295                 setup_affinity(desc, mask);
17297 @@ -1968,7 +2060,7 @@
17298   *     This call sets the internal irqchip state of an interrupt,
17299   *     depending on the value of @which.
17300   *
17301 - *     This function should be called with preemption disabled if the
17302 + *     This function should be called with migration disabled if the
17303   *     interrupt controller has per-cpu registers.
17304   */
17305  int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
17306 diff -Nur linux-4.4.46.orig/kernel/irq/settings.h linux-4.4.46/kernel/irq/settings.h
17307 --- linux-4.4.46.orig/kernel/irq/settings.h     2017-02-01 08:31:11.000000000 +0100
17308 +++ linux-4.4.46/kernel/irq/settings.h  2017-02-03 17:18:10.923618903 +0100
17309 @@ -16,6 +16,7 @@
17310         _IRQ_PER_CPU_DEVID      = IRQ_PER_CPU_DEVID,
17311         _IRQ_IS_POLLED          = IRQ_IS_POLLED,
17312         _IRQ_DISABLE_UNLAZY     = IRQ_DISABLE_UNLAZY,
17313 +       _IRQ_NO_SOFTIRQ_CALL    = IRQ_NO_SOFTIRQ_CALL,
17314         _IRQF_MODIFY_MASK       = IRQF_MODIFY_MASK,
17315  };
17317 @@ -30,6 +31,7 @@
17318  #define IRQ_PER_CPU_DEVID      GOT_YOU_MORON
17319  #define IRQ_IS_POLLED          GOT_YOU_MORON
17320  #define IRQ_DISABLE_UNLAZY     GOT_YOU_MORON
17321 +#define IRQ_NO_SOFTIRQ_CALL    GOT_YOU_MORON
17322  #undef IRQF_MODIFY_MASK
17323  #define IRQF_MODIFY_MASK       GOT_YOU_MORON
17325 @@ -40,6 +42,16 @@
17326         desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK);
17329 +static inline bool irq_settings_no_softirq_call(struct irq_desc *desc)
17331 +       return desc->status_use_accessors & _IRQ_NO_SOFTIRQ_CALL;
17334 +static inline void irq_settings_set_no_softirq_call(struct irq_desc *desc)
17336 +       desc->status_use_accessors |= _IRQ_NO_SOFTIRQ_CALL;
17339  static inline bool irq_settings_is_per_cpu(struct irq_desc *desc)
17341         return desc->status_use_accessors & _IRQ_PER_CPU;
17342 diff -Nur linux-4.4.46.orig/kernel/irq/spurious.c linux-4.4.46/kernel/irq/spurious.c
17343 --- linux-4.4.46.orig/kernel/irq/spurious.c     2017-02-01 08:31:11.000000000 +0100
17344 +++ linux-4.4.46/kernel/irq/spurious.c  2017-02-03 17:18:10.923618903 +0100
17345 @@ -444,6 +444,10 @@
17347  static int __init irqfixup_setup(char *str)
17349 +#ifdef CONFIG_PREEMPT_RT_BASE
17350 +       pr_warn("irqfixup boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
17351 +       return 1;
17352 +#endif
17353         irqfixup = 1;
17354         printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
17355         printk(KERN_WARNING "This may impact system performance.\n");
17356 @@ -456,6 +460,10 @@
17358  static int __init irqpoll_setup(char *str)
17360 +#ifdef CONFIG_PREEMPT_RT_BASE
17361 +       pr_warn("irqpoll boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
17362 +       return 1;
17363 +#endif
17364         irqfixup = 2;
17365         printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
17366                                 "enabled\n");
17367 diff -Nur linux-4.4.46.orig/kernel/irq_work.c linux-4.4.46/kernel/irq_work.c
17368 --- linux-4.4.46.orig/kernel/irq_work.c 2017-02-01 08:31:11.000000000 +0100
17369 +++ linux-4.4.46/kernel/irq_work.c      2017-02-03 17:18:10.923618903 +0100
17370 @@ -17,6 +17,7 @@
17371  #include <linux/cpu.h>
17372  #include <linux/notifier.h>
17373  #include <linux/smp.h>
17374 +#include <linux/interrupt.h>
17375  #include <asm/processor.h>
17378 @@ -65,6 +66,8 @@
17379   */
17380  bool irq_work_queue_on(struct irq_work *work, int cpu)
17382 +       struct llist_head *list;
17384         /* All work should have been flushed before going offline */
17385         WARN_ON_ONCE(cpu_is_offline(cpu));
17387 @@ -75,7 +78,12 @@
17388         if (!irq_work_claim(work))
17389                 return false;
17391 -       if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
17392 +       if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && !(work->flags & IRQ_WORK_HARD_IRQ))
17393 +               list = &per_cpu(lazy_list, cpu);
17394 +       else
17395 +               list = &per_cpu(raised_list, cpu);
17397 +       if (llist_add(&work->llnode, list))
17398                 arch_send_call_function_single_ipi(cpu);
17400         return true;
17401 @@ -86,6 +94,9 @@
17402  /* Enqueue the irq work @work on the current CPU */
17403  bool irq_work_queue(struct irq_work *work)
17405 +       struct llist_head *list;
17406 +       bool lazy_work, realtime = IS_ENABLED(CONFIG_PREEMPT_RT_FULL);
17408         /* Only queue if not already pending */
17409         if (!irq_work_claim(work))
17410                 return false;
17411 @@ -93,13 +104,15 @@
17412         /* Queue the entry and raise the IPI if needed. */
17413         preempt_disable();
17415 -       /* If the work is "lazy", handle it from next tick if any */
17416 -       if (work->flags & IRQ_WORK_LAZY) {
17417 -               if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) &&
17418 -                   tick_nohz_tick_stopped())
17419 -                       arch_irq_work_raise();
17420 -       } else {
17421 -               if (llist_add(&work->llnode, this_cpu_ptr(&raised_list)))
17422 +       lazy_work = work->flags & IRQ_WORK_LAZY;
17424 +       if (lazy_work || (realtime && !(work->flags & IRQ_WORK_HARD_IRQ)))
17425 +               list = this_cpu_ptr(&lazy_list);
17426 +       else
17427 +               list = this_cpu_ptr(&raised_list);
17429 +       if (llist_add(&work->llnode, list)) {
17430 +               if (!lazy_work || tick_nohz_tick_stopped())
17431                         arch_irq_work_raise();
17432         }
17434 @@ -116,9 +129,8 @@
17435         raised = this_cpu_ptr(&raised_list);
17436         lazy = this_cpu_ptr(&lazy_list);
17438 -       if (llist_empty(raised) || arch_irq_work_has_interrupt())
17439 -               if (llist_empty(lazy))
17440 -                       return false;
17441 +       if (llist_empty(raised) && llist_empty(lazy))
17442 +               return false;
17444         /* All work should have been flushed before going offline */
17445         WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
17446 @@ -132,7 +144,7 @@
17447         struct irq_work *work;
17448         struct llist_node *llnode;
17450 -       BUG_ON(!irqs_disabled());
17451 +       BUG_ON_NONRT(!irqs_disabled());
17453         if (llist_empty(list))
17454                 return;
17455 @@ -169,7 +181,16 @@
17456  void irq_work_run(void)
17458         irq_work_run_list(this_cpu_ptr(&raised_list));
17459 -       irq_work_run_list(this_cpu_ptr(&lazy_list));
17460 +       if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) {
17461 +               /*
17462 +                * NOTE: we raise softirq via IPI for safety,
17463 +                * and execute in irq_work_tick() to move the
17464 +                * overhead from hard to soft irq context.
17465 +                */
17466 +               if (!llist_empty(this_cpu_ptr(&lazy_list)))
17467 +                       raise_softirq(TIMER_SOFTIRQ);
17468 +       } else
17469 +               irq_work_run_list(this_cpu_ptr(&lazy_list));
17471  EXPORT_SYMBOL_GPL(irq_work_run);
17473 @@ -179,8 +200,17 @@
17475         if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
17476                 irq_work_run_list(raised);
17478 +       if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL))
17479 +               irq_work_run_list(this_cpu_ptr(&lazy_list));
17482 +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
17483 +void irq_work_tick_soft(void)
17485         irq_work_run_list(this_cpu_ptr(&lazy_list));
17487 +#endif
17489  /*
17490   * Synchronize against the irq_work @entry, ensures the entry is not
17491 diff -Nur linux-4.4.46.orig/kernel/Kconfig.locks linux-4.4.46/kernel/Kconfig.locks
17492 --- linux-4.4.46.orig/kernel/Kconfig.locks      2017-02-01 08:31:11.000000000 +0100
17493 +++ linux-4.4.46/kernel/Kconfig.locks   2017-02-03 17:18:10.919618749 +0100
17494 @@ -225,11 +225,11 @@
17496  config MUTEX_SPIN_ON_OWNER
17497         def_bool y
17498 -       depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW
17499 +       depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL
17501  config RWSEM_SPIN_ON_OWNER
17502         def_bool y
17503 -       depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
17504 +       depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL
17506  config LOCK_SPIN_ON_OWNER
17507         def_bool y
17508 diff -Nur linux-4.4.46.orig/kernel/Kconfig.preempt linux-4.4.46/kernel/Kconfig.preempt
17509 --- linux-4.4.46.orig/kernel/Kconfig.preempt    2017-02-01 08:31:11.000000000 +0100
17510 +++ linux-4.4.46/kernel/Kconfig.preempt 2017-02-03 17:18:10.919618749 +0100
17511 @@ -1,3 +1,16 @@
17512 +config PREEMPT
17513 +       bool
17514 +       select PREEMPT_COUNT
17516 +config PREEMPT_RT_BASE
17517 +       bool
17518 +       select PREEMPT
17520 +config HAVE_PREEMPT_LAZY
17521 +       bool
17523 +config PREEMPT_LAZY
17524 +       def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT_FULL
17526  choice
17527         prompt "Preemption Model"
17528 @@ -33,9 +46,9 @@
17530           Select this if you are building a kernel for a desktop system.
17532 -config PREEMPT
17533 +config PREEMPT__LL
17534         bool "Preemptible Kernel (Low-Latency Desktop)"
17535 -       select PREEMPT_COUNT
17536 +       select PREEMPT
17537         select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
17538         help
17539           This option reduces the latency of the kernel by making
17540 @@ -52,6 +65,22 @@
17541           embedded system with latency requirements in the milliseconds
17542           range.
17544 +config PREEMPT_RTB
17545 +       bool "Preemptible Kernel (Basic RT)"
17546 +       select PREEMPT_RT_BASE
17547 +       help
17548 +         This option is basically the same as (Low-Latency Desktop) but
17549 +         enables changes which are preliminary for the full preemptible
17550 +         RT kernel.
17552 +config PREEMPT_RT_FULL
17553 +       bool "Fully Preemptible Kernel (RT)"
17554 +       depends on IRQ_FORCED_THREADING
17555 +       select PREEMPT_RT_BASE
17556 +       select PREEMPT_RCU
17557 +       help
17558 +         All and everything
17560  endchoice
17562  config PREEMPT_COUNT
17563 diff -Nur linux-4.4.46.orig/kernel/ksysfs.c linux-4.4.46/kernel/ksysfs.c
17564 --- linux-4.4.46.orig/kernel/ksysfs.c   2017-02-01 08:31:11.000000000 +0100
17565 +++ linux-4.4.46/kernel/ksysfs.c        2017-02-03 17:18:10.923618903 +0100
17566 @@ -136,6 +136,15 @@
17568  #endif /* CONFIG_KEXEC_CORE */
17570 +#if defined(CONFIG_PREEMPT_RT_FULL)
17571 +static ssize_t  realtime_show(struct kobject *kobj,
17572 +                             struct kobj_attribute *attr, char *buf)
17574 +       return sprintf(buf, "%d\n", 1);
17576 +KERNEL_ATTR_RO(realtime);
17577 +#endif
17579  /* whether file capabilities are enabled */
17580  static ssize_t fscaps_show(struct kobject *kobj,
17581                                   struct kobj_attribute *attr, char *buf)
17582 @@ -203,6 +212,9 @@
17583         &vmcoreinfo_attr.attr,
17584  #endif
17585         &rcu_expedited_attr.attr,
17586 +#ifdef CONFIG_PREEMPT_RT_FULL
17587 +       &realtime_attr.attr,
17588 +#endif
17589         NULL
17590  };
17592 diff -Nur linux-4.4.46.orig/kernel/locking/lglock.c linux-4.4.46/kernel/locking/lglock.c
17593 --- linux-4.4.46.orig/kernel/locking/lglock.c   2017-02-01 08:31:11.000000000 +0100
17594 +++ linux-4.4.46/kernel/locking/lglock.c        2017-02-03 17:18:10.923618903 +0100
17595 @@ -4,6 +4,15 @@
17596  #include <linux/cpu.h>
17597  #include <linux/string.h>
17599 +#ifndef CONFIG_PREEMPT_RT_FULL
17600 +# define lg_lock_ptr           arch_spinlock_t
17601 +# define lg_do_lock(l)         arch_spin_lock(l)
17602 +# define lg_do_unlock(l)       arch_spin_unlock(l)
17603 +#else
17604 +# define lg_lock_ptr           struct rt_mutex
17605 +# define lg_do_lock(l)         __rt_spin_lock__no_mg(l)
17606 +# define lg_do_unlock(l)       __rt_spin_unlock(l)
17607 +#endif
17608  /*
17609   * Note there is no uninit, so lglocks cannot be defined in
17610   * modules (but it's fine to use them from there)
17611 @@ -12,51 +21,60 @@
17613  void lg_lock_init(struct lglock *lg, char *name)
17615 +#ifdef CONFIG_PREEMPT_RT_FULL
17616 +       int i;
17618 +       for_each_possible_cpu(i) {
17619 +               struct rt_mutex *lock = per_cpu_ptr(lg->lock, i);
17621 +               rt_mutex_init(lock);
17622 +       }
17623 +#endif
17624         LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0);
17626  EXPORT_SYMBOL(lg_lock_init);
17628  void lg_local_lock(struct lglock *lg)
17630 -       arch_spinlock_t *lock;
17631 +       lg_lock_ptr *lock;
17633 -       preempt_disable();
17634 +       migrate_disable();
17635         lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
17636         lock = this_cpu_ptr(lg->lock);
17637 -       arch_spin_lock(lock);
17638 +       lg_do_lock(lock);
17640  EXPORT_SYMBOL(lg_local_lock);
17642  void lg_local_unlock(struct lglock *lg)
17644 -       arch_spinlock_t *lock;
17645 +       lg_lock_ptr *lock;
17647         lock_release(&lg->lock_dep_map, 1, _RET_IP_);
17648         lock = this_cpu_ptr(lg->lock);
17649 -       arch_spin_unlock(lock);
17650 -       preempt_enable();
17651 +       lg_do_unlock(lock);
17652 +       migrate_enable();
17654  EXPORT_SYMBOL(lg_local_unlock);
17656  void lg_local_lock_cpu(struct lglock *lg, int cpu)
17658 -       arch_spinlock_t *lock;
17659 +       lg_lock_ptr *lock;
17661 -       preempt_disable();
17662 +       preempt_disable_nort();
17663         lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
17664         lock = per_cpu_ptr(lg->lock, cpu);
17665 -       arch_spin_lock(lock);
17666 +       lg_do_lock(lock);
17668  EXPORT_SYMBOL(lg_local_lock_cpu);
17670  void lg_local_unlock_cpu(struct lglock *lg, int cpu)
17672 -       arch_spinlock_t *lock;
17673 +       lg_lock_ptr *lock;
17675         lock_release(&lg->lock_dep_map, 1, _RET_IP_);
17676         lock = per_cpu_ptr(lg->lock, cpu);
17677 -       arch_spin_unlock(lock);
17678 -       preempt_enable();
17679 +       lg_do_unlock(lock);
17680 +       preempt_enable_nort();
17682  EXPORT_SYMBOL(lg_local_unlock_cpu);
17684 @@ -68,30 +86,30 @@
17685         if (cpu2 < cpu1)
17686                 swap(cpu1, cpu2);
17688 -       preempt_disable();
17689 +       preempt_disable_nort();
17690         lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
17691 -       arch_spin_lock(per_cpu_ptr(lg->lock, cpu1));
17692 -       arch_spin_lock(per_cpu_ptr(lg->lock, cpu2));
17693 +       lg_do_lock(per_cpu_ptr(lg->lock, cpu1));
17694 +       lg_do_lock(per_cpu_ptr(lg->lock, cpu2));
17697  void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2)
17699         lock_release(&lg->lock_dep_map, 1, _RET_IP_);
17700 -       arch_spin_unlock(per_cpu_ptr(lg->lock, cpu1));
17701 -       arch_spin_unlock(per_cpu_ptr(lg->lock, cpu2));
17702 -       preempt_enable();
17703 +       lg_do_unlock(per_cpu_ptr(lg->lock, cpu1));
17704 +       lg_do_unlock(per_cpu_ptr(lg->lock, cpu2));
17705 +       preempt_enable_nort();
17708  void lg_global_lock(struct lglock *lg)
17710         int i;
17712 -       preempt_disable();
17713 +       preempt_disable_nort();
17714         lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
17715         for_each_possible_cpu(i) {
17716 -               arch_spinlock_t *lock;
17717 +               lg_lock_ptr *lock;
17718                 lock = per_cpu_ptr(lg->lock, i);
17719 -               arch_spin_lock(lock);
17720 +               lg_do_lock(lock);
17721         }
17723  EXPORT_SYMBOL(lg_global_lock);
17724 @@ -102,10 +120,35 @@
17726         lock_release(&lg->lock_dep_map, 1, _RET_IP_);
17727         for_each_possible_cpu(i) {
17728 -               arch_spinlock_t *lock;
17729 +               lg_lock_ptr *lock;
17730                 lock = per_cpu_ptr(lg->lock, i);
17731 -               arch_spin_unlock(lock);
17732 +               lg_do_unlock(lock);
17733         }
17734 -       preempt_enable();
17735 +       preempt_enable_nort();
17737  EXPORT_SYMBOL(lg_global_unlock);
17739 +#ifdef CONFIG_PREEMPT_RT_FULL
17741 + * HACK: If you use this, you get to keep the pieces.
17742 + * Used in queue_stop_cpus_work() when stop machinery
17743 + * is called from inactive CPU, so we can't schedule.
17744 + */
17745 +# define lg_do_trylock_relax(l)                        \
17746 +       do {                                    \
17747 +               while (!__rt_spin_trylock(l))   \
17748 +                       cpu_relax();            \
17749 +       } while (0)
17751 +void lg_global_trylock_relax(struct lglock *lg)
17753 +       int i;
17755 +       lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
17756 +       for_each_possible_cpu(i) {
17757 +               lg_lock_ptr *lock;
17758 +               lock = per_cpu_ptr(lg->lock, i);
17759 +               lg_do_trylock_relax(lock);
17760 +       }
17762 +#endif
17763 diff -Nur linux-4.4.46.orig/kernel/locking/lockdep.c linux-4.4.46/kernel/locking/lockdep.c
17764 --- linux-4.4.46.orig/kernel/locking/lockdep.c  2017-02-01 08:31:11.000000000 +0100
17765 +++ linux-4.4.46/kernel/locking/lockdep.c       2017-02-03 17:18:10.923618903 +0100
17766 @@ -3525,6 +3525,7 @@
17767                 }
17768         }
17770 +#ifndef CONFIG_PREEMPT_RT_FULL
17771         /*
17772          * We dont accurately track softirq state in e.g.
17773          * hardirq contexts (such as on 4KSTACKS), so only
17774 @@ -3539,6 +3540,7 @@
17775                         DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
17776                 }
17777         }
17778 +#endif
17780         if (!debug_locks)
17781                 print_irqtrace_events(current);
17782 diff -Nur linux-4.4.46.orig/kernel/locking/locktorture.c linux-4.4.46/kernel/locking/locktorture.c
17783 --- linux-4.4.46.orig/kernel/locking/locktorture.c      2017-02-01 08:31:11.000000000 +0100
17784 +++ linux-4.4.46/kernel/locking/locktorture.c   2017-02-03 17:18:10.923618903 +0100
17785 @@ -26,7 +26,6 @@
17786  #include <linux/kthread.h>
17787  #include <linux/sched/rt.h>
17788  #include <linux/spinlock.h>
17789 -#include <linux/rwlock.h>
17790  #include <linux/mutex.h>
17791  #include <linux/rwsem.h>
17792  #include <linux/smp.h>
17793 diff -Nur linux-4.4.46.orig/kernel/locking/Makefile linux-4.4.46/kernel/locking/Makefile
17794 --- linux-4.4.46.orig/kernel/locking/Makefile   2017-02-01 08:31:11.000000000 +0100
17795 +++ linux-4.4.46/kernel/locking/Makefile        2017-02-03 17:18:10.923618903 +0100
17796 @@ -1,5 +1,5 @@
17798 -obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
17799 +obj-y += semaphore.o percpu-rwsem.o
17801  ifdef CONFIG_FUNCTION_TRACER
17802  CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
17803 @@ -8,7 +8,11 @@
17804  CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE)
17805  endif
17807 +ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
17808 +obj-y += mutex.o
17809  obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
17810 +obj-y += rwsem.o
17811 +endif
17812  obj-$(CONFIG_LOCKDEP) += lockdep.o
17813  ifeq ($(CONFIG_PROC_FS),y)
17814  obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
17815 @@ -22,7 +26,10 @@
17816  obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
17817  obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
17818  obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
17819 +ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
17820  obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
17821  obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
17822 +endif
17823 +obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o
17824  obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
17825  obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
17826 diff -Nur linux-4.4.46.orig/kernel/locking/rt.c linux-4.4.46/kernel/locking/rt.c
17827 --- linux-4.4.46.orig/kernel/locking/rt.c       1970-01-01 01:00:00.000000000 +0100
17828 +++ linux-4.4.46/kernel/locking/rt.c    2017-02-03 17:18:10.923618903 +0100
17829 @@ -0,0 +1,474 @@
17831 + * kernel/rt.c
17832 + *
17833 + * Real-Time Preemption Support
17834 + *
17835 + * started by Ingo Molnar:
17836 + *
17837 + *  Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
17838 + *  Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
17839 + *
17840 + * historic credit for proving that Linux spinlocks can be implemented via
17841 + * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow
17842 + * and others) who prototyped it on 2.4 and did lots of comparative
17843 + * research and analysis; TimeSys, for proving that you can implement a
17844 + * fully preemptible kernel via the use of IRQ threading and mutexes;
17845 + * Bill Huey for persuasively arguing on lkml that the mutex model is the
17846 + * right one; and to MontaVista, who ported pmutexes to 2.6.
17847 + *
17848 + * This code is a from-scratch implementation and is not based on pmutexes,
17849 + * but the idea of converting spinlocks to mutexes is used here too.
17850 + *
17851 + * lock debugging, locking tree, deadlock detection:
17852 + *
17853 + *  Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey
17854 + *  Released under the General Public License (GPL).
17855 + *
17856 + * Includes portions of the generic R/W semaphore implementation from:
17857 + *
17858 + *  Copyright (c) 2001   David Howells (dhowells@redhat.com).
17859 + *  - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
17860 + *  - Derived also from comments by Linus
17861 + *
17862 + * Pending ownership of locks and ownership stealing:
17863 + *
17864 + *  Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt
17865 + *
17866 + *   (also by Steven Rostedt)
17867 + *    - Converted single pi_lock to individual task locks.
17868 + *
17869 + * By Esben Nielsen:
17870 + *    Doing priority inheritance with help of the scheduler.
17871 + *
17872 + *  Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
17873 + *  - major rework based on Esben Nielsens initial patch
17874 + *  - replaced thread_info references by task_struct refs
17875 + *  - removed task->pending_owner dependency
17876 + *  - BKL drop/reacquire for semaphore style locks to avoid deadlocks
17877 + *    in the scheduler return path as discussed with Steven Rostedt
17878 + *
17879 + *  Copyright (C) 2006, Kihon Technologies Inc.
17880 + *    Steven Rostedt <rostedt@goodmis.org>
17881 + *  - debugged and patched Thomas Gleixner's rework.
17882 + *  - added back the cmpxchg to the rework.
17883 + *  - turned atomic require back on for SMP.
17884 + */
17886 +#include <linux/spinlock.h>
17887 +#include <linux/rtmutex.h>
17888 +#include <linux/sched.h>
17889 +#include <linux/delay.h>
17890 +#include <linux/module.h>
17891 +#include <linux/kallsyms.h>
17892 +#include <linux/syscalls.h>
17893 +#include <linux/interrupt.h>
17894 +#include <linux/plist.h>
17895 +#include <linux/fs.h>
17896 +#include <linux/futex.h>
17897 +#include <linux/hrtimer.h>
17899 +#include "rtmutex_common.h"
17902 + * struct mutex functions
17903 + */
17904 +void __mutex_do_init(struct mutex *mutex, const char *name,
17905 +                    struct lock_class_key *key)
17907 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
17908 +       /*
17909 +        * Make sure we are not reinitializing a held lock:
17910 +        */
17911 +       debug_check_no_locks_freed((void *)mutex, sizeof(*mutex));
17912 +       lockdep_init_map(&mutex->dep_map, name, key, 0);
17913 +#endif
17914 +       mutex->lock.save_state = 0;
17916 +EXPORT_SYMBOL(__mutex_do_init);
17918 +void __lockfunc _mutex_lock(struct mutex *lock)
17920 +       mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
17921 +       rt_mutex_lock(&lock->lock);
17923 +EXPORT_SYMBOL(_mutex_lock);
17925 +int __lockfunc _mutex_lock_interruptible(struct mutex *lock)
17927 +       int ret;
17929 +       mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
17930 +       ret = rt_mutex_lock_interruptible(&lock->lock);
17931 +       if (ret)
17932 +               mutex_release(&lock->dep_map, 1, _RET_IP_);
17933 +       return ret;
17935 +EXPORT_SYMBOL(_mutex_lock_interruptible);
17937 +int __lockfunc _mutex_lock_killable(struct mutex *lock)
17939 +       int ret;
17941 +       mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
17942 +       ret = rt_mutex_lock_killable(&lock->lock);
17943 +       if (ret)
17944 +               mutex_release(&lock->dep_map, 1, _RET_IP_);
17945 +       return ret;
17947 +EXPORT_SYMBOL(_mutex_lock_killable);
17949 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
17950 +void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass)
17952 +       mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
17953 +       rt_mutex_lock(&lock->lock);
17955 +EXPORT_SYMBOL(_mutex_lock_nested);
17957 +void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
17959 +       mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_);
17960 +       rt_mutex_lock(&lock->lock);
17962 +EXPORT_SYMBOL(_mutex_lock_nest_lock);
17964 +int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass)
17966 +       int ret;
17968 +       mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
17969 +       ret = rt_mutex_lock_interruptible(&lock->lock);
17970 +       if (ret)
17971 +               mutex_release(&lock->dep_map, 1, _RET_IP_);
17972 +       return ret;
17974 +EXPORT_SYMBOL(_mutex_lock_interruptible_nested);
17976 +int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass)
17978 +       int ret;
17980 +       mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
17981 +       ret = rt_mutex_lock_killable(&lock->lock);
17982 +       if (ret)
17983 +               mutex_release(&lock->dep_map, 1, _RET_IP_);
17984 +       return ret;
17986 +EXPORT_SYMBOL(_mutex_lock_killable_nested);
17987 +#endif
17989 +int __lockfunc _mutex_trylock(struct mutex *lock)
17991 +       int ret = rt_mutex_trylock(&lock->lock);
17993 +       if (ret)
17994 +               mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
17996 +       return ret;
17998 +EXPORT_SYMBOL(_mutex_trylock);
18000 +void __lockfunc _mutex_unlock(struct mutex *lock)
18002 +       mutex_release(&lock->dep_map, 1, _RET_IP_);
18003 +       rt_mutex_unlock(&lock->lock);
18005 +EXPORT_SYMBOL(_mutex_unlock);
18008 + * rwlock_t functions
18009 + */
18010 +int __lockfunc rt_write_trylock(rwlock_t *rwlock)
18012 +       int ret;
18014 +       migrate_disable();
18015 +       ret = rt_mutex_trylock(&rwlock->lock);
18016 +       if (ret)
18017 +               rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
18018 +       else
18019 +               migrate_enable();
18021 +       return ret;
18023 +EXPORT_SYMBOL(rt_write_trylock);
18025 +int __lockfunc rt_write_trylock_irqsave(rwlock_t *rwlock, unsigned long *flags)
18027 +       int ret;
18029 +       *flags = 0;
18030 +       ret = rt_write_trylock(rwlock);
18031 +       return ret;
18033 +EXPORT_SYMBOL(rt_write_trylock_irqsave);
18035 +int __lockfunc rt_read_trylock(rwlock_t *rwlock)
18037 +       struct rt_mutex *lock = &rwlock->lock;
18038 +       int ret = 1;
18040 +       /*
18041 +        * recursive read locks succeed when current owns the lock,
18042 +        * but not when read_depth == 0 which means that the lock is
18043 +        * write locked.
18044 +        */
18045 +       if (rt_mutex_owner(lock) != current) {
18046 +               migrate_disable();
18047 +               ret = rt_mutex_trylock(lock);
18048 +               if (ret)
18049 +                       rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
18050 +               else
18051 +                       migrate_enable();
18053 +       } else if (!rwlock->read_depth) {
18054 +               ret = 0;
18055 +       }
18057 +       if (ret)
18058 +               rwlock->read_depth++;
18060 +       return ret;
18062 +EXPORT_SYMBOL(rt_read_trylock);
18064 +void __lockfunc rt_write_lock(rwlock_t *rwlock)
18066 +       rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
18067 +       __rt_spin_lock(&rwlock->lock);
18069 +EXPORT_SYMBOL(rt_write_lock);
18071 +void __lockfunc rt_read_lock(rwlock_t *rwlock)
18073 +       struct rt_mutex *lock = &rwlock->lock;
18076 +       /*
18077 +        * recursive read locks succeed when current owns the lock
18078 +        */
18079 +       if (rt_mutex_owner(lock) != current) {
18080 +               rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
18081 +               __rt_spin_lock(lock);
18082 +       }
18083 +       rwlock->read_depth++;
18086 +EXPORT_SYMBOL(rt_read_lock);
18088 +void __lockfunc rt_write_unlock(rwlock_t *rwlock)
18090 +       /* NOTE: we always pass in '1' for nested, for simplicity */
18091 +       rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
18092 +       __rt_spin_unlock(&rwlock->lock);
18093 +       migrate_enable();
18095 +EXPORT_SYMBOL(rt_write_unlock);
18097 +void __lockfunc rt_read_unlock(rwlock_t *rwlock)
18099 +       /* Release the lock only when read_depth is down to 0 */
18100 +       if (--rwlock->read_depth == 0) {
18101 +               rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
18102 +               __rt_spin_unlock(&rwlock->lock);
18103 +               migrate_enable();
18104 +       }
18106 +EXPORT_SYMBOL(rt_read_unlock);
18108 +unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock)
18110 +       rt_write_lock(rwlock);
18112 +       return 0;
18114 +EXPORT_SYMBOL(rt_write_lock_irqsave);
18116 +unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock)
18118 +       rt_read_lock(rwlock);
18120 +       return 0;
18122 +EXPORT_SYMBOL(rt_read_lock_irqsave);
18124 +void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key)
18126 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
18127 +       /*
18128 +        * Make sure we are not reinitializing a held lock:
18129 +        */
18130 +       debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock));
18131 +       lockdep_init_map(&rwlock->dep_map, name, key, 0);
18132 +#endif
18133 +       rwlock->lock.save_state = 1;
18134 +       rwlock->read_depth = 0;
18136 +EXPORT_SYMBOL(__rt_rwlock_init);
18139 + * rw_semaphores
18140 + */
18142 +void  rt_up_write(struct rw_semaphore *rwsem)
18144 +       rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
18145 +       rt_mutex_unlock(&rwsem->lock);
18147 +EXPORT_SYMBOL(rt_up_write);
18149 +void __rt_up_read(struct rw_semaphore *rwsem)
18151 +       if (--rwsem->read_depth == 0)
18152 +               rt_mutex_unlock(&rwsem->lock);
18155 +void  rt_up_read(struct rw_semaphore *rwsem)
18157 +       rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
18158 +       __rt_up_read(rwsem);
18160 +EXPORT_SYMBOL(rt_up_read);
18163 + * downgrade a write lock into a read lock
18164 + * - just wake up any readers at the front of the queue
18165 + */
18166 +void  rt_downgrade_write(struct rw_semaphore *rwsem)
18168 +       BUG_ON(rt_mutex_owner(&rwsem->lock) != current);
18169 +       rwsem->read_depth = 1;
18171 +EXPORT_SYMBOL(rt_downgrade_write);
18173 +int  rt_down_write_trylock(struct rw_semaphore *rwsem)
18175 +       int ret = rt_mutex_trylock(&rwsem->lock);
18177 +       if (ret)
18178 +               rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
18179 +       return ret;
18181 +EXPORT_SYMBOL(rt_down_write_trylock);
18183 +void  rt_down_write(struct rw_semaphore *rwsem)
18185 +       rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_);
18186 +       rt_mutex_lock(&rwsem->lock);
18188 +EXPORT_SYMBOL(rt_down_write);
18190 +void  rt_down_write_nested(struct rw_semaphore *rwsem, int subclass)
18192 +       rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_);
18193 +       rt_mutex_lock(&rwsem->lock);
18195 +EXPORT_SYMBOL(rt_down_write_nested);
18197 +void rt_down_write_nested_lock(struct rw_semaphore *rwsem,
18198 +                              struct lockdep_map *nest)
18200 +       rwsem_acquire_nest(&rwsem->dep_map, 0, 0, nest, _RET_IP_);
18201 +       rt_mutex_lock(&rwsem->lock);
18203 +EXPORT_SYMBOL(rt_down_write_nested_lock);
18205 +int rt__down_read_trylock(struct rw_semaphore *rwsem)
18207 +       struct rt_mutex *lock = &rwsem->lock;
18208 +       int ret = 1;
18210 +       /*
18211 +        * recursive read locks succeed when current owns the rwsem,
18212 +        * but not when read_depth == 0 which means that the rwsem is
18213 +        * write locked.
18214 +        */
18215 +       if (rt_mutex_owner(lock) != current)
18216 +               ret = rt_mutex_trylock(&rwsem->lock);
18217 +       else if (!rwsem->read_depth)
18218 +               ret = 0;
18220 +       if (ret)
18221 +               rwsem->read_depth++;
18222 +       return ret;
18226 +int  rt_down_read_trylock(struct rw_semaphore *rwsem)
18228 +       int ret;
18230 +       ret = rt__down_read_trylock(rwsem);
18231 +       if (ret)
18232 +               rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
18234 +       return ret;
18236 +EXPORT_SYMBOL(rt_down_read_trylock);
18238 +void rt__down_read(struct rw_semaphore *rwsem)
18240 +       struct rt_mutex *lock = &rwsem->lock;
18242 +       if (rt_mutex_owner(lock) != current)
18243 +               rt_mutex_lock(&rwsem->lock);
18244 +       rwsem->read_depth++;
18246 +EXPORT_SYMBOL(rt__down_read);
18248 +static void __rt_down_read(struct rw_semaphore *rwsem, int subclass)
18250 +       rwsem_acquire_read(&rwsem->dep_map, subclass, 0, _RET_IP_);
18251 +       rt__down_read(rwsem);
18254 +void  rt_down_read(struct rw_semaphore *rwsem)
18256 +       __rt_down_read(rwsem, 0);
18258 +EXPORT_SYMBOL(rt_down_read);
18260 +void  rt_down_read_nested(struct rw_semaphore *rwsem, int subclass)
18262 +       __rt_down_read(rwsem, subclass);
18264 +EXPORT_SYMBOL(rt_down_read_nested);
18266 +void  __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name,
18267 +                             struct lock_class_key *key)
18269 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
18270 +       /*
18271 +        * Make sure we are not reinitializing a held lock:
18272 +        */
18273 +       debug_check_no_locks_freed((void *)rwsem, sizeof(*rwsem));
18274 +       lockdep_init_map(&rwsem->dep_map, name, key, 0);
18275 +#endif
18276 +       rwsem->read_depth = 0;
18277 +       rwsem->lock.save_state = 0;
18279 +EXPORT_SYMBOL(__rt_rwsem_init);
18281 +/**
18282 + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
18283 + * @cnt: the atomic which we are to dec
18284 + * @lock: the mutex to return holding if we dec to 0
18285 + *
18286 + * return true and hold lock if we dec to 0, return false otherwise
18287 + */
18288 +int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
18290 +       /* dec if we can't possibly hit 0 */
18291 +       if (atomic_add_unless(cnt, -1, 1))
18292 +               return 0;
18293 +       /* we might hit 0, so take the lock */
18294 +       mutex_lock(lock);
18295 +       if (!atomic_dec_and_test(cnt)) {
18296 +               /* when we actually did the dec, we didn't hit 0 */
18297 +               mutex_unlock(lock);
18298 +               return 0;
18299 +       }
18300 +       /* we hit 0, and we hold the lock */
18301 +       return 1;
18303 +EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
18304 diff -Nur linux-4.4.46.orig/kernel/locking/rtmutex.c linux-4.4.46/kernel/locking/rtmutex.c
18305 --- linux-4.4.46.orig/kernel/locking/rtmutex.c  2017-02-01 08:31:11.000000000 +0100
18306 +++ linux-4.4.46/kernel/locking/rtmutex.c       2017-02-03 17:18:10.927619058 +0100
18307 @@ -7,6 +7,11 @@
18308   *  Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
18309   *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
18310   *  Copyright (C) 2006 Esben Nielsen
18311 + *  Adaptive Spinlocks:
18312 + *  Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
18313 + *                                  and Peter Morreale,
18314 + * Adaptive Spinlocks simplification:
18315 + *  Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
18316   *
18317   *  See Documentation/locking/rt-mutex-design.txt for details.
18318   */
18319 @@ -16,6 +21,7 @@
18320  #include <linux/sched/rt.h>
18321  #include <linux/sched/deadline.h>
18322  #include <linux/timer.h>
18323 +#include <linux/ww_mutex.h>
18325  #include "rtmutex_common.h"
18327 @@ -133,6 +139,12 @@
18328                 WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
18331 +static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter)
18333 +       return waiter && waiter != PI_WAKEUP_INPROGRESS &&
18334 +               waiter != PI_REQUEUE_INPROGRESS;
18337  /*
18338   * We can speed up the acquire/release, if there's no debugging state to be
18339   * set up.
18340 @@ -163,13 +175,14 @@
18341   * 2) Drop lock->wait_lock
18342   * 3) Try to unlock the lock with cmpxchg
18343   */
18344 -static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
18345 +static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
18346 +                                       unsigned long flags)
18347         __releases(lock->wait_lock)
18349         struct task_struct *owner = rt_mutex_owner(lock);
18351         clear_rt_mutex_waiters(lock);
18352 -       raw_spin_unlock(&lock->wait_lock);
18353 +       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
18354         /*
18355          * If a new waiter comes in between the unlock and the cmpxchg
18356          * we have two situations:
18357 @@ -211,11 +224,12 @@
18358  /*
18359   * Simple slow path only version: lock->owner is protected by lock->wait_lock.
18360   */
18361 -static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
18362 +static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
18363 +                                       unsigned long flags)
18364         __releases(lock->wait_lock)
18366         lock->owner = NULL;
18367 -       raw_spin_unlock(&lock->wait_lock);
18368 +       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
18369         return true;
18371  #endif
18372 @@ -412,6 +426,14 @@
18373         return debug_rt_mutex_detect_deadlock(waiter, chwalk);
18376 +static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter)
18378 +       if (waiter->savestate)
18379 +               wake_up_lock_sleeper(waiter->task);
18380 +       else
18381 +               wake_up_process(waiter->task);
18384  /*
18385   * Max number of times we'll walk the boosting chain:
18386   */
18387 @@ -419,7 +441,8 @@
18389  static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
18391 -       return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
18392 +       return rt_mutex_real_waiter(p->pi_blocked_on) ?
18393 +               p->pi_blocked_on->lock : NULL;
18396  /*
18397 @@ -497,7 +520,6 @@
18398         int ret = 0, depth = 0;
18399         struct rt_mutex *lock;
18400         bool detect_deadlock;
18401 -       unsigned long flags;
18402         bool requeue = true;
18404         detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
18405 @@ -540,7 +562,7 @@
18406         /*
18407          * [1] Task cannot go away as we did a get_task() before !
18408          */
18409 -       raw_spin_lock_irqsave(&task->pi_lock, flags);
18410 +       raw_spin_lock_irq(&task->pi_lock);
18412         /*
18413          * [2] Get the waiter on which @task is blocked on.
18414 @@ -556,7 +578,7 @@
18415          * reached or the state of the chain has changed while we
18416          * dropped the locks.
18417          */
18418 -       if (!waiter)
18419 +       if (!rt_mutex_real_waiter(waiter))
18420                 goto out_unlock_pi;
18422         /*
18423 @@ -624,7 +646,7 @@
18424          * operations.
18425          */
18426         if (!raw_spin_trylock(&lock->wait_lock)) {
18427 -               raw_spin_unlock_irqrestore(&task->pi_lock, flags);
18428 +               raw_spin_unlock_irq(&task->pi_lock);
18429                 cpu_relax();
18430                 goto retry;
18431         }
18432 @@ -655,7 +677,7 @@
18433                 /*
18434                  * No requeue[7] here. Just release @task [8]
18435                  */
18436 -               raw_spin_unlock_irqrestore(&task->pi_lock, flags);
18437 +               raw_spin_unlock(&task->pi_lock);
18438                 put_task_struct(task);
18440                 /*
18441 @@ -663,14 +685,14 @@
18442                  * If there is no owner of the lock, end of chain.
18443                  */
18444                 if (!rt_mutex_owner(lock)) {
18445 -                       raw_spin_unlock(&lock->wait_lock);
18446 +                       raw_spin_unlock_irq(&lock->wait_lock);
18447                         return 0;
18448                 }
18450                 /* [10] Grab the next task, i.e. owner of @lock */
18451                 task = rt_mutex_owner(lock);
18452                 get_task_struct(task);
18453 -               raw_spin_lock_irqsave(&task->pi_lock, flags);
18454 +               raw_spin_lock(&task->pi_lock);
18456                 /*
18457                  * No requeue [11] here. We just do deadlock detection.
18458 @@ -685,8 +707,8 @@
18459                 top_waiter = rt_mutex_top_waiter(lock);
18461                 /* [13] Drop locks */
18462 -               raw_spin_unlock_irqrestore(&task->pi_lock, flags);
18463 -               raw_spin_unlock(&lock->wait_lock);
18464 +               raw_spin_unlock(&task->pi_lock);
18465 +               raw_spin_unlock_irq(&lock->wait_lock);
18467                 /* If owner is not blocked, end of chain. */
18468                 if (!next_lock)
18469 @@ -707,7 +729,7 @@
18470         rt_mutex_enqueue(lock, waiter);
18472         /* [8] Release the task */
18473 -       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
18474 +       raw_spin_unlock(&task->pi_lock);
18475         put_task_struct(task);
18477         /*
18478 @@ -718,21 +740,24 @@
18479          * follow here. This is the end of the chain we are walking.
18480          */
18481         if (!rt_mutex_owner(lock)) {
18482 +               struct rt_mutex_waiter *lock_top_waiter;
18484                 /*
18485                  * If the requeue [7] above changed the top waiter,
18486                  * then we need to wake the new top waiter up to try
18487                  * to get the lock.
18488                  */
18489 -               if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
18490 -                       wake_up_process(rt_mutex_top_waiter(lock)->task);
18491 -               raw_spin_unlock(&lock->wait_lock);
18492 +               lock_top_waiter = rt_mutex_top_waiter(lock);
18493 +               if (prerequeue_top_waiter != lock_top_waiter)
18494 +                       rt_mutex_wake_waiter(lock_top_waiter);
18495 +               raw_spin_unlock_irq(&lock->wait_lock);
18496                 return 0;
18497         }
18499         /* [10] Grab the next task, i.e. the owner of @lock */
18500         task = rt_mutex_owner(lock);
18501         get_task_struct(task);
18502 -       raw_spin_lock_irqsave(&task->pi_lock, flags);
18503 +       raw_spin_lock(&task->pi_lock);
18505         /* [11] requeue the pi waiters if necessary */
18506         if (waiter == rt_mutex_top_waiter(lock)) {
18507 @@ -786,8 +811,8 @@
18508         top_waiter = rt_mutex_top_waiter(lock);
18510         /* [13] Drop the locks */
18511 -       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
18512 -       raw_spin_unlock(&lock->wait_lock);
18513 +       raw_spin_unlock(&task->pi_lock);
18514 +       raw_spin_unlock_irq(&lock->wait_lock);
18516         /*
18517          * Make the actual exit decisions [12], based on the stored
18518 @@ -810,28 +835,46 @@
18519         goto again;
18521   out_unlock_pi:
18522 -       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
18523 +       raw_spin_unlock_irq(&task->pi_lock);
18524   out_put_task:
18525         put_task_struct(task);
18527         return ret;
18531 +#define STEAL_NORMAL  0
18532 +#define STEAL_LATERAL 1
18535 + * Note that RT tasks are excluded from lateral-steals to prevent the
18536 + * introduction of an unbounded latency
18537 + */
18538 +static inline int lock_is_stealable(struct task_struct *task,
18539 +                                   struct task_struct *pendowner, int mode)
18541 +    if (mode == STEAL_NORMAL || rt_task(task)) {
18542 +           if (task->prio >= pendowner->prio)
18543 +                   return 0;
18544 +    } else if (task->prio > pendowner->prio)
18545 +           return 0;
18546 +    return 1;
18549  /*
18550   * Try to take an rt-mutex
18551   *
18552 - * Must be called with lock->wait_lock held.
18553 + * Must be called with lock->wait_lock held and interrupts disabled
18554   *
18555   * @lock:   The lock to be acquired.
18556   * @task:   The task which wants to acquire the lock
18557   * @waiter: The waiter that is queued to the lock's wait tree if the
18558   *         callsite called task_blocked_on_lock(), otherwise NULL
18559   */
18560 -static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
18561 -                               struct rt_mutex_waiter *waiter)
18562 +static int __try_to_take_rt_mutex(struct rt_mutex *lock,
18563 +                                 struct task_struct *task,
18564 +                                 struct rt_mutex_waiter *waiter, int mode)
18566 -       unsigned long flags;
18568         /*
18569          * Before testing whether we can acquire @lock, we set the
18570          * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
18571 @@ -867,8 +910,10 @@
18572                  * If waiter is not the highest priority waiter of
18573                  * @lock, give up.
18574                  */
18575 -               if (waiter != rt_mutex_top_waiter(lock))
18576 +               if (waiter != rt_mutex_top_waiter(lock)) {
18577 +                       /* XXX lock_is_stealable() ? */
18578                         return 0;
18579 +               }
18581                 /*
18582                  * We can acquire the lock. Remove the waiter from the
18583 @@ -886,14 +931,10 @@
18584                  * not need to be dequeued.
18585                  */
18586                 if (rt_mutex_has_waiters(lock)) {
18587 -                       /*
18588 -                        * If @task->prio is greater than or equal to
18589 -                        * the top waiter priority (kernel view),
18590 -                        * @task lost.
18591 -                        */
18592 -                       if (task->prio >= rt_mutex_top_waiter(lock)->prio)
18593 -                               return 0;
18594 +                       struct task_struct *pown = rt_mutex_top_waiter(lock)->task;
18596 +                       if (task != pown && !lock_is_stealable(task, pown, mode))
18597 +                               return 0;
18598                         /*
18599                          * The current top waiter stays enqueued. We
18600                          * don't have to change anything in the lock
18601 @@ -916,7 +957,7 @@
18602          * case, but conditionals are more expensive than a redundant
18603          * store.
18604          */
18605 -       raw_spin_lock_irqsave(&task->pi_lock, flags);
18606 +       raw_spin_lock(&task->pi_lock);
18607         task->pi_blocked_on = NULL;
18608         /*
18609          * Finish the lock acquisition. @task is the new owner. If
18610 @@ -925,7 +966,7 @@
18611          */
18612         if (rt_mutex_has_waiters(lock))
18613                 rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
18614 -       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
18615 +       raw_spin_unlock(&task->pi_lock);
18617  takeit:
18618         /* We got the lock. */
18619 @@ -942,12 +983,444 @@
18620         return 1;
18623 +#ifdef CONFIG_PREEMPT_RT_FULL
18625 + * preemptible spin_lock functions:
18626 + */
18627 +static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
18628 +                                        void  (*slowfn)(struct rt_mutex *lock,
18629 +                                                        bool mg_off),
18630 +                                        bool do_mig_dis)
18632 +       might_sleep_no_state_check();
18634 +       if (do_mig_dis)
18635 +               migrate_disable();
18637 +       if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
18638 +               rt_mutex_deadlock_account_lock(lock, current);
18639 +       else
18640 +               slowfn(lock, do_mig_dis);
18643 +static inline int rt_spin_lock_fastunlock(struct rt_mutex *lock,
18644 +                                          int  (*slowfn)(struct rt_mutex *lock))
18646 +       if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
18647 +               rt_mutex_deadlock_account_unlock(current);
18648 +               return 0;
18649 +       }
18650 +       return slowfn(lock);
18652 +#ifdef CONFIG_SMP
18654 + * Note that owner is a speculative pointer and dereferencing relies
18655 + * on rcu_read_lock() and the check against the lock owner.
18656 + */
18657 +static int adaptive_wait(struct rt_mutex *lock,
18658 +                        struct task_struct *owner)
18660 +       int res = 0;
18662 +       rcu_read_lock();
18663 +       for (;;) {
18664 +               if (owner != rt_mutex_owner(lock))
18665 +                       break;
18666 +               /*
18667 +                * Ensure that owner->on_cpu is dereferenced _after_
18668 +                * checking the above to be valid.
18669 +                */
18670 +               barrier();
18671 +               if (!owner->on_cpu) {
18672 +                       res = 1;
18673 +                       break;
18674 +               }
18675 +               cpu_relax();
18676 +       }
18677 +       rcu_read_unlock();
18678 +       return res;
18680 +#else
18681 +static int adaptive_wait(struct rt_mutex *lock,
18682 +                        struct task_struct *orig_owner)
18684 +       return 1;
18686 +#endif
18688 +static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
18689 +                                  struct rt_mutex_waiter *waiter,
18690 +                                  struct task_struct *task,
18691 +                                  enum rtmutex_chainwalk chwalk);
18693 + * Slow path lock function spin_lock style: this variant is very
18694 + * careful not to miss any non-lock wakeups.
18695 + *
18696 + * We store the current state under p->pi_lock in p->saved_state and
18697 + * the try_to_wake_up() code handles this accordingly.
18698 + */
18699 +static void  noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock,
18700 +                                                   bool mg_off)
18702 +       struct task_struct *lock_owner, *self = current;
18703 +       struct rt_mutex_waiter waiter, *top_waiter;
18704 +       unsigned long flags;
18705 +       int ret;
18707 +       rt_mutex_init_waiter(&waiter, true);
18709 +       raw_spin_lock_irqsave(&lock->wait_lock, flags);
18711 +       if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) {
18712 +               raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
18713 +               return;
18714 +       }
18716 +       BUG_ON(rt_mutex_owner(lock) == self);
18718 +       /*
18719 +        * We save whatever state the task is in and we'll restore it
18720 +        * after acquiring the lock taking real wakeups into account
18721 +        * as well. We are serialized via pi_lock against wakeups. See
18722 +        * try_to_wake_up().
18723 +        */
18724 +       raw_spin_lock(&self->pi_lock);
18725 +       self->saved_state = self->state;
18726 +       __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
18727 +       raw_spin_unlock(&self->pi_lock);
18729 +       ret = task_blocks_on_rt_mutex(lock, &waiter, self, RT_MUTEX_MIN_CHAINWALK);
18730 +       BUG_ON(ret);
18732 +       for (;;) {
18733 +               /* Try to acquire the lock again. */
18734 +               if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL))
18735 +                       break;
18737 +               top_waiter = rt_mutex_top_waiter(lock);
18738 +               lock_owner = rt_mutex_owner(lock);
18740 +               raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
18742 +               debug_rt_mutex_print_deadlock(&waiter);
18744 +               if (top_waiter != &waiter || adaptive_wait(lock, lock_owner)) {
18745 +                       if (mg_off)
18746 +                               migrate_enable();
18747 +                       schedule();
18748 +                       if (mg_off)
18749 +                               migrate_disable();
18750 +               }
18752 +               raw_spin_lock_irqsave(&lock->wait_lock, flags);
18754 +               raw_spin_lock(&self->pi_lock);
18755 +               __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
18756 +               raw_spin_unlock(&self->pi_lock);
18757 +       }
18759 +       /*
18760 +        * Restore the task state to current->saved_state. We set it
18761 +        * to the original state above and the try_to_wake_up() code
18762 +        * has possibly updated it when a real (non-rtmutex) wakeup
18763 +        * happened while we were blocked. Clear saved_state so
18764 +        * try_to_wakeup() does not get confused.
18765 +        */
18766 +       raw_spin_lock(&self->pi_lock);
18767 +       __set_current_state_no_track(self->saved_state);
18768 +       self->saved_state = TASK_RUNNING;
18769 +       raw_spin_unlock(&self->pi_lock);
18771 +       /*
18772 +        * try_to_take_rt_mutex() sets the waiter bit
18773 +        * unconditionally. We might have to fix that up:
18774 +        */
18775 +       fixup_rt_mutex_waiters(lock);
18777 +       BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock));
18778 +       BUG_ON(!RB_EMPTY_NODE(&waiter.tree_entry));
18780 +       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
18782 +       debug_rt_mutex_free_waiter(&waiter);
18785 +static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
18786 +                                   struct wake_q_head *wake_sleeper_q,
18787 +                                   struct rt_mutex *lock);
18789 + * Slow path to release a rt_mutex spin_lock style
18790 + */
18791 +static int noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
18793 +       unsigned long flags;
18794 +       WAKE_Q(wake_q);
18795 +       WAKE_Q(wake_sleeper_q);
18797 +       raw_spin_lock_irqsave(&lock->wait_lock, flags);
18799 +       debug_rt_mutex_unlock(lock);
18801 +       rt_mutex_deadlock_account_unlock(current);
18803 +       if (!rt_mutex_has_waiters(lock)) {
18804 +               lock->owner = NULL;
18805 +               raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
18806 +               return 0;
18807 +       }
18809 +       mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock);
18811 +       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
18812 +       wake_up_q(&wake_q);
18813 +       wake_up_q_sleeper(&wake_sleeper_q);
18815 +       /* Undo pi boosting.when necessary */
18816 +       rt_mutex_adjust_prio(current);
18817 +       return 0;
18820 +static int noinline __sched rt_spin_lock_slowunlock_no_deboost(struct rt_mutex *lock)
18822 +       unsigned long flags;
18823 +       WAKE_Q(wake_q);
18824 +       WAKE_Q(wake_sleeper_q);
18826 +       raw_spin_lock_irqsave(&lock->wait_lock, flags);
18828 +       debug_rt_mutex_unlock(lock);
18830 +       rt_mutex_deadlock_account_unlock(current);
18832 +       if (!rt_mutex_has_waiters(lock)) {
18833 +               lock->owner = NULL;
18834 +               raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
18835 +               return 0;
18836 +       }
18838 +       mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock);
18840 +       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
18841 +       wake_up_q(&wake_q);
18842 +       wake_up_q_sleeper(&wake_sleeper_q);
18843 +       return 1;
18846 +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock)
18848 +       rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, false);
18849 +       spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
18851 +EXPORT_SYMBOL(rt_spin_lock__no_mg);
18853 +void __lockfunc rt_spin_lock(spinlock_t *lock)
18855 +       rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true);
18856 +       spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
18858 +EXPORT_SYMBOL(rt_spin_lock);
18860 +void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
18862 +       rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, true);
18864 +EXPORT_SYMBOL(__rt_spin_lock);
18866 +void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock)
18868 +       rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, false);
18870 +EXPORT_SYMBOL(__rt_spin_lock__no_mg);
18872 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
18873 +void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
18875 +       spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
18876 +       rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true);
18878 +EXPORT_SYMBOL(rt_spin_lock_nested);
18879 +#endif
18881 +void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock)
18883 +       /* NOTE: we always pass in '1' for nested, for simplicity */
18884 +       spin_release(&lock->dep_map, 1, _RET_IP_);
18885 +       rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
18887 +EXPORT_SYMBOL(rt_spin_unlock__no_mg);
18889 +void __lockfunc rt_spin_unlock(spinlock_t *lock)
18891 +       /* NOTE: we always pass in '1' for nested, for simplicity */
18892 +       spin_release(&lock->dep_map, 1, _RET_IP_);
18893 +       rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
18894 +       migrate_enable();
18896 +EXPORT_SYMBOL(rt_spin_unlock);
18898 +int __lockfunc rt_spin_unlock_no_deboost(spinlock_t *lock)
18900 +       int ret;
18902 +       /* NOTE: we always pass in '1' for nested, for simplicity */
18903 +       spin_release(&lock->dep_map, 1, _RET_IP_);
18904 +       ret = rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock_no_deboost);
18905 +       migrate_enable();
18906 +       return ret;
18909 +void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
18911 +       rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
18913 +EXPORT_SYMBOL(__rt_spin_unlock);
18916 + * Wait for the lock to get unlocked: instead of polling for an unlock
18917 + * (like raw spinlocks do), we lock and unlock, to force the kernel to
18918 + * schedule if there's contention:
18919 + */
18920 +void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
18922 +       spin_lock(lock);
18923 +       spin_unlock(lock);
18925 +EXPORT_SYMBOL(rt_spin_unlock_wait);
18927 +int __lockfunc __rt_spin_trylock(struct rt_mutex *lock)
18929 +       return rt_mutex_trylock(lock);
18932 +int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock)
18934 +       int ret;
18936 +       ret = rt_mutex_trylock(&lock->lock);
18937 +       if (ret)
18938 +               spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
18939 +       return ret;
18941 +EXPORT_SYMBOL(rt_spin_trylock__no_mg);
18943 +int __lockfunc rt_spin_trylock(spinlock_t *lock)
18945 +       int ret;
18947 +       migrate_disable();
18948 +       ret = rt_mutex_trylock(&lock->lock);
18949 +       if (ret)
18950 +               spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
18951 +       else
18952 +               migrate_enable();
18953 +       return ret;
18955 +EXPORT_SYMBOL(rt_spin_trylock);
18957 +int __lockfunc rt_spin_trylock_bh(spinlock_t *lock)
18959 +       int ret;
18961 +       local_bh_disable();
18962 +       ret = rt_mutex_trylock(&lock->lock);
18963 +       if (ret) {
18964 +               migrate_disable();
18965 +               spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
18966 +       } else
18967 +               local_bh_enable();
18968 +       return ret;
18970 +EXPORT_SYMBOL(rt_spin_trylock_bh);
18972 +int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags)
18974 +       int ret;
18976 +       *flags = 0;
18977 +       ret = rt_mutex_trylock(&lock->lock);
18978 +       if (ret) {
18979 +               migrate_disable();
18980 +               spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
18981 +       }
18982 +       return ret;
18984 +EXPORT_SYMBOL(rt_spin_trylock_irqsave);
18986 +int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock)
18988 +       /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
18989 +       if (atomic_add_unless(atomic, -1, 1))
18990 +               return 0;
18991 +       rt_spin_lock(lock);
18992 +       if (atomic_dec_and_test(atomic))
18993 +               return 1;
18994 +       rt_spin_unlock(lock);
18995 +       return 0;
18997 +EXPORT_SYMBOL(atomic_dec_and_spin_lock);
18999 +       void
19000 +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key)
19002 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
19003 +       /*
19004 +        * Make sure we are not reinitializing a held lock:
19005 +        */
19006 +       debug_check_no_locks_freed((void *)lock, sizeof(*lock));
19007 +       lockdep_init_map(&lock->dep_map, name, key, 0);
19008 +#endif
19010 +EXPORT_SYMBOL(__rt_spin_lock_init);
19012 +#endif /* PREEMPT_RT_FULL */
19014 +#ifdef CONFIG_PREEMPT_RT_FULL
19015 +       static inline int __sched
19016 +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
19018 +       struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
19019 +       struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx);
19021 +       if (!hold_ctx)
19022 +               return 0;
19024 +       if (unlikely(ctx == hold_ctx))
19025 +               return -EALREADY;
19027 +       if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
19028 +           (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
19029 +#ifdef CONFIG_DEBUG_MUTEXES
19030 +               DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
19031 +               ctx->contending_lock = ww;
19032 +#endif
19033 +               return -EDEADLK;
19034 +       }
19036 +       return 0;
19038 +#else
19039 +       static inline int __sched
19040 +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
19042 +       BUG();
19043 +       return 0;
19046 +#endif
19048 +static inline int
19049 +try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
19050 +                    struct rt_mutex_waiter *waiter)
19052 +       return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL);
19055  /*
19056   * Task blocks on lock.
19057   *
19058   * Prepare waiter and propagate pi chain
19059   *
19060 - * This must be called with lock->wait_lock held.
19061 + * This must be called with lock->wait_lock held and interrupts disabled
19062   */
19063  static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
19064                                    struct rt_mutex_waiter *waiter,
19065 @@ -958,7 +1431,6 @@
19066         struct rt_mutex_waiter *top_waiter = waiter;
19067         struct rt_mutex *next_lock;
19068         int chain_walk = 0, res;
19069 -       unsigned long flags;
19071         /*
19072          * Early deadlock detection. We really don't want the task to
19073 @@ -972,7 +1444,24 @@
19074         if (owner == task)
19075                 return -EDEADLK;
19077 -       raw_spin_lock_irqsave(&task->pi_lock, flags);
19078 +       raw_spin_lock(&task->pi_lock);
19080 +       /*
19081 +        * In the case of futex requeue PI, this will be a proxy
19082 +        * lock. The task will wake unaware that it is enqueueed on
19083 +        * this lock. Avoid blocking on two locks and corrupting
19084 +        * pi_blocked_on via the PI_WAKEUP_INPROGRESS
19085 +        * flag. futex_wait_requeue_pi() sets this when it wakes up
19086 +        * before requeue (due to a signal or timeout). Do not enqueue
19087 +        * the task if PI_WAKEUP_INPROGRESS is set.
19088 +        */
19089 +       if (task != current && task->pi_blocked_on == PI_WAKEUP_INPROGRESS) {
19090 +               raw_spin_unlock(&task->pi_lock);
19091 +               return -EAGAIN;
19092 +       }
19094 +       BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on));
19096         __rt_mutex_adjust_prio(task);
19097         waiter->task = task;
19098         waiter->lock = lock;
19099 @@ -985,18 +1474,18 @@
19101         task->pi_blocked_on = waiter;
19103 -       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
19104 +       raw_spin_unlock(&task->pi_lock);
19106         if (!owner)
19107                 return 0;
19109 -       raw_spin_lock_irqsave(&owner->pi_lock, flags);
19110 +       raw_spin_lock(&owner->pi_lock);
19111         if (waiter == rt_mutex_top_waiter(lock)) {
19112                 rt_mutex_dequeue_pi(owner, top_waiter);
19113                 rt_mutex_enqueue_pi(owner, waiter);
19115                 __rt_mutex_adjust_prio(owner);
19116 -               if (owner->pi_blocked_on)
19117 +               if (rt_mutex_real_waiter(owner->pi_blocked_on))
19118                         chain_walk = 1;
19119         } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
19120                 chain_walk = 1;
19121 @@ -1005,7 +1494,7 @@
19122         /* Store the lock on which owner is blocked or NULL */
19123         next_lock = task_blocked_on_lock(owner);
19125 -       raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
19126 +       raw_spin_unlock(&owner->pi_lock);
19127         /*
19128          * Even if full deadlock detection is on, if the owner is not
19129          * blocked itself, we can avoid finding this out in the chain
19130 @@ -1021,12 +1510,12 @@
19131          */
19132         get_task_struct(owner);
19134 -       raw_spin_unlock(&lock->wait_lock);
19135 +       raw_spin_unlock_irq(&lock->wait_lock);
19137         res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
19138                                          next_lock, waiter, task);
19140 -       raw_spin_lock(&lock->wait_lock);
19141 +       raw_spin_lock_irq(&lock->wait_lock);
19143         return res;
19145 @@ -1035,15 +1524,15 @@
19146   * Remove the top waiter from the current tasks pi waiter tree and
19147   * queue it up.
19148   *
19149 - * Called with lock->wait_lock held.
19150 + * Called with lock->wait_lock held and interrupts disabled.
19151   */
19152  static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
19153 +                                   struct wake_q_head *wake_sleeper_q,
19154                                     struct rt_mutex *lock)
19156         struct rt_mutex_waiter *waiter;
19157 -       unsigned long flags;
19159 -       raw_spin_lock_irqsave(&current->pi_lock, flags);
19160 +       raw_spin_lock(&current->pi_lock);
19162         waiter = rt_mutex_top_waiter(lock);
19164 @@ -1065,15 +1554,18 @@
19165          */
19166         lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
19168 -       raw_spin_unlock_irqrestore(&current->pi_lock, flags);
19169 +       raw_spin_unlock(&current->pi_lock);
19171 -       wake_q_add(wake_q, waiter->task);
19172 +       if (waiter->savestate)
19173 +               wake_q_add(wake_sleeper_q, waiter->task);
19174 +       else
19175 +               wake_q_add(wake_q, waiter->task);
19178  /*
19179   * Remove a waiter from a lock and give up
19180   *
19181 - * Must be called with lock->wait_lock held and
19182 + * Must be called with lock->wait_lock held and interrupts disabled. I must
19183   * have just failed to try_to_take_rt_mutex().
19184   */
19185  static void remove_waiter(struct rt_mutex *lock,
19186 @@ -1081,13 +1573,12 @@
19188         bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
19189         struct task_struct *owner = rt_mutex_owner(lock);
19190 -       struct rt_mutex *next_lock;
19191 -       unsigned long flags;
19192 +       struct rt_mutex *next_lock = NULL;
19194 -       raw_spin_lock_irqsave(&current->pi_lock, flags);
19195 +       raw_spin_lock(&current->pi_lock);
19196         rt_mutex_dequeue(lock, waiter);
19197         current->pi_blocked_on = NULL;
19198 -       raw_spin_unlock_irqrestore(&current->pi_lock, flags);
19199 +       raw_spin_unlock(&current->pi_lock);
19201         /*
19202          * Only update priority if the waiter was the highest priority
19203 @@ -1096,7 +1587,7 @@
19204         if (!owner || !is_top_waiter)
19205                 return;
19207 -       raw_spin_lock_irqsave(&owner->pi_lock, flags);
19208 +       raw_spin_lock(&owner->pi_lock);
19210         rt_mutex_dequeue_pi(owner, waiter);
19212 @@ -1106,9 +1597,10 @@
19213         __rt_mutex_adjust_prio(owner);
19215         /* Store the lock on which owner is blocked or NULL */
19216 -       next_lock = task_blocked_on_lock(owner);
19217 +       if (rt_mutex_real_waiter(owner->pi_blocked_on))
19218 +               next_lock = task_blocked_on_lock(owner);
19220 -       raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
19221 +       raw_spin_unlock(&owner->pi_lock);
19223         /*
19224          * Don't walk the chain, if the owner task is not blocked
19225 @@ -1120,12 +1612,12 @@
19226         /* gets dropped in rt_mutex_adjust_prio_chain()! */
19227         get_task_struct(owner);
19229 -       raw_spin_unlock(&lock->wait_lock);
19230 +       raw_spin_unlock_irq(&lock->wait_lock);
19232         rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
19233                                    next_lock, NULL, current);
19235 -       raw_spin_lock(&lock->wait_lock);
19236 +       raw_spin_lock_irq(&lock->wait_lock);
19239  /*
19240 @@ -1142,17 +1634,17 @@
19241         raw_spin_lock_irqsave(&task->pi_lock, flags);
19243         waiter = task->pi_blocked_on;
19244 -       if (!waiter || (waiter->prio == task->prio &&
19245 +       if (!rt_mutex_real_waiter(waiter) || (waiter->prio == task->prio &&
19246                         !dl_prio(task->prio))) {
19247                 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
19248                 return;
19249         }
19250         next_lock = waiter->lock;
19251 -       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
19253         /* gets dropped in rt_mutex_adjust_prio_chain()! */
19254         get_task_struct(task);
19256 +       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
19257         rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL,
19258                                    next_lock, NULL, task);
19260 @@ -1161,16 +1653,17 @@
19261   * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
19262   * @lock:               the rt_mutex to take
19263   * @state:              the state the task should block in (TASK_INTERRUPTIBLE
19264 - *                      or TASK_UNINTERRUPTIBLE)
19265 + *                      or TASK_UNINTERRUPTIBLE)
19266   * @timeout:            the pre-initialized and started timer, or NULL for none
19267   * @waiter:             the pre-initialized rt_mutex_waiter
19268   *
19269 - * lock->wait_lock must be held by the caller.
19270 + * Must be called with lock->wait_lock held and interrupts disabled
19271   */
19272  static int __sched
19273  __rt_mutex_slowlock(struct rt_mutex *lock, int state,
19274                     struct hrtimer_sleeper *timeout,
19275 -                   struct rt_mutex_waiter *waiter)
19276 +                   struct rt_mutex_waiter *waiter,
19277 +                   struct ww_acquire_ctx *ww_ctx)
19279         int ret = 0;
19281 @@ -1193,13 +1686,19 @@
19282                                 break;
19283                 }
19285 -               raw_spin_unlock(&lock->wait_lock);
19286 +               if (ww_ctx && ww_ctx->acquired > 0) {
19287 +                       ret = __mutex_lock_check_stamp(lock, ww_ctx);
19288 +                       if (ret)
19289 +                               break;
19290 +               }
19292 +               raw_spin_unlock_irq(&lock->wait_lock);
19294                 debug_rt_mutex_print_deadlock(waiter);
19296                 schedule();
19298 -               raw_spin_lock(&lock->wait_lock);
19299 +               raw_spin_lock_irq(&lock->wait_lock);
19300                 set_current_state(state);
19301         }
19303 @@ -1227,26 +1726,112 @@
19304         }
19307 +static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
19308 +                                                  struct ww_acquire_ctx *ww_ctx)
19310 +#ifdef CONFIG_DEBUG_MUTEXES
19311 +       /*
19312 +        * If this WARN_ON triggers, you used ww_mutex_lock to acquire,
19313 +        * but released with a normal mutex_unlock in this call.
19314 +        *
19315 +        * This should never happen, always use ww_mutex_unlock.
19316 +        */
19317 +       DEBUG_LOCKS_WARN_ON(ww->ctx);
19319 +       /*
19320 +        * Not quite done after calling ww_acquire_done() ?
19321 +        */
19322 +       DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
19324 +       if (ww_ctx->contending_lock) {
19325 +               /*
19326 +                * After -EDEADLK you tried to
19327 +                * acquire a different ww_mutex? Bad!
19328 +                */
19329 +               DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
19331 +               /*
19332 +                * You called ww_mutex_lock after receiving -EDEADLK,
19333 +                * but 'forgot' to unlock everything else first?
19334 +                */
19335 +               DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0);
19336 +               ww_ctx->contending_lock = NULL;
19337 +       }
19339 +       /*
19340 +        * Naughty, using a different class will lead to undefined behavior!
19341 +        */
19342 +       DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
19343 +#endif
19344 +       ww_ctx->acquired++;
19347 +#ifdef CONFIG_PREEMPT_RT_FULL
19348 +static void ww_mutex_account_lock(struct rt_mutex *lock,
19349 +                                 struct ww_acquire_ctx *ww_ctx)
19351 +       struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
19352 +       struct rt_mutex_waiter *waiter, *n;
19354 +       /*
19355 +        * This branch gets optimized out for the common case,
19356 +        * and is only important for ww_mutex_lock.
19357 +        */
19358 +       ww_mutex_lock_acquired(ww, ww_ctx);
19359 +       ww->ctx = ww_ctx;
19361 +       /*
19362 +        * Give any possible sleeping processes the chance to wake up,
19363 +        * so they can recheck if they have to back off.
19364 +        */
19365 +       rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters,
19366 +                                            tree_entry) {
19367 +               /* XXX debug rt mutex waiter wakeup */
19369 +               BUG_ON(waiter->lock != lock);
19370 +               rt_mutex_wake_waiter(waiter);
19371 +       }
19374 +#else
19376 +static void ww_mutex_account_lock(struct rt_mutex *lock,
19377 +                                 struct ww_acquire_ctx *ww_ctx)
19379 +       BUG();
19381 +#endif
19383  /*
19384   * Slow path lock function:
19385   */
19386  static int __sched
19387  rt_mutex_slowlock(struct rt_mutex *lock, int state,
19388                   struct hrtimer_sleeper *timeout,
19389 -                 enum rtmutex_chainwalk chwalk)
19390 +                 enum rtmutex_chainwalk chwalk,
19391 +                 struct ww_acquire_ctx *ww_ctx)
19393         struct rt_mutex_waiter waiter;
19394 +       unsigned long flags;
19395         int ret = 0;
19397 -       debug_rt_mutex_init_waiter(&waiter);
19398 -       RB_CLEAR_NODE(&waiter.pi_tree_entry);
19399 -       RB_CLEAR_NODE(&waiter.tree_entry);
19400 +       rt_mutex_init_waiter(&waiter, false);
19402 -       raw_spin_lock(&lock->wait_lock);
19403 +       /*
19404 +        * Technically we could use raw_spin_[un]lock_irq() here, but this can
19405 +        * be called in early boot if the cmpxchg() fast path is disabled
19406 +        * (debug, no architecture support). In this case we will acquire the
19407 +        * rtmutex with lock->wait_lock held. But we cannot unconditionally
19408 +        * enable interrupts in that early boot case. So we need to use the
19409 +        * irqsave/restore variants.
19410 +        */
19411 +       raw_spin_lock_irqsave(&lock->wait_lock, flags);
19413         /* Try to acquire the lock again: */
19414         if (try_to_take_rt_mutex(lock, current, NULL)) {
19415 -               raw_spin_unlock(&lock->wait_lock);
19416 +               if (ww_ctx)
19417 +                       ww_mutex_account_lock(lock, ww_ctx);
19418 +               raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
19419                 return 0;
19420         }
19422 @@ -1260,13 +1845,23 @@
19424         if (likely(!ret))
19425                 /* sleep on the mutex */
19426 -               ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
19427 +               ret = __rt_mutex_slowlock(lock, state, timeout, &waiter,
19428 +                                         ww_ctx);
19429 +       else if (ww_ctx) {
19430 +               /* ww_mutex received EDEADLK, let it become EALREADY */
19431 +               ret = __mutex_lock_check_stamp(lock, ww_ctx);
19432 +               BUG_ON(!ret);
19433 +       }
19435         if (unlikely(ret)) {
19436                 __set_current_state(TASK_RUNNING);
19437                 if (rt_mutex_has_waiters(lock))
19438                         remove_waiter(lock, &waiter);
19439 -               rt_mutex_handle_deadlock(ret, chwalk, &waiter);
19440 +               /* ww_mutex want to report EDEADLK/EALREADY, let them */
19441 +               if (!ww_ctx)
19442 +                       rt_mutex_handle_deadlock(ret, chwalk, &waiter);
19443 +       } else if (ww_ctx) {
19444 +               ww_mutex_account_lock(lock, ww_ctx);
19445         }
19447         /*
19448 @@ -1275,7 +1870,7 @@
19449          */
19450         fixup_rt_mutex_waiters(lock);
19452 -       raw_spin_unlock(&lock->wait_lock);
19453 +       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
19455         /* Remove pending timer: */
19456         if (unlikely(timeout))
19457 @@ -1291,6 +1886,7 @@
19458   */
19459  static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
19461 +       unsigned long flags;
19462         int ret;
19464         /*
19465 @@ -1302,10 +1898,10 @@
19466                 return 0;
19468         /*
19469 -        * The mutex has currently no owner. Lock the wait lock and
19470 -        * try to acquire the lock.
19471 +        * The mutex has currently no owner. Lock the wait lock and try to
19472 +        * acquire the lock. We use irqsave here to support early boot calls.
19473          */
19474 -       raw_spin_lock(&lock->wait_lock);
19475 +       raw_spin_lock_irqsave(&lock->wait_lock, flags);
19477         ret = try_to_take_rt_mutex(lock, current, NULL);
19479 @@ -1315,7 +1911,7 @@
19480          */
19481         fixup_rt_mutex_waiters(lock);
19483 -       raw_spin_unlock(&lock->wait_lock);
19484 +       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
19486         return ret;
19488 @@ -1325,9 +1921,13 @@
19489   * Return whether the current task needs to undo a potential priority boosting.
19490   */
19491  static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
19492 -                                       struct wake_q_head *wake_q)
19493 +                                       struct wake_q_head *wake_q,
19494 +                                       struct wake_q_head *wake_sleeper_q)
19496 -       raw_spin_lock(&lock->wait_lock);
19497 +       unsigned long flags;
19499 +       /* irqsave required to support early boot calls */
19500 +       raw_spin_lock_irqsave(&lock->wait_lock, flags);
19502         debug_rt_mutex_unlock(lock);
19504 @@ -1366,10 +1966,10 @@
19505          */
19506         while (!rt_mutex_has_waiters(lock)) {
19507                 /* Drops lock->wait_lock ! */
19508 -               if (unlock_rt_mutex_safe(lock) == true)
19509 +               if (unlock_rt_mutex_safe(lock, flags) == true)
19510                         return false;
19511                 /* Relock the rtmutex and try again */
19512 -               raw_spin_lock(&lock->wait_lock);
19513 +               raw_spin_lock_irqsave(&lock->wait_lock, flags);
19514         }
19516         /*
19517 @@ -1378,9 +1978,9 @@
19518          *
19519          * Queue the next waiter for wakeup once we release the wait_lock.
19520          */
19521 -       mark_wakeup_next_waiter(wake_q, lock);
19522 +       mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock);
19524 -       raw_spin_unlock(&lock->wait_lock);
19525 +       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
19527         /* check PI boosting */
19528         return true;
19529 @@ -1394,31 +1994,36 @@
19530   */
19531  static inline int
19532  rt_mutex_fastlock(struct rt_mutex *lock, int state,
19533 +                 struct ww_acquire_ctx *ww_ctx,
19534                   int (*slowfn)(struct rt_mutex *lock, int state,
19535                                 struct hrtimer_sleeper *timeout,
19536 -                               enum rtmutex_chainwalk chwalk))
19537 +                               enum rtmutex_chainwalk chwalk,
19538 +                               struct ww_acquire_ctx *ww_ctx))
19540         if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
19541                 rt_mutex_deadlock_account_lock(lock, current);
19542                 return 0;
19543         } else
19544 -               return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
19545 +               return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK,
19546 +                             ww_ctx);
19549  static inline int
19550  rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
19551                         struct hrtimer_sleeper *timeout,
19552                         enum rtmutex_chainwalk chwalk,
19553 +                       struct ww_acquire_ctx *ww_ctx,
19554                         int (*slowfn)(struct rt_mutex *lock, int state,
19555                                       struct hrtimer_sleeper *timeout,
19556 -                                     enum rtmutex_chainwalk chwalk))
19557 +                                     enum rtmutex_chainwalk chwalk,
19558 +                                     struct ww_acquire_ctx *ww_ctx))
19560         if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
19561             likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
19562                 rt_mutex_deadlock_account_lock(lock, current);
19563                 return 0;
19564         } else
19565 -               return slowfn(lock, state, timeout, chwalk);
19566 +               return slowfn(lock, state, timeout, chwalk, ww_ctx);
19569  static inline int
19570 @@ -1435,17 +2040,20 @@
19571  static inline void
19572  rt_mutex_fastunlock(struct rt_mutex *lock,
19573                     bool (*slowfn)(struct rt_mutex *lock,
19574 -                                  struct wake_q_head *wqh))
19575 +                                  struct wake_q_head *wqh,
19576 +                                  struct wake_q_head *wq_sleeper))
19578         WAKE_Q(wake_q);
19579 +       WAKE_Q(wake_sleeper_q);
19581         if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
19582                 rt_mutex_deadlock_account_unlock(current);
19584         } else {
19585 -               bool deboost = slowfn(lock, &wake_q);
19586 +               bool deboost = slowfn(lock, &wake_q, &wake_sleeper_q);
19588                 wake_up_q(&wake_q);
19589 +               wake_up_q_sleeper(&wake_sleeper_q);
19591                 /* Undo pi boosting if necessary: */
19592                 if (deboost)
19593 @@ -1462,7 +2070,7 @@
19595         might_sleep();
19597 -       rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
19598 +       rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, NULL, rt_mutex_slowlock);
19600  EXPORT_SYMBOL_GPL(rt_mutex_lock);
19602 @@ -1479,7 +2087,7 @@
19604         might_sleep();
19606 -       return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
19607 +       return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, NULL, rt_mutex_slowlock);
19609  EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
19611 @@ -1492,11 +2100,30 @@
19612         might_sleep();
19614         return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
19615 -                                      RT_MUTEX_FULL_CHAINWALK,
19616 +                                      RT_MUTEX_FULL_CHAINWALK, NULL,
19617                                        rt_mutex_slowlock);
19620  /**
19621 + * rt_mutex_lock_killable - lock a rt_mutex killable
19622 + *
19623 + * @lock:              the rt_mutex to be locked
19624 + * @detect_deadlock:   deadlock detection on/off
19625 + *
19626 + * Returns:
19627 + *  0          on success
19628 + * -EINTR      when interrupted by a signal
19629 + * -EDEADLK    when the lock would deadlock (when deadlock detection is on)
19630 + */
19631 +int __sched rt_mutex_lock_killable(struct rt_mutex *lock)
19633 +       might_sleep();
19635 +       return rt_mutex_fastlock(lock, TASK_KILLABLE, NULL, rt_mutex_slowlock);
19637 +EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
19639 +/**
19640   * rt_mutex_timed_lock - lock a rt_mutex interruptible
19641   *                     the timeout structure is provided
19642   *                     by the caller
19643 @@ -1516,6 +2143,7 @@
19645         return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
19646                                        RT_MUTEX_MIN_CHAINWALK,
19647 +                                      NULL,
19648                                        rt_mutex_slowlock);
19650  EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
19651 @@ -1533,7 +2161,11 @@
19652   */
19653  int __sched rt_mutex_trylock(struct rt_mutex *lock)
19655 +#ifdef CONFIG_PREEMPT_RT_FULL
19656 +       if (WARN_ON_ONCE(in_irq() || in_nmi()))
19657 +#else
19658         if (WARN_ON(in_irq() || in_nmi() || in_serving_softirq()))
19659 +#endif
19660                 return 0;
19662         return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
19663 @@ -1559,13 +2191,14 @@
19664   * required or not.
19665   */
19666  bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock,
19667 -                                  struct wake_q_head *wqh)
19668 +                                  struct wake_q_head *wqh,
19669 +                                  struct wake_q_head *wq_sleeper)
19671         if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
19672                 rt_mutex_deadlock_account_unlock(current);
19673                 return false;
19674         }
19675 -       return rt_mutex_slowunlock(lock, wqh);
19676 +       return rt_mutex_slowunlock(lock, wqh, wq_sleeper);
19679  /**
19680 @@ -1598,13 +2231,12 @@
19681  void __rt_mutex_init(struct rt_mutex *lock, const char *name)
19683         lock->owner = NULL;
19684 -       raw_spin_lock_init(&lock->wait_lock);
19685         lock->waiters = RB_ROOT;
19686         lock->waiters_leftmost = NULL;
19688         debug_rt_mutex_init(lock, name);
19690 -EXPORT_SYMBOL_GPL(__rt_mutex_init);
19691 +EXPORT_SYMBOL(__rt_mutex_init);
19693  /**
19694   * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
19695 @@ -1619,7 +2251,7 @@
19696  void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
19697                                 struct task_struct *proxy_owner)
19699 -       __rt_mutex_init(lock, NULL);
19700 +       rt_mutex_init(lock);
19701         debug_rt_mutex_proxy_lock(lock, proxy_owner);
19702         rt_mutex_set_owner(lock, proxy_owner);
19703         rt_mutex_deadlock_account_lock(lock, proxy_owner);
19704 @@ -1660,13 +2292,42 @@
19706         int ret;
19708 -       raw_spin_lock(&lock->wait_lock);
19709 +       raw_spin_lock_irq(&lock->wait_lock);
19711         if (try_to_take_rt_mutex(lock, task, NULL)) {
19712 -               raw_spin_unlock(&lock->wait_lock);
19713 +               raw_spin_unlock_irq(&lock->wait_lock);
19714                 return 1;
19715         }
19717 +#ifdef CONFIG_PREEMPT_RT_FULL
19718 +       /*
19719 +        * In PREEMPT_RT there's an added race.
19720 +        * If the task, that we are about to requeue, times out,
19721 +        * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue
19722 +        * to skip this task. But right after the task sets
19723 +        * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then
19724 +        * block on the spin_lock(&hb->lock), which in RT is an rtmutex.
19725 +        * This will replace the PI_WAKEUP_INPROGRESS with the actual
19726 +        * lock that it blocks on. We *must not* place this task
19727 +        * on this proxy lock in that case.
19728 +        *
19729 +        * To prevent this race, we first take the task's pi_lock
19730 +        * and check if it has updated its pi_blocked_on. If it has,
19731 +        * we assume that it woke up and we return -EAGAIN.
19732 +        * Otherwise, we set the task's pi_blocked_on to
19733 +        * PI_REQUEUE_INPROGRESS, so that if the task is waking up
19734 +        * it will know that we are in the process of requeuing it.
19735 +        */
19736 +       raw_spin_lock(&task->pi_lock);
19737 +       if (task->pi_blocked_on) {
19738 +               raw_spin_unlock(&task->pi_lock);
19739 +               raw_spin_unlock_irq(&lock->wait_lock);
19740 +               return -EAGAIN;
19741 +       }
19742 +       task->pi_blocked_on = PI_REQUEUE_INPROGRESS;
19743 +       raw_spin_unlock(&task->pi_lock);
19744 +#endif
19746         /* We enforce deadlock detection for futexes */
19747         ret = task_blocks_on_rt_mutex(lock, waiter, task,
19748                                       RT_MUTEX_FULL_CHAINWALK);
19749 @@ -1681,10 +2342,10 @@
19750                 ret = 0;
19751         }
19753 -       if (unlikely(ret))
19754 +       if (ret && rt_mutex_has_waiters(lock))
19755                 remove_waiter(lock, waiter);
19757 -       raw_spin_unlock(&lock->wait_lock);
19758 +       raw_spin_unlock_irq(&lock->wait_lock);
19760         debug_rt_mutex_print_deadlock(waiter);
19762 @@ -1732,12 +2393,12 @@
19764         int ret;
19766 -       raw_spin_lock(&lock->wait_lock);
19767 +       raw_spin_lock_irq(&lock->wait_lock);
19769         set_current_state(TASK_INTERRUPTIBLE);
19771         /* sleep on the mutex */
19772 -       ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
19773 +       ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL);
19775         if (unlikely(ret))
19776                 remove_waiter(lock, waiter);
19777 @@ -1748,7 +2409,93 @@
19778          */
19779         fixup_rt_mutex_waiters(lock);
19781 -       raw_spin_unlock(&lock->wait_lock);
19782 +       raw_spin_unlock_irq(&lock->wait_lock);
19784 +       return ret;
19787 +static inline int
19788 +ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
19790 +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
19791 +       unsigned tmp;
19793 +       if (ctx->deadlock_inject_countdown-- == 0) {
19794 +               tmp = ctx->deadlock_inject_interval;
19795 +               if (tmp > UINT_MAX/4)
19796 +                       tmp = UINT_MAX;
19797 +               else
19798 +                       tmp = tmp*2 + tmp + tmp/2;
19800 +               ctx->deadlock_inject_interval = tmp;
19801 +               ctx->deadlock_inject_countdown = tmp;
19802 +               ctx->contending_lock = lock;
19804 +               ww_mutex_unlock(lock);
19806 +               return -EDEADLK;
19807 +       }
19808 +#endif
19810 +       return 0;
19813 +#ifdef CONFIG_PREEMPT_RT_FULL
19814 +int __sched
19815 +__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
19817 +       int ret;
19819 +       might_sleep();
19821 +       mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
19822 +       ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, ww_ctx);
19823 +       if (ret)
19824 +               mutex_release(&lock->base.dep_map, 1, _RET_IP_);
19825 +       else if (!ret && ww_ctx->acquired > 1)
19826 +               return ww_mutex_deadlock_injection(lock, ww_ctx);
19828         return ret;
19830 +EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
19832 +int __sched
19833 +__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
19835 +       int ret;
19837 +       might_sleep();
19839 +       mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
19840 +       ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, ww_ctx);
19841 +       if (ret)
19842 +               mutex_release(&lock->base.dep_map, 1, _RET_IP_);
19843 +       else if (!ret && ww_ctx->acquired > 1)
19844 +               return ww_mutex_deadlock_injection(lock, ww_ctx);
19846 +       return ret;
19848 +EXPORT_SYMBOL_GPL(__ww_mutex_lock);
19850 +void __sched ww_mutex_unlock(struct ww_mutex *lock)
19852 +       int nest = !!lock->ctx;
19854 +       /*
19855 +        * The unlocking fastpath is the 0->1 transition from 'locked'
19856 +        * into 'unlocked' state:
19857 +        */
19858 +       if (nest) {
19859 +#ifdef CONFIG_DEBUG_MUTEXES
19860 +               DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
19861 +#endif
19862 +               if (lock->ctx->acquired > 0)
19863 +                       lock->ctx->acquired--;
19864 +               lock->ctx = NULL;
19865 +       }
19867 +       mutex_release(&lock->base.dep_map, nest, _RET_IP_);
19868 +       rt_mutex_unlock(&lock->base.lock);
19870 +EXPORT_SYMBOL(ww_mutex_unlock);
19871 +#endif
19872 diff -Nur linux-4.4.46.orig/kernel/locking/rtmutex_common.h linux-4.4.46/kernel/locking/rtmutex_common.h
19873 --- linux-4.4.46.orig/kernel/locking/rtmutex_common.h   2017-02-01 08:31:11.000000000 +0100
19874 +++ linux-4.4.46/kernel/locking/rtmutex_common.h        2017-02-03 17:18:10.927619058 +0100
19875 @@ -27,6 +27,7 @@
19876         struct rb_node          pi_tree_entry;
19877         struct task_struct      *task;
19878         struct rt_mutex         *lock;
19879 +       bool                    savestate;
19880  #ifdef CONFIG_DEBUG_RT_MUTEXES
19881         unsigned long           ip;
19882         struct pid              *deadlock_task_pid;
19883 @@ -98,6 +99,9 @@
19884  /*
19885   * PI-futex support (proxy locking functions, etc.):
19886   */
19887 +#define PI_WAKEUP_INPROGRESS   ((struct rt_mutex_waiter *) 1)
19888 +#define PI_REQUEUE_INPROGRESS  ((struct rt_mutex_waiter *) 2)
19890  extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
19891  extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
19892                                        struct task_struct *proxy_owner);
19893 @@ -111,7 +115,8 @@
19894                                       struct rt_mutex_waiter *waiter);
19895  extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to);
19896  extern bool rt_mutex_futex_unlock(struct rt_mutex *lock,
19897 -                                 struct wake_q_head *wqh);
19898 +                                 struct wake_q_head *wqh,
19899 +                                 struct wake_q_head *wq_sleeper);
19900  extern void rt_mutex_adjust_prio(struct task_struct *task);
19902  #ifdef CONFIG_DEBUG_RT_MUTEXES
19903 @@ -120,4 +125,14 @@
19904  # include "rtmutex.h"
19905  #endif
19907 +static inline void
19908 +rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate)
19910 +       debug_rt_mutex_init_waiter(waiter);
19911 +       waiter->task = NULL;
19912 +       waiter->savestate = savestate;
19913 +       RB_CLEAR_NODE(&waiter->pi_tree_entry);
19914 +       RB_CLEAR_NODE(&waiter->tree_entry);
19917  #endif
19918 diff -Nur linux-4.4.46.orig/kernel/locking/spinlock.c linux-4.4.46/kernel/locking/spinlock.c
19919 --- linux-4.4.46.orig/kernel/locking/spinlock.c 2017-02-01 08:31:11.000000000 +0100
19920 +++ linux-4.4.46/kernel/locking/spinlock.c      2017-02-03 17:18:10.927619058 +0100
19921 @@ -124,8 +124,11 @@
19922   *         __[spin|read|write]_lock_bh()
19923   */
19924  BUILD_LOCK_OPS(spin, raw_spinlock);
19926 +#ifndef CONFIG_PREEMPT_RT_FULL
19927  BUILD_LOCK_OPS(read, rwlock);
19928  BUILD_LOCK_OPS(write, rwlock);
19929 +#endif
19931  #endif
19933 @@ -209,6 +212,8 @@
19934  EXPORT_SYMBOL(_raw_spin_unlock_bh);
19935  #endif
19937 +#ifndef CONFIG_PREEMPT_RT_FULL
19939  #ifndef CONFIG_INLINE_READ_TRYLOCK
19940  int __lockfunc _raw_read_trylock(rwlock_t *lock)
19942 @@ -353,6 +358,8 @@
19943  EXPORT_SYMBOL(_raw_write_unlock_bh);
19944  #endif
19946 +#endif /* !PREEMPT_RT_FULL */
19948  #ifdef CONFIG_DEBUG_LOCK_ALLOC
19950  void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
19951 diff -Nur linux-4.4.46.orig/kernel/locking/spinlock_debug.c linux-4.4.46/kernel/locking/spinlock_debug.c
19952 --- linux-4.4.46.orig/kernel/locking/spinlock_debug.c   2017-02-01 08:31:11.000000000 +0100
19953 +++ linux-4.4.46/kernel/locking/spinlock_debug.c        2017-02-03 17:18:10.927619058 +0100
19954 @@ -31,6 +31,7 @@
19956  EXPORT_SYMBOL(__raw_spin_lock_init);
19958 +#ifndef CONFIG_PREEMPT_RT_FULL
19959  void __rwlock_init(rwlock_t *lock, const char *name,
19960                    struct lock_class_key *key)
19962 @@ -48,6 +49,7 @@
19965  EXPORT_SYMBOL(__rwlock_init);
19966 +#endif
19968  static void spin_dump(raw_spinlock_t *lock, const char *msg)
19970 @@ -159,6 +161,7 @@
19971         arch_spin_unlock(&lock->raw_lock);
19974 +#ifndef CONFIG_PREEMPT_RT_FULL
19975  static void rwlock_bug(rwlock_t *lock, const char *msg)
19977         if (!debug_locks_off())
19978 @@ -300,3 +303,5 @@
19979         debug_write_unlock(lock);
19980         arch_write_unlock(&lock->raw_lock);
19983 +#endif
19984 diff -Nur linux-4.4.46.orig/kernel/panic.c linux-4.4.46/kernel/panic.c
19985 --- linux-4.4.46.orig/kernel/panic.c    2017-02-01 08:31:11.000000000 +0100
19986 +++ linux-4.4.46/kernel/panic.c 2017-02-03 17:18:10.927619058 +0100
19987 @@ -61,6 +61,37 @@
19988                 cpu_relax();
19992 + * Stop ourselves in NMI context if another CPU has already panicked. Arch code
19993 + * may override this to prepare for crash dumping, e.g. save regs info.
19994 + */
19995 +void __weak nmi_panic_self_stop(struct pt_regs *regs)
19997 +       panic_smp_self_stop();
20000 +atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
20003 + * A variant of panic() called from NMI context. We return if we've already
20004 + * panicked on this CPU. If another CPU already panicked, loop in
20005 + * nmi_panic_self_stop() which can provide architecture dependent code such
20006 + * as saving register state for crash dump.
20007 + */
20008 +void nmi_panic(struct pt_regs *regs, const char *msg)
20010 +       int old_cpu, cpu;
20012 +       cpu = raw_smp_processor_id();
20013 +       old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu);
20015 +       if (old_cpu == PANIC_CPU_INVALID)
20016 +               panic("%s", msg);
20017 +       else if (old_cpu != cpu)
20018 +               nmi_panic_self_stop(regs);
20020 +EXPORT_SYMBOL(nmi_panic);
20022  /**
20023   *     panic - halt the system
20024   *     @fmt: The text string to print
20025 @@ -71,17 +102,17 @@
20026   */
20027  void panic(const char *fmt, ...)
20029 -       static DEFINE_SPINLOCK(panic_lock);
20030         static char buf[1024];
20031         va_list args;
20032         long i, i_next = 0;
20033         int state = 0;
20034 +       int old_cpu, this_cpu;
20036         /*
20037          * Disable local interrupts. This will prevent panic_smp_self_stop
20038          * from deadlocking the first cpu that invokes the panic, since
20039          * there is nothing to prevent an interrupt handler (that runs
20040 -        * after the panic_lock is acquired) from invoking panic again.
20041 +        * after setting panic_cpu) from invoking panic() again.
20042          */
20043         local_irq_disable();
20045 @@ -94,8 +125,16 @@
20046          * multiple parallel invocations of panic, all other CPUs either
20047          * stop themself or will wait until they are stopped by the 1st CPU
20048          * with smp_send_stop().
20049 +        *
20050 +        * `old_cpu == PANIC_CPU_INVALID' means this is the 1st CPU which
20051 +        * comes here, so go ahead.
20052 +        * `old_cpu == this_cpu' means we came from nmi_panic() which sets
20053 +        * panic_cpu to this CPU.  In this case, this is also the 1st CPU.
20054          */
20055 -       if (!spin_trylock(&panic_lock))
20056 +       this_cpu = raw_smp_processor_id();
20057 +       old_cpu  = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
20059 +       if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu)
20060                 panic_smp_self_stop();
20062         console_verbose();
20063 @@ -400,9 +439,11 @@
20065  static int init_oops_id(void)
20067 +#ifndef CONFIG_PREEMPT_RT_FULL
20068         if (!oops_id)
20069                 get_random_bytes(&oops_id, sizeof(oops_id));
20070         else
20071 +#endif
20072                 oops_id++;
20074         return 0;
20075 diff -Nur linux-4.4.46.orig/kernel/power/hibernate.c linux-4.4.46/kernel/power/hibernate.c
20076 --- linux-4.4.46.orig/kernel/power/hibernate.c  2017-02-01 08:31:11.000000000 +0100
20077 +++ linux-4.4.46/kernel/power/hibernate.c       2017-02-03 17:18:10.927619058 +0100
20078 @@ -285,6 +285,8 @@
20080         local_irq_disable();
20082 +       system_state = SYSTEM_SUSPEND;
20084         error = syscore_suspend();
20085         if (error) {
20086                 printk(KERN_ERR "PM: Some system devices failed to power down, "
20087 @@ -314,6 +316,7 @@
20088         syscore_resume();
20090   Enable_irqs:
20091 +       system_state = SYSTEM_RUNNING;
20092         local_irq_enable();
20094   Enable_cpus:
20095 @@ -438,6 +441,7 @@
20096                 goto Enable_cpus;
20098         local_irq_disable();
20099 +       system_state = SYSTEM_SUSPEND;
20101         error = syscore_suspend();
20102         if (error)
20103 @@ -471,6 +475,7 @@
20104         syscore_resume();
20106   Enable_irqs:
20107 +       system_state = SYSTEM_RUNNING;
20108         local_irq_enable();
20110   Enable_cpus:
20111 @@ -556,6 +561,7 @@
20112                 goto Enable_cpus;
20114         local_irq_disable();
20115 +       system_state = SYSTEM_SUSPEND;
20116         syscore_suspend();
20117         if (pm_wakeup_pending()) {
20118                 error = -EAGAIN;
20119 @@ -568,6 +574,7 @@
20121   Power_up:
20122         syscore_resume();
20123 +       system_state = SYSTEM_RUNNING;
20124         local_irq_enable();
20126   Enable_cpus:
20127 @@ -642,6 +649,10 @@
20128                 cpu_relax();
20131 +#ifndef CONFIG_SUSPEND
20132 +bool pm_in_action;
20133 +#endif
20135  /**
20136   * hibernate - Carry out system hibernation, including saving the image.
20137   */
20138 @@ -654,6 +665,8 @@
20139                 return -EPERM;
20140         }
20142 +       pm_in_action = true;
20144         lock_system_sleep();
20145         /* The snapshot device should not be opened while we're running */
20146         if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
20147 @@ -719,6 +732,7 @@
20148         atomic_inc(&snapshot_device_available);
20149   Unlock:
20150         unlock_system_sleep();
20151 +       pm_in_action = false;
20152         return error;
20155 diff -Nur linux-4.4.46.orig/kernel/power/suspend.c linux-4.4.46/kernel/power/suspend.c
20156 --- linux-4.4.46.orig/kernel/power/suspend.c    2017-02-01 08:31:11.000000000 +0100
20157 +++ linux-4.4.46/kernel/power/suspend.c 2017-02-03 17:18:10.927619058 +0100
20158 @@ -359,6 +359,8 @@
20159         arch_suspend_disable_irqs();
20160         BUG_ON(!irqs_disabled());
20162 +       system_state = SYSTEM_SUSPEND;
20164         error = syscore_suspend();
20165         if (!error) {
20166                 *wakeup = pm_wakeup_pending();
20167 @@ -375,6 +377,8 @@
20168                 syscore_resume();
20169         }
20171 +       system_state = SYSTEM_RUNNING;
20173         arch_suspend_enable_irqs();
20174         BUG_ON(irqs_disabled());
20176 @@ -518,6 +522,8 @@
20177         return error;
20180 +bool pm_in_action;
20182  /**
20183   * pm_suspend - Externally visible function for suspending the system.
20184   * @state: System sleep state to enter.
20185 @@ -532,6 +538,8 @@
20186         if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX)
20187                 return -EINVAL;
20189 +       pm_in_action = true;
20191         error = enter_state(state);
20192         if (error) {
20193                 suspend_stats.fail++;
20194 @@ -539,6 +547,7 @@
20195         } else {
20196                 suspend_stats.success++;
20197         }
20198 +       pm_in_action = false;
20199         return error;
20201  EXPORT_SYMBOL(pm_suspend);
20202 diff -Nur linux-4.4.46.orig/kernel/printk/printk.c linux-4.4.46/kernel/printk/printk.c
20203 --- linux-4.4.46.orig/kernel/printk/printk.c    2017-02-01 08:31:11.000000000 +0100
20204 +++ linux-4.4.46/kernel/printk/printk.c 2017-02-03 17:18:10.927619058 +0100
20205 @@ -241,6 +241,65 @@
20206   */
20207  static DEFINE_RAW_SPINLOCK(logbuf_lock);
20209 +#ifdef CONFIG_EARLY_PRINTK
20210 +struct console *early_console;
20212 +static void early_vprintk(const char *fmt, va_list ap)
20214 +       if (early_console) {
20215 +               char buf[512];
20216 +               int n = vscnprintf(buf, sizeof(buf), fmt, ap);
20218 +               early_console->write(early_console, buf, n);
20219 +       }
20222 +asmlinkage void early_printk(const char *fmt, ...)
20224 +       va_list ap;
20226 +       va_start(ap, fmt);
20227 +       early_vprintk(fmt, ap);
20228 +       va_end(ap);
20232 + * This is independent of any log levels - a global
20233 + * kill switch that turns off all of printk.
20234 + *
20235 + * Used by the NMI watchdog if early-printk is enabled.
20236 + */
20237 +static bool __read_mostly printk_killswitch;
20239 +static int __init force_early_printk_setup(char *str)
20241 +       printk_killswitch = true;
20242 +       return 0;
20244 +early_param("force_early_printk", force_early_printk_setup);
20246 +void printk_kill(void)
20248 +       printk_killswitch = true;
20251 +#ifdef CONFIG_PRINTK
20252 +static int forced_early_printk(const char *fmt, va_list ap)
20254 +       if (!printk_killswitch)
20255 +               return 0;
20256 +       early_vprintk(fmt, ap);
20257 +       return 1;
20259 +#endif
20261 +#else
20262 +static inline int forced_early_printk(const char *fmt, va_list ap)
20264 +       return 0;
20266 +#endif
20268  #ifdef CONFIG_PRINTK
20269  DECLARE_WAIT_QUEUE_HEAD(log_wait);
20270  /* the next printk record to read by syslog(READ) or /proc/kmsg */
20271 @@ -1203,6 +1262,7 @@
20273         char *text;
20274         int len = 0;
20275 +       int attempts = 0;
20277         text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
20278         if (!text)
20279 @@ -1214,7 +1274,14 @@
20280                 u64 seq;
20281                 u32 idx;
20282                 enum log_flags prev;
20284 +               int num_msg;
20285 +try_again:
20286 +               attempts++;
20287 +               if (attempts > 10) {
20288 +                       len = -EBUSY;
20289 +                       goto out;
20290 +               }
20291 +               num_msg = 0;
20292                 if (clear_seq < log_first_seq) {
20293                         /* messages are gone, move to first available one */
20294                         clear_seq = log_first_seq;
20295 @@ -1235,6 +1302,14 @@
20296                         prev = msg->flags;
20297                         idx = log_next(idx);
20298                         seq++;
20299 +                       num_msg++;
20300 +                       if (num_msg > 5) {
20301 +                               num_msg = 0;
20302 +                               raw_spin_unlock_irq(&logbuf_lock);
20303 +                               raw_spin_lock_irq(&logbuf_lock);
20304 +                               if (clear_seq < log_first_seq)
20305 +                                       goto try_again;
20306 +                       }
20307                 }
20309                 /* move first record forward until length fits into the buffer */
20310 @@ -1248,6 +1323,14 @@
20311                         prev = msg->flags;
20312                         idx = log_next(idx);
20313                         seq++;
20314 +                       num_msg++;
20315 +                       if (num_msg > 5) {
20316 +                               num_msg = 0;
20317 +                               raw_spin_unlock_irq(&logbuf_lock);
20318 +                               raw_spin_lock_irq(&logbuf_lock);
20319 +                               if (clear_seq < log_first_seq)
20320 +                                       goto try_again;
20321 +                       }
20322                 }
20324                 /* last message fitting into this dump */
20325 @@ -1288,6 +1371,7 @@
20326                 clear_seq = log_next_seq;
20327                 clear_idx = log_next_idx;
20328         }
20329 +out:
20330         raw_spin_unlock_irq(&logbuf_lock);
20332         kfree(text);
20333 @@ -1443,6 +1527,12 @@
20334         if (!console_drivers)
20335                 return;
20337 +       if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) {
20338 +               if (in_irq() || in_nmi())
20339 +                       return;
20340 +       }
20342 +       migrate_disable();
20343         for_each_console(con) {
20344                 if (exclusive_console && con != exclusive_console)
20345                         continue;
20346 @@ -1458,6 +1548,7 @@
20347                 else
20348                         con->write(con, text, len);
20349         }
20350 +       migrate_enable();
20353  /*
20354 @@ -1518,6 +1609,15 @@
20355  static int console_trylock_for_printk(void)
20357         unsigned int cpu = smp_processor_id();
20358 +#ifdef CONFIG_PREEMPT_RT_FULL
20359 +       int lock = !early_boot_irqs_disabled && (preempt_count() == 0) &&
20360 +               !irqs_disabled();
20361 +#else
20362 +       int lock = 1;
20363 +#endif
20365 +       if (!lock)
20366 +               return 0;
20368         if (!console_trylock())
20369                 return 0;
20370 @@ -1672,6 +1772,13 @@
20371         /* cpu currently holding logbuf_lock in this function */
20372         static unsigned int logbuf_cpu = UINT_MAX;
20374 +       /*
20375 +        * Fall back to early_printk if a debugging subsystem has
20376 +        * killed printk output
20377 +        */
20378 +       if (unlikely(forced_early_printk(fmt, args)))
20379 +               return 1;
20381         if (level == LOGLEVEL_SCHED) {
20382                 level = LOGLEVEL_DEFAULT;
20383                 in_sched = true;
20384 @@ -1813,8 +1920,7 @@
20385                  * console_sem which would prevent anyone from printing to
20386                  * console
20387                  */
20388 -               preempt_disable();
20390 +               migrate_disable();
20391                 /*
20392                  * Try to acquire and then immediately release the console
20393                  * semaphore.  The release will print out buffers and wake up
20394 @@ -1822,7 +1928,7 @@
20395                  */
20396                 if (console_trylock_for_printk())
20397                         console_unlock();
20398 -               preempt_enable();
20399 +               migrate_enable();
20400                 lockdep_on();
20401         }
20403 @@ -1961,26 +2067,6 @@
20405  #endif /* CONFIG_PRINTK */
20407 -#ifdef CONFIG_EARLY_PRINTK
20408 -struct console *early_console;
20410 -asmlinkage __visible void early_printk(const char *fmt, ...)
20412 -       va_list ap;
20413 -       char buf[512];
20414 -       int n;
20416 -       if (!early_console)
20417 -               return;
20419 -       va_start(ap, fmt);
20420 -       n = vscnprintf(buf, sizeof(buf), fmt, ap);
20421 -       va_end(ap);
20423 -       early_console->write(early_console, buf, n);
20425 -#endif
20427  static int __add_preferred_console(char *name, int idx, char *options,
20428                                    char *brl_options)
20430 @@ -2202,11 +2288,16 @@
20431                 goto out;
20433         len = cont_print_text(text, size);
20434 +#ifdef CONFIG_PREEMPT_RT_FULL
20435 +       raw_spin_unlock_irqrestore(&logbuf_lock, flags);
20436 +       call_console_drivers(cont.level, NULL, 0, text, len);
20437 +#else
20438         raw_spin_unlock(&logbuf_lock);
20439         stop_critical_timings();
20440         call_console_drivers(cont.level, NULL, 0, text, len);
20441         start_critical_timings();
20442         local_irq_restore(flags);
20443 +#endif
20444         return;
20445  out:
20446         raw_spin_unlock_irqrestore(&logbuf_lock, flags);
20447 @@ -2316,13 +2407,17 @@
20448                 console_idx = log_next(console_idx);
20449                 console_seq++;
20450                 console_prev = msg->flags;
20451 +#ifdef CONFIG_PREEMPT_RT_FULL
20452 +               raw_spin_unlock_irqrestore(&logbuf_lock, flags);
20453 +               call_console_drivers(level, ext_text, ext_len, text, len);
20454 +#else
20455                 raw_spin_unlock(&logbuf_lock);
20457                 stop_critical_timings();        /* don't trace print latency */
20458                 call_console_drivers(level, ext_text, ext_len, text, len);
20459                 start_critical_timings();
20460                 local_irq_restore(flags);
20462 +#endif
20463                 if (do_cond_resched)
20464                         cond_resched();
20465         }
20466 @@ -2374,6 +2469,11 @@
20468         struct console *c;
20470 +       if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) {
20471 +               if (in_irq() || in_nmi())
20472 +                       return;
20473 +       }
20475         /*
20476          * console_unblank can no longer be called in interrupt context unless
20477          * oops_in_progress is set to 1..
20478 diff -Nur linux-4.4.46.orig/kernel/ptrace.c linux-4.4.46/kernel/ptrace.c
20479 --- linux-4.4.46.orig/kernel/ptrace.c   2017-02-01 08:31:11.000000000 +0100
20480 +++ linux-4.4.46/kernel/ptrace.c        2017-02-03 17:18:10.927619058 +0100
20481 @@ -136,7 +136,14 @@
20483         spin_lock_irq(&task->sighand->siglock);
20484         if (task_is_traced(task) && !__fatal_signal_pending(task)) {
20485 -               task->state = __TASK_TRACED;
20486 +               unsigned long flags;
20488 +               raw_spin_lock_irqsave(&task->pi_lock, flags);
20489 +               if (task->state & __TASK_TRACED)
20490 +                       task->state = __TASK_TRACED;
20491 +               else
20492 +                       task->saved_state = __TASK_TRACED;
20493 +               raw_spin_unlock_irqrestore(&task->pi_lock, flags);
20494                 ret = true;
20495         }
20496         spin_unlock_irq(&task->sighand->siglock);
20497 diff -Nur linux-4.4.46.orig/kernel/rcu/rcutorture.c linux-4.4.46/kernel/rcu/rcutorture.c
20498 --- linux-4.4.46.orig/kernel/rcu/rcutorture.c   2017-02-01 08:31:11.000000000 +0100
20499 +++ linux-4.4.46/kernel/rcu/rcutorture.c        2017-02-03 17:18:10.927619058 +0100
20500 @@ -390,6 +390,7 @@
20501         .name           = "rcu"
20502  };
20504 +#ifndef CONFIG_PREEMPT_RT_FULL
20505  /*
20506   * Definitions for rcu_bh torture testing.
20507   */
20508 @@ -429,6 +430,12 @@
20509         .name           = "rcu_bh"
20510  };
20512 +#else
20513 +static struct rcu_torture_ops rcu_bh_ops = {
20514 +       .ttype          = INVALID_RCU_FLAVOR,
20516 +#endif
20518  /*
20519   * Don't even think about trying any of these in real life!!!
20520   * The names includes "busted", and they really means it!
20521 diff -Nur linux-4.4.46.orig/kernel/rcu/tree.c linux-4.4.46/kernel/rcu/tree.c
20522 --- linux-4.4.46.orig/kernel/rcu/tree.c 2017-02-01 08:31:11.000000000 +0100
20523 +++ linux-4.4.46/kernel/rcu/tree.c      2017-02-03 17:18:10.931619212 +0100
20524 @@ -56,6 +56,11 @@
20525  #include <linux/random.h>
20526  #include <linux/trace_events.h>
20527  #include <linux/suspend.h>
20528 +#include <linux/delay.h>
20529 +#include <linux/gfp.h>
20530 +#include <linux/oom.h>
20531 +#include <linux/smpboot.h>
20532 +#include "../time/tick-internal.h"
20534  #include "tree.h"
20535  #include "rcu.h"
20536 @@ -266,6 +271,19 @@
20537         }
20540 +#ifdef CONFIG_PREEMPT_RT_FULL
20541 +static void rcu_preempt_qs(void);
20543 +void rcu_bh_qs(void)
20545 +       unsigned long flags;
20547 +       /* Callers to this function, rcu_preempt_qs(), must disable irqs. */
20548 +       local_irq_save(flags);
20549 +       rcu_preempt_qs();
20550 +       local_irq_restore(flags);
20552 +#else
20553  void rcu_bh_qs(void)
20555         if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) {
20556 @@ -275,6 +293,7 @@
20557                 __this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false);
20558         }
20560 +#endif
20562  static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
20564 @@ -435,11 +454,13 @@
20565  /*
20566   * Return the number of RCU BH batches started thus far for debug & stats.
20567   */
20568 +#ifndef CONFIG_PREEMPT_RT_FULL
20569  unsigned long rcu_batches_started_bh(void)
20571         return rcu_bh_state.gpnum;
20573  EXPORT_SYMBOL_GPL(rcu_batches_started_bh);
20574 +#endif
20576  /*
20577   * Return the number of RCU batches completed thus far for debug & stats.
20578 @@ -459,6 +480,7 @@
20580  EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
20582 +#ifndef CONFIG_PREEMPT_RT_FULL
20583  /*
20584   * Return the number of RCU BH batches completed thus far for debug & stats.
20585   */
20586 @@ -486,6 +508,13 @@
20588  EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
20590 +#else
20591 +void rcu_force_quiescent_state(void)
20594 +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
20595 +#endif
20597  /*
20598   * Force a quiescent state for RCU-sched.
20599   */
20600 @@ -536,9 +565,11 @@
20601         case RCU_FLAVOR:
20602                 rsp = rcu_state_p;
20603                 break;
20604 +#ifndef CONFIG_PREEMPT_RT_FULL
20605         case RCU_BH_FLAVOR:
20606                 rsp = &rcu_bh_state;
20607                 break;
20608 +#endif
20609         case RCU_SCHED_FLAVOR:
20610                 rsp = &rcu_sched_state;
20611                 break;
20612 @@ -1590,7 +1621,6 @@
20613         int needmore;
20614         struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
20616 -       rcu_nocb_gp_cleanup(rsp, rnp);
20617         rnp->need_future_gp[c & 0x1] = 0;
20618         needmore = rnp->need_future_gp[(c + 1) & 0x1];
20619         trace_rcu_future_gp(rnp, rdp, c,
20620 @@ -1611,7 +1641,7 @@
20621             !READ_ONCE(rsp->gp_flags) ||
20622             !rsp->gp_kthread)
20623                 return;
20624 -       wake_up(&rsp->gp_wq);
20625 +       swake_up(&rsp->gp_wq);
20628  /*
20629 @@ -1991,6 +2021,7 @@
20630         int nocb = 0;
20631         struct rcu_data *rdp;
20632         struct rcu_node *rnp = rcu_get_root(rsp);
20633 +       struct swait_queue_head *sq;
20635         WRITE_ONCE(rsp->gp_activity, jiffies);
20636         raw_spin_lock_irq(&rnp->lock);
20637 @@ -2029,7 +2060,9 @@
20638                         needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;
20639                 /* smp_mb() provided by prior unlock-lock pair. */
20640                 nocb += rcu_future_gp_cleanup(rsp, rnp);
20641 +               sq = rcu_nocb_gp_get(rnp);
20642                 raw_spin_unlock_irq(&rnp->lock);
20643 +               rcu_nocb_gp_cleanup(sq);
20644                 cond_resched_rcu_qs();
20645                 WRITE_ONCE(rsp->gp_activity, jiffies);
20646                 rcu_gp_slow(rsp, gp_cleanup_delay);
20647 @@ -2076,7 +2109,7 @@
20648                                                READ_ONCE(rsp->gpnum),
20649                                                TPS("reqwait"));
20650                         rsp->gp_state = RCU_GP_WAIT_GPS;
20651 -                       wait_event_interruptible(rsp->gp_wq,
20652 +                       swait_event_interruptible(rsp->gp_wq,
20653                                                  READ_ONCE(rsp->gp_flags) &
20654                                                  RCU_GP_FLAG_INIT);
20655                         rsp->gp_state = RCU_GP_DONE_GPS;
20656 @@ -2106,7 +2139,7 @@
20657                                                READ_ONCE(rsp->gpnum),
20658                                                TPS("fqswait"));
20659                         rsp->gp_state = RCU_GP_WAIT_FQS;
20660 -                       ret = wait_event_interruptible_timeout(rsp->gp_wq,
20661 +                       ret = swait_event_interruptible_timeout(rsp->gp_wq,
20662                                         rcu_gp_fqs_check_wake(rsp, &gf), j);
20663                         rsp->gp_state = RCU_GP_DOING_FQS;
20664                         /* Locking provides needed memory barriers. */
20665 @@ -2230,7 +2263,7 @@
20666         WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
20667         WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
20668         raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
20669 -       rcu_gp_kthread_wake(rsp);
20670 +       swake_up(&rsp->gp_wq);  /* Memory barrier implied by swake_up() path. */
20673  /*
20674 @@ -2891,7 +2924,7 @@
20675         }
20676         WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
20677         raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
20678 -       rcu_gp_kthread_wake(rsp);
20679 +       swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
20682  /*
20683 @@ -2934,18 +2967,17 @@
20684  /*
20685   * Do RCU core processing for the current CPU.
20686   */
20687 -static void rcu_process_callbacks(struct softirq_action *unused)
20688 +static void rcu_process_callbacks(void)
20690         struct rcu_state *rsp;
20692         if (cpu_is_offline(smp_processor_id()))
20693                 return;
20694 -       trace_rcu_utilization(TPS("Start RCU core"));
20695         for_each_rcu_flavor(rsp)
20696                 __rcu_process_callbacks(rsp);
20697 -       trace_rcu_utilization(TPS("End RCU core"));
20700 +static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
20701  /*
20702   * Schedule RCU callback invocation.  If the specified type of RCU
20703   * does not support RCU priority boosting, just do a direct call,
20704 @@ -2957,18 +2989,105 @@
20706         if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
20707                 return;
20708 -       if (likely(!rsp->boost)) {
20709 -               rcu_do_batch(rsp, rdp);
20710 +       rcu_do_batch(rsp, rdp);
20713 +static void rcu_wake_cond(struct task_struct *t, int status)
20715 +       /*
20716 +        * If the thread is yielding, only wake it when this
20717 +        * is invoked from idle
20718 +        */
20719 +       if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
20720 +               wake_up_process(t);
20724 + * Wake up this CPU's rcuc kthread to do RCU core processing.
20725 + */
20726 +static void invoke_rcu_core(void)
20728 +       unsigned long flags;
20729 +       struct task_struct *t;
20731 +       if (!cpu_online(smp_processor_id()))
20732                 return;
20733 +       local_irq_save(flags);
20734 +       __this_cpu_write(rcu_cpu_has_work, 1);
20735 +       t = __this_cpu_read(rcu_cpu_kthread_task);
20736 +       if (t != NULL && current != t)
20737 +               rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status));
20738 +       local_irq_restore(flags);
20741 +static void rcu_cpu_kthread_park(unsigned int cpu)
20743 +       per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
20746 +static int rcu_cpu_kthread_should_run(unsigned int cpu)
20748 +       return __this_cpu_read(rcu_cpu_has_work);
20752 + * Per-CPU kernel thread that invokes RCU callbacks.  This replaces the
20753 + * RCU softirq used in flavors and configurations of RCU that do not
20754 + * support RCU priority boosting.
20755 + */
20756 +static void rcu_cpu_kthread(unsigned int cpu)
20758 +       unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
20759 +       char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
20760 +       int spincnt;
20762 +       for (spincnt = 0; spincnt < 10; spincnt++) {
20763 +               trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
20764 +               local_bh_disable();
20765 +               *statusp = RCU_KTHREAD_RUNNING;
20766 +               this_cpu_inc(rcu_cpu_kthread_loops);
20767 +               local_irq_disable();
20768 +               work = *workp;
20769 +               *workp = 0;
20770 +               local_irq_enable();
20771 +               if (work)
20772 +                       rcu_process_callbacks();
20773 +               local_bh_enable();
20774 +               if (*workp == 0) {
20775 +                       trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
20776 +                       *statusp = RCU_KTHREAD_WAITING;
20777 +                       return;
20778 +               }
20779         }
20780 -       invoke_rcu_callbacks_kthread();
20781 +       *statusp = RCU_KTHREAD_YIELDING;
20782 +       trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
20783 +       schedule_timeout_interruptible(2);
20784 +       trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
20785 +       *statusp = RCU_KTHREAD_WAITING;
20788 -static void invoke_rcu_core(void)
20789 +static struct smp_hotplug_thread rcu_cpu_thread_spec = {
20790 +       .store                  = &rcu_cpu_kthread_task,
20791 +       .thread_should_run      = rcu_cpu_kthread_should_run,
20792 +       .thread_fn              = rcu_cpu_kthread,
20793 +       .thread_comm            = "rcuc/%u",
20794 +       .setup                  = rcu_cpu_kthread_setup,
20795 +       .park                   = rcu_cpu_kthread_park,
20799 + * Spawn per-CPU RCU core processing kthreads.
20800 + */
20801 +static int __init rcu_spawn_core_kthreads(void)
20803 -       if (cpu_online(smp_processor_id()))
20804 -               raise_softirq(RCU_SOFTIRQ);
20805 +       int cpu;
20807 +       for_each_possible_cpu(cpu)
20808 +               per_cpu(rcu_cpu_has_work, cpu) = 0;
20809 +       BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
20810 +       return 0;
20812 +early_initcall(rcu_spawn_core_kthreads);
20814  /*
20815   * Handle any core-RCU processing required by a call_rcu() invocation.
20816 @@ -3114,6 +3233,7 @@
20818  EXPORT_SYMBOL_GPL(call_rcu_sched);
20820 +#ifndef CONFIG_PREEMPT_RT_FULL
20821  /*
20822   * Queue an RCU callback for invocation after a quicker grace period.
20823   */
20824 @@ -3122,6 +3242,7 @@
20825         __call_rcu(head, func, &rcu_bh_state, -1, 0);
20827  EXPORT_SYMBOL_GPL(call_rcu_bh);
20828 +#endif
20830  /*
20831   * Queue an RCU callback for lazy invocation after a grace period.
20832 @@ -3213,6 +3334,7 @@
20834  EXPORT_SYMBOL_GPL(synchronize_sched);
20836 +#ifndef CONFIG_PREEMPT_RT_FULL
20837  /**
20838   * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
20839   *
20840 @@ -3239,6 +3361,7 @@
20841                 wait_rcu_gp(call_rcu_bh);
20843  EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
20844 +#endif
20846  /**
20847   * get_state_synchronize_rcu - Snapshot current RCU state
20848 @@ -3524,7 +3647,7 @@
20849                         raw_spin_unlock_irqrestore(&rnp->lock, flags);
20850                         if (wake) {
20851                                 smp_mb(); /* EGP done before wake_up(). */
20852 -                               wake_up(&rsp->expedited_wq);
20853 +                               swake_up(&rsp->expedited_wq);
20854                         }
20855                         break;
20856                 }
20857 @@ -3781,7 +3904,7 @@
20858         jiffies_start = jiffies;
20860         for (;;) {
20861 -               ret = wait_event_interruptible_timeout(
20862 +               ret = swait_event_timeout(
20863                                 rsp->expedited_wq,
20864                                 sync_rcu_preempt_exp_done(rnp_root),
20865                                 jiffies_stall);
20866 @@ -3789,7 +3912,7 @@
20867                         return;
20868                 if (ret < 0) {
20869                         /* Hit a signal, disable CPU stall warnings. */
20870 -                       wait_event(rsp->expedited_wq,
20871 +                       swait_event(rsp->expedited_wq,
20872                                    sync_rcu_preempt_exp_done(rnp_root));
20873                         return;
20874                 }
20875 @@ -4101,6 +4224,7 @@
20876         mutex_unlock(&rsp->barrier_mutex);
20879 +#ifndef CONFIG_PREEMPT_RT_FULL
20880  /**
20881   * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
20882   */
20883 @@ -4109,6 +4233,7 @@
20884         _rcu_barrier(&rcu_bh_state);
20886  EXPORT_SYMBOL_GPL(rcu_barrier_bh);
20887 +#endif
20889  /**
20890   * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
20891 @@ -4455,8 +4580,8 @@
20892                 }
20893         }
20895 -       init_waitqueue_head(&rsp->gp_wq);
20896 -       init_waitqueue_head(&rsp->expedited_wq);
20897 +       init_swait_queue_head(&rsp->gp_wq);
20898 +       init_swait_queue_head(&rsp->expedited_wq);
20899         rnp = rsp->level[rcu_num_lvls - 1];
20900         for_each_possible_cpu(i) {
20901                 while (i > rnp->grphi)
20902 @@ -4576,12 +4701,13 @@
20904         rcu_bootup_announce();
20905         rcu_init_geometry();
20906 +#ifndef CONFIG_PREEMPT_RT_FULL
20907         rcu_init_one(&rcu_bh_state, &rcu_bh_data);
20908 +#endif
20909         rcu_init_one(&rcu_sched_state, &rcu_sched_data);
20910         if (dump_tree)
20911                 rcu_dump_rcu_node_tree(&rcu_sched_state);
20912         __rcu_init_preempt();
20913 -       open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
20915         /*
20916          * We don't need protection against CPU-hotplug here because
20917 diff -Nur linux-4.4.46.orig/kernel/rcu/tree.h linux-4.4.46/kernel/rcu/tree.h
20918 --- linux-4.4.46.orig/kernel/rcu/tree.h 2017-02-01 08:31:11.000000000 +0100
20919 +++ linux-4.4.46/kernel/rcu/tree.h      2017-02-03 17:18:10.931619212 +0100
20920 @@ -27,6 +27,7 @@
20921  #include <linux/threads.h>
20922  #include <linux/cpumask.h>
20923  #include <linux/seqlock.h>
20924 +#include <linux/swait.h>
20925  #include <linux/stop_machine.h>
20927  /*
20928 @@ -241,7 +242,7 @@
20929                                 /* Refused to boost: not sure why, though. */
20930                                 /*  This can happen due to race conditions. */
20931  #ifdef CONFIG_RCU_NOCB_CPU
20932 -       wait_queue_head_t nocb_gp_wq[2];
20933 +       struct swait_queue_head nocb_gp_wq[2];
20934                                 /* Place for rcu_nocb_kthread() to wait GP. */
20935  #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
20936         int need_future_gp[2];
20937 @@ -393,7 +394,7 @@
20938         atomic_long_t nocb_q_count_lazy; /*  invocation (all stages). */
20939         struct rcu_head *nocb_follower_head; /* CBs ready to invoke. */
20940         struct rcu_head **nocb_follower_tail;
20941 -       wait_queue_head_t nocb_wq;      /* For nocb kthreads to sleep on. */
20942 +       struct swait_queue_head nocb_wq; /* For nocb kthreads to sleep on. */
20943         struct task_struct *nocb_kthread;
20944         int nocb_defer_wakeup;          /* Defer wakeup of nocb_kthread. */
20946 @@ -472,7 +473,7 @@
20947         unsigned long gpnum;                    /* Current gp number. */
20948         unsigned long completed;                /* # of last completed gp. */
20949         struct task_struct *gp_kthread;         /* Task for grace periods. */
20950 -       wait_queue_head_t gp_wq;                /* Where GP task waits. */
20951 +       struct swait_queue_head gp_wq;          /* Where GP task waits. */
20952         short gp_flags;                         /* Commands for GP task. */
20953         short gp_state;                         /* GP kthread sleep state. */
20955 @@ -504,7 +505,7 @@
20956         atomic_long_t expedited_workdone3;      /* # done by others #3. */
20957         atomic_long_t expedited_normal;         /* # fallbacks to normal. */
20958         atomic_t expedited_need_qs;             /* # CPUs left to check in. */
20959 -       wait_queue_head_t expedited_wq;         /* Wait for check-ins. */
20960 +       struct swait_queue_head expedited_wq;   /* Wait for check-ins. */
20961         int ncpus_snap;                         /* # CPUs seen last time. */
20963         unsigned long jiffies_force_qs;         /* Time at which to invoke */
20964 @@ -556,18 +557,18 @@
20965   */
20966  extern struct rcu_state rcu_sched_state;
20968 +#ifndef CONFIG_PREEMPT_RT_FULL
20969  extern struct rcu_state rcu_bh_state;
20970 +#endif
20972  #ifdef CONFIG_PREEMPT_RCU
20973  extern struct rcu_state rcu_preempt_state;
20974  #endif /* #ifdef CONFIG_PREEMPT_RCU */
20976 -#ifdef CONFIG_RCU_BOOST
20977  DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
20978  DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
20979  DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
20980  DECLARE_PER_CPU(char, rcu_cpu_has_work);
20981 -#endif /* #ifdef CONFIG_RCU_BOOST */
20983  #ifndef RCU_TREE_NONCORE
20985 @@ -587,10 +588,9 @@
20986  static void __init __rcu_init_preempt(void);
20987  static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
20988  static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
20989 -static void invoke_rcu_callbacks_kthread(void);
20990  static bool rcu_is_callbacks_kthread(void);
20991 +static void rcu_cpu_kthread_setup(unsigned int cpu);
20992  #ifdef CONFIG_RCU_BOOST
20993 -static void rcu_preempt_do_callbacks(void);
20994  static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
20995                                                  struct rcu_node *rnp);
20996  #endif /* #ifdef CONFIG_RCU_BOOST */
20997 @@ -607,7 +607,8 @@
20998  static void increment_cpu_stall_ticks(void);
20999  static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu);
21000  static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
21001 -static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp);
21002 +static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
21003 +static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
21004  static void rcu_init_one_nocb(struct rcu_node *rnp);
21005  static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
21006                             bool lazy, unsigned long flags);
21007 diff -Nur linux-4.4.46.orig/kernel/rcu/tree_plugin.h linux-4.4.46/kernel/rcu/tree_plugin.h
21008 --- linux-4.4.46.orig/kernel/rcu/tree_plugin.h  2017-02-01 08:31:11.000000000 +0100
21009 +++ linux-4.4.46/kernel/rcu/tree_plugin.h       2017-02-03 17:18:10.931619212 +0100
21010 @@ -24,25 +24,10 @@
21011   *        Paul E. McKenney <paulmck@linux.vnet.ibm.com>
21012   */
21014 -#include <linux/delay.h>
21015 -#include <linux/gfp.h>
21016 -#include <linux/oom.h>
21017 -#include <linux/smpboot.h>
21018 -#include "../time/tick-internal.h"
21020  #ifdef CONFIG_RCU_BOOST
21022  #include "../locking/rtmutex_common.h"
21025 - * Control variables for per-CPU and per-rcu_node kthreads.  These
21026 - * handle all flavors of RCU.
21027 - */
21028 -static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
21029 -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
21030 -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
21031 -DEFINE_PER_CPU(char, rcu_cpu_has_work);
21033  #else /* #ifdef CONFIG_RCU_BOOST */
21035  /*
21036 @@ -55,6 +40,14 @@
21038  #endif /* #else #ifdef CONFIG_RCU_BOOST */
21041 + * Control variables for per-CPU and per-rcu_node kthreads.  These
21042 + * handle all flavors of RCU.
21043 + */
21044 +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
21045 +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
21046 +DEFINE_PER_CPU(char, rcu_cpu_has_work);
21048  #ifdef CONFIG_RCU_NOCB_CPU
21049  static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
21050  static bool have_rcu_nocb_mask;            /* Was rcu_nocb_mask allocated? */
21051 @@ -432,7 +425,7 @@
21052         }
21054         /* Hardware IRQ handlers cannot block, complain if they get here. */
21055 -       if (in_irq() || in_serving_softirq()) {
21056 +       if (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET)) {
21057                 lockdep_rcu_suspicious(__FILE__, __LINE__,
21058                                        "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n");
21059                 pr_alert("->rcu_read_unlock_special: %#x (b: %d, enq: %d nq: %d)\n",
21060 @@ -645,15 +638,6 @@
21061                 t->rcu_read_unlock_special.b.need_qs = true;
21064 -#ifdef CONFIG_RCU_BOOST
21066 -static void rcu_preempt_do_callbacks(void)
21068 -       rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p));
21071 -#endif /* #ifdef CONFIG_RCU_BOOST */
21073  /*
21074   * Queue a preemptible-RCU callback for invocation after a grace period.
21075   */
21076 @@ -930,6 +914,19 @@
21078  #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
21081 + * If boosting, set rcuc kthreads to realtime priority.
21082 + */
21083 +static void rcu_cpu_kthread_setup(unsigned int cpu)
21085 +#ifdef CONFIG_RCU_BOOST
21086 +       struct sched_param sp;
21088 +       sp.sched_priority = kthread_prio;
21089 +       sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
21090 +#endif /* #ifdef CONFIG_RCU_BOOST */
21093  #ifdef CONFIG_RCU_BOOST
21095  #include "../locking/rtmutex_common.h"
21096 @@ -961,16 +958,6 @@
21098  #endif /* #else #ifdef CONFIG_RCU_TRACE */
21100 -static void rcu_wake_cond(struct task_struct *t, int status)
21102 -       /*
21103 -        * If the thread is yielding, only wake it when this
21104 -        * is invoked from idle
21105 -        */
21106 -       if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
21107 -               wake_up_process(t);
21110  /*
21111   * Carry out RCU priority boosting on the task indicated by ->exp_tasks
21112   * or ->boost_tasks, advancing the pointer to the next task in the
21113 @@ -1115,23 +1102,6 @@
21116  /*
21117 - * Wake up the per-CPU kthread to invoke RCU callbacks.
21118 - */
21119 -static void invoke_rcu_callbacks_kthread(void)
21121 -       unsigned long flags;
21123 -       local_irq_save(flags);
21124 -       __this_cpu_write(rcu_cpu_has_work, 1);
21125 -       if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
21126 -           current != __this_cpu_read(rcu_cpu_kthread_task)) {
21127 -               rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
21128 -                             __this_cpu_read(rcu_cpu_kthread_status));
21129 -       }
21130 -       local_irq_restore(flags);
21134   * Is the current CPU running the RCU-callbacks kthread?
21135   * Caller must have preemption disabled.
21136   */
21137 @@ -1186,67 +1156,6 @@
21138         return 0;
21141 -static void rcu_kthread_do_work(void)
21143 -       rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
21144 -       rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
21145 -       rcu_preempt_do_callbacks();
21148 -static void rcu_cpu_kthread_setup(unsigned int cpu)
21150 -       struct sched_param sp;
21152 -       sp.sched_priority = kthread_prio;
21153 -       sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
21156 -static void rcu_cpu_kthread_park(unsigned int cpu)
21158 -       per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
21161 -static int rcu_cpu_kthread_should_run(unsigned int cpu)
21163 -       return __this_cpu_read(rcu_cpu_has_work);
21167 - * Per-CPU kernel thread that invokes RCU callbacks.  This replaces the
21168 - * RCU softirq used in flavors and configurations of RCU that do not
21169 - * support RCU priority boosting.
21170 - */
21171 -static void rcu_cpu_kthread(unsigned int cpu)
21173 -       unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
21174 -       char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
21175 -       int spincnt;
21177 -       for (spincnt = 0; spincnt < 10; spincnt++) {
21178 -               trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
21179 -               local_bh_disable();
21180 -               *statusp = RCU_KTHREAD_RUNNING;
21181 -               this_cpu_inc(rcu_cpu_kthread_loops);
21182 -               local_irq_disable();
21183 -               work = *workp;
21184 -               *workp = 0;
21185 -               local_irq_enable();
21186 -               if (work)
21187 -                       rcu_kthread_do_work();
21188 -               local_bh_enable();
21189 -               if (*workp == 0) {
21190 -                       trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
21191 -                       *statusp = RCU_KTHREAD_WAITING;
21192 -                       return;
21193 -               }
21194 -       }
21195 -       *statusp = RCU_KTHREAD_YIELDING;
21196 -       trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
21197 -       schedule_timeout_interruptible(2);
21198 -       trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
21199 -       *statusp = RCU_KTHREAD_WAITING;
21202  /*
21203   * Set the per-rcu_node kthread's affinity to cover all CPUs that are
21204   * served by the rcu_node in question.  The CPU hotplug lock is still
21205 @@ -1276,26 +1185,12 @@
21206         free_cpumask_var(cm);
21209 -static struct smp_hotplug_thread rcu_cpu_thread_spec = {
21210 -       .store                  = &rcu_cpu_kthread_task,
21211 -       .thread_should_run      = rcu_cpu_kthread_should_run,
21212 -       .thread_fn              = rcu_cpu_kthread,
21213 -       .thread_comm            = "rcuc/%u",
21214 -       .setup                  = rcu_cpu_kthread_setup,
21215 -       .park                   = rcu_cpu_kthread_park,
21218  /*
21219   * Spawn boost kthreads -- called as soon as the scheduler is running.
21220   */
21221  static void __init rcu_spawn_boost_kthreads(void)
21223         struct rcu_node *rnp;
21224 -       int cpu;
21226 -       for_each_possible_cpu(cpu)
21227 -               per_cpu(rcu_cpu_has_work, cpu) = 0;
21228 -       BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
21229         rcu_for_each_leaf_node(rcu_state_p, rnp)
21230                 (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
21232 @@ -1318,11 +1213,6 @@
21233         raw_spin_unlock_irqrestore(&rnp->lock, flags);
21236 -static void invoke_rcu_callbacks_kthread(void)
21238 -       WARN_ON_ONCE(1);
21241  static bool rcu_is_callbacks_kthread(void)
21243         return false;
21244 @@ -1346,7 +1236,7 @@
21246  #endif /* #else #ifdef CONFIG_RCU_BOOST */
21248 -#if !defined(CONFIG_RCU_FAST_NO_HZ)
21249 +#if !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL)
21251  /*
21252   * Check to see if any future RCU-related work will need to be done
21253 @@ -1363,7 +1253,9 @@
21254         return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)
21255                ? 0 : rcu_cpu_has_callbacks(NULL);
21257 +#endif /* !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) */
21259 +#if !defined(CONFIG_RCU_FAST_NO_HZ)
21260  /*
21261   * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
21262   * after it.
21263 @@ -1459,6 +1351,8 @@
21264         return cbs_ready;
21267 +#ifndef CONFIG_PREEMPT_RT_FULL
21269  /*
21270   * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
21271   * to invoke.  If the CPU has callbacks, try to advance them.  Tell the
21272 @@ -1504,6 +1398,7 @@
21273         *nextevt = basemono + dj * TICK_NSEC;
21274         return 0;
21276 +#endif /* #ifndef CONFIG_PREEMPT_RT_FULL */
21278  /*
21279   * Prepare a CPU for idle from an RCU perspective.  The first major task
21280 @@ -1822,9 +1717,9 @@
21281   * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
21282   * grace period.
21283   */
21284 -static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
21285 +static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
21287 -       wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
21288 +       swake_up_all(sq);
21291  /*
21292 @@ -1840,10 +1735,15 @@
21293         rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;
21296 +static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
21298 +       return &rnp->nocb_gp_wq[rnp->completed & 0x1];
21301  static void rcu_init_one_nocb(struct rcu_node *rnp)
21303 -       init_waitqueue_head(&rnp->nocb_gp_wq[0]);
21304 -       init_waitqueue_head(&rnp->nocb_gp_wq[1]);
21305 +       init_swait_queue_head(&rnp->nocb_gp_wq[0]);
21306 +       init_swait_queue_head(&rnp->nocb_gp_wq[1]);
21309  #ifndef CONFIG_RCU_NOCB_CPU_ALL
21310 @@ -1868,7 +1768,7 @@
21311         if (READ_ONCE(rdp_leader->nocb_leader_sleep) || force) {
21312                 /* Prior smp_mb__after_atomic() orders against prior enqueue. */
21313                 WRITE_ONCE(rdp_leader->nocb_leader_sleep, false);
21314 -               wake_up(&rdp_leader->nocb_wq);
21315 +               swake_up(&rdp_leader->nocb_wq);
21316         }
21319 @@ -2081,7 +1981,7 @@
21320          */
21321         trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait"));
21322         for (;;) {
21323 -               wait_event_interruptible(
21324 +               swait_event_interruptible(
21325                         rnp->nocb_gp_wq[c & 0x1],
21326                         (d = ULONG_CMP_GE(READ_ONCE(rnp->completed), c)));
21327                 if (likely(d))
21328 @@ -2109,7 +2009,7 @@
21329         /* Wait for callbacks to appear. */
21330         if (!rcu_nocb_poll) {
21331                 trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep");
21332 -               wait_event_interruptible(my_rdp->nocb_wq,
21333 +               swait_event_interruptible(my_rdp->nocb_wq,
21334                                 !READ_ONCE(my_rdp->nocb_leader_sleep));
21335                 /* Memory barrier handled by smp_mb() calls below and repoll. */
21336         } else if (firsttime) {
21337 @@ -2184,7 +2084,7 @@
21338                          * List was empty, wake up the follower.
21339                          * Memory barriers supplied by atomic_long_add().
21340                          */
21341 -                       wake_up(&rdp->nocb_wq);
21342 +                       swake_up(&rdp->nocb_wq);
21343                 }
21344         }
21346 @@ -2205,7 +2105,7 @@
21347                 if (!rcu_nocb_poll) {
21348                         trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
21349                                             "FollowerSleep");
21350 -                       wait_event_interruptible(rdp->nocb_wq,
21351 +                       swait_event_interruptible(rdp->nocb_wq,
21352                                                  READ_ONCE(rdp->nocb_follower_head));
21353                 } else if (firsttime) {
21354                         /* Don't drown trace log with "Poll"! */
21355 @@ -2365,7 +2265,7 @@
21356  static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
21358         rdp->nocb_tail = &rdp->nocb_head;
21359 -       init_waitqueue_head(&rdp->nocb_wq);
21360 +       init_swait_queue_head(&rdp->nocb_wq);
21361         rdp->nocb_follower_tail = &rdp->nocb_follower_head;
21364 @@ -2515,7 +2415,7 @@
21365         return false;
21368 -static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
21369 +static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
21373 @@ -2523,6 +2423,11 @@
21377 +static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
21379 +       return NULL;
21382  static void rcu_init_one_nocb(struct rcu_node *rnp)
21385 diff -Nur linux-4.4.46.orig/kernel/rcu/update.c linux-4.4.46/kernel/rcu/update.c
21386 --- linux-4.4.46.orig/kernel/rcu/update.c       2017-02-01 08:31:11.000000000 +0100
21387 +++ linux-4.4.46/kernel/rcu/update.c    2017-02-03 17:18:10.931619212 +0100
21388 @@ -276,6 +276,7 @@
21390  EXPORT_SYMBOL_GPL(rcu_read_lock_held);
21392 +#ifndef CONFIG_PREEMPT_RT_FULL
21393  /**
21394   * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
21395   *
21396 @@ -302,6 +303,7 @@
21397         return in_softirq() || irqs_disabled();
21399  EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
21400 +#endif
21402  #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
21404 diff -Nur linux-4.4.46.orig/kernel/relay.c linux-4.4.46/kernel/relay.c
21405 --- linux-4.4.46.orig/kernel/relay.c    2017-02-01 08:31:11.000000000 +0100
21406 +++ linux-4.4.46/kernel/relay.c 2017-02-03 17:18:10.931619212 +0100
21407 @@ -336,6 +336,10 @@
21409         struct rchan_buf *buf = (struct rchan_buf *)data;
21410         wake_up_interruptible(&buf->read_wait);
21411 +       /*
21412 +        * Stupid polling for now:
21413 +        */
21414 +       mod_timer(&buf->timer, jiffies + 1);
21417  /**
21418 @@ -353,6 +357,7 @@
21419                 init_waitqueue_head(&buf->read_wait);
21420                 kref_init(&buf->kref);
21421                 setup_timer(&buf->timer, wakeup_readers, (unsigned long)buf);
21422 +               mod_timer(&buf->timer, jiffies + 1);
21423         } else
21424                 del_timer_sync(&buf->timer);
21426 @@ -736,15 +741,6 @@
21427                 else
21428                         buf->early_bytes += buf->chan->subbuf_size -
21429                                             buf->padding[old_subbuf];
21430 -               smp_mb();
21431 -               if (waitqueue_active(&buf->read_wait))
21432 -                       /*
21433 -                        * Calling wake_up_interruptible() from here
21434 -                        * will deadlock if we happen to be logging
21435 -                        * from the scheduler (trying to re-grab
21436 -                        * rq->lock), so defer it.
21437 -                        */
21438 -                       mod_timer(&buf->timer, jiffies + 1);
21439         }
21441         old = buf->data;
21442 diff -Nur linux-4.4.46.orig/kernel/sched/completion.c linux-4.4.46/kernel/sched/completion.c
21443 --- linux-4.4.46.orig/kernel/sched/completion.c 2017-02-01 08:31:11.000000000 +0100
21444 +++ linux-4.4.46/kernel/sched/completion.c      2017-02-03 17:18:10.931619212 +0100
21445 @@ -30,10 +30,10 @@
21447         unsigned long flags;
21449 -       spin_lock_irqsave(&x->wait.lock, flags);
21450 +       raw_spin_lock_irqsave(&x->wait.lock, flags);
21451         x->done++;
21452 -       __wake_up_locked(&x->wait, TASK_NORMAL, 1);
21453 -       spin_unlock_irqrestore(&x->wait.lock, flags);
21454 +       swake_up_locked(&x->wait);
21455 +       raw_spin_unlock_irqrestore(&x->wait.lock, flags);
21457  EXPORT_SYMBOL(complete);
21459 @@ -50,10 +50,10 @@
21461         unsigned long flags;
21463 -       spin_lock_irqsave(&x->wait.lock, flags);
21464 +       raw_spin_lock_irqsave(&x->wait.lock, flags);
21465         x->done += UINT_MAX/2;
21466 -       __wake_up_locked(&x->wait, TASK_NORMAL, 0);
21467 -       spin_unlock_irqrestore(&x->wait.lock, flags);
21468 +       swake_up_all_locked(&x->wait);
21469 +       raw_spin_unlock_irqrestore(&x->wait.lock, flags);
21471  EXPORT_SYMBOL(complete_all);
21473 @@ -62,20 +62,20 @@
21474                    long (*action)(long), long timeout, int state)
21476         if (!x->done) {
21477 -               DECLARE_WAITQUEUE(wait, current);
21478 +               DECLARE_SWAITQUEUE(wait);
21480 -               __add_wait_queue_tail_exclusive(&x->wait, &wait);
21481 +               __prepare_to_swait(&x->wait, &wait);
21482                 do {
21483                         if (signal_pending_state(state, current)) {
21484                                 timeout = -ERESTARTSYS;
21485                                 break;
21486                         }
21487                         __set_current_state(state);
21488 -                       spin_unlock_irq(&x->wait.lock);
21489 +                       raw_spin_unlock_irq(&x->wait.lock);
21490                         timeout = action(timeout);
21491 -                       spin_lock_irq(&x->wait.lock);
21492 +                       raw_spin_lock_irq(&x->wait.lock);
21493                 } while (!x->done && timeout);
21494 -               __remove_wait_queue(&x->wait, &wait);
21495 +               __finish_swait(&x->wait, &wait);
21496                 if (!x->done)
21497                         return timeout;
21498         }
21499 @@ -89,9 +89,9 @@
21501         might_sleep();
21503 -       spin_lock_irq(&x->wait.lock);
21504 +       raw_spin_lock_irq(&x->wait.lock);
21505         timeout = do_wait_for_common(x, action, timeout, state);
21506 -       spin_unlock_irq(&x->wait.lock);
21507 +       raw_spin_unlock_irq(&x->wait.lock);
21508         return timeout;
21511 @@ -277,12 +277,12 @@
21512         if (!READ_ONCE(x->done))
21513                 return 0;
21515 -       spin_lock_irqsave(&x->wait.lock, flags);
21516 +       raw_spin_lock_irqsave(&x->wait.lock, flags);
21517         if (!x->done)
21518                 ret = 0;
21519         else
21520                 x->done--;
21521 -       spin_unlock_irqrestore(&x->wait.lock, flags);
21522 +       raw_spin_unlock_irqrestore(&x->wait.lock, flags);
21523         return ret;
21525  EXPORT_SYMBOL(try_wait_for_completion);
21526 @@ -311,7 +311,7 @@
21527          * after it's acquired the lock.
21528          */
21529         smp_rmb();
21530 -       spin_unlock_wait(&x->wait.lock);
21531 +       raw_spin_unlock_wait(&x->wait.lock);
21532         return true;
21534  EXPORT_SYMBOL(completion_done);
21535 diff -Nur linux-4.4.46.orig/kernel/sched/core.c linux-4.4.46/kernel/sched/core.c
21536 --- linux-4.4.46.orig/kernel/sched/core.c       2017-02-01 08:31:11.000000000 +0100
21537 +++ linux-4.4.46/kernel/sched/core.c    2017-02-03 17:18:10.931619212 +0100
21538 @@ -260,7 +260,11 @@
21539   * Number of tasks to iterate in a single balance run.
21540   * Limited because this is done with IRQs disabled.
21541   */
21542 +#ifndef CONFIG_PREEMPT_RT_FULL
21543  const_debug unsigned int sysctl_sched_nr_migrate = 32;
21544 +#else
21545 +const_debug unsigned int sysctl_sched_nr_migrate = 8;
21546 +#endif
21548  /*
21549   * period over which we average the RT time consumption, measured
21550 @@ -438,6 +442,7 @@
21552         hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
21553         rq->hrtick_timer.function = hrtick;
21554 +       rq->hrtick_timer.irqsafe = 1;
21556  #else  /* CONFIG_SCHED_HRTICK */
21557  static inline void hrtick_clear(struct rq *rq)
21558 @@ -542,7 +547,7 @@
21559         head->lastp = &node->next;
21562 -void wake_up_q(struct wake_q_head *head)
21563 +void __wake_up_q(struct wake_q_head *head, bool sleeper)
21565         struct wake_q_node *node = head->first;
21567 @@ -559,7 +564,10 @@
21568                  * wake_up_process() implies a wmb() to pair with the queueing
21569                  * in wake_q_add() so as not to miss wakeups.
21570                  */
21571 -               wake_up_process(task);
21572 +               if (sleeper)
21573 +                       wake_up_lock_sleeper(task);
21574 +               else
21575 +                       wake_up_process(task);
21576                 put_task_struct(task);
21577         }
21579 @@ -595,6 +603,38 @@
21580                 trace_sched_wake_idle_without_ipi(cpu);
21583 +#ifdef CONFIG_PREEMPT_LAZY
21584 +void resched_curr_lazy(struct rq *rq)
21586 +       struct task_struct *curr = rq->curr;
21587 +       int cpu;
21589 +       if (!sched_feat(PREEMPT_LAZY)) {
21590 +               resched_curr(rq);
21591 +               return;
21592 +       }
21594 +       lockdep_assert_held(&rq->lock);
21596 +       if (test_tsk_need_resched(curr))
21597 +               return;
21599 +       if (test_tsk_need_resched_lazy(curr))
21600 +               return;
21602 +       set_tsk_need_resched_lazy(curr);
21604 +       cpu = cpu_of(rq);
21605 +       if (cpu == smp_processor_id())
21606 +               return;
21608 +       /* NEED_RESCHED_LAZY must be visible before we test polling */
21609 +       smp_mb();
21610 +       if (!tsk_is_polling(curr))
21611 +               smp_send_reschedule(cpu);
21613 +#endif
21615  void resched_cpu(int cpu)
21617         struct rq *rq = cpu_rq(cpu);
21618 @@ -618,11 +658,14 @@
21619   */
21620  int get_nohz_timer_target(void)
21622 -       int i, cpu = smp_processor_id();
21623 +       int i, cpu;
21624         struct sched_domain *sd;
21626 +       preempt_disable_rt();
21627 +       cpu = smp_processor_id();
21629         if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu))
21630 -               return cpu;
21631 +               goto preempt_en_rt;
21633         rcu_read_lock();
21634         for_each_domain(cpu, sd) {
21635 @@ -641,6 +684,8 @@
21636                 cpu = housekeeping_any_cpu();
21637  unlock:
21638         rcu_read_unlock();
21639 +preempt_en_rt:
21640 +       preempt_enable_rt();
21641         return cpu;
21643  /*
21644 @@ -1174,6 +1219,11 @@
21646         lockdep_assert_held(&p->pi_lock);
21648 +       if (__migrate_disabled(p)) {
21649 +               cpumask_copy(&p->cpus_allowed, new_mask);
21650 +               return;
21651 +       }
21653         queued = task_on_rq_queued(p);
21654         running = task_current(rq, p);
21656 @@ -1196,6 +1246,84 @@
21657                 enqueue_task(rq, p, ENQUEUE_RESTORE);
21660 +static DEFINE_PER_CPU(struct cpumask, sched_cpumasks);
21661 +static DEFINE_MUTEX(sched_down_mutex);
21662 +static cpumask_t sched_down_cpumask;
21664 +void tell_sched_cpu_down_begin(int cpu)
21666 +       mutex_lock(&sched_down_mutex);
21667 +       cpumask_set_cpu(cpu, &sched_down_cpumask);
21668 +       mutex_unlock(&sched_down_mutex);
21671 +void tell_sched_cpu_down_done(int cpu)
21673 +       mutex_lock(&sched_down_mutex);
21674 +       cpumask_clear_cpu(cpu, &sched_down_cpumask);
21675 +       mutex_unlock(&sched_down_mutex);
21678 +/**
21679 + * migrate_me - try to move the current task off this cpu
21680 + *
21681 + * Used by the pin_current_cpu() code to try to get tasks
21682 + * to move off the current CPU as it is going down.
21683 + * It will only move the task if the task isn't pinned to
21684 + * the CPU (with migrate_disable, affinity or NO_SETAFFINITY)
21685 + * and the task has to be in a RUNNING state. Otherwise the
21686 + * movement of the task will wake it up (change its state
21687 + * to running) when the task did not expect it.
21688 + *
21689 + * Returns 1 if it succeeded in moving the current task
21690 + *         0 otherwise.
21691 + */
21692 +int migrate_me(void)
21694 +       struct task_struct *p = current;
21695 +       struct migration_arg arg;
21696 +       struct cpumask *cpumask;
21697 +       struct cpumask *mask;
21698 +       unsigned long flags;
21699 +       unsigned int dest_cpu;
21700 +       struct rq *rq;
21702 +       /*
21703 +        * We can not migrate tasks bounded to a CPU or tasks not
21704 +        * running. The movement of the task will wake it up.
21705 +        */
21706 +       if (p->flags & PF_NO_SETAFFINITY || p->state)
21707 +               return 0;
21709 +       mutex_lock(&sched_down_mutex);
21710 +       rq = task_rq_lock(p, &flags);
21712 +       cpumask = this_cpu_ptr(&sched_cpumasks);
21713 +       mask = &p->cpus_allowed;
21715 +       cpumask_andnot(cpumask, mask, &sched_down_cpumask);
21717 +       if (!cpumask_weight(cpumask)) {
21718 +               /* It's only on this CPU? */
21719 +               task_rq_unlock(rq, p, &flags);
21720 +               mutex_unlock(&sched_down_mutex);
21721 +               return 0;
21722 +       }
21724 +       dest_cpu = cpumask_any_and(cpu_active_mask, cpumask);
21726 +       arg.task = p;
21727 +       arg.dest_cpu = dest_cpu;
21729 +       task_rq_unlock(rq, p, &flags);
21731 +       stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
21732 +       tlb_migrate_finish(p->mm);
21733 +       mutex_unlock(&sched_down_mutex);
21735 +       return 1;
21738  /*
21739   * Change a given task's CPU affinity. Migrate the thread to a
21740   * proper CPU and schedule it away if the CPU it's executing on
21741 @@ -1235,7 +1363,7 @@
21742         do_set_cpus_allowed(p, new_mask);
21744         /* Can the task run on the task's current CPU? If so, we're done */
21745 -       if (cpumask_test_cpu(task_cpu(p), new_mask))
21746 +       if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p))
21747                 goto out;
21749         dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
21750 @@ -1411,6 +1539,18 @@
21751         return ret;
21754 +static bool check_task_state(struct task_struct *p, long match_state)
21756 +       bool match = false;
21758 +       raw_spin_lock_irq(&p->pi_lock);
21759 +       if (p->state == match_state || p->saved_state == match_state)
21760 +               match = true;
21761 +       raw_spin_unlock_irq(&p->pi_lock);
21763 +       return match;
21766  /*
21767   * wait_task_inactive - wait for a thread to unschedule.
21768   *
21769 @@ -1455,7 +1595,7 @@
21770                  * is actually now running somewhere else!
21771                  */
21772                 while (task_running(rq, p)) {
21773 -                       if (match_state && unlikely(p->state != match_state))
21774 +                       if (match_state && !check_task_state(p, match_state))
21775                                 return 0;
21776                         cpu_relax();
21777                 }
21778 @@ -1470,7 +1610,8 @@
21779                 running = task_running(rq, p);
21780                 queued = task_on_rq_queued(p);
21781                 ncsw = 0;
21782 -               if (!match_state || p->state == match_state)
21783 +               if (!match_state || p->state == match_state ||
21784 +                   p->saved_state == match_state)
21785                         ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
21786                 task_rq_unlock(rq, p, &flags);
21788 @@ -1627,7 +1768,7 @@
21790         lockdep_assert_held(&p->pi_lock);
21792 -       if (p->nr_cpus_allowed > 1)
21793 +       if (tsk_nr_cpus_allowed(p) > 1)
21794                 cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
21796         /*
21797 @@ -1707,10 +1848,6 @@
21799         activate_task(rq, p, en_flags);
21800         p->on_rq = TASK_ON_RQ_QUEUED;
21802 -       /* if a worker is waking up, notify workqueue */
21803 -       if (p->flags & PF_WQ_WORKER)
21804 -               wq_worker_waking_up(p, cpu_of(rq));
21807  /*
21808 @@ -1937,8 +2074,27 @@
21809          */
21810         smp_mb__before_spinlock();
21811         raw_spin_lock_irqsave(&p->pi_lock, flags);
21812 -       if (!(p->state & state))
21813 +       if (!(p->state & state)) {
21814 +               /*
21815 +                * The task might be running due to a spinlock sleeper
21816 +                * wakeup. Check the saved state and set it to running
21817 +                * if the wakeup condition is true.
21818 +                */
21819 +               if (!(wake_flags & WF_LOCK_SLEEPER)) {
21820 +                       if (p->saved_state & state) {
21821 +                               p->saved_state = TASK_RUNNING;
21822 +                               success = 1;
21823 +                       }
21824 +               }
21825                 goto out;
21826 +       }
21828 +       /*
21829 +        * If this is a regular wakeup, then we can unconditionally
21830 +        * clear the saved state of a "lock sleeper".
21831 +        */
21832 +       if (!(wake_flags & WF_LOCK_SLEEPER))
21833 +               p->saved_state = TASK_RUNNING;
21835         trace_sched_waking(p);
21837 @@ -2030,52 +2186,6 @@
21840  /**
21841 - * try_to_wake_up_local - try to wake up a local task with rq lock held
21842 - * @p: the thread to be awakened
21843 - *
21844 - * Put @p on the run-queue if it's not already there. The caller must
21845 - * ensure that this_rq() is locked, @p is bound to this_rq() and not
21846 - * the current task.
21847 - */
21848 -static void try_to_wake_up_local(struct task_struct *p)
21850 -       struct rq *rq = task_rq(p);
21852 -       if (WARN_ON_ONCE(rq != this_rq()) ||
21853 -           WARN_ON_ONCE(p == current))
21854 -               return;
21856 -       lockdep_assert_held(&rq->lock);
21858 -       if (!raw_spin_trylock(&p->pi_lock)) {
21859 -               /*
21860 -                * This is OK, because current is on_cpu, which avoids it being
21861 -                * picked for load-balance and preemption/IRQs are still
21862 -                * disabled avoiding further scheduler activity on it and we've
21863 -                * not yet picked a replacement task.
21864 -                */
21865 -               lockdep_unpin_lock(&rq->lock);
21866 -               raw_spin_unlock(&rq->lock);
21867 -               raw_spin_lock(&p->pi_lock);
21868 -               raw_spin_lock(&rq->lock);
21869 -               lockdep_pin_lock(&rq->lock);
21870 -       }
21872 -       if (!(p->state & TASK_NORMAL))
21873 -               goto out;
21875 -       trace_sched_waking(p);
21877 -       if (!task_on_rq_queued(p))
21878 -               ttwu_activate(rq, p, ENQUEUE_WAKEUP);
21880 -       ttwu_do_wakeup(rq, p, 0);
21881 -       ttwu_stat(p, smp_processor_id(), 0);
21882 -out:
21883 -       raw_spin_unlock(&p->pi_lock);
21886 -/**
21887   * wake_up_process - Wake up a specific process
21888   * @p: The process to be woken up.
21889   *
21890 @@ -2093,6 +2203,18 @@
21892  EXPORT_SYMBOL(wake_up_process);
21894 +/**
21895 + * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock"
21896 + * @p: The process to be woken up.
21897 + *
21898 + * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate
21899 + * the nature of the wakeup.
21900 + */
21901 +int wake_up_lock_sleeper(struct task_struct *p)
21903 +       return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER);
21906  int wake_up_state(struct task_struct *p, unsigned int state)
21908         return try_to_wake_up(p, state, 0);
21909 @@ -2279,6 +2401,9 @@
21910         p->on_cpu = 0;
21911  #endif
21912         init_task_preempt_count(p);
21913 +#ifdef CONFIG_HAVE_PREEMPT_LAZY
21914 +       task_thread_info(p)->preempt_lazy_count = 0;
21915 +#endif
21916  #ifdef CONFIG_SMP
21917         plist_node_init(&p->pushable_tasks, MAX_PRIO);
21918         RB_CLEAR_NODE(&p->pushable_dl_tasks);
21919 @@ -2603,8 +2728,12 @@
21920         finish_arch_post_lock_switch();
21922         fire_sched_in_preempt_notifiers(current);
21923 +       /*
21924 +        * We use mmdrop_delayed() here so we don't have to do the
21925 +        * full __mmdrop() when we are the last user.
21926 +        */
21927         if (mm)
21928 -               mmdrop(mm);
21929 +               mmdrop_delayed(mm);
21930         if (unlikely(prev_state == TASK_DEAD)) {
21931                 if (prev->sched_class->task_dead)
21932                         prev->sched_class->task_dead(prev);
21933 @@ -2935,16 +3064,6 @@
21935  #endif
21937 -notrace unsigned long get_parent_ip(unsigned long addr)
21939 -       if (in_lock_functions(addr)) {
21940 -               addr = CALLER_ADDR2;
21941 -               if (in_lock_functions(addr))
21942 -                       addr = CALLER_ADDR3;
21943 -       }
21944 -       return addr;
21947  #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
21948                                 defined(CONFIG_PREEMPT_TRACER))
21950 @@ -2966,7 +3085,7 @@
21951                                 PREEMPT_MASK - 10);
21952  #endif
21953         if (preempt_count() == val) {
21954 -               unsigned long ip = get_parent_ip(CALLER_ADDR1);
21955 +               unsigned long ip = get_lock_parent_ip();
21956  #ifdef CONFIG_DEBUG_PREEMPT
21957                 current->preempt_disable_ip = ip;
21958  #endif
21959 @@ -2993,7 +3112,7 @@
21960  #endif
21962         if (preempt_count() == val)
21963 -               trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
21964 +               trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
21965         __preempt_count_sub(val);
21967  EXPORT_SYMBOL(preempt_count_sub);
21968 @@ -3048,6 +3167,77 @@
21969         schedstat_inc(this_rq(), sched_count);
21972 +#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP)
21974 +void migrate_disable(void)
21976 +       struct task_struct *p = current;
21978 +       if (in_atomic() || irqs_disabled()) {
21979 +#ifdef CONFIG_SCHED_DEBUG
21980 +               p->migrate_disable_atomic++;
21981 +#endif
21982 +               return;
21983 +       }
21985 +#ifdef CONFIG_SCHED_DEBUG
21986 +       if (unlikely(p->migrate_disable_atomic)) {
21987 +               tracing_off();
21988 +               WARN_ON_ONCE(1);
21989 +       }
21990 +#endif
21992 +       if (p->migrate_disable) {
21993 +               p->migrate_disable++;
21994 +               return;
21995 +       }
21997 +       preempt_disable();
21998 +       preempt_lazy_disable();
21999 +       pin_current_cpu();
22000 +       p->migrate_disable = 1;
22001 +       preempt_enable();
22003 +EXPORT_SYMBOL(migrate_disable);
22005 +void migrate_enable(void)
22007 +       struct task_struct *p = current;
22009 +       if (in_atomic() || irqs_disabled()) {
22010 +#ifdef CONFIG_SCHED_DEBUG
22011 +               p->migrate_disable_atomic--;
22012 +#endif
22013 +               return;
22014 +       }
22016 +#ifdef CONFIG_SCHED_DEBUG
22017 +       if (unlikely(p->migrate_disable_atomic)) {
22018 +               tracing_off();
22019 +               WARN_ON_ONCE(1);
22020 +       }
22021 +#endif
22022 +       WARN_ON_ONCE(p->migrate_disable <= 0);
22024 +       if (p->migrate_disable > 1) {
22025 +               p->migrate_disable--;
22026 +               return;
22027 +       }
22029 +       preempt_disable();
22030 +       /*
22031 +        * Clearing migrate_disable causes tsk_cpus_allowed to
22032 +        * show the tasks original cpu affinity.
22033 +        */
22034 +       p->migrate_disable = 0;
22036 +       unpin_current_cpu();
22037 +       preempt_enable();
22038 +       preempt_lazy_enable();
22040 +EXPORT_SYMBOL(migrate_enable);
22041 +#endif
22043  /*
22044   * Pick up the highest-prio task:
22045   */
22046 @@ -3172,19 +3362,6 @@
22047                 } else {
22048                         deactivate_task(rq, prev, DEQUEUE_SLEEP);
22049                         prev->on_rq = 0;
22051 -                       /*
22052 -                        * If a worker went to sleep, notify and ask workqueue
22053 -                        * whether it wants to wake up a task to maintain
22054 -                        * concurrency.
22055 -                        */
22056 -                       if (prev->flags & PF_WQ_WORKER) {
22057 -                               struct task_struct *to_wakeup;
22059 -                               to_wakeup = wq_worker_sleeping(prev, cpu);
22060 -                               if (to_wakeup)
22061 -                                       try_to_wake_up_local(to_wakeup);
22062 -                       }
22063                 }
22064                 switch_count = &prev->nvcsw;
22065         }
22066 @@ -3194,6 +3371,7 @@
22068         next = pick_next_task(rq, prev);
22069         clear_tsk_need_resched(prev);
22070 +       clear_tsk_need_resched_lazy(prev);
22071         clear_preempt_need_resched();
22072         rq->clock_skip_update = 0;
22074 @@ -3215,9 +3393,20 @@
22076  static inline void sched_submit_work(struct task_struct *tsk)
22078 -       if (!tsk->state || tsk_is_pi_blocked(tsk))
22079 +       if (!tsk->state)
22080                 return;
22081         /*
22082 +        * If a worker went to sleep, notify and ask workqueue whether
22083 +        * it wants to wake up a task to maintain concurrency.
22084 +        */
22085 +       if (tsk->flags & PF_WQ_WORKER)
22086 +               wq_worker_sleeping(tsk);
22089 +       if (tsk_is_pi_blocked(tsk))
22090 +               return;
22092 +       /*
22093          * If we are going to sleep and we have plugged IO queued,
22094          * make sure to submit it to avoid deadlocks.
22095          */
22096 @@ -3225,6 +3414,12 @@
22097                 blk_schedule_flush_plug(tsk);
22100 +static void sched_update_worker(struct task_struct *tsk)
22102 +       if (tsk->flags & PF_WQ_WORKER)
22103 +               wq_worker_running(tsk);
22106  asmlinkage __visible void __sched schedule(void)
22108         struct task_struct *tsk = current;
22109 @@ -3235,6 +3430,7 @@
22110                 __schedule(false);
22111                 sched_preempt_enable_no_resched();
22112         } while (need_resched());
22113 +       sched_update_worker(tsk);
22115  EXPORT_SYMBOL(schedule);
22117 @@ -3283,6 +3479,30 @@
22118         } while (need_resched());
22121 +#ifdef CONFIG_PREEMPT_LAZY
22123 + * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is
22124 + * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as
22125 + * preempt_lazy_count counter >0.
22126 + */
22127 +static __always_inline int preemptible_lazy(void)
22129 +       if (test_thread_flag(TIF_NEED_RESCHED))
22130 +               return 1;
22131 +       if (current_thread_info()->preempt_lazy_count)
22132 +               return 0;
22133 +       return 1;
22136 +#else
22138 +static inline int preemptible_lazy(void)
22140 +       return 1;
22143 +#endif
22145  #ifdef CONFIG_PREEMPT
22146  /*
22147   * this is the entry point to schedule() from in-kernel preemption
22148 @@ -3297,6 +3517,8 @@
22149          */
22150         if (likely(!preemptible()))
22151                 return;
22152 +       if (!preemptible_lazy())
22153 +               return;
22155         preempt_schedule_common();
22157 @@ -3323,6 +3545,8 @@
22159         if (likely(!preemptible()))
22160                 return;
22161 +       if (!preemptible_lazy())
22162 +               return;
22164         do {
22165                 preempt_disable_notrace();
22166 @@ -3332,7 +3556,16 @@
22167                  * an infinite recursion.
22168                  */
22169                 prev_ctx = exception_enter();
22170 +               /*
22171 +                * The add/subtract must not be traced by the function
22172 +                * tracer. But we still want to account for the
22173 +                * preempt off latency tracer. Since the _notrace versions
22174 +                * of add/subtract skip the accounting for latency tracer
22175 +                * we must force it manually.
22176 +                */
22177 +               start_critical_timings();
22178                 __schedule(true);
22179 +               stop_critical_timings();
22180                 exception_exit(prev_ctx);
22182                 preempt_enable_no_resched_notrace();
22183 @@ -4676,6 +4909,7 @@
22185  EXPORT_SYMBOL(__cond_resched_lock);
22187 +#ifndef CONFIG_PREEMPT_RT_FULL
22188  int __sched __cond_resched_softirq(void)
22190         BUG_ON(!in_softirq());
22191 @@ -4689,6 +4923,7 @@
22192         return 0;
22194  EXPORT_SYMBOL(__cond_resched_softirq);
22195 +#endif
22197  /**
22198   * yield - yield the current processor to other threads.
22199 @@ -5055,7 +5290,9 @@
22201         /* Set the preempt count _outside_ the spinlocks! */
22202         init_idle_preempt_count(idle, cpu);
22204 +#ifdef CONFIG_HAVE_PREEMPT_LAZY
22205 +       task_thread_info(idle)->preempt_lazy_count = 0;
22206 +#endif
22207         /*
22208          * The idle tasks have their own, simple scheduling class:
22209          */
22210 @@ -5196,6 +5433,8 @@
22211  #endif /* CONFIG_NUMA_BALANCING */
22213  #ifdef CONFIG_HOTPLUG_CPU
22214 +static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm);
22216  /*
22217   * Ensures that the idle task is using init_mm right before its cpu goes
22218   * offline.
22219 @@ -5210,7 +5449,11 @@
22220                 switch_mm(mm, &init_mm, current);
22221                 finish_arch_post_lock_switch();
22222         }
22223 -       mmdrop(mm);
22224 +       /*
22225 +        * Defer the cleanup to an alive cpu. On RT we can neither
22226 +        * call mmdrop() nor mmdrop_delayed() from here.
22227 +        */
22228 +       per_cpu(idle_last_mm, smp_processor_id()) = mm;
22231  /*
22232 @@ -5583,6 +5826,10 @@
22234         case CPU_DEAD:
22235                 calc_load_migrate(rq);
22236 +               if (per_cpu(idle_last_mm, cpu)) {
22237 +                       mmdrop(per_cpu(idle_last_mm, cpu));
22238 +                       per_cpu(idle_last_mm, cpu) = NULL;
22239 +               }
22240                 break;
22241  #endif
22242         }
22243 @@ -7566,7 +7813,7 @@
22244  #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
22245  static inline int preempt_count_equals(int preempt_offset)
22247 -       int nested = preempt_count() + rcu_preempt_depth();
22248 +       int nested = preempt_count() + sched_rcu_preempt_depth();
22250         return (nested == preempt_offset);
22252 diff -Nur linux-4.4.46.orig/kernel/sched/cpudeadline.c linux-4.4.46/kernel/sched/cpudeadline.c
22253 --- linux-4.4.46.orig/kernel/sched/cpudeadline.c        2017-02-01 08:31:11.000000000 +0100
22254 +++ linux-4.4.46/kernel/sched/cpudeadline.c     2017-02-03 17:18:10.931619212 +0100
22255 @@ -103,10 +103,10 @@
22256         const struct sched_dl_entity *dl_se = &p->dl;
22258         if (later_mask &&
22259 -           cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
22260 +           cpumask_and(later_mask, cp->free_cpus, tsk_cpus_allowed(p))) {
22261                 best_cpu = cpumask_any(later_mask);
22262                 goto out;
22263 -       } else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
22264 +       } else if (cpumask_test_cpu(cpudl_maximum(cp), tsk_cpus_allowed(p)) &&
22265                         dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
22266                 best_cpu = cpudl_maximum(cp);
22267                 if (later_mask)
22268 diff -Nur linux-4.4.46.orig/kernel/sched/cpupri.c linux-4.4.46/kernel/sched/cpupri.c
22269 --- linux-4.4.46.orig/kernel/sched/cpupri.c     2017-02-01 08:31:11.000000000 +0100
22270 +++ linux-4.4.46/kernel/sched/cpupri.c  2017-02-03 17:18:10.931619212 +0100
22271 @@ -103,11 +103,11 @@
22272                 if (skip)
22273                         continue;
22275 -               if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
22276 +               if (cpumask_any_and(tsk_cpus_allowed(p), vec->mask) >= nr_cpu_ids)
22277                         continue;
22279                 if (lowest_mask) {
22280 -                       cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
22281 +                       cpumask_and(lowest_mask, tsk_cpus_allowed(p), vec->mask);
22283                         /*
22284                          * We have to ensure that we have at least one bit
22285 diff -Nur linux-4.4.46.orig/kernel/sched/cputime.c linux-4.4.46/kernel/sched/cputime.c
22286 --- linux-4.4.46.orig/kernel/sched/cputime.c    2017-02-01 08:31:11.000000000 +0100
22287 +++ linux-4.4.46/kernel/sched/cputime.c 2017-02-03 17:18:10.931619212 +0100
22288 @@ -685,7 +685,7 @@
22290         unsigned long long delta = vtime_delta(tsk);
22292 -       WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING);
22293 +       WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
22294         tsk->vtime_snap += delta;
22296         /* CHECKME: always safe to convert nsecs to cputime? */
22297 @@ -701,37 +701,37 @@
22299  void vtime_account_system(struct task_struct *tsk)
22301 -       write_seqlock(&tsk->vtime_seqlock);
22302 +       write_seqcount_begin(&tsk->vtime_seqcount);
22303         __vtime_account_system(tsk);
22304 -       write_sequnlock(&tsk->vtime_seqlock);
22305 +       write_seqcount_end(&tsk->vtime_seqcount);
22308  void vtime_gen_account_irq_exit(struct task_struct *tsk)
22310 -       write_seqlock(&tsk->vtime_seqlock);
22311 +       write_seqcount_begin(&tsk->vtime_seqcount);
22312         __vtime_account_system(tsk);
22313         if (context_tracking_in_user())
22314                 tsk->vtime_snap_whence = VTIME_USER;
22315 -       write_sequnlock(&tsk->vtime_seqlock);
22316 +       write_seqcount_end(&tsk->vtime_seqcount);
22319  void vtime_account_user(struct task_struct *tsk)
22321         cputime_t delta_cpu;
22323 -       write_seqlock(&tsk->vtime_seqlock);
22324 +       write_seqcount_begin(&tsk->vtime_seqcount);
22325         delta_cpu = get_vtime_delta(tsk);
22326         tsk->vtime_snap_whence = VTIME_SYS;
22327         account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
22328 -       write_sequnlock(&tsk->vtime_seqlock);
22329 +       write_seqcount_end(&tsk->vtime_seqcount);
22332  void vtime_user_enter(struct task_struct *tsk)
22334 -       write_seqlock(&tsk->vtime_seqlock);
22335 +       write_seqcount_begin(&tsk->vtime_seqcount);
22336         __vtime_account_system(tsk);
22337         tsk->vtime_snap_whence = VTIME_USER;
22338 -       write_sequnlock(&tsk->vtime_seqlock);
22339 +       write_seqcount_end(&tsk->vtime_seqcount);
22342  void vtime_guest_enter(struct task_struct *tsk)
22343 @@ -743,19 +743,19 @@
22344          * synchronization against the reader (task_gtime())
22345          * that can thus safely catch up with a tickless delta.
22346          */
22347 -       write_seqlock(&tsk->vtime_seqlock);
22348 +       write_seqcount_begin(&tsk->vtime_seqcount);
22349         __vtime_account_system(tsk);
22350         current->flags |= PF_VCPU;
22351 -       write_sequnlock(&tsk->vtime_seqlock);
22352 +       write_seqcount_end(&tsk->vtime_seqcount);
22354  EXPORT_SYMBOL_GPL(vtime_guest_enter);
22356  void vtime_guest_exit(struct task_struct *tsk)
22358 -       write_seqlock(&tsk->vtime_seqlock);
22359 +       write_seqcount_begin(&tsk->vtime_seqcount);
22360         __vtime_account_system(tsk);
22361         current->flags &= ~PF_VCPU;
22362 -       write_sequnlock(&tsk->vtime_seqlock);
22363 +       write_seqcount_end(&tsk->vtime_seqcount);
22365  EXPORT_SYMBOL_GPL(vtime_guest_exit);
22367 @@ -768,24 +768,26 @@
22369  void arch_vtime_task_switch(struct task_struct *prev)
22371 -       write_seqlock(&prev->vtime_seqlock);
22372 -       prev->vtime_snap_whence = VTIME_SLEEPING;
22373 -       write_sequnlock(&prev->vtime_seqlock);
22374 +       write_seqcount_begin(&prev->vtime_seqcount);
22375 +       prev->vtime_snap_whence = VTIME_INACTIVE;
22376 +       write_seqcount_end(&prev->vtime_seqcount);
22378 -       write_seqlock(&current->vtime_seqlock);
22379 +       write_seqcount_begin(&current->vtime_seqcount);
22380         current->vtime_snap_whence = VTIME_SYS;
22381         current->vtime_snap = sched_clock_cpu(smp_processor_id());
22382 -       write_sequnlock(&current->vtime_seqlock);
22383 +       write_seqcount_end(&current->vtime_seqcount);
22386  void vtime_init_idle(struct task_struct *t, int cpu)
22388         unsigned long flags;
22390 -       write_seqlock_irqsave(&t->vtime_seqlock, flags);
22391 +       local_irq_save(flags);
22392 +       write_seqcount_begin(&t->vtime_seqcount);
22393         t->vtime_snap_whence = VTIME_SYS;
22394         t->vtime_snap = sched_clock_cpu(cpu);
22395 -       write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
22396 +       write_seqcount_end(&t->vtime_seqcount);
22397 +       local_irq_restore(flags);
22400  cputime_t task_gtime(struct task_struct *t)
22401 @@ -797,13 +799,13 @@
22402                 return t->gtime;
22404         do {
22405 -               seq = read_seqbegin(&t->vtime_seqlock);
22406 +               seq = read_seqcount_begin(&t->vtime_seqcount);
22408                 gtime = t->gtime;
22409                 if (t->flags & PF_VCPU)
22410                         gtime += vtime_delta(t);
22412 -       } while (read_seqretry(&t->vtime_seqlock, seq));
22413 +       } while (read_seqcount_retry(&t->vtime_seqcount, seq));
22415         return gtime;
22417 @@ -826,7 +828,7 @@
22418                 *udelta = 0;
22419                 *sdelta = 0;
22421 -               seq = read_seqbegin(&t->vtime_seqlock);
22422 +               seq = read_seqcount_begin(&t->vtime_seqcount);
22424                 if (u_dst)
22425                         *u_dst = *u_src;
22426 @@ -834,7 +836,7 @@
22427                         *s_dst = *s_src;
22429                 /* Task is sleeping, nothing to add */
22430 -               if (t->vtime_snap_whence == VTIME_SLEEPING ||
22431 +               if (t->vtime_snap_whence == VTIME_INACTIVE ||
22432                     is_idle_task(t))
22433                         continue;
22435 @@ -850,7 +852,7 @@
22436                         if (t->vtime_snap_whence == VTIME_SYS)
22437                                 *sdelta = delta;
22438                 }
22439 -       } while (read_seqretry(&t->vtime_seqlock, seq));
22440 +       } while (read_seqcount_retry(&t->vtime_seqcount, seq));
22444 diff -Nur linux-4.4.46.orig/kernel/sched/deadline.c linux-4.4.46/kernel/sched/deadline.c
22445 --- linux-4.4.46.orig/kernel/sched/deadline.c   2017-02-01 08:31:11.000000000 +0100
22446 +++ linux-4.4.46/kernel/sched/deadline.c        2017-02-03 17:18:10.935619367 +0100
22447 @@ -134,7 +134,7 @@
22449         struct task_struct *p = dl_task_of(dl_se);
22451 -       if (p->nr_cpus_allowed > 1)
22452 +       if (tsk_nr_cpus_allowed(p) > 1)
22453                 dl_rq->dl_nr_migratory++;
22455         update_dl_migration(dl_rq);
22456 @@ -144,7 +144,7 @@
22458         struct task_struct *p = dl_task_of(dl_se);
22460 -       if (p->nr_cpus_allowed > 1)
22461 +       if (tsk_nr_cpus_allowed(p) > 1)
22462                 dl_rq->dl_nr_migratory--;
22464         update_dl_migration(dl_rq);
22465 @@ -697,6 +697,7 @@
22467         hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
22468         timer->function = dl_task_timer;
22469 +       timer->irqsafe = 1;
22472  static
22473 @@ -989,7 +990,7 @@
22475         enqueue_dl_entity(&p->dl, pi_se, flags);
22477 -       if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
22478 +       if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1)
22479                 enqueue_pushable_dl_task(rq, p);
22482 @@ -1067,9 +1068,9 @@
22483          * try to make it stay here, it might be important.
22484          */
22485         if (unlikely(dl_task(curr)) &&
22486 -           (curr->nr_cpus_allowed < 2 ||
22487 +           (tsk_nr_cpus_allowed(curr) < 2 ||
22488              !dl_entity_preempt(&p->dl, &curr->dl)) &&
22489 -           (p->nr_cpus_allowed > 1)) {
22490 +           (tsk_nr_cpus_allowed(p) > 1)) {
22491                 int target = find_later_rq(p);
22493                 if (target != -1 &&
22494 @@ -1090,7 +1091,7 @@
22495          * Current can't be migrated, useless to reschedule,
22496          * let's hope p can move out.
22497          */
22498 -       if (rq->curr->nr_cpus_allowed == 1 ||
22499 +       if (tsk_nr_cpus_allowed(rq->curr) == 1 ||
22500             cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
22501                 return;
22503 @@ -1098,7 +1099,7 @@
22504          * p is migratable, so let's not schedule it and
22505          * see if it is pushed or pulled somewhere else.
22506          */
22507 -       if (p->nr_cpus_allowed != 1 &&
22508 +       if (tsk_nr_cpus_allowed(p) != 1 &&
22509             cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
22510                 return;
22512 @@ -1212,7 +1213,7 @@
22514         update_curr_dl(rq);
22516 -       if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
22517 +       if (on_dl_rq(&p->dl) && tsk_nr_cpus_allowed(p) > 1)
22518                 enqueue_pushable_dl_task(rq, p);
22521 @@ -1335,7 +1336,7 @@
22522         if (unlikely(!later_mask))
22523                 return -1;
22525 -       if (task->nr_cpus_allowed == 1)
22526 +       if (tsk_nr_cpus_allowed(task) == 1)
22527                 return -1;
22529         /*
22530 @@ -1441,7 +1442,7 @@
22531                 if (double_lock_balance(rq, later_rq)) {
22532                         if (unlikely(task_rq(task) != rq ||
22533                                      !cpumask_test_cpu(later_rq->cpu,
22534 -                                                      &task->cpus_allowed) ||
22535 +                                                      tsk_cpus_allowed(task)) ||
22536                                      task_running(rq, task) ||
22537                                      !task_on_rq_queued(task))) {
22538                                 double_unlock_balance(rq, later_rq);
22539 @@ -1480,7 +1481,7 @@
22541         BUG_ON(rq->cpu != task_cpu(p));
22542         BUG_ON(task_current(rq, p));
22543 -       BUG_ON(p->nr_cpus_allowed <= 1);
22544 +       BUG_ON(tsk_nr_cpus_allowed(p) <= 1);
22546         BUG_ON(!task_on_rq_queued(p));
22547         BUG_ON(!dl_task(p));
22548 @@ -1519,7 +1520,7 @@
22549          */
22550         if (dl_task(rq->curr) &&
22551             dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
22552 -           rq->curr->nr_cpus_allowed > 1) {
22553 +           tsk_nr_cpus_allowed(rq->curr) > 1) {
22554                 resched_curr(rq);
22555                 return 0;
22556         }
22557 @@ -1666,9 +1667,9 @@
22559         if (!task_running(rq, p) &&
22560             !test_tsk_need_resched(rq->curr) &&
22561 -           p->nr_cpus_allowed > 1 &&
22562 +           tsk_nr_cpus_allowed(p) > 1 &&
22563             dl_task(rq->curr) &&
22564 -           (rq->curr->nr_cpus_allowed < 2 ||
22565 +           (tsk_nr_cpus_allowed(rq->curr) < 2 ||
22566              !dl_entity_preempt(&p->dl, &rq->curr->dl))) {
22567                 push_dl_tasks(rq);
22568         }
22569 @@ -1769,7 +1770,7 @@
22571         if (task_on_rq_queued(p) && rq->curr != p) {
22572  #ifdef CONFIG_SMP
22573 -               if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
22574 +               if (tsk_nr_cpus_allowed(p) > 1 && rq->dl.overloaded)
22575                         queue_push_tasks(rq);
22576  #else
22577                 if (dl_task(rq->curr))
22578 diff -Nur linux-4.4.46.orig/kernel/sched/debug.c linux-4.4.46/kernel/sched/debug.c
22579 --- linux-4.4.46.orig/kernel/sched/debug.c      2017-02-01 08:31:11.000000000 +0100
22580 +++ linux-4.4.46/kernel/sched/debug.c   2017-02-03 17:18:10.935619367 +0100
22581 @@ -251,6 +251,9 @@
22582         P(rt_throttled);
22583         PN(rt_time);
22584         PN(rt_runtime);
22585 +#ifdef CONFIG_SMP
22586 +       P(rt_nr_migratory);
22587 +#endif
22589  #undef PN
22590  #undef P
22591 @@ -635,6 +638,10 @@
22592  #endif
22593         P(policy);
22594         P(prio);
22595 +#ifdef CONFIG_PREEMPT_RT_FULL
22596 +       P(migrate_disable);
22597 +#endif
22598 +       P(nr_cpus_allowed);
22599  #undef PN
22600  #undef __PN
22601  #undef P
22602 diff -Nur linux-4.4.46.orig/kernel/sched/fair.c linux-4.4.46/kernel/sched/fair.c
22603 --- linux-4.4.46.orig/kernel/sched/fair.c       2017-02-01 08:31:11.000000000 +0100
22604 +++ linux-4.4.46/kernel/sched/fair.c    2017-02-03 17:18:10.935619367 +0100
22605 @@ -3166,7 +3166,7 @@
22606         ideal_runtime = sched_slice(cfs_rq, curr);
22607         delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
22608         if (delta_exec > ideal_runtime) {
22609 -               resched_curr(rq_of(cfs_rq));
22610 +               resched_curr_lazy(rq_of(cfs_rq));
22611                 /*
22612                  * The current task ran long enough, ensure it doesn't get
22613                  * re-elected due to buddy favours.
22614 @@ -3190,7 +3190,7 @@
22615                 return;
22617         if (delta > ideal_runtime)
22618 -               resched_curr(rq_of(cfs_rq));
22619 +               resched_curr_lazy(rq_of(cfs_rq));
22622  static void
22623 @@ -3330,7 +3330,7 @@
22624          * validating it and just reschedule.
22625          */
22626         if (queued) {
22627 -               resched_curr(rq_of(cfs_rq));
22628 +               resched_curr_lazy(rq_of(cfs_rq));
22629                 return;
22630         }
22631         /*
22632 @@ -3512,7 +3512,7 @@
22633          * hierarchy can be throttled
22634          */
22635         if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
22636 -               resched_curr(rq_of(cfs_rq));
22637 +               resched_curr_lazy(rq_of(cfs_rq));
22640  static __always_inline
22641 @@ -4124,7 +4124,7 @@
22643                 if (delta < 0) {
22644                         if (rq->curr == p)
22645 -                               resched_curr(rq);
22646 +                               resched_curr_lazy(rq);
22647                         return;
22648                 }
22649                 hrtick_start(rq, delta);
22650 @@ -5213,7 +5213,7 @@
22651         return;
22653  preempt:
22654 -       resched_curr(rq);
22655 +       resched_curr_lazy(rq);
22656         /*
22657          * Only set the backward buddy when the current task is still
22658          * on the rq. This can happen when a wakeup gets interleaved
22659 @@ -7964,7 +7964,7 @@
22660                  * 'current' within the tree based on its new key value.
22661                  */
22662                 swap(curr->vruntime, se->vruntime);
22663 -               resched_curr(rq);
22664 +               resched_curr_lazy(rq);
22665         }
22667         se->vruntime -= cfs_rq->min_vruntime;
22668 @@ -7989,7 +7989,7 @@
22669          */
22670         if (rq->curr == p) {
22671                 if (p->prio > oldprio)
22672 -                       resched_curr(rq);
22673 +                       resched_curr_lazy(rq);
22674         } else
22675                 check_preempt_curr(rq, p, 0);
22677 diff -Nur linux-4.4.46.orig/kernel/sched/features.h linux-4.4.46/kernel/sched/features.h
22678 --- linux-4.4.46.orig/kernel/sched/features.h   2017-02-01 08:31:11.000000000 +0100
22679 +++ linux-4.4.46/kernel/sched/features.h        2017-02-03 17:18:10.935619367 +0100
22680 @@ -45,11 +45,19 @@
22681   */
22682  SCHED_FEAT(NONTASK_CAPACITY, true)
22684 +#ifdef CONFIG_PREEMPT_RT_FULL
22685 +SCHED_FEAT(TTWU_QUEUE, false)
22686 +# ifdef CONFIG_PREEMPT_LAZY
22687 +SCHED_FEAT(PREEMPT_LAZY, true)
22688 +# endif
22689 +#else
22691  /*
22692   * Queue remote wakeups on the target CPU and process them
22693   * using the scheduler IPI. Reduces rq->lock contention/bounces.
22694   */
22695  SCHED_FEAT(TTWU_QUEUE, true)
22696 +#endif
22698  #ifdef HAVE_RT_PUSH_IPI
22699  /*
22700 diff -Nur linux-4.4.46.orig/kernel/sched/Makefile linux-4.4.46/kernel/sched/Makefile
22701 --- linux-4.4.46.orig/kernel/sched/Makefile     2017-02-01 08:31:11.000000000 +0100
22702 +++ linux-4.4.46/kernel/sched/Makefile  2017-02-03 17:18:10.931619212 +0100
22703 @@ -13,7 +13,7 @@
22705  obj-y += core.o loadavg.o clock.o cputime.o
22706  obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
22707 -obj-y += wait.o completion.o idle.o
22708 +obj-y += wait.o swait.o swork.o completion.o idle.o
22709  obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o
22710  obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
22711  obj-$(CONFIG_SCHEDSTATS) += stats.o
22712 diff -Nur linux-4.4.46.orig/kernel/sched/rt.c linux-4.4.46/kernel/sched/rt.c
22713 --- linux-4.4.46.orig/kernel/sched/rt.c 2017-02-01 08:31:11.000000000 +0100
22714 +++ linux-4.4.46/kernel/sched/rt.c      2017-02-03 17:18:10.935619367 +0100
22715 @@ -47,6 +47,7 @@
22717         hrtimer_init(&rt_b->rt_period_timer,
22718                         CLOCK_MONOTONIC, HRTIMER_MODE_REL);
22719 +       rt_b->rt_period_timer.irqsafe = 1;
22720         rt_b->rt_period_timer.function = sched_rt_period_timer;
22723 @@ -93,6 +94,7 @@
22724         rt_rq->push_cpu = nr_cpu_ids;
22725         raw_spin_lock_init(&rt_rq->push_lock);
22726         init_irq_work(&rt_rq->push_work, push_irq_work_func);
22727 +       rt_rq->push_work.flags |= IRQ_WORK_HARD_IRQ;
22728  #endif
22729  #endif /* CONFIG_SMP */
22730         /* We start is dequeued state, because no RT tasks are queued */
22731 @@ -326,7 +328,7 @@
22732         rt_rq = &rq_of_rt_rq(rt_rq)->rt;
22734         rt_rq->rt_nr_total++;
22735 -       if (p->nr_cpus_allowed > 1)
22736 +       if (tsk_nr_cpus_allowed(p) > 1)
22737                 rt_rq->rt_nr_migratory++;
22739         update_rt_migration(rt_rq);
22740 @@ -343,7 +345,7 @@
22741         rt_rq = &rq_of_rt_rq(rt_rq)->rt;
22743         rt_rq->rt_nr_total--;
22744 -       if (p->nr_cpus_allowed > 1)
22745 +       if (tsk_nr_cpus_allowed(p) > 1)
22746                 rt_rq->rt_nr_migratory--;
22748         update_rt_migration(rt_rq);
22749 @@ -1262,7 +1264,7 @@
22751         enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
22753 -       if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
22754 +       if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1)
22755                 enqueue_pushable_task(rq, p);
22758 @@ -1351,7 +1353,7 @@
22759          * will have to sort it out.
22760          */
22761         if (curr && unlikely(rt_task(curr)) &&
22762 -           (curr->nr_cpus_allowed < 2 ||
22763 +           (tsk_nr_cpus_allowed(curr) < 2 ||
22764              curr->prio <= p->prio)) {
22765                 int target = find_lowest_rq(p);
22767 @@ -1375,7 +1377,7 @@
22768          * Current can't be migrated, useless to reschedule,
22769          * let's hope p can move out.
22770          */
22771 -       if (rq->curr->nr_cpus_allowed == 1 ||
22772 +       if (tsk_nr_cpus_allowed(rq->curr) == 1 ||
22773             !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
22774                 return;
22776 @@ -1383,7 +1385,7 @@
22777          * p is migratable, so let's not schedule it and
22778          * see if it is pushed or pulled somewhere else.
22779          */
22780 -       if (p->nr_cpus_allowed != 1
22781 +       if (tsk_nr_cpus_allowed(p) != 1
22782             && cpupri_find(&rq->rd->cpupri, p, NULL))
22783                 return;
22785 @@ -1517,7 +1519,7 @@
22786          * The previous task needs to be made eligible for pushing
22787          * if it is still active
22788          */
22789 -       if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
22790 +       if (on_rt_rq(&p->rt) && tsk_nr_cpus_allowed(p) > 1)
22791                 enqueue_pushable_task(rq, p);
22794 @@ -1567,7 +1569,7 @@
22795         if (unlikely(!lowest_mask))
22796                 return -1;
22798 -       if (task->nr_cpus_allowed == 1)
22799 +       if (tsk_nr_cpus_allowed(task) == 1)
22800                 return -1; /* No other targets possible */
22802         if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
22803 @@ -1699,7 +1701,7 @@
22805         BUG_ON(rq->cpu != task_cpu(p));
22806         BUG_ON(task_current(rq, p));
22807 -       BUG_ON(p->nr_cpus_allowed <= 1);
22808 +       BUG_ON(tsk_nr_cpus_allowed(p) <= 1);
22810         BUG_ON(!task_on_rq_queued(p));
22811         BUG_ON(!rt_task(p));
22812 @@ -2059,9 +2061,9 @@
22814         if (!task_running(rq, p) &&
22815             !test_tsk_need_resched(rq->curr) &&
22816 -           p->nr_cpus_allowed > 1 &&
22817 +           tsk_nr_cpus_allowed(p) > 1 &&
22818             (dl_task(rq->curr) || rt_task(rq->curr)) &&
22819 -           (rq->curr->nr_cpus_allowed < 2 ||
22820 +           (tsk_nr_cpus_allowed(rq->curr) < 2 ||
22821              rq->curr->prio <= p->prio))
22822                 push_rt_tasks(rq);
22824 @@ -2134,7 +2136,7 @@
22825          */
22826         if (task_on_rq_queued(p) && rq->curr != p) {
22827  #ifdef CONFIG_SMP
22828 -               if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
22829 +               if (tsk_nr_cpus_allowed(p) > 1 && rq->rt.overloaded)
22830                         queue_push_tasks(rq);
22831  #else
22832                 if (p->prio < rq->curr->prio)
22833 diff -Nur linux-4.4.46.orig/kernel/sched/sched.h linux-4.4.46/kernel/sched/sched.h
22834 --- linux-4.4.46.orig/kernel/sched/sched.h      2017-02-01 08:31:11.000000000 +0100
22835 +++ linux-4.4.46/kernel/sched/sched.h   2017-02-03 17:18:10.935619367 +0100
22836 @@ -1100,6 +1100,7 @@
22837  #define WF_SYNC                0x01            /* waker goes to sleep after wakeup */
22838  #define WF_FORK                0x02            /* child wakeup after fork */
22839  #define WF_MIGRATED    0x4             /* internal use, task got migrated */
22840 +#define WF_LOCK_SLEEPER        0x08            /* wakeup spinlock "sleeper" */
22842  /*
22843   * To aid in avoiding the subversion of "niceness" due to uneven distribution
22844 @@ -1299,6 +1300,15 @@
22845  extern void resched_curr(struct rq *rq);
22846  extern void resched_cpu(int cpu);
22848 +#ifdef CONFIG_PREEMPT_LAZY
22849 +extern void resched_curr_lazy(struct rq *rq);
22850 +#else
22851 +static inline void resched_curr_lazy(struct rq *rq)
22853 +       resched_curr(rq);
22855 +#endif
22857  extern struct rt_bandwidth def_rt_bandwidth;
22858  extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
22860 diff -Nur linux-4.4.46.orig/kernel/sched/swait.c linux-4.4.46/kernel/sched/swait.c
22861 --- linux-4.4.46.orig/kernel/sched/swait.c      1970-01-01 01:00:00.000000000 +0100
22862 +++ linux-4.4.46/kernel/sched/swait.c   2017-02-03 17:18:10.935619367 +0100
22863 @@ -0,0 +1,143 @@
22864 +#include <linux/sched.h>
22865 +#include <linux/swait.h>
22866 +#include <linux/suspend.h>
22868 +void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
22869 +                            struct lock_class_key *key)
22871 +       raw_spin_lock_init(&q->lock);
22872 +       lockdep_set_class_and_name(&q->lock, key, name);
22873 +       INIT_LIST_HEAD(&q->task_list);
22875 +EXPORT_SYMBOL(__init_swait_queue_head);
22878 + * The thing about the wake_up_state() return value; I think we can ignore it.
22879 + *
22880 + * If for some reason it would return 0, that means the previously waiting
22881 + * task is already running, so it will observe condition true (or has already).
22882 + */
22883 +void swake_up_locked(struct swait_queue_head *q)
22885 +       struct swait_queue *curr;
22887 +       if (list_empty(&q->task_list))
22888 +               return;
22890 +       curr = list_first_entry(&q->task_list, typeof(*curr), task_list);
22891 +       wake_up_process(curr->task);
22892 +       list_del_init(&curr->task_list);
22894 +EXPORT_SYMBOL(swake_up_locked);
22896 +void swake_up_all_locked(struct swait_queue_head *q)
22898 +       struct swait_queue *curr;
22899 +       int wakes = 0;
22901 +       while (!list_empty(&q->task_list)) {
22903 +               curr = list_first_entry(&q->task_list, typeof(*curr),
22904 +                                       task_list);
22905 +               wake_up_process(curr->task);
22906 +               list_del_init(&curr->task_list);
22907 +               wakes++;
22908 +       }
22909 +       if (pm_in_action)
22910 +               return;
22911 +       WARN(wakes > 2, "complate_all() with %d waiters\n", wakes);
22913 +EXPORT_SYMBOL(swake_up_all_locked);
22915 +void swake_up(struct swait_queue_head *q)
22917 +       unsigned long flags;
22919 +       if (!swait_active(q))
22920 +               return;
22922 +       raw_spin_lock_irqsave(&q->lock, flags);
22923 +       swake_up_locked(q);
22924 +       raw_spin_unlock_irqrestore(&q->lock, flags);
22926 +EXPORT_SYMBOL(swake_up);
22929 + * Does not allow usage from IRQ disabled, since we must be able to
22930 + * release IRQs to guarantee bounded hold time.
22931 + */
22932 +void swake_up_all(struct swait_queue_head *q)
22934 +       struct swait_queue *curr;
22935 +       LIST_HEAD(tmp);
22937 +       if (!swait_active(q))
22938 +               return;
22940 +       raw_spin_lock_irq(&q->lock);
22941 +       list_splice_init(&q->task_list, &tmp);
22942 +       while (!list_empty(&tmp)) {
22943 +               curr = list_first_entry(&tmp, typeof(*curr), task_list);
22945 +               wake_up_state(curr->task, TASK_NORMAL);
22946 +               list_del_init(&curr->task_list);
22948 +               if (list_empty(&tmp))
22949 +                       break;
22951 +               raw_spin_unlock_irq(&q->lock);
22952 +               raw_spin_lock_irq(&q->lock);
22953 +       }
22954 +       raw_spin_unlock_irq(&q->lock);
22956 +EXPORT_SYMBOL(swake_up_all);
22958 +void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait)
22960 +       wait->task = current;
22961 +       if (list_empty(&wait->task_list))
22962 +               list_add(&wait->task_list, &q->task_list);
22965 +void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state)
22967 +       unsigned long flags;
22969 +       raw_spin_lock_irqsave(&q->lock, flags);
22970 +       __prepare_to_swait(q, wait);
22971 +       set_current_state(state);
22972 +       raw_spin_unlock_irqrestore(&q->lock, flags);
22974 +EXPORT_SYMBOL(prepare_to_swait);
22976 +long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state)
22978 +       if (signal_pending_state(state, current))
22979 +               return -ERESTARTSYS;
22981 +       prepare_to_swait(q, wait, state);
22983 +       return 0;
22985 +EXPORT_SYMBOL(prepare_to_swait_event);
22987 +void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait)
22989 +       __set_current_state(TASK_RUNNING);
22990 +       if (!list_empty(&wait->task_list))
22991 +               list_del_init(&wait->task_list);
22994 +void finish_swait(struct swait_queue_head *q, struct swait_queue *wait)
22996 +       unsigned long flags;
22998 +       __set_current_state(TASK_RUNNING);
23000 +       if (!list_empty_careful(&wait->task_list)) {
23001 +               raw_spin_lock_irqsave(&q->lock, flags);
23002 +               list_del_init(&wait->task_list);
23003 +               raw_spin_unlock_irqrestore(&q->lock, flags);
23004 +       }
23006 +EXPORT_SYMBOL(finish_swait);
23007 diff -Nur linux-4.4.46.orig/kernel/sched/swork.c linux-4.4.46/kernel/sched/swork.c
23008 --- linux-4.4.46.orig/kernel/sched/swork.c      1970-01-01 01:00:00.000000000 +0100
23009 +++ linux-4.4.46/kernel/sched/swork.c   2017-02-03 17:18:10.935619367 +0100
23010 @@ -0,0 +1,173 @@
23012 + * Copyright (C) 2014 BMW Car IT GmbH, Daniel Wagner daniel.wagner@bmw-carit.de
23013 + *
23014 + * Provides a framework for enqueuing callbacks from irq context
23015 + * PREEMPT_RT_FULL safe. The callbacks are executed in kthread context.
23016 + */
23018 +#include <linux/swait.h>
23019 +#include <linux/swork.h>
23020 +#include <linux/kthread.h>
23021 +#include <linux/slab.h>
23022 +#include <linux/spinlock.h>
23023 +#include <linux/export.h>
23025 +#define SWORK_EVENT_PENDING     (1 << 0)
23027 +static DEFINE_MUTEX(worker_mutex);
23028 +static struct sworker *glob_worker;
23030 +struct sworker {
23031 +       struct list_head events;
23032 +       struct swait_queue_head wq;
23034 +       raw_spinlock_t lock;
23036 +       struct task_struct *task;
23037 +       int refs;
23040 +static bool swork_readable(struct sworker *worker)
23042 +       bool r;
23044 +       if (kthread_should_stop())
23045 +               return true;
23047 +       raw_spin_lock_irq(&worker->lock);
23048 +       r = !list_empty(&worker->events);
23049 +       raw_spin_unlock_irq(&worker->lock);
23051 +       return r;
23054 +static int swork_kthread(void *arg)
23056 +       struct sworker *worker = arg;
23058 +       for (;;) {
23059 +               swait_event_interruptible(worker->wq,
23060 +                                       swork_readable(worker));
23061 +               if (kthread_should_stop())
23062 +                       break;
23064 +               raw_spin_lock_irq(&worker->lock);
23065 +               while (!list_empty(&worker->events)) {
23066 +                       struct swork_event *sev;
23068 +                       sev = list_first_entry(&worker->events,
23069 +                                       struct swork_event, item);
23070 +                       list_del(&sev->item);
23071 +                       raw_spin_unlock_irq(&worker->lock);
23073 +                       WARN_ON_ONCE(!test_and_clear_bit(SWORK_EVENT_PENDING,
23074 +                                                        &sev->flags));
23075 +                       sev->func(sev);
23076 +                       raw_spin_lock_irq(&worker->lock);
23077 +               }
23078 +               raw_spin_unlock_irq(&worker->lock);
23079 +       }
23080 +       return 0;
23083 +static struct sworker *swork_create(void)
23085 +       struct sworker *worker;
23087 +       worker = kzalloc(sizeof(*worker), GFP_KERNEL);
23088 +       if (!worker)
23089 +               return ERR_PTR(-ENOMEM);
23091 +       INIT_LIST_HEAD(&worker->events);
23092 +       raw_spin_lock_init(&worker->lock);
23093 +       init_swait_queue_head(&worker->wq);
23095 +       worker->task = kthread_run(swork_kthread, worker, "kswork");
23096 +       if (IS_ERR(worker->task)) {
23097 +               kfree(worker);
23098 +               return ERR_PTR(-ENOMEM);
23099 +       }
23101 +       return worker;
23104 +static void swork_destroy(struct sworker *worker)
23106 +       kthread_stop(worker->task);
23108 +       WARN_ON(!list_empty(&worker->events));
23109 +       kfree(worker);
23112 +/**
23113 + * swork_queue - queue swork
23114 + *
23115 + * Returns %false if @work was already on a queue, %true otherwise.
23116 + *
23117 + * The work is queued and processed on a random CPU
23118 + */
23119 +bool swork_queue(struct swork_event *sev)
23121 +       unsigned long flags;
23123 +       if (test_and_set_bit(SWORK_EVENT_PENDING, &sev->flags))
23124 +               return false;
23126 +       raw_spin_lock_irqsave(&glob_worker->lock, flags);
23127 +       list_add_tail(&sev->item, &glob_worker->events);
23128 +       raw_spin_unlock_irqrestore(&glob_worker->lock, flags);
23130 +       swake_up(&glob_worker->wq);
23131 +       return true;
23133 +EXPORT_SYMBOL_GPL(swork_queue);
23135 +/**
23136 + * swork_get - get an instance of the sworker
23137 + *
23138 + * Returns an negative error code if the initialization if the worker did not
23139 + * work, %0 otherwise.
23140 + *
23141 + */
23142 +int swork_get(void)
23144 +       struct sworker *worker;
23146 +       mutex_lock(&worker_mutex);
23147 +       if (!glob_worker) {
23148 +               worker = swork_create();
23149 +               if (IS_ERR(worker)) {
23150 +                       mutex_unlock(&worker_mutex);
23151 +                       return -ENOMEM;
23152 +               }
23154 +               glob_worker = worker;
23155 +       }
23157 +       glob_worker->refs++;
23158 +       mutex_unlock(&worker_mutex);
23160 +       return 0;
23162 +EXPORT_SYMBOL_GPL(swork_get);
23164 +/**
23165 + * swork_put - puts an instance of the sworker
23166 + *
23167 + * Will destroy the sworker thread. This function must not be called until all
23168 + * queued events have been completed.
23169 + */
23170 +void swork_put(void)
23172 +       mutex_lock(&worker_mutex);
23174 +       glob_worker->refs--;
23175 +       if (glob_worker->refs > 0)
23176 +               goto out;
23178 +       swork_destroy(glob_worker);
23179 +       glob_worker = NULL;
23180 +out:
23181 +       mutex_unlock(&worker_mutex);
23183 +EXPORT_SYMBOL_GPL(swork_put);
23184 diff -Nur linux-4.4.46.orig/kernel/signal.c linux-4.4.46/kernel/signal.c
23185 --- linux-4.4.46.orig/kernel/signal.c   2017-02-01 08:31:11.000000000 +0100
23186 +++ linux-4.4.46/kernel/signal.c        2017-02-03 17:18:10.935619367 +0100
23187 @@ -14,6 +14,7 @@
23188  #include <linux/export.h>
23189  #include <linux/init.h>
23190  #include <linux/sched.h>
23191 +#include <linux/sched/rt.h>
23192  #include <linux/fs.h>
23193  #include <linux/tty.h>
23194  #include <linux/binfmts.h>
23195 @@ -352,13 +353,30 @@
23196         return false;
23199 +static inline struct sigqueue *get_task_cache(struct task_struct *t)
23201 +       struct sigqueue *q = t->sigqueue_cache;
23203 +       if (cmpxchg(&t->sigqueue_cache, q, NULL) != q)
23204 +               return NULL;
23205 +       return q;
23208 +static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
23210 +       if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL)
23211 +               return 0;
23212 +       return 1;
23215  /*
23216   * allocate a new signal queue record
23217   * - this may be called without locks if and only if t == current, otherwise an
23218   *   appropriate lock must be held to stop the target task from exiting
23219   */
23220  static struct sigqueue *
23221 -__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
23222 +__sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags,
23223 +                   int override_rlimit, int fromslab)
23225         struct sigqueue *q = NULL;
23226         struct user_struct *user;
23227 @@ -375,7 +393,10 @@
23228         if (override_rlimit ||
23229             atomic_read(&user->sigpending) <=
23230                         task_rlimit(t, RLIMIT_SIGPENDING)) {
23231 -               q = kmem_cache_alloc(sigqueue_cachep, flags);
23232 +               if (!fromslab)
23233 +                       q = get_task_cache(t);
23234 +               if (!q)
23235 +                       q = kmem_cache_alloc(sigqueue_cachep, flags);
23236         } else {
23237                 print_dropped_signal(sig);
23238         }
23239 @@ -392,6 +413,13 @@
23240         return q;
23243 +static struct sigqueue *
23244 +__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags,
23245 +                int override_rlimit)
23247 +       return __sigqueue_do_alloc(sig, t, flags, override_rlimit, 0);
23250  static void __sigqueue_free(struct sigqueue *q)
23252         if (q->flags & SIGQUEUE_PREALLOC)
23253 @@ -401,6 +429,21 @@
23254         kmem_cache_free(sigqueue_cachep, q);
23257 +static void sigqueue_free_current(struct sigqueue *q)
23259 +       struct user_struct *up;
23261 +       if (q->flags & SIGQUEUE_PREALLOC)
23262 +               return;
23264 +       up = q->user;
23265 +       if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) {
23266 +               atomic_dec(&up->sigpending);
23267 +               free_uid(up);
23268 +       } else
23269 +                 __sigqueue_free(q);
23272  void flush_sigqueue(struct sigpending *queue)
23274         struct sigqueue *q;
23275 @@ -414,6 +457,21 @@
23278  /*
23279 + * Called from __exit_signal. Flush tsk->pending and
23280 + * tsk->sigqueue_cache
23281 + */
23282 +void flush_task_sigqueue(struct task_struct *tsk)
23284 +       struct sigqueue *q;
23286 +       flush_sigqueue(&tsk->pending);
23288 +       q = get_task_cache(tsk);
23289 +       if (q)
23290 +               kmem_cache_free(sigqueue_cachep, q);
23294   * Flush all pending signals for this kthread.
23295   */
23296  void flush_signals(struct task_struct *t)
23297 @@ -525,7 +583,7 @@
23298  still_pending:
23299                 list_del_init(&first->list);
23300                 copy_siginfo(info, &first->info);
23301 -               __sigqueue_free(first);
23302 +               sigqueue_free_current(first);
23303         } else {
23304                 /*
23305                  * Ok, it wasn't in the queue.  This must be
23306 @@ -560,6 +618,8 @@
23308         int signr;
23310 +       WARN_ON_ONCE(tsk != current);
23312         /* We only dequeue private signals from ourselves, we don't let
23313          * signalfd steal them
23314          */
23315 @@ -1156,8 +1216,8 @@
23316   * We don't want to have recursive SIGSEGV's etc, for example,
23317   * that is why we also clear SIGNAL_UNKILLABLE.
23318   */
23319 -int
23320 -force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
23321 +static int
23322 +do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
23324         unsigned long int flags;
23325         int ret, blocked, ignored;
23326 @@ -1182,6 +1242,39 @@
23327         return ret;
23330 +int force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
23333 + * On some archs, PREEMPT_RT has to delay sending a signal from a trap
23334 + * since it can not enable preemption, and the signal code's spin_locks
23335 + * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will
23336 + * send the signal on exit of the trap.
23337 + */
23338 +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
23339 +       if (in_atomic()) {
23340 +               if (WARN_ON_ONCE(t != current))
23341 +                       return 0;
23342 +               if (WARN_ON_ONCE(t->forced_info.si_signo))
23343 +                       return 0;
23345 +               if (is_si_special(info)) {
23346 +                       WARN_ON_ONCE(info != SEND_SIG_PRIV);
23347 +                       t->forced_info.si_signo = sig;
23348 +                       t->forced_info.si_errno = 0;
23349 +                       t->forced_info.si_code = SI_KERNEL;
23350 +                       t->forced_info.si_pid = 0;
23351 +                       t->forced_info.si_uid = 0;
23352 +               } else {
23353 +                       t->forced_info = *info;
23354 +               }
23356 +               set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
23357 +               return 0;
23358 +       }
23359 +#endif
23360 +       return do_force_sig_info(sig, info, t);
23363  /*
23364   * Nuke all other threads in the group.
23365   */
23366 @@ -1216,12 +1309,12 @@
23367                  * Disable interrupts early to avoid deadlocks.
23368                  * See rcu_read_unlock() comment header for details.
23369                  */
23370 -               local_irq_save(*flags);
23371 +               local_irq_save_nort(*flags);
23372                 rcu_read_lock();
23373                 sighand = rcu_dereference(tsk->sighand);
23374                 if (unlikely(sighand == NULL)) {
23375                         rcu_read_unlock();
23376 -                       local_irq_restore(*flags);
23377 +                       local_irq_restore_nort(*flags);
23378                         break;
23379                 }
23380                 /*
23381 @@ -1242,7 +1335,7 @@
23382                 }
23383                 spin_unlock(&sighand->siglock);
23384                 rcu_read_unlock();
23385 -               local_irq_restore(*flags);
23386 +               local_irq_restore_nort(*flags);
23387         }
23389         return sighand;
23390 @@ -1485,7 +1578,8 @@
23391   */
23392  struct sigqueue *sigqueue_alloc(void)
23394 -       struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0);
23395 +       /* Preallocated sigqueue objects always from the slabcache ! */
23396 +       struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, 1);
23398         if (q)
23399                 q->flags |= SIGQUEUE_PREALLOC;
23400 @@ -1846,15 +1940,7 @@
23401                 if (gstop_done && ptrace_reparented(current))
23402                         do_notify_parent_cldstop(current, false, why);
23404 -               /*
23405 -                * Don't want to allow preemption here, because
23406 -                * sys_ptrace() needs this task to be inactive.
23407 -                *
23408 -                * XXX: implement read_unlock_no_resched().
23409 -                */
23410 -               preempt_disable();
23411                 read_unlock(&tasklist_lock);
23412 -               preempt_enable_no_resched();
23413                 freezable_schedule();
23414         } else {
23415                 /*
23416 diff -Nur linux-4.4.46.orig/kernel/softirq.c linux-4.4.46/kernel/softirq.c
23417 --- linux-4.4.46.orig/kernel/softirq.c  2017-02-01 08:31:11.000000000 +0100
23418 +++ linux-4.4.46/kernel/softirq.c       2017-02-03 17:18:10.935619367 +0100
23419 @@ -21,10 +21,12 @@
23420  #include <linux/freezer.h>
23421  #include <linux/kthread.h>
23422  #include <linux/rcupdate.h>
23423 +#include <linux/delay.h>
23424  #include <linux/ftrace.h>
23425  #include <linux/smp.h>
23426  #include <linux/smpboot.h>
23427  #include <linux/tick.h>
23428 +#include <linux/locallock.h>
23429  #include <linux/irq.h>
23431  #define CREATE_TRACE_POINTS
23432 @@ -56,12 +58,108 @@
23433  static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
23435  DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
23436 +#ifdef CONFIG_PREEMPT_RT_FULL
23437 +#define TIMER_SOFTIRQS ((1 << TIMER_SOFTIRQ) | (1 << HRTIMER_SOFTIRQ))
23438 +DEFINE_PER_CPU(struct task_struct *, ktimer_softirqd);
23439 +#endif
23441  const char * const softirq_to_name[NR_SOFTIRQS] = {
23442         "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
23443         "TASKLET", "SCHED", "HRTIMER", "RCU"
23444  };
23446 +#ifdef CONFIG_NO_HZ_COMMON
23447 +# ifdef CONFIG_PREEMPT_RT_FULL
23449 +struct softirq_runner {
23450 +       struct task_struct *runner[NR_SOFTIRQS];
23453 +static DEFINE_PER_CPU(struct softirq_runner, softirq_runners);
23455 +static inline void softirq_set_runner(unsigned int sirq)
23457 +       struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
23459 +       sr->runner[sirq] = current;
23462 +static inline void softirq_clr_runner(unsigned int sirq)
23464 +       struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
23466 +       sr->runner[sirq] = NULL;
23470 + * On preempt-rt a softirq running context might be blocked on a
23471 + * lock. There might be no other runnable task on this CPU because the
23472 + * lock owner runs on some other CPU. So we have to go into idle with
23473 + * the pending bit set. Therefor we need to check this otherwise we
23474 + * warn about false positives which confuses users and defeats the
23475 + * whole purpose of this test.
23476 + *
23477 + * This code is called with interrupts disabled.
23478 + */
23479 +void softirq_check_pending_idle(void)
23481 +       static int rate_limit;
23482 +       struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
23483 +       u32 warnpending;
23484 +       int i;
23486 +       if (rate_limit >= 10)
23487 +               return;
23489 +       warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK;
23490 +       for (i = 0; i < NR_SOFTIRQS; i++) {
23491 +               struct task_struct *tsk = sr->runner[i];
23493 +               /*
23494 +                * The wakeup code in rtmutex.c wakes up the task
23495 +                * _before_ it sets pi_blocked_on to NULL under
23496 +                * tsk->pi_lock. So we need to check for both: state
23497 +                * and pi_blocked_on.
23498 +                */
23499 +               if (tsk) {
23500 +                       raw_spin_lock(&tsk->pi_lock);
23501 +                       if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING) {
23502 +                               /* Clear all bits pending in that task */
23503 +                               warnpending &= ~(tsk->softirqs_raised);
23504 +                               warnpending &= ~(1 << i);
23505 +                       }
23506 +                       raw_spin_unlock(&tsk->pi_lock);
23507 +               }
23508 +       }
23510 +       if (warnpending) {
23511 +               printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
23512 +                      warnpending);
23513 +               rate_limit++;
23514 +       }
23516 +# else
23518 + * On !PREEMPT_RT we just printk rate limited:
23519 + */
23520 +void softirq_check_pending_idle(void)
23522 +       static int rate_limit;
23524 +       if (rate_limit < 10 &&
23525 +                       (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
23526 +               printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
23527 +                      local_softirq_pending());
23528 +               rate_limit++;
23529 +       }
23531 +# endif
23533 +#else /* !CONFIG_NO_HZ_COMMON */
23534 +static inline void softirq_set_runner(unsigned int sirq) { }
23535 +static inline void softirq_clr_runner(unsigned int sirq) { }
23536 +#endif
23538  /*
23539   * we cannot loop indefinitely here to avoid userspace starvation,
23540   * but we also don't want to introduce a worst case 1/HZ latency
23541 @@ -77,6 +175,79 @@
23542                 wake_up_process(tsk);
23545 +#ifdef CONFIG_PREEMPT_RT_FULL
23546 +static void wakeup_timer_softirqd(void)
23548 +       /* Interrupts are disabled: no need to stop preemption */
23549 +       struct task_struct *tsk = __this_cpu_read(ktimer_softirqd);
23551 +       if (tsk && tsk->state != TASK_RUNNING)
23552 +               wake_up_process(tsk);
23554 +#endif
23556 +static void handle_softirq(unsigned int vec_nr)
23558 +       struct softirq_action *h = softirq_vec + vec_nr;
23559 +       int prev_count;
23561 +       prev_count = preempt_count();
23563 +       kstat_incr_softirqs_this_cpu(vec_nr);
23565 +       trace_softirq_entry(vec_nr);
23566 +       h->action(h);
23567 +       trace_softirq_exit(vec_nr);
23568 +       if (unlikely(prev_count != preempt_count())) {
23569 +               pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
23570 +                      vec_nr, softirq_to_name[vec_nr], h->action,
23571 +                      prev_count, preempt_count());
23572 +               preempt_count_set(prev_count);
23573 +       }
23576 +#ifndef CONFIG_PREEMPT_RT_FULL
23577 +static inline int ksoftirqd_softirq_pending(void)
23579 +       return local_softirq_pending();
23582 +static void handle_pending_softirqs(u32 pending)
23584 +       struct softirq_action *h = softirq_vec;
23585 +       int softirq_bit;
23587 +       local_irq_enable();
23589 +       h = softirq_vec;
23591 +       while ((softirq_bit = ffs(pending))) {
23592 +               unsigned int vec_nr;
23594 +               h += softirq_bit - 1;
23595 +               vec_nr = h - softirq_vec;
23596 +               handle_softirq(vec_nr);
23598 +               h++;
23599 +               pending >>= softirq_bit;
23600 +       }
23602 +       rcu_bh_qs();
23603 +       local_irq_disable();
23606 +static void run_ksoftirqd(unsigned int cpu)
23608 +       local_irq_disable();
23609 +       if (ksoftirqd_softirq_pending()) {
23610 +               __do_softirq();
23611 +               local_irq_enable();
23612 +               cond_resched_rcu_qs();
23613 +               return;
23614 +       }
23615 +       local_irq_enable();
23618  /*
23619   * preempt_count and SOFTIRQ_OFFSET usage:
23620   * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
23621 @@ -116,9 +287,9 @@
23623         if (preempt_count() == cnt) {
23624  #ifdef CONFIG_DEBUG_PREEMPT
23625 -               current->preempt_disable_ip = get_parent_ip(CALLER_ADDR1);
23626 +               current->preempt_disable_ip = get_lock_parent_ip();
23627  #endif
23628 -               trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
23629 +               trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
23630         }
23632  EXPORT_SYMBOL(__local_bh_disable_ip);
23633 @@ -232,10 +403,8 @@
23634         unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
23635         unsigned long old_flags = current->flags;
23636         int max_restart = MAX_SOFTIRQ_RESTART;
23637 -       struct softirq_action *h;
23638         bool in_hardirq;
23639         __u32 pending;
23640 -       int softirq_bit;
23642         /*
23643          * Mask out PF_MEMALLOC s current task context is borrowed for the
23644 @@ -254,36 +423,7 @@
23645         /* Reset the pending bitmask before enabling irqs */
23646         set_softirq_pending(0);
23648 -       local_irq_enable();
23650 -       h = softirq_vec;
23652 -       while ((softirq_bit = ffs(pending))) {
23653 -               unsigned int vec_nr;
23654 -               int prev_count;
23656 -               h += softirq_bit - 1;
23658 -               vec_nr = h - softirq_vec;
23659 -               prev_count = preempt_count();
23661 -               kstat_incr_softirqs_this_cpu(vec_nr);
23663 -               trace_softirq_entry(vec_nr);
23664 -               h->action(h);
23665 -               trace_softirq_exit(vec_nr);
23666 -               if (unlikely(prev_count != preempt_count())) {
23667 -                       pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
23668 -                              vec_nr, softirq_to_name[vec_nr], h->action,
23669 -                              prev_count, preempt_count());
23670 -                       preempt_count_set(prev_count);
23671 -               }
23672 -               h++;
23673 -               pending >>= softirq_bit;
23674 -       }
23676 -       rcu_bh_qs();
23677 -       local_irq_disable();
23678 +       handle_pending_softirqs(pending);
23680         pending = local_softirq_pending();
23681         if (pending) {
23682 @@ -320,6 +460,310 @@
23685  /*
23686 + * This function must run with irqs disabled!
23687 + */
23688 +void raise_softirq_irqoff(unsigned int nr)
23690 +       __raise_softirq_irqoff(nr);
23692 +       /*
23693 +        * If we're in an interrupt or softirq, we're done
23694 +        * (this also catches softirq-disabled code). We will
23695 +        * actually run the softirq once we return from
23696 +        * the irq or softirq.
23697 +        *
23698 +        * Otherwise we wake up ksoftirqd to make sure we
23699 +        * schedule the softirq soon.
23700 +        */
23701 +       if (!in_interrupt())
23702 +               wakeup_softirqd();
23705 +void __raise_softirq_irqoff(unsigned int nr)
23707 +       trace_softirq_raise(nr);
23708 +       or_softirq_pending(1UL << nr);
23711 +static inline void local_bh_disable_nort(void) { local_bh_disable(); }
23712 +static inline void _local_bh_enable_nort(void) { _local_bh_enable(); }
23713 +static void ksoftirqd_set_sched_params(unsigned int cpu) { }
23715 +#else /* !PREEMPT_RT_FULL */
23718 + * On RT we serialize softirq execution with a cpu local lock per softirq
23719 + */
23720 +static DEFINE_PER_CPU(struct local_irq_lock [NR_SOFTIRQS], local_softirq_locks);
23722 +void __init softirq_early_init(void)
23724 +       int i;
23726 +       for (i = 0; i < NR_SOFTIRQS; i++)
23727 +               local_irq_lock_init(local_softirq_locks[i]);
23730 +static void lock_softirq(int which)
23732 +       local_lock(local_softirq_locks[which]);
23735 +static void unlock_softirq(int which)
23737 +       local_unlock(local_softirq_locks[which]);
23740 +static void do_single_softirq(int which)
23742 +       unsigned long old_flags = current->flags;
23744 +       current->flags &= ~PF_MEMALLOC;
23745 +       vtime_account_irq_enter(current);
23746 +       current->flags |= PF_IN_SOFTIRQ;
23747 +       lockdep_softirq_enter();
23748 +       local_irq_enable();
23749 +       handle_softirq(which);
23750 +       local_irq_disable();
23751 +       lockdep_softirq_exit();
23752 +       current->flags &= ~PF_IN_SOFTIRQ;
23753 +       vtime_account_irq_enter(current);
23754 +       tsk_restore_flags(current, old_flags, PF_MEMALLOC);
23758 + * Called with interrupts disabled. Process softirqs which were raised
23759 + * in current context (or on behalf of ksoftirqd).
23760 + */
23761 +static void do_current_softirqs(void)
23763 +       while (current->softirqs_raised) {
23764 +               int i = __ffs(current->softirqs_raised);
23765 +               unsigned int pending, mask = (1U << i);
23767 +               current->softirqs_raised &= ~mask;
23768 +               local_irq_enable();
23770 +               /*
23771 +                * If the lock is contended, we boost the owner to
23772 +                * process the softirq or leave the critical section
23773 +                * now.
23774 +                */
23775 +               lock_softirq(i);
23776 +               local_irq_disable();
23777 +               softirq_set_runner(i);
23778 +               /*
23779 +                * Check with the local_softirq_pending() bits,
23780 +                * whether we need to process this still or if someone
23781 +                * else took care of it.
23782 +                */
23783 +               pending = local_softirq_pending();
23784 +               if (pending & mask) {
23785 +                       set_softirq_pending(pending & ~mask);
23786 +                       do_single_softirq(i);
23787 +               }
23788 +               softirq_clr_runner(i);
23789 +               WARN_ON(current->softirq_nestcnt != 1);
23790 +               local_irq_enable();
23791 +               unlock_softirq(i);
23792 +               local_irq_disable();
23793 +       }
23796 +void __local_bh_disable(void)
23798 +       if (++current->softirq_nestcnt == 1)
23799 +               migrate_disable();
23801 +EXPORT_SYMBOL(__local_bh_disable);
23803 +void __local_bh_enable(void)
23805 +       if (WARN_ON(current->softirq_nestcnt == 0))
23806 +               return;
23808 +       local_irq_disable();
23809 +       if (current->softirq_nestcnt == 1 && current->softirqs_raised)
23810 +               do_current_softirqs();
23811 +       local_irq_enable();
23813 +       if (--current->softirq_nestcnt == 0)
23814 +               migrate_enable();
23816 +EXPORT_SYMBOL(__local_bh_enable);
23818 +void _local_bh_enable(void)
23820 +       if (WARN_ON(current->softirq_nestcnt == 0))
23821 +               return;
23822 +       if (--current->softirq_nestcnt == 0)
23823 +               migrate_enable();
23825 +EXPORT_SYMBOL(_local_bh_enable);
23827 +int in_serving_softirq(void)
23829 +       return current->flags & PF_IN_SOFTIRQ;
23831 +EXPORT_SYMBOL(in_serving_softirq);
23833 +/* Called with preemption disabled */
23834 +static void run_ksoftirqd(unsigned int cpu)
23836 +       local_irq_disable();
23837 +       current->softirq_nestcnt++;
23839 +       do_current_softirqs();
23840 +       current->softirq_nestcnt--;
23841 +       local_irq_enable();
23842 +       cond_resched_rcu_qs();
23846 + * Called from netif_rx_ni(). Preemption enabled, but migration
23847 + * disabled. So the cpu can't go away under us.
23848 + */
23849 +void thread_do_softirq(void)
23851 +       if (!in_serving_softirq() && current->softirqs_raised) {
23852 +               current->softirq_nestcnt++;
23853 +               do_current_softirqs();
23854 +               current->softirq_nestcnt--;
23855 +       }
23858 +static void do_raise_softirq_irqoff(unsigned int nr)
23860 +       unsigned int mask;
23862 +       mask = 1UL << nr;
23864 +       trace_softirq_raise(nr);
23865 +       or_softirq_pending(mask);
23867 +       /*
23868 +        * If we are not in a hard interrupt and inside a bh disabled
23869 +        * region, we simply raise the flag on current. local_bh_enable()
23870 +        * will make sure that the softirq is executed. Otherwise we
23871 +        * delegate it to ksoftirqd.
23872 +        */
23873 +       if (!in_irq() && current->softirq_nestcnt)
23874 +               current->softirqs_raised |= mask;
23875 +       else if (!__this_cpu_read(ksoftirqd) || !__this_cpu_read(ktimer_softirqd))
23876 +               return;
23878 +       if (mask & TIMER_SOFTIRQS)
23879 +               __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask;
23880 +       else
23881 +               __this_cpu_read(ksoftirqd)->softirqs_raised |= mask;
23884 +static void wakeup_proper_softirq(unsigned int nr)
23886 +       if ((1UL << nr) & TIMER_SOFTIRQS)
23887 +               wakeup_timer_softirqd();
23888 +       else
23889 +               wakeup_softirqd();
23893 +void __raise_softirq_irqoff(unsigned int nr)
23895 +       do_raise_softirq_irqoff(nr);
23896 +       if (!in_irq() && !current->softirq_nestcnt)
23897 +               wakeup_proper_softirq(nr);
23901 + * Same as __raise_softirq_irqoff() but will process them in ksoftirqd
23902 + */
23903 +void __raise_softirq_irqoff_ksoft(unsigned int nr)
23905 +       unsigned int mask;
23907 +       if (WARN_ON_ONCE(!__this_cpu_read(ksoftirqd) ||
23908 +                        !__this_cpu_read(ktimer_softirqd)))
23909 +               return;
23910 +       mask = 1UL << nr;
23912 +       trace_softirq_raise(nr);
23913 +       or_softirq_pending(mask);
23914 +       if (mask & TIMER_SOFTIRQS)
23915 +               __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask;
23916 +       else
23917 +               __this_cpu_read(ksoftirqd)->softirqs_raised |= mask;
23918 +       wakeup_proper_softirq(nr);
23922 + * This function must run with irqs disabled!
23923 + */
23924 +void raise_softirq_irqoff(unsigned int nr)
23926 +       do_raise_softirq_irqoff(nr);
23928 +       /*
23929 +        * If we're in an hard interrupt we let irq return code deal
23930 +        * with the wakeup of ksoftirqd.
23931 +        */
23932 +       if (in_irq())
23933 +               return;
23934 +       /*
23935 +        * If we are in thread context but outside of a bh disabled
23936 +        * region, we need to wake ksoftirqd as well.
23937 +        *
23938 +        * CHECKME: Some of the places which do that could be wrapped
23939 +        * into local_bh_disable/enable pairs. Though it's unclear
23940 +        * whether this is worth the effort. To find those places just
23941 +        * raise a WARN() if the condition is met.
23942 +        */
23943 +       if (!current->softirq_nestcnt)
23944 +               wakeup_proper_softirq(nr);
23947 +static inline int ksoftirqd_softirq_pending(void)
23949 +       return current->softirqs_raised;
23952 +static inline void local_bh_disable_nort(void) { }
23953 +static inline void _local_bh_enable_nort(void) { }
23955 +static inline void ksoftirqd_set_sched_params(unsigned int cpu)
23957 +       /* Take over all but timer pending softirqs when starting */
23958 +       local_irq_disable();
23959 +       current->softirqs_raised = local_softirq_pending() & ~TIMER_SOFTIRQS;
23960 +       local_irq_enable();
23963 +static inline void ktimer_softirqd_set_sched_params(unsigned int cpu)
23965 +       struct sched_param param = { .sched_priority = 1 };
23967 +       sched_setscheduler(current, SCHED_FIFO, &param);
23969 +       /* Take over timer pending softirqs when starting */
23970 +       local_irq_disable();
23971 +       current->softirqs_raised = local_softirq_pending() & TIMER_SOFTIRQS;
23972 +       local_irq_enable();
23975 +static inline void ktimer_softirqd_clr_sched_params(unsigned int cpu,
23976 +                                                   bool online)
23978 +       struct sched_param param = { .sched_priority = 0 };
23980 +       sched_setscheduler(current, SCHED_NORMAL, &param);
23983 +static int ktimer_softirqd_should_run(unsigned int cpu)
23985 +       return current->softirqs_raised;
23988 +#endif /* PREEMPT_RT_FULL */
23990   * Enter an interrupt context.
23991   */
23992  void irq_enter(void)
23993 @@ -330,9 +774,9 @@
23994                  * Prevent raise_softirq from needlessly waking up ksoftirqd
23995                  * here, as softirq will be serviced on return from interrupt.
23996                  */
23997 -               local_bh_disable();
23998 +               local_bh_disable_nort();
23999                 tick_irq_enter();
24000 -               _local_bh_enable();
24001 +               _local_bh_enable_nort();
24002         }
24004         __irq_enter();
24005 @@ -340,6 +784,7 @@
24007  static inline void invoke_softirq(void)
24009 +#ifndef CONFIG_PREEMPT_RT_FULL
24010         if (!force_irqthreads) {
24011  #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
24012                 /*
24013 @@ -359,6 +804,18 @@
24014         } else {
24015                 wakeup_softirqd();
24016         }
24017 +#else /* PREEMPT_RT_FULL */
24018 +       unsigned long flags;
24020 +       local_irq_save(flags);
24021 +       if (__this_cpu_read(ksoftirqd) &&
24022 +                       __this_cpu_read(ksoftirqd)->softirqs_raised)
24023 +               wakeup_softirqd();
24024 +       if (__this_cpu_read(ktimer_softirqd) &&
24025 +                       __this_cpu_read(ktimer_softirqd)->softirqs_raised)
24026 +               wakeup_timer_softirqd();
24027 +       local_irq_restore(flags);
24028 +#endif
24031  static inline void tick_irq_exit(void)
24032 @@ -395,26 +852,6 @@
24033         trace_hardirq_exit(); /* must be last! */
24037 - * This function must run with irqs disabled!
24038 - */
24039 -inline void raise_softirq_irqoff(unsigned int nr)
24041 -       __raise_softirq_irqoff(nr);
24043 -       /*
24044 -        * If we're in an interrupt or softirq, we're done
24045 -        * (this also catches softirq-disabled code). We will
24046 -        * actually run the softirq once we return from
24047 -        * the irq or softirq.
24048 -        *
24049 -        * Otherwise we wake up ksoftirqd to make sure we
24050 -        * schedule the softirq soon.
24051 -        */
24052 -       if (!in_interrupt())
24053 -               wakeup_softirqd();
24056  void raise_softirq(unsigned int nr)
24058         unsigned long flags;
24059 @@ -424,12 +861,6 @@
24060         local_irq_restore(flags);
24063 -void __raise_softirq_irqoff(unsigned int nr)
24065 -       trace_softirq_raise(nr);
24066 -       or_softirq_pending(1UL << nr);
24069  void open_softirq(int nr, void (*action)(struct softirq_action *))
24071         softirq_vec[nr].action = action;
24072 @@ -446,15 +877,45 @@
24073  static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
24074  static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
24076 +static void inline
24077 +__tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr)
24079 +       if (tasklet_trylock(t)) {
24080 +again:
24081 +               /* We may have been preempted before tasklet_trylock
24082 +                * and __tasklet_action may have already run.
24083 +                * So double check the sched bit while the takslet
24084 +                * is locked before adding it to the list.
24085 +                */
24086 +               if (test_bit(TASKLET_STATE_SCHED, &t->state)) {
24087 +                       t->next = NULL;
24088 +                       *head->tail = t;
24089 +                       head->tail = &(t->next);
24090 +                       raise_softirq_irqoff(nr);
24091 +                       tasklet_unlock(t);
24092 +               } else {
24093 +                       /* This is subtle. If we hit the corner case above
24094 +                        * It is possible that we get preempted right here,
24095 +                        * and another task has successfully called
24096 +                        * tasklet_schedule(), then this function, and
24097 +                        * failed on the trylock. Thus we must be sure
24098 +                        * before releasing the tasklet lock, that the
24099 +                        * SCHED_BIT is clear. Otherwise the tasklet
24100 +                        * may get its SCHED_BIT set, but not added to the
24101 +                        * list
24102 +                        */
24103 +                       if (!tasklet_tryunlock(t))
24104 +                               goto again;
24105 +               }
24106 +       }
24109  void __tasklet_schedule(struct tasklet_struct *t)
24111         unsigned long flags;
24113         local_irq_save(flags);
24114 -       t->next = NULL;
24115 -       *__this_cpu_read(tasklet_vec.tail) = t;
24116 -       __this_cpu_write(tasklet_vec.tail, &(t->next));
24117 -       raise_softirq_irqoff(TASKLET_SOFTIRQ);
24118 +       __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
24119         local_irq_restore(flags);
24121  EXPORT_SYMBOL(__tasklet_schedule);
24122 @@ -464,10 +925,7 @@
24123         unsigned long flags;
24125         local_irq_save(flags);
24126 -       t->next = NULL;
24127 -       *__this_cpu_read(tasklet_hi_vec.tail) = t;
24128 -       __this_cpu_write(tasklet_hi_vec.tail,  &(t->next));
24129 -       raise_softirq_irqoff(HI_SOFTIRQ);
24130 +       __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
24131         local_irq_restore(flags);
24133  EXPORT_SYMBOL(__tasklet_hi_schedule);
24134 @@ -476,82 +934,122 @@
24136         BUG_ON(!irqs_disabled());
24138 -       t->next = __this_cpu_read(tasklet_hi_vec.head);
24139 -       __this_cpu_write(tasklet_hi_vec.head, t);
24140 -       __raise_softirq_irqoff(HI_SOFTIRQ);
24141 +       __tasklet_hi_schedule(t);
24143  EXPORT_SYMBOL(__tasklet_hi_schedule_first);
24145 -static void tasklet_action(struct softirq_action *a)
24146 +void  tasklet_enable(struct tasklet_struct *t)
24148 -       struct tasklet_struct *list;
24149 +       if (!atomic_dec_and_test(&t->count))
24150 +               return;
24151 +       if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state))
24152 +               tasklet_schedule(t);
24154 +EXPORT_SYMBOL(tasklet_enable);
24156 -       local_irq_disable();
24157 -       list = __this_cpu_read(tasklet_vec.head);
24158 -       __this_cpu_write(tasklet_vec.head, NULL);
24159 -       __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head));
24160 -       local_irq_enable();
24161 +static void __tasklet_action(struct softirq_action *a,
24162 +                            struct tasklet_struct *list)
24164 +       int loops = 1000000;
24166         while (list) {
24167                 struct tasklet_struct *t = list;
24169                 list = list->next;
24171 -               if (tasklet_trylock(t)) {
24172 -                       if (!atomic_read(&t->count)) {
24173 -                               if (!test_and_clear_bit(TASKLET_STATE_SCHED,
24174 -                                                       &t->state))
24175 -                                       BUG();
24176 -                               t->func(t->data);
24177 -                               tasklet_unlock(t);
24178 -                               continue;
24179 -                       }
24180 -                       tasklet_unlock(t);
24181 +               /*
24182 +                * Should always succeed - after a tasklist got on the
24183 +                * list (after getting the SCHED bit set from 0 to 1),
24184 +                * nothing but the tasklet softirq it got queued to can
24185 +                * lock it:
24186 +                */
24187 +               if (!tasklet_trylock(t)) {
24188 +                       WARN_ON(1);
24189 +                       continue;
24190                 }
24192 -               local_irq_disable();
24193                 t->next = NULL;
24194 -               *__this_cpu_read(tasklet_vec.tail) = t;
24195 -               __this_cpu_write(tasklet_vec.tail, &(t->next));
24196 -               __raise_softirq_irqoff(TASKLET_SOFTIRQ);
24197 -               local_irq_enable();
24199 +               /*
24200 +                * If we cannot handle the tasklet because it's disabled,
24201 +                * mark it as pending. tasklet_enable() will later
24202 +                * re-schedule the tasklet.
24203 +                */
24204 +               if (unlikely(atomic_read(&t->count))) {
24205 +out_disabled:
24206 +                       /* implicit unlock: */
24207 +                       wmb();
24208 +                       t->state = TASKLET_STATEF_PENDING;
24209 +                       continue;
24210 +               }
24212 +               /*
24213 +                * After this point on the tasklet might be rescheduled
24214 +                * on another CPU, but it can only be added to another
24215 +                * CPU's tasklet list if we unlock the tasklet (which we
24216 +                * dont do yet).
24217 +                */
24218 +               if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
24219 +                       WARN_ON(1);
24221 +again:
24222 +               t->func(t->data);
24224 +               /*
24225 +                * Try to unlock the tasklet. We must use cmpxchg, because
24226 +                * another CPU might have scheduled or disabled the tasklet.
24227 +                * We only allow the STATE_RUN -> 0 transition here.
24228 +                */
24229 +               while (!tasklet_tryunlock(t)) {
24230 +                       /*
24231 +                        * If it got disabled meanwhile, bail out:
24232 +                        */
24233 +                       if (atomic_read(&t->count))
24234 +                               goto out_disabled;
24235 +                       /*
24236 +                        * If it got scheduled meanwhile, re-execute
24237 +                        * the tasklet function:
24238 +                        */
24239 +                       if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
24240 +                               goto again;
24241 +                       if (!--loops) {
24242 +                               printk("hm, tasklet state: %08lx\n", t->state);
24243 +                               WARN_ON(1);
24244 +                               tasklet_unlock(t);
24245 +                               break;
24246 +                       }
24247 +               }
24248         }
24251 +static void tasklet_action(struct softirq_action *a)
24253 +       struct tasklet_struct *list;
24255 +       local_irq_disable();
24257 +       list = __this_cpu_read(tasklet_vec.head);
24258 +       __this_cpu_write(tasklet_vec.head, NULL);
24259 +       __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head));
24261 +       local_irq_enable();
24263 +       __tasklet_action(a, list);
24266  static void tasklet_hi_action(struct softirq_action *a)
24268         struct tasklet_struct *list;
24270         local_irq_disable();
24272         list = __this_cpu_read(tasklet_hi_vec.head);
24273         __this_cpu_write(tasklet_hi_vec.head, NULL);
24274         __this_cpu_write(tasklet_hi_vec.tail, this_cpu_ptr(&tasklet_hi_vec.head));
24275 -       local_irq_enable();
24277 -       while (list) {
24278 -               struct tasklet_struct *t = list;
24280 -               list = list->next;
24282 -               if (tasklet_trylock(t)) {
24283 -                       if (!atomic_read(&t->count)) {
24284 -                               if (!test_and_clear_bit(TASKLET_STATE_SCHED,
24285 -                                                       &t->state))
24286 -                                       BUG();
24287 -                               t->func(t->data);
24288 -                               tasklet_unlock(t);
24289 -                               continue;
24290 -                       }
24291 -                       tasklet_unlock(t);
24292 -               }
24293 +       local_irq_enable();
24295 -               local_irq_disable();
24296 -               t->next = NULL;
24297 -               *__this_cpu_read(tasklet_hi_vec.tail) = t;
24298 -               __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
24299 -               __raise_softirq_irqoff(HI_SOFTIRQ);
24300 -               local_irq_enable();
24301 -       }
24302 +       __tasklet_action(a, list);
24305  void tasklet_init(struct tasklet_struct *t,
24306 @@ -572,7 +1070,7 @@
24308         while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
24309                 do {
24310 -                       yield();
24311 +                       msleep(1);
24312                 } while (test_bit(TASKLET_STATE_SCHED, &t->state));
24313         }
24314         tasklet_unlock_wait(t);
24315 @@ -646,25 +1144,26 @@
24316         open_softirq(HI_SOFTIRQ, tasklet_hi_action);
24319 -static int ksoftirqd_should_run(unsigned int cpu)
24321 -       return local_softirq_pending();
24324 -static void run_ksoftirqd(unsigned int cpu)
24325 +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
24326 +void tasklet_unlock_wait(struct tasklet_struct *t)
24328 -       local_irq_disable();
24329 -       if (local_softirq_pending()) {
24330 +       while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
24331                 /*
24332 -                * We can safely run softirq on inline stack, as we are not deep
24333 -                * in the task stack here.
24334 +                * Hack for now to avoid this busy-loop:
24335                  */
24336 -               __do_softirq();
24337 -               local_irq_enable();
24338 -               cond_resched_rcu_qs();
24339 -               return;
24340 +#ifdef CONFIG_PREEMPT_RT_FULL
24341 +               msleep(1);
24342 +#else
24343 +               barrier();
24344 +#endif
24345         }
24346 -       local_irq_enable();
24348 +EXPORT_SYMBOL(tasklet_unlock_wait);
24349 +#endif
24351 +static int ksoftirqd_should_run(unsigned int cpu)
24353 +       return ksoftirqd_softirq_pending();
24356  #ifdef CONFIG_HOTPLUG_CPU
24357 @@ -746,16 +1245,31 @@
24359  static struct smp_hotplug_thread softirq_threads = {
24360         .store                  = &ksoftirqd,
24361 +       .setup                  = ksoftirqd_set_sched_params,
24362         .thread_should_run      = ksoftirqd_should_run,
24363         .thread_fn              = run_ksoftirqd,
24364         .thread_comm            = "ksoftirqd/%u",
24365  };
24367 +#ifdef CONFIG_PREEMPT_RT_FULL
24368 +static struct smp_hotplug_thread softirq_timer_threads = {
24369 +       .store                  = &ktimer_softirqd,
24370 +       .setup                  = ktimer_softirqd_set_sched_params,
24371 +       .cleanup                = ktimer_softirqd_clr_sched_params,
24372 +       .thread_should_run      = ktimer_softirqd_should_run,
24373 +       .thread_fn              = run_ksoftirqd,
24374 +       .thread_comm            = "ktimersoftd/%u",
24376 +#endif
24378  static __init int spawn_ksoftirqd(void)
24380         register_cpu_notifier(&cpu_nfb);
24382         BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
24383 +#ifdef CONFIG_PREEMPT_RT_FULL
24384 +       BUG_ON(smpboot_register_percpu_thread(&softirq_timer_threads));
24385 +#endif
24387         return 0;
24389 diff -Nur linux-4.4.46.orig/kernel/stop_machine.c linux-4.4.46/kernel/stop_machine.c
24390 --- linux-4.4.46.orig/kernel/stop_machine.c     2017-02-01 08:31:11.000000000 +0100
24391 +++ linux-4.4.46/kernel/stop_machine.c  2017-02-03 17:18:10.935619367 +0100
24392 @@ -37,7 +37,7 @@
24393  struct cpu_stopper {
24394         struct task_struct      *thread;
24396 -       spinlock_t              lock;
24397 +       raw_spinlock_t          lock;
24398         bool                    enabled;        /* is this stopper enabled? */
24399         struct list_head        works;          /* list of pending works */
24401 @@ -86,12 +86,12 @@
24402         struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
24403         unsigned long flags;
24405 -       spin_lock_irqsave(&stopper->lock, flags);
24406 +       raw_spin_lock_irqsave(&stopper->lock, flags);
24407         if (stopper->enabled)
24408                 __cpu_stop_queue_work(stopper, work);
24409         else
24410                 cpu_stop_signal_done(work->done, false);
24411 -       spin_unlock_irqrestore(&stopper->lock, flags);
24412 +       raw_spin_unlock_irqrestore(&stopper->lock, flags);
24415  /**
24416 @@ -224,8 +224,8 @@
24417         int err;
24419         lg_double_lock(&stop_cpus_lock, cpu1, cpu2);
24420 -       spin_lock_irq(&stopper1->lock);
24421 -       spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
24422 +       raw_spin_lock_irq(&stopper1->lock);
24423 +       raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
24425         err = -ENOENT;
24426         if (!stopper1->enabled || !stopper2->enabled)
24427 @@ -235,8 +235,8 @@
24428         __cpu_stop_queue_work(stopper1, work1);
24429         __cpu_stop_queue_work(stopper2, work2);
24430  unlock:
24431 -       spin_unlock(&stopper2->lock);
24432 -       spin_unlock_irq(&stopper1->lock);
24433 +       raw_spin_unlock(&stopper2->lock);
24434 +       raw_spin_unlock_irq(&stopper1->lock);
24435         lg_double_unlock(&stop_cpus_lock, cpu1, cpu2);
24437         return err;
24438 @@ -258,7 +258,7 @@
24439         struct cpu_stop_work work1, work2;
24440         struct multi_stop_data msdata;
24442 -       preempt_disable();
24443 +       preempt_disable_nort();
24444         msdata = (struct multi_stop_data){
24445                 .fn = fn,
24446                 .data = arg,
24447 @@ -278,11 +278,11 @@
24448         if (cpu1 > cpu2)
24449                 swap(cpu1, cpu2);
24450         if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2)) {
24451 -               preempt_enable();
24452 +               preempt_enable_nort();
24453                 return -ENOENT;
24454         }
24456 -       preempt_enable();
24457 +       preempt_enable_nort();
24459         wait_for_completion(&done.completion);
24461 @@ -315,17 +315,20 @@
24463  static void queue_stop_cpus_work(const struct cpumask *cpumask,
24464                                  cpu_stop_fn_t fn, void *arg,
24465 -                                struct cpu_stop_done *done)
24466 +                                struct cpu_stop_done *done, bool inactive)
24468         struct cpu_stop_work *work;
24469         unsigned int cpu;
24471         /*
24472 -        * Disable preemption while queueing to avoid getting
24473 -        * preempted by a stopper which might wait for other stoppers
24474 -        * to enter @fn which can lead to deadlock.
24475 +        * Make sure that all work is queued on all cpus before
24476 +        * any of the cpus can execute it.
24477          */
24478 -       lg_global_lock(&stop_cpus_lock);
24479 +       if (!inactive)
24480 +               lg_global_lock(&stop_cpus_lock);
24481 +       else
24482 +               lg_global_trylock_relax(&stop_cpus_lock);
24484         for_each_cpu(cpu, cpumask) {
24485                 work = &per_cpu(cpu_stopper.stop_work, cpu);
24486                 work->fn = fn;
24487 @@ -342,7 +345,7 @@
24488         struct cpu_stop_done done;
24490         cpu_stop_init_done(&done, cpumask_weight(cpumask));
24491 -       queue_stop_cpus_work(cpumask, fn, arg, &done);
24492 +       queue_stop_cpus_work(cpumask, fn, arg, &done, false);
24493         wait_for_completion(&done.completion);
24494         return done.executed ? done.ret : -ENOENT;
24496 @@ -422,9 +425,9 @@
24497         unsigned long flags;
24498         int run;
24500 -       spin_lock_irqsave(&stopper->lock, flags);
24501 +       raw_spin_lock_irqsave(&stopper->lock, flags);
24502         run = !list_empty(&stopper->works);
24503 -       spin_unlock_irqrestore(&stopper->lock, flags);
24504 +       raw_spin_unlock_irqrestore(&stopper->lock, flags);
24505         return run;
24508 @@ -436,13 +439,13 @@
24510  repeat:
24511         work = NULL;
24512 -       spin_lock_irq(&stopper->lock);
24513 +       raw_spin_lock_irq(&stopper->lock);
24514         if (!list_empty(&stopper->works)) {
24515                 work = list_first_entry(&stopper->works,
24516                                         struct cpu_stop_work, list);
24517                 list_del_init(&work->list);
24518         }
24519 -       spin_unlock_irq(&stopper->lock);
24520 +       raw_spin_unlock_irq(&stopper->lock);
24522         if (work) {
24523                 cpu_stop_fn_t fn = work->fn;
24524 @@ -450,6 +453,16 @@
24525                 struct cpu_stop_done *done = work->done;
24526                 char ksym_buf[KSYM_NAME_LEN] __maybe_unused;
24528 +               /*
24529 +                * Wait until the stopper finished scheduling on all
24530 +                * cpus
24531 +                */
24532 +               lg_global_lock(&stop_cpus_lock);
24533 +               /*
24534 +                * Let other cpu threads continue as well
24535 +                */
24536 +               lg_global_unlock(&stop_cpus_lock);
24538                 /* cpu stop callbacks are not allowed to sleep */
24539                 preempt_disable();
24541 @@ -520,10 +533,12 @@
24542         for_each_possible_cpu(cpu) {
24543                 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
24545 -               spin_lock_init(&stopper->lock);
24546 +               raw_spin_lock_init(&stopper->lock);
24547                 INIT_LIST_HEAD(&stopper->works);
24548         }
24550 +       lg_lock_init(&stop_cpus_lock, "stop_cpus_lock");
24552         BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
24553         stop_machine_unpark(raw_smp_processor_id());
24554         stop_machine_initialized = true;
24555 @@ -620,7 +635,7 @@
24556         set_state(&msdata, MULTI_STOP_PREPARE);
24557         cpu_stop_init_done(&done, num_active_cpus());
24558         queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata,
24559 -                            &done);
24560 +                            &done, true);
24561         ret = multi_cpu_stop(&msdata);
24563         /* Busy wait for completion. */
24564 diff -Nur linux-4.4.46.orig/kernel/time/hrtimer.c linux-4.4.46/kernel/time/hrtimer.c
24565 --- linux-4.4.46.orig/kernel/time/hrtimer.c     2017-02-01 08:31:11.000000000 +0100
24566 +++ linux-4.4.46/kernel/time/hrtimer.c  2017-02-03 17:18:10.939619522 +0100
24567 @@ -48,11 +48,13 @@
24568  #include <linux/sched/rt.h>
24569  #include <linux/sched/deadline.h>
24570  #include <linux/timer.h>
24571 +#include <linux/kthread.h>
24572  #include <linux/freezer.h>
24574  #include <asm/uaccess.h>
24576  #include <trace/events/timer.h>
24577 +#include <trace/events/hist.h>
24579  #include "tick-internal.h"
24581 @@ -717,6 +719,44 @@
24583  static DECLARE_WORK(hrtimer_work, clock_was_set_work);
24585 +#ifdef CONFIG_PREEMPT_RT_FULL
24587 + * RT can not call schedule_work from real interrupt context.
24588 + * Need to make a thread to do the real work.
24589 + */
24590 +static struct task_struct *clock_set_delay_thread;
24591 +static bool do_clock_set_delay;
24593 +static int run_clock_set_delay(void *ignore)
24595 +       while (!kthread_should_stop()) {
24596 +               set_current_state(TASK_INTERRUPTIBLE);
24597 +               if (do_clock_set_delay) {
24598 +                       do_clock_set_delay = false;
24599 +                       schedule_work(&hrtimer_work);
24600 +               }
24601 +               schedule();
24602 +       }
24603 +       __set_current_state(TASK_RUNNING);
24604 +       return 0;
24607 +void clock_was_set_delayed(void)
24609 +       do_clock_set_delay = true;
24610 +       /* Make visible before waking up process */
24611 +       smp_wmb();
24612 +       wake_up_process(clock_set_delay_thread);
24615 +static __init int create_clock_set_delay_thread(void)
24617 +       clock_set_delay_thread = kthread_run(run_clock_set_delay, NULL, "kclksetdelayd");
24618 +       BUG_ON(!clock_set_delay_thread);
24619 +       return 0;
24621 +early_initcall(create_clock_set_delay_thread);
24622 +#else /* PREEMPT_RT_FULL */
24623  /*
24624   * Called from timekeeping and resume code to reprogramm the hrtimer
24625   * interrupt device on all cpus.
24626 @@ -725,6 +765,7 @@
24628         schedule_work(&hrtimer_work);
24630 +#endif
24632  #else
24634 @@ -734,11 +775,8 @@
24635  static inline void hrtimer_switch_to_hres(void) { }
24636  static inline void
24637  hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
24638 -static inline int hrtimer_reprogram(struct hrtimer *timer,
24639 -                                   struct hrtimer_clock_base *base)
24641 -       return 0;
24643 +static inline void hrtimer_reprogram(struct hrtimer *timer,
24644 +                                    struct hrtimer_clock_base *base) { }
24645  static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
24646  static inline void retrigger_next_event(void *arg) { }
24648 @@ -870,6 +908,32 @@
24650  EXPORT_SYMBOL_GPL(hrtimer_forward);
24652 +#ifdef CONFIG_PREEMPT_RT_BASE
24653 +# define wake_up_timer_waiters(b)      wake_up(&(b)->wait)
24655 +/**
24656 + * hrtimer_wait_for_timer - Wait for a running timer
24657 + *
24658 + * @timer:     timer to wait for
24659 + *
24660 + * The function waits in case the timers callback function is
24661 + * currently executed on the waitqueue of the timer base. The
24662 + * waitqueue is woken up after the timer callback function has
24663 + * finished execution.
24664 + */
24665 +void hrtimer_wait_for_timer(const struct hrtimer *timer)
24667 +       struct hrtimer_clock_base *base = timer->base;
24669 +       if (base && base->cpu_base && !timer->irqsafe)
24670 +               wait_event(base->cpu_base->wait,
24671 +                               !(hrtimer_callback_running(timer)));
24674 +#else
24675 +# define wake_up_timer_waiters(b)      do { } while (0)
24676 +#endif
24678  /*
24679   * enqueue_hrtimer - internal function to (re)start a timer
24680   *
24681 @@ -911,6 +975,11 @@
24682         if (!(state & HRTIMER_STATE_ENQUEUED))
24683                 return;
24685 +       if (unlikely(!list_empty(&timer->cb_entry))) {
24686 +               list_del_init(&timer->cb_entry);
24687 +               return;
24688 +       }
24690         if (!timerqueue_del(&base->active, &timer->node))
24691                 cpu_base->active_bases &= ~(1 << base->index);
24693 @@ -1006,7 +1075,16 @@
24694         new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
24696         timer_stats_hrtimer_set_start_info(timer);
24697 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
24698 +       {
24699 +               ktime_t now = new_base->get_time();
24701 +               if (ktime_to_ns(tim) < ktime_to_ns(now))
24702 +                       timer->praecox = now;
24703 +               else
24704 +                       timer->praecox = ktime_set(0, 0);
24705 +       }
24706 +#endif
24707         leftmost = enqueue_hrtimer(timer, new_base);
24708         if (!leftmost)
24709                 goto unlock;
24710 @@ -1078,7 +1156,7 @@
24712                 if (ret >= 0)
24713                         return ret;
24714 -               cpu_relax();
24715 +               hrtimer_wait_for_timer(timer);
24716         }
24718  EXPORT_SYMBOL_GPL(hrtimer_cancel);
24719 @@ -1142,6 +1220,7 @@
24721         base = hrtimer_clockid_to_base(clock_id);
24722         timer->base = &cpu_base->clock_base[base];
24723 +       INIT_LIST_HEAD(&timer->cb_entry);
24724         timerqueue_init(&timer->node);
24726  #ifdef CONFIG_TIMER_STATS
24727 @@ -1182,6 +1261,7 @@
24728                 seq = raw_read_seqcount_begin(&cpu_base->seq);
24730                 if (timer->state != HRTIMER_STATE_INACTIVE ||
24731 +                   cpu_base->running_soft == timer ||
24732                     cpu_base->running == timer)
24733                         return true;
24735 @@ -1280,10 +1360,112 @@
24736         cpu_base->running = NULL;
24739 +#ifdef CONFIG_PREEMPT_RT_BASE
24740 +static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer,
24741 +                                struct hrtimer_clock_base *base)
24743 +       int leftmost;
24745 +       if (restart != HRTIMER_NORESTART &&
24746 +           !(timer->state & HRTIMER_STATE_ENQUEUED)) {
24748 +               leftmost = enqueue_hrtimer(timer, base);
24749 +               if (!leftmost)
24750 +                       return;
24751 +#ifdef CONFIG_HIGH_RES_TIMERS
24752 +               if (!hrtimer_is_hres_active(timer)) {
24753 +                       /*
24754 +                        * Kick to reschedule the next tick to handle the new timer
24755 +                        * on dynticks target.
24756 +                        */
24757 +                       if (base->cpu_base->nohz_active)
24758 +                               wake_up_nohz_cpu(base->cpu_base->cpu);
24759 +               } else {
24761 +                       hrtimer_reprogram(timer, base);
24762 +               }
24763 +#endif
24764 +       }
24768 + * The changes in mainline which removed the callback modes from
24769 + * hrtimer are not yet working with -rt. The non wakeup_process()
24770 + * based callbacks which involve sleeping locks need to be treated
24771 + * seperately.
24772 + */
24773 +static void hrtimer_rt_run_pending(void)
24775 +       enum hrtimer_restart (*fn)(struct hrtimer *);
24776 +       struct hrtimer_cpu_base *cpu_base;
24777 +       struct hrtimer_clock_base *base;
24778 +       struct hrtimer *timer;
24779 +       int index, restart;
24781 +       local_irq_disable();
24782 +       cpu_base = &per_cpu(hrtimer_bases, smp_processor_id());
24784 +       raw_spin_lock(&cpu_base->lock);
24786 +       for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
24787 +               base = &cpu_base->clock_base[index];
24789 +               while (!list_empty(&base->expired)) {
24790 +                       timer = list_first_entry(&base->expired,
24791 +                                                struct hrtimer, cb_entry);
24793 +                       /*
24794 +                        * Same as the above __run_hrtimer function
24795 +                        * just we run with interrupts enabled.
24796 +                        */
24797 +                       debug_deactivate(timer);
24798 +                       cpu_base->running_soft = timer;
24799 +                       raw_write_seqcount_barrier(&cpu_base->seq);
24801 +                       __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
24802 +                       timer_stats_account_hrtimer(timer);
24803 +                       fn = timer->function;
24805 +                       raw_spin_unlock_irq(&cpu_base->lock);
24806 +                       restart = fn(timer);
24807 +                       raw_spin_lock_irq(&cpu_base->lock);
24809 +                       hrtimer_rt_reprogram(restart, timer, base);
24810 +                       raw_write_seqcount_barrier(&cpu_base->seq);
24812 +                       WARN_ON_ONCE(cpu_base->running_soft != timer);
24813 +                       cpu_base->running_soft = NULL;
24814 +               }
24815 +       }
24817 +       raw_spin_unlock_irq(&cpu_base->lock);
24819 +       wake_up_timer_waiters(cpu_base);
24822 +static int hrtimer_rt_defer(struct hrtimer *timer)
24824 +       if (timer->irqsafe)
24825 +               return 0;
24827 +       __remove_hrtimer(timer, timer->base, timer->state, 0);
24828 +       list_add_tail(&timer->cb_entry, &timer->base->expired);
24829 +       return 1;
24832 +#else
24834 +static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; }
24836 +#endif
24838 +static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer);
24840  static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)
24842         struct hrtimer_clock_base *base = cpu_base->clock_base;
24843         unsigned int active = cpu_base->active_bases;
24844 +       int raise = 0;
24846         for (; active; base++, active >>= 1) {
24847                 struct timerqueue_node *node;
24848 @@ -1299,6 +1481,15 @@
24850                         timer = container_of(node, struct hrtimer, node);
24852 +                       trace_hrtimer_interrupt(raw_smp_processor_id(),
24853 +                           ktime_to_ns(ktime_sub(ktime_to_ns(timer->praecox) ?
24854 +                               timer->praecox : hrtimer_get_expires(timer),
24855 +                               basenow)),
24856 +                           current,
24857 +                           timer->function == hrtimer_wakeup ?
24858 +                           container_of(timer, struct hrtimer_sleeper,
24859 +                               timer)->task : NULL);
24861                         /*
24862                          * The immediate goal for using the softexpires is
24863                          * minimizing wakeups, not running timers at the
24864 @@ -1314,9 +1505,14 @@
24865                         if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer))
24866                                 break;
24868 -                       __run_hrtimer(cpu_base, base, timer, &basenow);
24869 +                       if (!hrtimer_rt_defer(timer))
24870 +                               __run_hrtimer(cpu_base, base, timer, &basenow);
24871 +                       else
24872 +                               raise = 1;
24873                 }
24874         }
24875 +       if (raise)
24876 +               raise_softirq_irqoff(HRTIMER_SOFTIRQ);
24879  #ifdef CONFIG_HIGH_RES_TIMERS
24880 @@ -1479,16 +1675,18 @@
24881  void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
24883         sl->timer.function = hrtimer_wakeup;
24884 +       sl->timer.irqsafe = 1;
24885         sl->task = task;
24887  EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
24889 -static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
24890 +static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode,
24891 +                               unsigned long state)
24893         hrtimer_init_sleeper(t, current);
24895         do {
24896 -               set_current_state(TASK_INTERRUPTIBLE);
24897 +               set_current_state(state);
24898                 hrtimer_start_expires(&t->timer, mode);
24900                 if (likely(t->task))
24901 @@ -1530,7 +1728,8 @@
24902                                 HRTIMER_MODE_ABS);
24903         hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
24905 -       if (do_nanosleep(&t, HRTIMER_MODE_ABS))
24906 +       /* cpu_chill() does not care about restart state. */
24907 +       if (do_nanosleep(&t, HRTIMER_MODE_ABS, TASK_INTERRUPTIBLE))
24908                 goto out;
24910         rmtp = restart->nanosleep.rmtp;
24911 @@ -1547,8 +1746,10 @@
24912         return ret;
24915 -long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
24916 -                      const enum hrtimer_mode mode, const clockid_t clockid)
24917 +static long
24918 +__hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
24919 +                   const enum hrtimer_mode mode, const clockid_t clockid,
24920 +                   unsigned long state)
24922         struct restart_block *restart;
24923         struct hrtimer_sleeper t;
24924 @@ -1561,7 +1762,7 @@
24926         hrtimer_init_on_stack(&t.timer, clockid, mode);
24927         hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
24928 -       if (do_nanosleep(&t, mode))
24929 +       if (do_nanosleep(&t, mode, state))
24930                 goto out;
24932         /* Absolute timers do not update the rmtp value and restart: */
24933 @@ -1588,6 +1789,12 @@
24934         return ret;
24937 +long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
24938 +                      const enum hrtimer_mode mode, const clockid_t clockid)
24940 +       return __hrtimer_nanosleep(rqtp, rmtp, mode, clockid, TASK_INTERRUPTIBLE);
24943  SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
24944                 struct timespec __user *, rmtp)
24946 @@ -1602,6 +1809,26 @@
24947         return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
24950 +#ifdef CONFIG_PREEMPT_RT_FULL
24952 + * Sleep for 1 ms in hope whoever holds what we want will let it go.
24953 + */
24954 +void cpu_chill(void)
24956 +       struct timespec tu = {
24957 +               .tv_nsec = NSEC_PER_MSEC,
24958 +       };
24959 +       unsigned int freeze_flag = current->flags & PF_NOFREEZE;
24961 +       current->flags |= PF_NOFREEZE;
24962 +       __hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC,
24963 +                           TASK_UNINTERRUPTIBLE);
24964 +       if (!freeze_flag)
24965 +               current->flags &= ~PF_NOFREEZE;
24967 +EXPORT_SYMBOL(cpu_chill);
24968 +#endif
24970  /*
24971   * Functions related to boot-time initialization:
24972   */
24973 @@ -1613,10 +1840,14 @@
24974         for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
24975                 cpu_base->clock_base[i].cpu_base = cpu_base;
24976                 timerqueue_init_head(&cpu_base->clock_base[i].active);
24977 +               INIT_LIST_HEAD(&cpu_base->clock_base[i].expired);
24978         }
24980         cpu_base->cpu = cpu;
24981         hrtimer_init_hres(cpu_base);
24982 +#ifdef CONFIG_PREEMPT_RT_BASE
24983 +       init_waitqueue_head(&cpu_base->wait);
24984 +#endif
24987  #ifdef CONFIG_HOTPLUG_CPU
24988 @@ -1714,11 +1945,21 @@
24989         .notifier_call = hrtimer_cpu_notify,
24990  };
24992 +#ifdef CONFIG_PREEMPT_RT_BASE
24993 +static void run_hrtimer_softirq(struct softirq_action *h)
24995 +       hrtimer_rt_run_pending();
24997 +#endif
24999  void __init hrtimers_init(void)
25001         hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
25002                           (void *)(long)smp_processor_id());
25003         register_cpu_notifier(&hrtimers_nb);
25004 +#ifdef CONFIG_PREEMPT_RT_BASE
25005 +       open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
25006 +#endif
25009  /**
25010 diff -Nur linux-4.4.46.orig/kernel/time/itimer.c linux-4.4.46/kernel/time/itimer.c
25011 --- linux-4.4.46.orig/kernel/time/itimer.c      2017-02-01 08:31:11.000000000 +0100
25012 +++ linux-4.4.46/kernel/time/itimer.c   2017-02-03 17:18:10.939619522 +0100
25013 @@ -213,6 +213,7 @@
25014                 /* We are sharing ->siglock with it_real_fn() */
25015                 if (hrtimer_try_to_cancel(timer) < 0) {
25016                         spin_unlock_irq(&tsk->sighand->siglock);
25017 +                       hrtimer_wait_for_timer(&tsk->signal->real_timer);
25018                         goto again;
25019                 }
25020                 expires = timeval_to_ktime(value->it_value);
25021 diff -Nur linux-4.4.46.orig/kernel/time/jiffies.c linux-4.4.46/kernel/time/jiffies.c
25022 --- linux-4.4.46.orig/kernel/time/jiffies.c     2017-02-01 08:31:11.000000000 +0100
25023 +++ linux-4.4.46/kernel/time/jiffies.c  2017-02-03 17:18:10.939619522 +0100
25024 @@ -74,7 +74,8 @@
25025         .max_cycles     = 10,
25026  };
25028 -__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
25029 +__cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock);
25030 +__cacheline_aligned_in_smp seqcount_t jiffies_seq;
25032  #if (BITS_PER_LONG < 64)
25033  u64 get_jiffies_64(void)
25034 @@ -83,9 +84,9 @@
25035         u64 ret;
25037         do {
25038 -               seq = read_seqbegin(&jiffies_lock);
25039 +               seq = read_seqcount_begin(&jiffies_seq);
25040                 ret = jiffies_64;
25041 -       } while (read_seqretry(&jiffies_lock, seq));
25042 +       } while (read_seqcount_retry(&jiffies_seq, seq));
25043         return ret;
25045  EXPORT_SYMBOL(get_jiffies_64);
25046 diff -Nur linux-4.4.46.orig/kernel/time/ntp.c linux-4.4.46/kernel/time/ntp.c
25047 --- linux-4.4.46.orig/kernel/time/ntp.c 2017-02-01 08:31:11.000000000 +0100
25048 +++ linux-4.4.46/kernel/time/ntp.c      2017-02-03 17:18:10.939619522 +0100
25049 @@ -10,6 +10,7 @@
25050  #include <linux/workqueue.h>
25051  #include <linux/hrtimer.h>
25052  #include <linux/jiffies.h>
25053 +#include <linux/kthread.h>
25054  #include <linux/math64.h>
25055  #include <linux/timex.h>
25056  #include <linux/time.h>
25057 @@ -562,10 +563,52 @@
25058                            &sync_cmos_work, timespec64_to_jiffies(&next));
25061 +#ifdef CONFIG_PREEMPT_RT_FULL
25063 + * RT can not call schedule_delayed_work from real interrupt context.
25064 + * Need to make a thread to do the real work.
25065 + */
25066 +static struct task_struct *cmos_delay_thread;
25067 +static bool do_cmos_delay;
25069 +static int run_cmos_delay(void *ignore)
25071 +       while (!kthread_should_stop()) {
25072 +               set_current_state(TASK_INTERRUPTIBLE);
25073 +               if (do_cmos_delay) {
25074 +                       do_cmos_delay = false;
25075 +                       queue_delayed_work(system_power_efficient_wq,
25076 +                                          &sync_cmos_work, 0);
25077 +               }
25078 +               schedule();
25079 +       }
25080 +       __set_current_state(TASK_RUNNING);
25081 +       return 0;
25084 +void ntp_notify_cmos_timer(void)
25086 +       do_cmos_delay = true;
25087 +       /* Make visible before waking up process */
25088 +       smp_wmb();
25089 +       wake_up_process(cmos_delay_thread);
25092 +static __init int create_cmos_delay_thread(void)
25094 +       cmos_delay_thread = kthread_run(run_cmos_delay, NULL, "kcmosdelayd");
25095 +       BUG_ON(!cmos_delay_thread);
25096 +       return 0;
25098 +early_initcall(create_cmos_delay_thread);
25100 +#else
25102  void ntp_notify_cmos_timer(void)
25104         queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0);
25106 +#endif /* CONFIG_PREEMPT_RT_FULL */
25108  #else
25109  void ntp_notify_cmos_timer(void) { }
25110 diff -Nur linux-4.4.46.orig/kernel/time/posix-cpu-timers.c linux-4.4.46/kernel/time/posix-cpu-timers.c
25111 --- linux-4.4.46.orig/kernel/time/posix-cpu-timers.c    2017-02-01 08:31:11.000000000 +0100
25112 +++ linux-4.4.46/kernel/time/posix-cpu-timers.c 2017-02-03 17:18:10.939619522 +0100
25113 @@ -3,6 +3,7 @@
25114   */
25116  #include <linux/sched.h>
25117 +#include <linux/sched/rt.h>
25118  #include <linux/posix-timers.h>
25119  #include <linux/errno.h>
25120  #include <linux/math64.h>
25121 @@ -650,7 +651,7 @@
25122         /*
25123          * Disarm any old timer after extracting its expiry time.
25124          */
25125 -       WARN_ON_ONCE(!irqs_disabled());
25126 +       WARN_ON_ONCE_NONRT(!irqs_disabled());
25128         ret = 0;
25129         old_incr = timer->it.cpu.incr;
25130 @@ -1092,7 +1093,7 @@
25131         /*
25132          * Now re-arm for the new expiry time.
25133          */
25134 -       WARN_ON_ONCE(!irqs_disabled());
25135 +       WARN_ON_ONCE_NONRT(!irqs_disabled());
25136         arm_timer(timer);
25137         unlock_task_sighand(p, &flags);
25139 @@ -1183,13 +1184,13 @@
25140   * already updated our counts.  We need to check if any timers fire now.
25141   * Interrupts are disabled.
25142   */
25143 -void run_posix_cpu_timers(struct task_struct *tsk)
25144 +static void __run_posix_cpu_timers(struct task_struct *tsk)
25146         LIST_HEAD(firing);
25147         struct k_itimer *timer, *next;
25148         unsigned long flags;
25150 -       WARN_ON_ONCE(!irqs_disabled());
25151 +       WARN_ON_ONCE_NONRT(!irqs_disabled());
25153         /*
25154          * The fast path checks that there are no expired thread or thread
25155 @@ -1243,6 +1244,190 @@
25156         }
25159 +#ifdef CONFIG_PREEMPT_RT_BASE
25160 +#include <linux/kthread.h>
25161 +#include <linux/cpu.h>
25162 +DEFINE_PER_CPU(struct task_struct *, posix_timer_task);
25163 +DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist);
25165 +static int posix_cpu_timers_thread(void *data)
25167 +       int cpu = (long)data;
25169 +       BUG_ON(per_cpu(posix_timer_task,cpu) != current);
25171 +       while (!kthread_should_stop()) {
25172 +               struct task_struct *tsk = NULL;
25173 +               struct task_struct *next = NULL;
25175 +               if (cpu_is_offline(cpu))
25176 +                       goto wait_to_die;
25178 +               /* grab task list */
25179 +               raw_local_irq_disable();
25180 +               tsk = per_cpu(posix_timer_tasklist, cpu);
25181 +               per_cpu(posix_timer_tasklist, cpu) = NULL;
25182 +               raw_local_irq_enable();
25184 +               /* its possible the list is empty, just return */
25185 +               if (!tsk) {
25186 +                       set_current_state(TASK_INTERRUPTIBLE);
25187 +                       schedule();
25188 +                       __set_current_state(TASK_RUNNING);
25189 +                       continue;
25190 +               }
25192 +               /* Process task list */
25193 +               while (1) {
25194 +                       /* save next */
25195 +                       next = tsk->posix_timer_list;
25197 +                       /* run the task timers, clear its ptr and
25198 +                        * unreference it
25199 +                        */
25200 +                       __run_posix_cpu_timers(tsk);
25201 +                       tsk->posix_timer_list = NULL;
25202 +                       put_task_struct(tsk);
25204 +                       /* check if this is the last on the list */
25205 +                       if (next == tsk)
25206 +                               break;
25207 +                       tsk = next;
25208 +               }
25209 +       }
25210 +       return 0;
25212 +wait_to_die:
25213 +       /* Wait for kthread_stop */
25214 +       set_current_state(TASK_INTERRUPTIBLE);
25215 +       while (!kthread_should_stop()) {
25216 +               schedule();
25217 +               set_current_state(TASK_INTERRUPTIBLE);
25218 +       }
25219 +       __set_current_state(TASK_RUNNING);
25220 +       return 0;
25223 +static inline int __fastpath_timer_check(struct task_struct *tsk)
25225 +       /* tsk == current, ensure it is safe to use ->signal/sighand */
25226 +       if (unlikely(tsk->exit_state))
25227 +               return 0;
25229 +       if (!task_cputime_zero(&tsk->cputime_expires))
25230 +                       return 1;
25232 +       if (!task_cputime_zero(&tsk->signal->cputime_expires))
25233 +                       return 1;
25235 +       return 0;
25238 +void run_posix_cpu_timers(struct task_struct *tsk)
25240 +       unsigned long cpu = smp_processor_id();
25241 +       struct task_struct *tasklist;
25243 +       BUG_ON(!irqs_disabled());
25244 +       if(!per_cpu(posix_timer_task, cpu))
25245 +               return;
25246 +       /* get per-cpu references */
25247 +       tasklist = per_cpu(posix_timer_tasklist, cpu);
25249 +       /* check to see if we're already queued */
25250 +       if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) {
25251 +               get_task_struct(tsk);
25252 +               if (tasklist) {
25253 +                       tsk->posix_timer_list = tasklist;
25254 +               } else {
25255 +                       /*
25256 +                        * The list is terminated by a self-pointing
25257 +                        * task_struct
25258 +                        */
25259 +                       tsk->posix_timer_list = tsk;
25260 +               }
25261 +               per_cpu(posix_timer_tasklist, cpu) = tsk;
25263 +               wake_up_process(per_cpu(posix_timer_task, cpu));
25264 +       }
25268 + * posix_cpu_thread_call - callback that gets triggered when a CPU is added.
25269 + * Here we can start up the necessary migration thread for the new CPU.
25270 + */
25271 +static int posix_cpu_thread_call(struct notifier_block *nfb,
25272 +                                unsigned long action, void *hcpu)
25274 +       int cpu = (long)hcpu;
25275 +       struct task_struct *p;
25276 +       struct sched_param param;
25278 +       switch (action) {
25279 +       case CPU_UP_PREPARE:
25280 +               p = kthread_create(posix_cpu_timers_thread, hcpu,
25281 +                                       "posixcputmr/%d",cpu);
25282 +               if (IS_ERR(p))
25283 +                       return NOTIFY_BAD;
25284 +               p->flags |= PF_NOFREEZE;
25285 +               kthread_bind(p, cpu);
25286 +               /* Must be high prio to avoid getting starved */
25287 +               param.sched_priority = MAX_RT_PRIO-1;
25288 +               sched_setscheduler(p, SCHED_FIFO, &param);
25289 +               per_cpu(posix_timer_task,cpu) = p;
25290 +               break;
25291 +       case CPU_ONLINE:
25292 +               /* Strictly unneccessary, as first user will wake it. */
25293 +               wake_up_process(per_cpu(posix_timer_task,cpu));
25294 +               break;
25295 +#ifdef CONFIG_HOTPLUG_CPU
25296 +       case CPU_UP_CANCELED:
25297 +               /* Unbind it from offline cpu so it can run.  Fall thru. */
25298 +               kthread_bind(per_cpu(posix_timer_task, cpu),
25299 +                            cpumask_any(cpu_online_mask));
25300 +               kthread_stop(per_cpu(posix_timer_task,cpu));
25301 +               per_cpu(posix_timer_task,cpu) = NULL;
25302 +               break;
25303 +       case CPU_DEAD:
25304 +               kthread_stop(per_cpu(posix_timer_task,cpu));
25305 +               per_cpu(posix_timer_task,cpu) = NULL;
25306 +               break;
25307 +#endif
25308 +       }
25309 +       return NOTIFY_OK;
25312 +/* Register at highest priority so that task migration (migrate_all_tasks)
25313 + * happens before everything else.
25314 + */
25315 +static struct notifier_block posix_cpu_thread_notifier = {
25316 +       .notifier_call = posix_cpu_thread_call,
25317 +       .priority = 10
25320 +static int __init posix_cpu_thread_init(void)
25322 +       void *hcpu = (void *)(long)smp_processor_id();
25323 +       /* Start one for boot CPU. */
25324 +       unsigned long cpu;
25326 +       /* init the per-cpu posix_timer_tasklets */
25327 +       for_each_possible_cpu(cpu)
25328 +               per_cpu(posix_timer_tasklist, cpu) = NULL;
25330 +       posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_UP_PREPARE, hcpu);
25331 +       posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_ONLINE, hcpu);
25332 +       register_cpu_notifier(&posix_cpu_thread_notifier);
25333 +       return 0;
25335 +early_initcall(posix_cpu_thread_init);
25336 +#else /* CONFIG_PREEMPT_RT_BASE */
25337 +void run_posix_cpu_timers(struct task_struct *tsk)
25339 +       __run_posix_cpu_timers(tsk);
25341 +#endif /* CONFIG_PREEMPT_RT_BASE */
25343  /*
25344   * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
25345   * The tsk->sighand->siglock must be held by the caller.
25346 diff -Nur linux-4.4.46.orig/kernel/time/posix-timers.c linux-4.4.46/kernel/time/posix-timers.c
25347 --- linux-4.4.46.orig/kernel/time/posix-timers.c        2017-02-01 08:31:11.000000000 +0100
25348 +++ linux-4.4.46/kernel/time/posix-timers.c     2017-02-03 17:18:10.939619522 +0100
25349 @@ -506,6 +506,7 @@
25350  static struct pid *good_sigevent(sigevent_t * event)
25352         struct task_struct *rtn = current->group_leader;
25353 +       int sig = event->sigev_signo;
25355         if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
25356                 (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
25357 @@ -514,7 +515,8 @@
25358                 return NULL;
25360         if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
25361 -           ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
25362 +           (sig <= 0 || sig > SIGRTMAX || sig_kernel_only(sig) ||
25363 +            sig_kernel_coredump(sig)))
25364                 return NULL;
25366         return task_pid(rtn);
25367 @@ -826,6 +828,20 @@
25368         return overrun;
25372 + * Protected by RCU!
25373 + */
25374 +static void timer_wait_for_callback(struct k_clock *kc, struct k_itimer *timr)
25376 +#ifdef CONFIG_PREEMPT_RT_FULL
25377 +       if (kc->timer_set == common_timer_set)
25378 +               hrtimer_wait_for_timer(&timr->it.real.timer);
25379 +       else
25380 +               /* FIXME: Whacky hack for posix-cpu-timers */
25381 +               schedule_timeout(1);
25382 +#endif
25385  /* Set a POSIX.1b interval timer. */
25386  /* timr->it_lock is taken. */
25387  static int
25388 @@ -903,6 +919,7 @@
25389         if (!timr)
25390                 return -EINVAL;
25392 +       rcu_read_lock();
25393         kc = clockid_to_kclock(timr->it_clock);
25394         if (WARN_ON_ONCE(!kc || !kc->timer_set))
25395                 error = -EINVAL;
25396 @@ -911,9 +928,12 @@
25398         unlock_timer(timr, flag);
25399         if (error == TIMER_RETRY) {
25400 +               timer_wait_for_callback(kc, timr);
25401                 rtn = NULL;     // We already got the old time...
25402 +               rcu_read_unlock();
25403                 goto retry;
25404         }
25405 +       rcu_read_unlock();
25407         if (old_setting && !error &&
25408             copy_to_user(old_setting, &old_spec, sizeof (old_spec)))
25409 @@ -951,10 +971,15 @@
25410         if (!timer)
25411                 return -EINVAL;
25413 +       rcu_read_lock();
25414         if (timer_delete_hook(timer) == TIMER_RETRY) {
25415                 unlock_timer(timer, flags);
25416 +               timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
25417 +                                       timer);
25418 +               rcu_read_unlock();
25419                 goto retry_delete;
25420         }
25421 +       rcu_read_unlock();
25423         spin_lock(&current->sighand->siglock);
25424         list_del(&timer->list);
25425 @@ -980,8 +1005,18 @@
25426  retry_delete:
25427         spin_lock_irqsave(&timer->it_lock, flags);
25429 +       /* On RT we can race with a deletion */
25430 +       if (!timer->it_signal) {
25431 +               unlock_timer(timer, flags);
25432 +               return;
25433 +       }
25435         if (timer_delete_hook(timer) == TIMER_RETRY) {
25436 +               rcu_read_lock();
25437                 unlock_timer(timer, flags);
25438 +               timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
25439 +                                       timer);
25440 +               rcu_read_unlock();
25441                 goto retry_delete;
25442         }
25443         list_del(&timer->list);
25444 diff -Nur linux-4.4.46.orig/kernel/time/tick-broadcast-hrtimer.c linux-4.4.46/kernel/time/tick-broadcast-hrtimer.c
25445 --- linux-4.4.46.orig/kernel/time/tick-broadcast-hrtimer.c      2017-02-01 08:31:11.000000000 +0100
25446 +++ linux-4.4.46/kernel/time/tick-broadcast-hrtimer.c   2017-02-03 17:18:10.939619522 +0100
25447 @@ -106,5 +106,6 @@
25449         hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
25450         bctimer.function = bc_handler;
25451 +       bctimer.irqsafe = true;
25452         clockevents_register_device(&ce_broadcast_hrtimer);
25454 diff -Nur linux-4.4.46.orig/kernel/time/tick-common.c linux-4.4.46/kernel/time/tick-common.c
25455 --- linux-4.4.46.orig/kernel/time/tick-common.c 2017-02-01 08:31:11.000000000 +0100
25456 +++ linux-4.4.46/kernel/time/tick-common.c      2017-02-03 17:18:10.939619522 +0100
25457 @@ -79,13 +79,15 @@
25458  static void tick_periodic(int cpu)
25460         if (tick_do_timer_cpu == cpu) {
25461 -               write_seqlock(&jiffies_lock);
25462 +               raw_spin_lock(&jiffies_lock);
25463 +               write_seqcount_begin(&jiffies_seq);
25465                 /* Keep track of the next tick event */
25466                 tick_next_period = ktime_add(tick_next_period, tick_period);
25468                 do_timer(1);
25469 -               write_sequnlock(&jiffies_lock);
25470 +               write_seqcount_end(&jiffies_seq);
25471 +               raw_spin_unlock(&jiffies_lock);
25472                 update_wall_time();
25473         }
25475 @@ -157,9 +159,9 @@
25476                 ktime_t next;
25478                 do {
25479 -                       seq = read_seqbegin(&jiffies_lock);
25480 +                       seq = read_seqcount_begin(&jiffies_seq);
25481                         next = tick_next_period;
25482 -               } while (read_seqretry(&jiffies_lock, seq));
25483 +               } while (read_seqcount_retry(&jiffies_seq, seq));
25485                 clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
25487 diff -Nur linux-4.4.46.orig/kernel/time/tick-sched.c linux-4.4.46/kernel/time/tick-sched.c
25488 --- linux-4.4.46.orig/kernel/time/tick-sched.c  2017-02-01 08:31:11.000000000 +0100
25489 +++ linux-4.4.46/kernel/time/tick-sched.c       2017-02-03 17:18:10.939619522 +0100
25490 @@ -62,7 +62,8 @@
25491                 return;
25493         /* Reevalute with jiffies_lock held */
25494 -       write_seqlock(&jiffies_lock);
25495 +       raw_spin_lock(&jiffies_lock);
25496 +       write_seqcount_begin(&jiffies_seq);
25498         delta = ktime_sub(now, last_jiffies_update);
25499         if (delta.tv64 >= tick_period.tv64) {
25500 @@ -85,10 +86,12 @@
25501                 /* Keep the tick_next_period variable up to date */
25502                 tick_next_period = ktime_add(last_jiffies_update, tick_period);
25503         } else {
25504 -               write_sequnlock(&jiffies_lock);
25505 +               write_seqcount_end(&jiffies_seq);
25506 +               raw_spin_unlock(&jiffies_lock);
25507                 return;
25508         }
25509 -       write_sequnlock(&jiffies_lock);
25510 +       write_seqcount_end(&jiffies_seq);
25511 +       raw_spin_unlock(&jiffies_lock);
25512         update_wall_time();
25515 @@ -99,12 +102,14 @@
25517         ktime_t period;
25519 -       write_seqlock(&jiffies_lock);
25520 +       raw_spin_lock(&jiffies_lock);
25521 +       write_seqcount_begin(&jiffies_seq);
25522         /* Did we start the jiffies update yet ? */
25523         if (last_jiffies_update.tv64 == 0)
25524                 last_jiffies_update = tick_next_period;
25525         period = last_jiffies_update;
25526 -       write_sequnlock(&jiffies_lock);
25527 +       write_seqcount_end(&jiffies_seq);
25528 +       raw_spin_unlock(&jiffies_lock);
25529         return period;
25532 @@ -176,6 +181,11 @@
25533                 return false;
25534         }
25536 +       if (!arch_irq_work_has_interrupt()) {
25537 +               trace_tick_stop(0, "missing irq work interrupt\n");
25538 +               return false;
25539 +       }
25541         /* sched_clock_tick() needs us? */
25542  #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
25543         /*
25544 @@ -204,6 +214,7 @@
25546  static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
25547         .func = nohz_full_kick_work_func,
25548 +       .flags = IRQ_WORK_HARD_IRQ,
25549  };
25551  /*
25552 @@ -578,10 +589,10 @@
25554         /* Read jiffies and the time when jiffies were updated last */
25555         do {
25556 -               seq = read_seqbegin(&jiffies_lock);
25557 +               seq = read_seqcount_begin(&jiffies_seq);
25558                 basemono = last_jiffies_update.tv64;
25559                 basejiff = jiffies;
25560 -       } while (read_seqretry(&jiffies_lock, seq));
25561 +       } while (read_seqcount_retry(&jiffies_seq, seq));
25562         ts->last_jiffies = basejiff;
25564         if (rcu_needs_cpu(basemono, &next_rcu) ||
25565 @@ -753,14 +764,7 @@
25566                 return false;
25568         if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
25569 -               static int ratelimit;
25571 -               if (ratelimit < 10 &&
25572 -                   (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
25573 -                       pr_warn("NOHZ: local_softirq_pending %02x\n",
25574 -                               (unsigned int) local_softirq_pending());
25575 -                       ratelimit++;
25576 -               }
25577 +               softirq_check_pending_idle();
25578                 return false;
25579         }
25581 @@ -1100,6 +1104,7 @@
25582          * Emulate tick processing via per-CPU hrtimers:
25583          */
25584         hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
25585 +       ts->sched_timer.irqsafe = 1;
25586         ts->sched_timer.function = tick_sched_timer;
25588         /* Get the next period (per cpu) */
25589 diff -Nur linux-4.4.46.orig/kernel/time/timekeeping.c linux-4.4.46/kernel/time/timekeeping.c
25590 --- linux-4.4.46.orig/kernel/time/timekeeping.c 2017-02-01 08:31:11.000000000 +0100
25591 +++ linux-4.4.46/kernel/time/timekeeping.c      2017-02-03 17:18:10.939619522 +0100
25592 @@ -2070,8 +2070,10 @@
25593   */
25594  void xtime_update(unsigned long ticks)
25596 -       write_seqlock(&jiffies_lock);
25597 +       raw_spin_lock(&jiffies_lock);
25598 +       write_seqcount_begin(&jiffies_seq);
25599         do_timer(ticks);
25600 -       write_sequnlock(&jiffies_lock);
25601 +       write_seqcount_end(&jiffies_seq);
25602 +       raw_spin_unlock(&jiffies_lock);
25603         update_wall_time();
25605 diff -Nur linux-4.4.46.orig/kernel/time/timekeeping.h linux-4.4.46/kernel/time/timekeeping.h
25606 --- linux-4.4.46.orig/kernel/time/timekeeping.h 2017-02-01 08:31:11.000000000 +0100
25607 +++ linux-4.4.46/kernel/time/timekeeping.h      2017-02-03 17:18:10.939619522 +0100
25608 @@ -19,7 +19,8 @@
25609  extern void do_timer(unsigned long ticks);
25610  extern void update_wall_time(void);
25612 -extern seqlock_t jiffies_lock;
25613 +extern raw_spinlock_t jiffies_lock;
25614 +extern seqcount_t jiffies_seq;
25616  #define CS_NAME_LEN    32
25618 diff -Nur linux-4.4.46.orig/kernel/time/timer.c linux-4.4.46/kernel/time/timer.c
25619 --- linux-4.4.46.orig/kernel/time/timer.c       2017-02-01 08:31:11.000000000 +0100
25620 +++ linux-4.4.46/kernel/time/timer.c    2017-02-03 17:18:10.939619522 +0100
25621 @@ -80,6 +80,9 @@
25622  struct tvec_base {
25623         spinlock_t lock;
25624         struct timer_list *running_timer;
25625 +#ifdef CONFIG_PREEMPT_RT_FULL
25626 +       wait_queue_head_t wait_for_running_timer;
25627 +#endif
25628         unsigned long timer_jiffies;
25629         unsigned long next_timer;
25630         unsigned long active_timers;
25631 @@ -777,6 +780,39 @@
25632                 cpu_relax();
25633         }
25635 +#ifdef CONFIG_PREEMPT_RT_FULL
25636 +static inline struct tvec_base *switch_timer_base(struct timer_list *timer,
25637 +                                                 struct tvec_base *old,
25638 +                                                 struct tvec_base *new)
25640 +       /*
25641 +        * We cannot do the below because we might be preempted and
25642 +        * then the preempter would see NULL and loop forever.
25643 +        */
25644 +       if (spin_trylock(&new->lock)) {
25645 +               WRITE_ONCE(timer->flags,
25646 +                          (timer->flags & ~TIMER_BASEMASK) | new->cpu);
25647 +               spin_unlock(&old->lock);
25648 +               return new;
25649 +       }
25650 +       return old;
25653 +#else
25654 +static inline struct tvec_base *switch_timer_base(struct timer_list *timer,
25655 +                                                 struct tvec_base *old,
25656 +                                                 struct tvec_base *new)
25658 +       /* See the comment in lock_timer_base() */
25659 +       timer->flags |= TIMER_MIGRATING;
25661 +       spin_unlock(&old->lock);
25662 +       spin_lock(&new->lock);
25663 +       WRITE_ONCE(timer->flags,
25664 +                  (timer->flags & ~TIMER_BASEMASK) | new->cpu);
25665 +       return new;
25667 +#endif
25669  static inline int
25670  __mod_timer(struct timer_list *timer, unsigned long expires,
25671 @@ -807,16 +843,8 @@
25672                  * handler yet has not finished. This also guarantees that
25673                  * the timer is serialized wrt itself.
25674                  */
25675 -               if (likely(base->running_timer != timer)) {
25676 -                       /* See the comment in lock_timer_base() */
25677 -                       timer->flags |= TIMER_MIGRATING;
25679 -                       spin_unlock(&base->lock);
25680 -                       base = new_base;
25681 -                       spin_lock(&base->lock);
25682 -                       WRITE_ONCE(timer->flags,
25683 -                                  (timer->flags & ~TIMER_BASEMASK) | base->cpu);
25684 -               }
25685 +               if (likely(base->running_timer != timer))
25686 +                       base = switch_timer_base(timer, base, new_base);
25687         }
25689         timer->expires = expires;
25690 @@ -1006,6 +1034,33 @@
25692  EXPORT_SYMBOL_GPL(add_timer_on);
25694 +#ifdef CONFIG_PREEMPT_RT_FULL
25696 + * Wait for a running timer
25697 + */
25698 +static void wait_for_running_timer(struct timer_list *timer)
25700 +       struct tvec_base *base;
25701 +       u32 tf = timer->flags;
25703 +       if (tf & TIMER_MIGRATING)
25704 +               return;
25706 +       base = per_cpu_ptr(&tvec_bases, tf & TIMER_CPUMASK);
25707 +       wait_event(base->wait_for_running_timer,
25708 +                  base->running_timer != timer);
25711 +# define wakeup_timer_waiters(b)       wake_up_all(&(b)->wait_for_running_timer)
25712 +#else
25713 +static inline void wait_for_running_timer(struct timer_list *timer)
25715 +       cpu_relax();
25718 +# define wakeup_timer_waiters(b)       do { } while (0)
25719 +#endif
25721  /**
25722   * del_timer - deactive a timer.
25723   * @timer: the timer to be deactivated
25724 @@ -1063,7 +1118,7 @@
25726  EXPORT_SYMBOL(try_to_del_timer_sync);
25728 -#ifdef CONFIG_SMP
25729 +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
25730  /**
25731   * del_timer_sync - deactivate a timer and wait for the handler to finish.
25732   * @timer: the timer to be deactivated
25733 @@ -1123,7 +1178,7 @@
25734                 int ret = try_to_del_timer_sync(timer);
25735                 if (ret >= 0)
25736                         return ret;
25737 -               cpu_relax();
25738 +               wait_for_running_timer(timer);
25739         }
25741  EXPORT_SYMBOL(del_timer_sync);
25742 @@ -1248,16 +1303,18 @@
25743                         if (irqsafe) {
25744                                 spin_unlock(&base->lock);
25745                                 call_timer_fn(timer, fn, data);
25746 +                               base->running_timer = NULL;
25747                                 spin_lock(&base->lock);
25748                         } else {
25749                                 spin_unlock_irq(&base->lock);
25750                                 call_timer_fn(timer, fn, data);
25751 +                               base->running_timer = NULL;
25752                                 spin_lock_irq(&base->lock);
25753                         }
25754                 }
25755         }
25756 -       base->running_timer = NULL;
25757         spin_unlock_irq(&base->lock);
25758 +       wakeup_timer_waiters(base);
25761  #ifdef CONFIG_NO_HZ_COMMON
25762 @@ -1390,6 +1447,14 @@
25763         if (cpu_is_offline(smp_processor_id()))
25764                 return expires;
25766 +#ifdef CONFIG_PREEMPT_RT_FULL
25767 +       /*
25768 +        * On PREEMPT_RT we cannot sleep here. As a result we can't take
25769 +        * the base lock to check when the next timer is pending and so
25770 +        * we assume the next jiffy.
25771 +        */
25772 +       return basem + TICK_NSEC;
25773 +#endif
25774         spin_lock(&base->lock);
25775         if (base->active_timers) {
25776                 if (time_before_eq(base->next_timer, base->timer_jiffies))
25777 @@ -1416,13 +1481,13 @@
25779         /* Note: this timer irq context must be accounted for as well. */
25780         account_process_tick(p, user_tick);
25781 +       scheduler_tick();
25782         run_local_timers();
25783         rcu_check_callbacks(user_tick);
25784 -#ifdef CONFIG_IRQ_WORK
25785 +#if defined(CONFIG_IRQ_WORK)
25786         if (in_irq())
25787                 irq_work_tick();
25788  #endif
25789 -       scheduler_tick();
25790         run_posix_cpu_timers(p);
25793 @@ -1433,6 +1498,8 @@
25795         struct tvec_base *base = this_cpu_ptr(&tvec_bases);
25797 +       irq_work_tick_soft();
25799         if (time_after_eq(jiffies, base->timer_jiffies))
25800                 __run_timers(base);
25802 @@ -1589,7 +1656,7 @@
25804         BUG_ON(cpu_online(cpu));
25805         old_base = per_cpu_ptr(&tvec_bases, cpu);
25806 -       new_base = get_cpu_ptr(&tvec_bases);
25807 +       new_base = get_local_ptr(&tvec_bases);
25808         /*
25809          * The caller is globally serialized and nobody else
25810          * takes two locks at once, deadlock is not possible.
25811 @@ -1613,7 +1680,7 @@
25813         spin_unlock(&old_base->lock);
25814         spin_unlock_irq(&new_base->lock);
25815 -       put_cpu_ptr(&tvec_bases);
25816 +       put_local_ptr(&tvec_bases);
25819  static int timer_cpu_notify(struct notifier_block *self,
25820 @@ -1645,6 +1712,9 @@
25822         base->cpu = cpu;
25823         spin_lock_init(&base->lock);
25824 +#ifdef CONFIG_PREEMPT_RT_FULL
25825 +       init_waitqueue_head(&base->wait_for_running_timer);
25826 +#endif
25828         base->timer_jiffies = jiffies;
25829         base->next_timer = base->timer_jiffies;
25830 diff -Nur linux-4.4.46.orig/kernel/trace/Kconfig linux-4.4.46/kernel/trace/Kconfig
25831 --- linux-4.4.46.orig/kernel/trace/Kconfig      2017-02-01 08:31:11.000000000 +0100
25832 +++ linux-4.4.46/kernel/trace/Kconfig   2017-02-03 17:18:10.939619522 +0100
25833 @@ -187,6 +187,24 @@
25834           enabled. This option and the preempt-off timing option can be
25835           used together or separately.)
25837 +config INTERRUPT_OFF_HIST
25838 +       bool "Interrupts-off Latency Histogram"
25839 +       depends on IRQSOFF_TRACER
25840 +       help
25841 +         This option generates continuously updated histograms (one per cpu)
25842 +         of the duration of time periods with interrupts disabled. The
25843 +         histograms are disabled by default. To enable them, write a non-zero
25844 +         number to
25846 +             /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
25848 +         If PREEMPT_OFF_HIST is also selected, additional histograms (one
25849 +         per cpu) are generated that accumulate the duration of time periods
25850 +         when both interrupts and preemption are disabled. The histogram data
25851 +         will be located in the debug file system at
25853 +             /sys/kernel/debug/tracing/latency_hist/irqsoff
25855  config PREEMPT_TRACER
25856         bool "Preemption-off Latency Tracer"
25857         default n
25858 @@ -211,6 +229,24 @@
25859           enabled. This option and the irqs-off timing option can be
25860           used together or separately.)
25862 +config PREEMPT_OFF_HIST
25863 +       bool "Preemption-off Latency Histogram"
25864 +       depends on PREEMPT_TRACER
25865 +       help
25866 +         This option generates continuously updated histograms (one per cpu)
25867 +         of the duration of time periods with preemption disabled. The
25868 +         histograms are disabled by default. To enable them, write a non-zero
25869 +         number to
25871 +             /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
25873 +         If INTERRUPT_OFF_HIST is also selected, additional histograms (one
25874 +         per cpu) are generated that accumulate the duration of time periods
25875 +         when both interrupts and preemption are disabled. The histogram data
25876 +         will be located in the debug file system at
25878 +             /sys/kernel/debug/tracing/latency_hist/preemptoff
25880  config SCHED_TRACER
25881         bool "Scheduling Latency Tracer"
25882         select GENERIC_TRACER
25883 @@ -221,6 +257,74 @@
25884           This tracer tracks the latency of the highest priority task
25885           to be scheduled in, starting from the point it has woken up.
25887 +config WAKEUP_LATENCY_HIST
25888 +       bool "Scheduling Latency Histogram"
25889 +       depends on SCHED_TRACER
25890 +       help
25891 +         This option generates continuously updated histograms (one per cpu)
25892 +         of the scheduling latency of the highest priority task.
25893 +         The histograms are disabled by default. To enable them, write a
25894 +         non-zero number to
25896 +             /sys/kernel/debug/tracing/latency_hist/enable/wakeup
25898 +         Two different algorithms are used, one to determine the latency of
25899 +         processes that exclusively use the highest priority of the system and
25900 +         another one to determine the latency of processes that share the
25901 +         highest system priority with other processes. The former is used to
25902 +         improve hardware and system software, the latter to optimize the
25903 +         priority design of a given system. The histogram data will be
25904 +         located in the debug file system at
25906 +             /sys/kernel/debug/tracing/latency_hist/wakeup
25908 +         and
25910 +             /sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio
25912 +         If both Scheduling Latency Histogram and Missed Timer Offsets
25913 +         Histogram are selected, additional histogram data will be collected
25914 +         that contain, in addition to the wakeup latency, the timer latency, in
25915 +         case the wakeup was triggered by an expired timer. These histograms
25916 +         are available in the
25918 +             /sys/kernel/debug/tracing/latency_hist/timerandwakeup
25920 +         directory. They reflect the apparent interrupt and scheduling latency
25921 +         and are best suitable to determine the worst-case latency of a given
25922 +         system. To enable these histograms, write a non-zero number to
25924 +             /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
25926 +config MISSED_TIMER_OFFSETS_HIST
25927 +       depends on HIGH_RES_TIMERS
25928 +       select GENERIC_TRACER
25929 +       bool "Missed Timer Offsets Histogram"
25930 +       help
25931 +         Generate a histogram of missed timer offsets in microseconds. The
25932 +         histograms are disabled by default. To enable them, write a non-zero
25933 +         number to
25935 +             /sys/kernel/debug/tracing/latency_hist/enable/missed_timer_offsets
25937 +         The histogram data will be located in the debug file system at
25939 +             /sys/kernel/debug/tracing/latency_hist/missed_timer_offsets
25941 +         If both Scheduling Latency Histogram and Missed Timer Offsets
25942 +         Histogram are selected, additional histogram data will be collected
25943 +         that contain, in addition to the wakeup latency, the timer latency, in
25944 +         case the wakeup was triggered by an expired timer. These histograms
25945 +         are available in the
25947 +             /sys/kernel/debug/tracing/latency_hist/timerandwakeup
25949 +         directory. They reflect the apparent interrupt and scheduling latency
25950 +         and are best suitable to determine the worst-case latency of a given
25951 +         system. To enable these histograms, write a non-zero number to
25953 +             /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
25955  config ENABLE_DEFAULT_TRACERS
25956         bool "Trace process context switches and events"
25957         depends on !GENERIC_TRACER
25958 diff -Nur linux-4.4.46.orig/kernel/trace/latency_hist.c linux-4.4.46/kernel/trace/latency_hist.c
25959 --- linux-4.4.46.orig/kernel/trace/latency_hist.c       1970-01-01 01:00:00.000000000 +0100
25960 +++ linux-4.4.46/kernel/trace/latency_hist.c    2017-02-03 17:18:10.939619522 +0100
25961 @@ -0,0 +1,1178 @@
25963 + * kernel/trace/latency_hist.c
25964 + *
25965 + * Add support for histograms of preemption-off latency and
25966 + * interrupt-off latency and wakeup latency, it depends on
25967 + * Real-Time Preemption Support.
25968 + *
25969 + *  Copyright (C) 2005 MontaVista Software, Inc.
25970 + *  Yi Yang <yyang@ch.mvista.com>
25971 + *
25972 + *  Converted to work with the new latency tracer.
25973 + *  Copyright (C) 2008 Red Hat, Inc.
25974 + *    Steven Rostedt <srostedt@redhat.com>
25975 + *
25976 + */
25977 +#include <linux/module.h>
25978 +#include <linux/debugfs.h>
25979 +#include <linux/seq_file.h>
25980 +#include <linux/percpu.h>
25981 +#include <linux/kallsyms.h>
25982 +#include <linux/uaccess.h>
25983 +#include <linux/sched.h>
25984 +#include <linux/sched/rt.h>
25985 +#include <linux/slab.h>
25986 +#include <linux/atomic.h>
25987 +#include <asm/div64.h>
25989 +#include "trace.h"
25990 +#include <trace/events/sched.h>
25992 +#define NSECS_PER_USECS 1000L
25994 +#define CREATE_TRACE_POINTS
25995 +#include <trace/events/hist.h>
25997 +enum {
25998 +       IRQSOFF_LATENCY = 0,
25999 +       PREEMPTOFF_LATENCY,
26000 +       PREEMPTIRQSOFF_LATENCY,
26001 +       WAKEUP_LATENCY,
26002 +       WAKEUP_LATENCY_SHAREDPRIO,
26003 +       MISSED_TIMER_OFFSETS,
26004 +       TIMERANDWAKEUP_LATENCY,
26005 +       MAX_LATENCY_TYPE,
26008 +#define MAX_ENTRY_NUM 10240
26010 +struct hist_data {
26011 +       atomic_t hist_mode; /* 0 log, 1 don't log */
26012 +       long offset; /* set it to MAX_ENTRY_NUM/2 for a bipolar scale */
26013 +       long min_lat;
26014 +       long max_lat;
26015 +       unsigned long long below_hist_bound_samples;
26016 +       unsigned long long above_hist_bound_samples;
26017 +       long long accumulate_lat;
26018 +       unsigned long long total_samples;
26019 +       unsigned long long hist_array[MAX_ENTRY_NUM];
26022 +struct enable_data {
26023 +       int latency_type;
26024 +       int enabled;
26027 +static char *latency_hist_dir_root = "latency_hist";
26029 +#ifdef CONFIG_INTERRUPT_OFF_HIST
26030 +static DEFINE_PER_CPU(struct hist_data, irqsoff_hist);
26031 +static char *irqsoff_hist_dir = "irqsoff";
26032 +static DEFINE_PER_CPU(cycles_t, hist_irqsoff_start);
26033 +static DEFINE_PER_CPU(int, hist_irqsoff_counting);
26034 +#endif
26036 +#ifdef CONFIG_PREEMPT_OFF_HIST
26037 +static DEFINE_PER_CPU(struct hist_data, preemptoff_hist);
26038 +static char *preemptoff_hist_dir = "preemptoff";
26039 +static DEFINE_PER_CPU(cycles_t, hist_preemptoff_start);
26040 +static DEFINE_PER_CPU(int, hist_preemptoff_counting);
26041 +#endif
26043 +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
26044 +static DEFINE_PER_CPU(struct hist_data, preemptirqsoff_hist);
26045 +static char *preemptirqsoff_hist_dir = "preemptirqsoff";
26046 +static DEFINE_PER_CPU(cycles_t, hist_preemptirqsoff_start);
26047 +static DEFINE_PER_CPU(int, hist_preemptirqsoff_counting);
26048 +#endif
26050 +#if defined(CONFIG_PREEMPT_OFF_HIST) || defined(CONFIG_INTERRUPT_OFF_HIST)
26051 +static notrace void probe_preemptirqsoff_hist(void *v, int reason, int start);
26052 +static struct enable_data preemptirqsoff_enabled_data = {
26053 +       .latency_type = PREEMPTIRQSOFF_LATENCY,
26054 +       .enabled = 0,
26056 +#endif
26058 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
26059 +       defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26060 +struct maxlatproc_data {
26061 +       char comm[FIELD_SIZEOF(struct task_struct, comm)];
26062 +       char current_comm[FIELD_SIZEOF(struct task_struct, comm)];
26063 +       int pid;
26064 +       int current_pid;
26065 +       int prio;
26066 +       int current_prio;
26067 +       long latency;
26068 +       long timeroffset;
26069 +       cycle_t timestamp;
26071 +#endif
26073 +#ifdef CONFIG_WAKEUP_LATENCY_HIST
26074 +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist);
26075 +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist_sharedprio);
26076 +static char *wakeup_latency_hist_dir = "wakeup";
26077 +static char *wakeup_latency_hist_dir_sharedprio = "sharedprio";
26078 +static notrace void probe_wakeup_latency_hist_start(void *v,
26079 +       struct task_struct *p);
26080 +static notrace void probe_wakeup_latency_hist_stop(void *v,
26081 +       bool preempt, struct task_struct *prev, struct task_struct *next);
26082 +static notrace void probe_sched_migrate_task(void *,
26083 +       struct task_struct *task, int cpu);
26084 +static struct enable_data wakeup_latency_enabled_data = {
26085 +       .latency_type = WAKEUP_LATENCY,
26086 +       .enabled = 0,
26088 +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc);
26089 +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc_sharedprio);
26090 +static DEFINE_PER_CPU(struct task_struct *, wakeup_task);
26091 +static DEFINE_PER_CPU(int, wakeup_sharedprio);
26092 +static unsigned long wakeup_pid;
26093 +#endif
26095 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
26096 +static DEFINE_PER_CPU(struct hist_data, missed_timer_offsets);
26097 +static char *missed_timer_offsets_dir = "missed_timer_offsets";
26098 +static notrace void probe_hrtimer_interrupt(void *v, int cpu,
26099 +       long long offset, struct task_struct *curr, struct task_struct *task);
26100 +static struct enable_data missed_timer_offsets_enabled_data = {
26101 +       .latency_type = MISSED_TIMER_OFFSETS,
26102 +       .enabled = 0,
26104 +static DEFINE_PER_CPU(struct maxlatproc_data, missed_timer_offsets_maxlatproc);
26105 +static unsigned long missed_timer_offsets_pid;
26106 +#endif
26108 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
26109 +       defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26110 +static DEFINE_PER_CPU(struct hist_data, timerandwakeup_latency_hist);
26111 +static char *timerandwakeup_latency_hist_dir = "timerandwakeup";
26112 +static struct enable_data timerandwakeup_enabled_data = {
26113 +       .latency_type = TIMERANDWAKEUP_LATENCY,
26114 +       .enabled = 0,
26116 +static DEFINE_PER_CPU(struct maxlatproc_data, timerandwakeup_maxlatproc);
26117 +#endif
26119 +void notrace latency_hist(int latency_type, int cpu, long latency,
26120 +                         long timeroffset, cycle_t stop,
26121 +                         struct task_struct *p)
26123 +       struct hist_data *my_hist;
26124 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
26125 +       defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26126 +       struct maxlatproc_data *mp = NULL;
26127 +#endif
26129 +       if (!cpu_possible(cpu) || latency_type < 0 ||
26130 +           latency_type >= MAX_LATENCY_TYPE)
26131 +               return;
26133 +       switch (latency_type) {
26134 +#ifdef CONFIG_INTERRUPT_OFF_HIST
26135 +       case IRQSOFF_LATENCY:
26136 +               my_hist = &per_cpu(irqsoff_hist, cpu);
26137 +               break;
26138 +#endif
26139 +#ifdef CONFIG_PREEMPT_OFF_HIST
26140 +       case PREEMPTOFF_LATENCY:
26141 +               my_hist = &per_cpu(preemptoff_hist, cpu);
26142 +               break;
26143 +#endif
26144 +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
26145 +       case PREEMPTIRQSOFF_LATENCY:
26146 +               my_hist = &per_cpu(preemptirqsoff_hist, cpu);
26147 +               break;
26148 +#endif
26149 +#ifdef CONFIG_WAKEUP_LATENCY_HIST
26150 +       case WAKEUP_LATENCY:
26151 +               my_hist = &per_cpu(wakeup_latency_hist, cpu);
26152 +               mp = &per_cpu(wakeup_maxlatproc, cpu);
26153 +               break;
26154 +       case WAKEUP_LATENCY_SHAREDPRIO:
26155 +               my_hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
26156 +               mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
26157 +               break;
26158 +#endif
26159 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
26160 +       case MISSED_TIMER_OFFSETS:
26161 +               my_hist = &per_cpu(missed_timer_offsets, cpu);
26162 +               mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
26163 +               break;
26164 +#endif
26165 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
26166 +       defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26167 +       case TIMERANDWAKEUP_LATENCY:
26168 +               my_hist = &per_cpu(timerandwakeup_latency_hist, cpu);
26169 +               mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
26170 +               break;
26171 +#endif
26173 +       default:
26174 +               return;
26175 +       }
26177 +       latency += my_hist->offset;
26179 +       if (atomic_read(&my_hist->hist_mode) == 0)
26180 +               return;
26182 +       if (latency < 0 || latency >= MAX_ENTRY_NUM) {
26183 +               if (latency < 0)
26184 +                       my_hist->below_hist_bound_samples++;
26185 +               else
26186 +                       my_hist->above_hist_bound_samples++;
26187 +       } else
26188 +               my_hist->hist_array[latency]++;
26190 +       if (unlikely(latency > my_hist->max_lat ||
26191 +           my_hist->min_lat == LONG_MAX)) {
26192 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
26193 +       defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26194 +               if (latency_type == WAKEUP_LATENCY ||
26195 +                   latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
26196 +                   latency_type == MISSED_TIMER_OFFSETS ||
26197 +                   latency_type == TIMERANDWAKEUP_LATENCY) {
26198 +                       strncpy(mp->comm, p->comm, sizeof(mp->comm));
26199 +                       strncpy(mp->current_comm, current->comm,
26200 +                           sizeof(mp->current_comm));
26201 +                       mp->pid = task_pid_nr(p);
26202 +                       mp->current_pid = task_pid_nr(current);
26203 +                       mp->prio = p->prio;
26204 +                       mp->current_prio = current->prio;
26205 +                       mp->latency = latency;
26206 +                       mp->timeroffset = timeroffset;
26207 +                       mp->timestamp = stop;
26208 +               }
26209 +#endif
26210 +               my_hist->max_lat = latency;
26211 +       }
26212 +       if (unlikely(latency < my_hist->min_lat))
26213 +               my_hist->min_lat = latency;
26214 +       my_hist->total_samples++;
26215 +       my_hist->accumulate_lat += latency;
26218 +static void *l_start(struct seq_file *m, loff_t *pos)
26220 +       loff_t *index_ptr = NULL;
26221 +       loff_t index = *pos;
26222 +       struct hist_data *my_hist = m->private;
26224 +       if (index == 0) {
26225 +               char minstr[32], avgstr[32], maxstr[32];
26227 +               atomic_dec(&my_hist->hist_mode);
26229 +               if (likely(my_hist->total_samples)) {
26230 +                       long avg = (long) div64_s64(my_hist->accumulate_lat,
26231 +                           my_hist->total_samples);
26232 +                       snprintf(minstr, sizeof(minstr), "%ld",
26233 +                           my_hist->min_lat - my_hist->offset);
26234 +                       snprintf(avgstr, sizeof(avgstr), "%ld",
26235 +                           avg - my_hist->offset);
26236 +                       snprintf(maxstr, sizeof(maxstr), "%ld",
26237 +                           my_hist->max_lat - my_hist->offset);
26238 +               } else {
26239 +                       strcpy(minstr, "<undef>");
26240 +                       strcpy(avgstr, minstr);
26241 +                       strcpy(maxstr, minstr);
26242 +               }
26244 +               seq_printf(m, "#Minimum latency: %s microseconds\n"
26245 +                          "#Average latency: %s microseconds\n"
26246 +                          "#Maximum latency: %s microseconds\n"
26247 +                          "#Total samples: %llu\n"
26248 +                          "#There are %llu samples lower than %ld"
26249 +                          " microseconds.\n"
26250 +                          "#There are %llu samples greater or equal"
26251 +                          " than %ld microseconds.\n"
26252 +                          "#usecs\t%16s\n",
26253 +                          minstr, avgstr, maxstr,
26254 +                          my_hist->total_samples,
26255 +                          my_hist->below_hist_bound_samples,
26256 +                          -my_hist->offset,
26257 +                          my_hist->above_hist_bound_samples,
26258 +                          MAX_ENTRY_NUM - my_hist->offset,
26259 +                          "samples");
26260 +       }
26261 +       if (index < MAX_ENTRY_NUM) {
26262 +               index_ptr = kmalloc(sizeof(loff_t), GFP_KERNEL);
26263 +               if (index_ptr)
26264 +                       *index_ptr = index;
26265 +       }
26267 +       return index_ptr;
26270 +static void *l_next(struct seq_file *m, void *p, loff_t *pos)
26272 +       loff_t *index_ptr = p;
26273 +       struct hist_data *my_hist = m->private;
26275 +       if (++*pos >= MAX_ENTRY_NUM) {
26276 +               atomic_inc(&my_hist->hist_mode);
26277 +               return NULL;
26278 +       }
26279 +       *index_ptr = *pos;
26280 +       return index_ptr;
26283 +static void l_stop(struct seq_file *m, void *p)
26285 +       kfree(p);
26288 +static int l_show(struct seq_file *m, void *p)
26290 +       int index = *(loff_t *) p;
26291 +       struct hist_data *my_hist = m->private;
26293 +       seq_printf(m, "%6ld\t%16llu\n", index - my_hist->offset,
26294 +           my_hist->hist_array[index]);
26295 +       return 0;
26298 +static const struct seq_operations latency_hist_seq_op = {
26299 +       .start = l_start,
26300 +       .next  = l_next,
26301 +       .stop  = l_stop,
26302 +       .show  = l_show
26305 +static int latency_hist_open(struct inode *inode, struct file *file)
26307 +       int ret;
26309 +       ret = seq_open(file, &latency_hist_seq_op);
26310 +       if (!ret) {
26311 +               struct seq_file *seq = file->private_data;
26312 +               seq->private = inode->i_private;
26313 +       }
26314 +       return ret;
26317 +static const struct file_operations latency_hist_fops = {
26318 +       .open = latency_hist_open,
26319 +       .read = seq_read,
26320 +       .llseek = seq_lseek,
26321 +       .release = seq_release,
26324 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
26325 +       defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26326 +static void clear_maxlatprocdata(struct maxlatproc_data *mp)
26328 +       mp->comm[0] = mp->current_comm[0] = '\0';
26329 +       mp->prio = mp->current_prio = mp->pid = mp->current_pid =
26330 +           mp->latency = mp->timeroffset = -1;
26331 +       mp->timestamp = 0;
26333 +#endif
26335 +static void hist_reset(struct hist_data *hist)
26337 +       atomic_dec(&hist->hist_mode);
26339 +       memset(hist->hist_array, 0, sizeof(hist->hist_array));
26340 +       hist->below_hist_bound_samples = 0ULL;
26341 +       hist->above_hist_bound_samples = 0ULL;
26342 +       hist->min_lat = LONG_MAX;
26343 +       hist->max_lat = LONG_MIN;
26344 +       hist->total_samples = 0ULL;
26345 +       hist->accumulate_lat = 0LL;
26347 +       atomic_inc(&hist->hist_mode);
26350 +static ssize_t
26351 +latency_hist_reset(struct file *file, const char __user *a,
26352 +                  size_t size, loff_t *off)
26354 +       int cpu;
26355 +       struct hist_data *hist = NULL;
26356 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
26357 +       defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26358 +       struct maxlatproc_data *mp = NULL;
26359 +#endif
26360 +       off_t latency_type = (off_t) file->private_data;
26362 +       for_each_online_cpu(cpu) {
26364 +               switch (latency_type) {
26365 +#ifdef CONFIG_PREEMPT_OFF_HIST
26366 +               case PREEMPTOFF_LATENCY:
26367 +                       hist = &per_cpu(preemptoff_hist, cpu);
26368 +                       break;
26369 +#endif
26370 +#ifdef CONFIG_INTERRUPT_OFF_HIST
26371 +               case IRQSOFF_LATENCY:
26372 +                       hist = &per_cpu(irqsoff_hist, cpu);
26373 +                       break;
26374 +#endif
26375 +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
26376 +               case PREEMPTIRQSOFF_LATENCY:
26377 +                       hist = &per_cpu(preemptirqsoff_hist, cpu);
26378 +                       break;
26379 +#endif
26380 +#ifdef CONFIG_WAKEUP_LATENCY_HIST
26381 +               case WAKEUP_LATENCY:
26382 +                       hist = &per_cpu(wakeup_latency_hist, cpu);
26383 +                       mp = &per_cpu(wakeup_maxlatproc, cpu);
26384 +                       break;
26385 +               case WAKEUP_LATENCY_SHAREDPRIO:
26386 +                       hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
26387 +                       mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
26388 +                       break;
26389 +#endif
26390 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
26391 +               case MISSED_TIMER_OFFSETS:
26392 +                       hist = &per_cpu(missed_timer_offsets, cpu);
26393 +                       mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
26394 +                       break;
26395 +#endif
26396 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
26397 +       defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26398 +               case TIMERANDWAKEUP_LATENCY:
26399 +                       hist = &per_cpu(timerandwakeup_latency_hist, cpu);
26400 +                       mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
26401 +                       break;
26402 +#endif
26403 +               }
26405 +               hist_reset(hist);
26406 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
26407 +       defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26408 +               if (latency_type == WAKEUP_LATENCY ||
26409 +                   latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
26410 +                   latency_type == MISSED_TIMER_OFFSETS ||
26411 +                   latency_type == TIMERANDWAKEUP_LATENCY)
26412 +                       clear_maxlatprocdata(mp);
26413 +#endif
26414 +       }
26416 +       return size;
26419 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
26420 +       defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26421 +static ssize_t
26422 +show_pid(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
26424 +       char buf[64];
26425 +       int r;
26426 +       unsigned long *this_pid = file->private_data;
26428 +       r = snprintf(buf, sizeof(buf), "%lu\n", *this_pid);
26429 +       return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
26432 +static ssize_t do_pid(struct file *file, const char __user *ubuf,
26433 +                     size_t cnt, loff_t *ppos)
26435 +       char buf[64];
26436 +       unsigned long pid;
26437 +       unsigned long *this_pid = file->private_data;
26439 +       if (cnt >= sizeof(buf))
26440 +               return -EINVAL;
26442 +       if (copy_from_user(&buf, ubuf, cnt))
26443 +               return -EFAULT;
26445 +       buf[cnt] = '\0';
26447 +       if (kstrtoul(buf, 10, &pid))
26448 +               return -EINVAL;
26450 +       *this_pid = pid;
26452 +       return cnt;
26454 +#endif
26456 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
26457 +       defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26458 +static ssize_t
26459 +show_maxlatproc(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
26461 +       int r;
26462 +       struct maxlatproc_data *mp = file->private_data;
26463 +       int strmaxlen = (TASK_COMM_LEN * 2) + (8 * 8);
26464 +       unsigned long long t;
26465 +       unsigned long usecs, secs;
26466 +       char *buf;
26468 +       if (mp->pid == -1 || mp->current_pid == -1) {
26469 +               buf = "(none)\n";
26470 +               return simple_read_from_buffer(ubuf, cnt, ppos, buf,
26471 +                   strlen(buf));
26472 +       }
26474 +       buf = kmalloc(strmaxlen, GFP_KERNEL);
26475 +       if (buf == NULL)
26476 +               return -ENOMEM;
26478 +       t = ns2usecs(mp->timestamp);
26479 +       usecs = do_div(t, USEC_PER_SEC);
26480 +       secs = (unsigned long) t;
26481 +       r = snprintf(buf, strmaxlen,
26482 +           "%d %d %ld (%ld) %s <- %d %d %s %lu.%06lu\n", mp->pid,
26483 +           MAX_RT_PRIO-1 - mp->prio, mp->latency, mp->timeroffset, mp->comm,
26484 +           mp->current_pid, MAX_RT_PRIO-1 - mp->current_prio, mp->current_comm,
26485 +           secs, usecs);
26486 +       r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
26487 +       kfree(buf);
26488 +       return r;
26490 +#endif
26492 +static ssize_t
26493 +show_enable(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
26495 +       char buf[64];
26496 +       struct enable_data *ed = file->private_data;
26497 +       int r;
26499 +       r = snprintf(buf, sizeof(buf), "%d\n", ed->enabled);
26500 +       return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
26503 +static ssize_t
26504 +do_enable(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos)
26506 +       char buf[64];
26507 +       long enable;
26508 +       struct enable_data *ed = file->private_data;
26510 +       if (cnt >= sizeof(buf))
26511 +               return -EINVAL;
26513 +       if (copy_from_user(&buf, ubuf, cnt))
26514 +               return -EFAULT;
26516 +       buf[cnt] = 0;
26518 +       if (kstrtoul(buf, 10, &enable))
26519 +               return -EINVAL;
26521 +       if ((enable && ed->enabled) || (!enable && !ed->enabled))
26522 +               return cnt;
26524 +       if (enable) {
26525 +               int ret;
26527 +               switch (ed->latency_type) {
26528 +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
26529 +               case PREEMPTIRQSOFF_LATENCY:
26530 +                       ret = register_trace_preemptirqsoff_hist(
26531 +                           probe_preemptirqsoff_hist, NULL);
26532 +                       if (ret) {
26533 +                               pr_info("wakeup trace: Couldn't assign "
26534 +                                   "probe_preemptirqsoff_hist "
26535 +                                   "to trace_preemptirqsoff_hist\n");
26536 +                               return ret;
26537 +                       }
26538 +                       break;
26539 +#endif
26540 +#ifdef CONFIG_WAKEUP_LATENCY_HIST
26541 +               case WAKEUP_LATENCY:
26542 +                       ret = register_trace_sched_wakeup(
26543 +                           probe_wakeup_latency_hist_start, NULL);
26544 +                       if (ret) {
26545 +                               pr_info("wakeup trace: Couldn't assign "
26546 +                                   "probe_wakeup_latency_hist_start "
26547 +                                   "to trace_sched_wakeup\n");
26548 +                               return ret;
26549 +                       }
26550 +                       ret = register_trace_sched_wakeup_new(
26551 +                           probe_wakeup_latency_hist_start, NULL);
26552 +                       if (ret) {
26553 +                               pr_info("wakeup trace: Couldn't assign "
26554 +                                   "probe_wakeup_latency_hist_start "
26555 +                                   "to trace_sched_wakeup_new\n");
26556 +                               unregister_trace_sched_wakeup(
26557 +                                   probe_wakeup_latency_hist_start, NULL);
26558 +                               return ret;
26559 +                       }
26560 +                       ret = register_trace_sched_switch(
26561 +                           probe_wakeup_latency_hist_stop, NULL);
26562 +                       if (ret) {
26563 +                               pr_info("wakeup trace: Couldn't assign "
26564 +                                   "probe_wakeup_latency_hist_stop "
26565 +                                   "to trace_sched_switch\n");
26566 +                               unregister_trace_sched_wakeup(
26567 +                                   probe_wakeup_latency_hist_start, NULL);
26568 +                               unregister_trace_sched_wakeup_new(
26569 +                                   probe_wakeup_latency_hist_start, NULL);
26570 +                               return ret;
26571 +                       }
26572 +                       ret = register_trace_sched_migrate_task(
26573 +                           probe_sched_migrate_task, NULL);
26574 +                       if (ret) {
26575 +                               pr_info("wakeup trace: Couldn't assign "
26576 +                                   "probe_sched_migrate_task "
26577 +                                   "to trace_sched_migrate_task\n");
26578 +                               unregister_trace_sched_wakeup(
26579 +                                   probe_wakeup_latency_hist_start, NULL);
26580 +                               unregister_trace_sched_wakeup_new(
26581 +                                   probe_wakeup_latency_hist_start, NULL);
26582 +                               unregister_trace_sched_switch(
26583 +                                   probe_wakeup_latency_hist_stop, NULL);
26584 +                               return ret;
26585 +                       }
26586 +                       break;
26587 +#endif
26588 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
26589 +               case MISSED_TIMER_OFFSETS:
26590 +                       ret = register_trace_hrtimer_interrupt(
26591 +                           probe_hrtimer_interrupt, NULL);
26592 +                       if (ret) {
26593 +                               pr_info("wakeup trace: Couldn't assign "
26594 +                                   "probe_hrtimer_interrupt "
26595 +                                   "to trace_hrtimer_interrupt\n");
26596 +                               return ret;
26597 +                       }
26598 +                       break;
26599 +#endif
26600 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
26601 +       defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26602 +               case TIMERANDWAKEUP_LATENCY:
26603 +                       if (!wakeup_latency_enabled_data.enabled ||
26604 +                           !missed_timer_offsets_enabled_data.enabled)
26605 +                               return -EINVAL;
26606 +                       break;
26607 +#endif
26608 +               default:
26609 +                       break;
26610 +               }
26611 +       } else {
26612 +               switch (ed->latency_type) {
26613 +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
26614 +               case PREEMPTIRQSOFF_LATENCY:
26615 +                       {
26616 +                               int cpu;
26618 +                               unregister_trace_preemptirqsoff_hist(
26619 +                                   probe_preemptirqsoff_hist, NULL);
26620 +                               for_each_online_cpu(cpu) {
26621 +#ifdef CONFIG_INTERRUPT_OFF_HIST
26622 +                                       per_cpu(hist_irqsoff_counting,
26623 +                                           cpu) = 0;
26624 +#endif
26625 +#ifdef CONFIG_PREEMPT_OFF_HIST
26626 +                                       per_cpu(hist_preemptoff_counting,
26627 +                                           cpu) = 0;
26628 +#endif
26629 +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
26630 +                                       per_cpu(hist_preemptirqsoff_counting,
26631 +                                           cpu) = 0;
26632 +#endif
26633 +                               }
26634 +                       }
26635 +                       break;
26636 +#endif
26637 +#ifdef CONFIG_WAKEUP_LATENCY_HIST
26638 +               case WAKEUP_LATENCY:
26639 +                       {
26640 +                               int cpu;
26642 +                               unregister_trace_sched_wakeup(
26643 +                                   probe_wakeup_latency_hist_start, NULL);
26644 +                               unregister_trace_sched_wakeup_new(
26645 +                                   probe_wakeup_latency_hist_start, NULL);
26646 +                               unregister_trace_sched_switch(
26647 +                                   probe_wakeup_latency_hist_stop, NULL);
26648 +                               unregister_trace_sched_migrate_task(
26649 +                                   probe_sched_migrate_task, NULL);
26651 +                               for_each_online_cpu(cpu) {
26652 +                                       per_cpu(wakeup_task, cpu) = NULL;
26653 +                                       per_cpu(wakeup_sharedprio, cpu) = 0;
26654 +                               }
26655 +                       }
26656 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
26657 +                       timerandwakeup_enabled_data.enabled = 0;
26658 +#endif
26659 +                       break;
26660 +#endif
26661 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
26662 +               case MISSED_TIMER_OFFSETS:
26663 +                       unregister_trace_hrtimer_interrupt(
26664 +                           probe_hrtimer_interrupt, NULL);
26665 +#ifdef CONFIG_WAKEUP_LATENCY_HIST
26666 +                       timerandwakeup_enabled_data.enabled = 0;
26667 +#endif
26668 +                       break;
26669 +#endif
26670 +               default:
26671 +                       break;
26672 +               }
26673 +       }
26674 +       ed->enabled = enable;
26675 +       return cnt;
26678 +static const struct file_operations latency_hist_reset_fops = {
26679 +       .open = tracing_open_generic,
26680 +       .write = latency_hist_reset,
26683 +static const struct file_operations enable_fops = {
26684 +       .open = tracing_open_generic,
26685 +       .read = show_enable,
26686 +       .write = do_enable,
26689 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
26690 +       defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26691 +static const struct file_operations pid_fops = {
26692 +       .open = tracing_open_generic,
26693 +       .read = show_pid,
26694 +       .write = do_pid,
26697 +static const struct file_operations maxlatproc_fops = {
26698 +       .open = tracing_open_generic,
26699 +       .read = show_maxlatproc,
26701 +#endif
26703 +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
26704 +static notrace void probe_preemptirqsoff_hist(void *v, int reason,
26705 +       int starthist)
26707 +       int cpu = raw_smp_processor_id();
26708 +       int time_set = 0;
26710 +       if (starthist) {
26711 +               cycle_t uninitialized_var(start);
26713 +               if (!preempt_count() && !irqs_disabled())
26714 +                       return;
26716 +#ifdef CONFIG_INTERRUPT_OFF_HIST
26717 +               if ((reason == IRQS_OFF || reason == TRACE_START) &&
26718 +                   !per_cpu(hist_irqsoff_counting, cpu)) {
26719 +                       per_cpu(hist_irqsoff_counting, cpu) = 1;
26720 +                       start = ftrace_now(cpu);
26721 +                       time_set++;
26722 +                       per_cpu(hist_irqsoff_start, cpu) = start;
26723 +               }
26724 +#endif
26726 +#ifdef CONFIG_PREEMPT_OFF_HIST
26727 +               if ((reason == PREEMPT_OFF || reason == TRACE_START) &&
26728 +                   !per_cpu(hist_preemptoff_counting, cpu)) {
26729 +                       per_cpu(hist_preemptoff_counting, cpu) = 1;
26730 +                       if (!(time_set++))
26731 +                               start = ftrace_now(cpu);
26732 +                       per_cpu(hist_preemptoff_start, cpu) = start;
26733 +               }
26734 +#endif
26736 +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
26737 +               if (per_cpu(hist_irqsoff_counting, cpu) &&
26738 +                   per_cpu(hist_preemptoff_counting, cpu) &&
26739 +                   !per_cpu(hist_preemptirqsoff_counting, cpu)) {
26740 +                       per_cpu(hist_preemptirqsoff_counting, cpu) = 1;
26741 +                       if (!time_set)
26742 +                               start = ftrace_now(cpu);
26743 +                       per_cpu(hist_preemptirqsoff_start, cpu) = start;
26744 +               }
26745 +#endif
26746 +       } else {
26747 +               cycle_t uninitialized_var(stop);
26749 +#ifdef CONFIG_INTERRUPT_OFF_HIST
26750 +               if ((reason == IRQS_ON || reason == TRACE_STOP) &&
26751 +                   per_cpu(hist_irqsoff_counting, cpu)) {
26752 +                       cycle_t start = per_cpu(hist_irqsoff_start, cpu);
26754 +                       stop = ftrace_now(cpu);
26755 +                       time_set++;
26756 +                       if (start) {
26757 +                               long latency = ((long) (stop - start)) /
26758 +                                   NSECS_PER_USECS;
26760 +                               latency_hist(IRQSOFF_LATENCY, cpu, latency, 0,
26761 +                                   stop, NULL);
26762 +                       }
26763 +                       per_cpu(hist_irqsoff_counting, cpu) = 0;
26764 +               }
26765 +#endif
26767 +#ifdef CONFIG_PREEMPT_OFF_HIST
26768 +               if ((reason == PREEMPT_ON || reason == TRACE_STOP) &&
26769 +                   per_cpu(hist_preemptoff_counting, cpu)) {
26770 +                       cycle_t start = per_cpu(hist_preemptoff_start, cpu);
26772 +                       if (!(time_set++))
26773 +                               stop = ftrace_now(cpu);
26774 +                       if (start) {
26775 +                               long latency = ((long) (stop - start)) /
26776 +                                   NSECS_PER_USECS;
26778 +                               latency_hist(PREEMPTOFF_LATENCY, cpu, latency,
26779 +                                   0, stop, NULL);
26780 +                       }
26781 +                       per_cpu(hist_preemptoff_counting, cpu) = 0;
26782 +               }
26783 +#endif
26785 +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
26786 +               if ((!per_cpu(hist_irqsoff_counting, cpu) ||
26787 +                    !per_cpu(hist_preemptoff_counting, cpu)) &&
26788 +                  per_cpu(hist_preemptirqsoff_counting, cpu)) {
26789 +                       cycle_t start = per_cpu(hist_preemptirqsoff_start, cpu);
26791 +                       if (!time_set)
26792 +                               stop = ftrace_now(cpu);
26793 +                       if (start) {
26794 +                               long latency = ((long) (stop - start)) /
26795 +                                   NSECS_PER_USECS;
26797 +                               latency_hist(PREEMPTIRQSOFF_LATENCY, cpu,
26798 +                                   latency, 0, stop, NULL);
26799 +                       }
26800 +                       per_cpu(hist_preemptirqsoff_counting, cpu) = 0;
26801 +               }
26802 +#endif
26803 +       }
26805 +#endif
26807 +#ifdef CONFIG_WAKEUP_LATENCY_HIST
26808 +static DEFINE_RAW_SPINLOCK(wakeup_lock);
26809 +static notrace void probe_sched_migrate_task(void *v, struct task_struct *task,
26810 +       int cpu)
26812 +       int old_cpu = task_cpu(task);
26814 +       if (cpu != old_cpu) {
26815 +               unsigned long flags;
26816 +               struct task_struct *cpu_wakeup_task;
26818 +               raw_spin_lock_irqsave(&wakeup_lock, flags);
26820 +               cpu_wakeup_task = per_cpu(wakeup_task, old_cpu);
26821 +               if (task == cpu_wakeup_task) {
26822 +                       put_task_struct(cpu_wakeup_task);
26823 +                       per_cpu(wakeup_task, old_cpu) = NULL;
26824 +                       cpu_wakeup_task = per_cpu(wakeup_task, cpu) = task;
26825 +                       get_task_struct(cpu_wakeup_task);
26826 +               }
26828 +               raw_spin_unlock_irqrestore(&wakeup_lock, flags);
26829 +       }
26832 +static notrace void probe_wakeup_latency_hist_start(void *v,
26833 +       struct task_struct *p)
26835 +       unsigned long flags;
26836 +       struct task_struct *curr = current;
26837 +       int cpu = task_cpu(p);
26838 +       struct task_struct *cpu_wakeup_task;
26840 +       raw_spin_lock_irqsave(&wakeup_lock, flags);
26842 +       cpu_wakeup_task = per_cpu(wakeup_task, cpu);
26844 +       if (wakeup_pid) {
26845 +               if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
26846 +                   p->prio == curr->prio)
26847 +                       per_cpu(wakeup_sharedprio, cpu) = 1;
26848 +               if (likely(wakeup_pid != task_pid_nr(p)))
26849 +                       goto out;
26850 +       } else {
26851 +               if (likely(!rt_task(p)) ||
26852 +                   (cpu_wakeup_task && p->prio > cpu_wakeup_task->prio) ||
26853 +                   p->prio > curr->prio)
26854 +                       goto out;
26855 +               if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
26856 +                   p->prio == curr->prio)
26857 +                       per_cpu(wakeup_sharedprio, cpu) = 1;
26858 +       }
26860 +       if (cpu_wakeup_task)
26861 +               put_task_struct(cpu_wakeup_task);
26862 +       cpu_wakeup_task = per_cpu(wakeup_task, cpu) = p;
26863 +       get_task_struct(cpu_wakeup_task);
26864 +       cpu_wakeup_task->preempt_timestamp_hist =
26865 +               ftrace_now(raw_smp_processor_id());
26866 +out:
26867 +       raw_spin_unlock_irqrestore(&wakeup_lock, flags);
26870 +static notrace void probe_wakeup_latency_hist_stop(void *v,
26871 +       bool preempt, struct task_struct *prev, struct task_struct *next)
26873 +       unsigned long flags;
26874 +       int cpu = task_cpu(next);
26875 +       long latency;
26876 +       cycle_t stop;
26877 +       struct task_struct *cpu_wakeup_task;
26879 +       raw_spin_lock_irqsave(&wakeup_lock, flags);
26881 +       cpu_wakeup_task = per_cpu(wakeup_task, cpu);
26883 +       if (cpu_wakeup_task == NULL)
26884 +               goto out;
26886 +       /* Already running? */
26887 +       if (unlikely(current == cpu_wakeup_task))
26888 +               goto out_reset;
26890 +       if (next != cpu_wakeup_task) {
26891 +               if (next->prio < cpu_wakeup_task->prio)
26892 +                       goto out_reset;
26894 +               if (next->prio == cpu_wakeup_task->prio)
26895 +                       per_cpu(wakeup_sharedprio, cpu) = 1;
26897 +               goto out;
26898 +       }
26900 +       if (current->prio == cpu_wakeup_task->prio)
26901 +               per_cpu(wakeup_sharedprio, cpu) = 1;
26903 +       /*
26904 +        * The task we are waiting for is about to be switched to.
26905 +        * Calculate latency and store it in histogram.
26906 +        */
26907 +       stop = ftrace_now(raw_smp_processor_id());
26909 +       latency = ((long) (stop - next->preempt_timestamp_hist)) /
26910 +           NSECS_PER_USECS;
26912 +       if (per_cpu(wakeup_sharedprio, cpu)) {
26913 +               latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, 0, stop,
26914 +                   next);
26915 +               per_cpu(wakeup_sharedprio, cpu) = 0;
26916 +       } else {
26917 +               latency_hist(WAKEUP_LATENCY, cpu, latency, 0, stop, next);
26918 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
26919 +               if (timerandwakeup_enabled_data.enabled) {
26920 +                       latency_hist(TIMERANDWAKEUP_LATENCY, cpu,
26921 +                           next->timer_offset + latency, next->timer_offset,
26922 +                           stop, next);
26923 +               }
26924 +#endif
26925 +       }
26927 +out_reset:
26928 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
26929 +       next->timer_offset = 0;
26930 +#endif
26931 +       put_task_struct(cpu_wakeup_task);
26932 +       per_cpu(wakeup_task, cpu) = NULL;
26933 +out:
26934 +       raw_spin_unlock_irqrestore(&wakeup_lock, flags);
26936 +#endif
26938 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
26939 +static notrace void probe_hrtimer_interrupt(void *v, int cpu,
26940 +       long long latency_ns, struct task_struct *curr,
26941 +       struct task_struct *task)
26943 +       if (latency_ns <= 0 && task != NULL && rt_task(task) &&
26944 +           (task->prio < curr->prio ||
26945 +           (task->prio == curr->prio &&
26946 +           !cpumask_test_cpu(cpu, &task->cpus_allowed)))) {
26947 +               long latency;
26948 +               cycle_t now;
26950 +               if (missed_timer_offsets_pid) {
26951 +                       if (likely(missed_timer_offsets_pid !=
26952 +                           task_pid_nr(task)))
26953 +                               return;
26954 +               }
26956 +               now = ftrace_now(cpu);
26957 +               latency = (long) div_s64(-latency_ns, NSECS_PER_USECS);
26958 +               latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, latency, now,
26959 +                   task);
26960 +#ifdef CONFIG_WAKEUP_LATENCY_HIST
26961 +               task->timer_offset = latency;
26962 +#endif
26963 +       }
26965 +#endif
26967 +static __init int latency_hist_init(void)
26969 +       struct dentry *latency_hist_root = NULL;
26970 +       struct dentry *dentry;
26971 +#ifdef CONFIG_WAKEUP_LATENCY_HIST
26972 +       struct dentry *dentry_sharedprio;
26973 +#endif
26974 +       struct dentry *entry;
26975 +       struct dentry *enable_root;
26976 +       int i = 0;
26977 +       struct hist_data *my_hist;
26978 +       char name[64];
26979 +       char *cpufmt = "CPU%d";
26980 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
26981 +       defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
26982 +       char *cpufmt_maxlatproc = "max_latency-CPU%d";
26983 +       struct maxlatproc_data *mp = NULL;
26984 +#endif
26986 +       dentry = tracing_init_dentry();
26987 +       latency_hist_root = debugfs_create_dir(latency_hist_dir_root, dentry);
26988 +       enable_root = debugfs_create_dir("enable", latency_hist_root);
26990 +#ifdef CONFIG_INTERRUPT_OFF_HIST
26991 +       dentry = debugfs_create_dir(irqsoff_hist_dir, latency_hist_root);
26992 +       for_each_possible_cpu(i) {
26993 +               sprintf(name, cpufmt, i);
26994 +               entry = debugfs_create_file(name, 0444, dentry,
26995 +                   &per_cpu(irqsoff_hist, i), &latency_hist_fops);
26996 +               my_hist = &per_cpu(irqsoff_hist, i);
26997 +               atomic_set(&my_hist->hist_mode, 1);
26998 +               my_hist->min_lat = LONG_MAX;
26999 +       }
27000 +       entry = debugfs_create_file("reset", 0644, dentry,
27001 +           (void *)IRQSOFF_LATENCY, &latency_hist_reset_fops);
27002 +#endif
27004 +#ifdef CONFIG_PREEMPT_OFF_HIST
27005 +       dentry = debugfs_create_dir(preemptoff_hist_dir,
27006 +           latency_hist_root);
27007 +       for_each_possible_cpu(i) {
27008 +               sprintf(name, cpufmt, i);
27009 +               entry = debugfs_create_file(name, 0444, dentry,
27010 +                   &per_cpu(preemptoff_hist, i), &latency_hist_fops);
27011 +               my_hist = &per_cpu(preemptoff_hist, i);
27012 +               atomic_set(&my_hist->hist_mode, 1);
27013 +               my_hist->min_lat = LONG_MAX;
27014 +       }
27015 +       entry = debugfs_create_file("reset", 0644, dentry,
27016 +           (void *)PREEMPTOFF_LATENCY, &latency_hist_reset_fops);
27017 +#endif
27019 +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
27020 +       dentry = debugfs_create_dir(preemptirqsoff_hist_dir,
27021 +           latency_hist_root);
27022 +       for_each_possible_cpu(i) {
27023 +               sprintf(name, cpufmt, i);
27024 +               entry = debugfs_create_file(name, 0444, dentry,
27025 +                   &per_cpu(preemptirqsoff_hist, i), &latency_hist_fops);
27026 +               my_hist = &per_cpu(preemptirqsoff_hist, i);
27027 +               atomic_set(&my_hist->hist_mode, 1);
27028 +               my_hist->min_lat = LONG_MAX;
27029 +       }
27030 +       entry = debugfs_create_file("reset", 0644, dentry,
27031 +           (void *)PREEMPTIRQSOFF_LATENCY, &latency_hist_reset_fops);
27032 +#endif
27034 +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
27035 +       entry = debugfs_create_file("preemptirqsoff", 0644,
27036 +           enable_root, (void *)&preemptirqsoff_enabled_data,
27037 +           &enable_fops);
27038 +#endif
27040 +#ifdef CONFIG_WAKEUP_LATENCY_HIST
27041 +       dentry = debugfs_create_dir(wakeup_latency_hist_dir,
27042 +           latency_hist_root);
27043 +       dentry_sharedprio = debugfs_create_dir(
27044 +           wakeup_latency_hist_dir_sharedprio, dentry);
27045 +       for_each_possible_cpu(i) {
27046 +               sprintf(name, cpufmt, i);
27048 +               entry = debugfs_create_file(name, 0444, dentry,
27049 +                   &per_cpu(wakeup_latency_hist, i),
27050 +                   &latency_hist_fops);
27051 +               my_hist = &per_cpu(wakeup_latency_hist, i);
27052 +               atomic_set(&my_hist->hist_mode, 1);
27053 +               my_hist->min_lat = LONG_MAX;
27055 +               entry = debugfs_create_file(name, 0444, dentry_sharedprio,
27056 +                   &per_cpu(wakeup_latency_hist_sharedprio, i),
27057 +                   &latency_hist_fops);
27058 +               my_hist = &per_cpu(wakeup_latency_hist_sharedprio, i);
27059 +               atomic_set(&my_hist->hist_mode, 1);
27060 +               my_hist->min_lat = LONG_MAX;
27062 +               sprintf(name, cpufmt_maxlatproc, i);
27064 +               mp = &per_cpu(wakeup_maxlatproc, i);
27065 +               entry = debugfs_create_file(name, 0444, dentry, mp,
27066 +                   &maxlatproc_fops);
27067 +               clear_maxlatprocdata(mp);
27069 +               mp = &per_cpu(wakeup_maxlatproc_sharedprio, i);
27070 +               entry = debugfs_create_file(name, 0444, dentry_sharedprio, mp,
27071 +                   &maxlatproc_fops);
27072 +               clear_maxlatprocdata(mp);
27073 +       }
27074 +       entry = debugfs_create_file("pid", 0644, dentry,
27075 +           (void *)&wakeup_pid, &pid_fops);
27076 +       entry = debugfs_create_file("reset", 0644, dentry,
27077 +           (void *)WAKEUP_LATENCY, &latency_hist_reset_fops);
27078 +       entry = debugfs_create_file("reset", 0644, dentry_sharedprio,
27079 +           (void *)WAKEUP_LATENCY_SHAREDPRIO, &latency_hist_reset_fops);
27080 +       entry = debugfs_create_file("wakeup", 0644,
27081 +           enable_root, (void *)&wakeup_latency_enabled_data,
27082 +           &enable_fops);
27083 +#endif
27085 +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
27086 +       dentry = debugfs_create_dir(missed_timer_offsets_dir,
27087 +           latency_hist_root);
27088 +       for_each_possible_cpu(i) {
27089 +               sprintf(name, cpufmt, i);
27090 +               entry = debugfs_create_file(name, 0444, dentry,
27091 +                   &per_cpu(missed_timer_offsets, i), &latency_hist_fops);
27092 +               my_hist = &per_cpu(missed_timer_offsets, i);
27093 +               atomic_set(&my_hist->hist_mode, 1);
27094 +               my_hist->min_lat = LONG_MAX;
27096 +               sprintf(name, cpufmt_maxlatproc, i);
27097 +               mp = &per_cpu(missed_timer_offsets_maxlatproc, i);
27098 +               entry = debugfs_create_file(name, 0444, dentry, mp,
27099 +                   &maxlatproc_fops);
27100 +               clear_maxlatprocdata(mp);
27101 +       }
27102 +       entry = debugfs_create_file("pid", 0644, dentry,
27103 +           (void *)&missed_timer_offsets_pid, &pid_fops);
27104 +       entry = debugfs_create_file("reset", 0644, dentry,
27105 +           (void *)MISSED_TIMER_OFFSETS, &latency_hist_reset_fops);
27106 +       entry = debugfs_create_file("missed_timer_offsets", 0644,
27107 +           enable_root, (void *)&missed_timer_offsets_enabled_data,
27108 +           &enable_fops);
27109 +#endif
27111 +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
27112 +       defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
27113 +       dentry = debugfs_create_dir(timerandwakeup_latency_hist_dir,
27114 +           latency_hist_root);
27115 +       for_each_possible_cpu(i) {
27116 +               sprintf(name, cpufmt, i);
27117 +               entry = debugfs_create_file(name, 0444, dentry,
27118 +                   &per_cpu(timerandwakeup_latency_hist, i),
27119 +                   &latency_hist_fops);
27120 +               my_hist = &per_cpu(timerandwakeup_latency_hist, i);
27121 +               atomic_set(&my_hist->hist_mode, 1);
27122 +               my_hist->min_lat = LONG_MAX;
27124 +               sprintf(name, cpufmt_maxlatproc, i);
27125 +               mp = &per_cpu(timerandwakeup_maxlatproc, i);
27126 +               entry = debugfs_create_file(name, 0444, dentry, mp,
27127 +                   &maxlatproc_fops);
27128 +               clear_maxlatprocdata(mp);
27129 +       }
27130 +       entry = debugfs_create_file("reset", 0644, dentry,
27131 +           (void *)TIMERANDWAKEUP_LATENCY, &latency_hist_reset_fops);
27132 +       entry = debugfs_create_file("timerandwakeup", 0644,
27133 +           enable_root, (void *)&timerandwakeup_enabled_data,
27134 +           &enable_fops);
27135 +#endif
27136 +       return 0;
27139 +device_initcall(latency_hist_init);
27140 diff -Nur linux-4.4.46.orig/kernel/trace/Makefile linux-4.4.46/kernel/trace/Makefile
27141 --- linux-4.4.46.orig/kernel/trace/Makefile     2017-02-01 08:31:11.000000000 +0100
27142 +++ linux-4.4.46/kernel/trace/Makefile  2017-02-03 17:18:10.939619522 +0100
27143 @@ -40,6 +40,10 @@
27144  obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
27145  obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
27146  obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
27147 +obj-$(CONFIG_INTERRUPT_OFF_HIST) += latency_hist.o
27148 +obj-$(CONFIG_PREEMPT_OFF_HIST) += latency_hist.o
27149 +obj-$(CONFIG_WAKEUP_LATENCY_HIST) += latency_hist.o
27150 +obj-$(CONFIG_MISSED_TIMER_OFFSETS_HIST) += latency_hist.o
27151  obj-$(CONFIG_NOP_TRACER) += trace_nop.o
27152  obj-$(CONFIG_STACK_TRACER) += trace_stack.o
27153  obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
27154 diff -Nur linux-4.4.46.orig/kernel/trace/trace.c linux-4.4.46/kernel/trace/trace.c
27155 --- linux-4.4.46.orig/kernel/trace/trace.c      2017-02-01 08:31:11.000000000 +0100
27156 +++ linux-4.4.46/kernel/trace/trace.c   2017-02-03 17:18:10.943619676 +0100
27157 @@ -1652,6 +1652,7 @@
27158         struct task_struct *tsk = current;
27160         entry->preempt_count            = pc & 0xff;
27161 +       entry->preempt_lazy_count       = preempt_lazy_count();
27162         entry->pid                      = (tsk) ? tsk->pid : 0;
27163         entry->flags =
27164  #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
27165 @@ -1661,8 +1662,11 @@
27166  #endif
27167                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
27168                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
27169 -               (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
27170 +               (tif_need_resched_now() ? TRACE_FLAG_NEED_RESCHED : 0) |
27171 +               (need_resched_lazy() ? TRACE_FLAG_NEED_RESCHED_LAZY : 0) |
27172                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
27174 +       entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0;
27176  EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
27178 @@ -2555,14 +2559,17 @@
27180  static void print_lat_help_header(struct seq_file *m)
27182 -       seq_puts(m, "#                  _------=> CPU#            \n"
27183 -                   "#                 / _-----=> irqs-off        \n"
27184 -                   "#                | / _----=> need-resched    \n"
27185 -                   "#                || / _---=> hardirq/softirq \n"
27186 -                   "#                ||| / _--=> preempt-depth   \n"
27187 -                   "#                |||| /     delay            \n"
27188 -                   "#  cmd     pid   ||||| time  |   caller      \n"
27189 -                   "#     \\   /      |||||  \\    |   /         \n");
27190 +       seq_puts(m, "#                  _--------=> CPU#              \n"
27191 +                   "#                 / _-------=> irqs-off          \n"
27192 +                   "#                | / _------=> need-resched      \n"
27193 +                   "#                || / _-----=> need-resched_lazy \n"
27194 +                   "#                ||| / _----=> hardirq/softirq   \n"
27195 +                   "#                |||| / _---=> preempt-depth     \n"
27196 +                   "#                ||||| / _--=> preempt-lazy-depth\n"
27197 +                   "#                |||||| / _-=> migrate-disable   \n"
27198 +                   "#                ||||||| /     delay             \n"
27199 +                   "# cmd     pid    |||||||| time   |  caller       \n"
27200 +                   "#     \\   /      ||||||||   \\    |  /            \n");
27203  static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
27204 @@ -2588,11 +2595,14 @@
27205         print_event_info(buf, m);
27206         seq_puts(m, "#                              _-----=> irqs-off\n"
27207                     "#                             / _----=> need-resched\n"
27208 -                   "#                            | / _---=> hardirq/softirq\n"
27209 -                   "#                            || / _--=> preempt-depth\n"
27210 -                   "#                            ||| /     delay\n"
27211 -                   "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
27212 -                   "#              | |       |   ||||       |         |\n");
27213 +                   "#                            |/  _-----=> need-resched_lazy\n"
27214 +                   "#                            || / _---=> hardirq/softirq\n"
27215 +                   "#                            ||| / _--=> preempt-depth\n"
27216 +                   "#                            |||| / _-=> preempt-lazy-depth\n"
27217 +                   "#                            ||||| / _-=> migrate-disable   \n"
27218 +                   "#                            |||||| /    delay\n"
27219 +                   "#           TASK-PID   CPU#  |||||||   TIMESTAMP  FUNCTION\n"
27220 +                   "#              | |       |   |||||||      |         |\n");
27223  void
27224 diff -Nur linux-4.4.46.orig/kernel/trace/trace_events.c linux-4.4.46/kernel/trace/trace_events.c
27225 --- linux-4.4.46.orig/kernel/trace/trace_events.c       2017-02-01 08:31:11.000000000 +0100
27226 +++ linux-4.4.46/kernel/trace/trace_events.c    2017-02-03 17:18:10.943619676 +0100
27227 @@ -188,6 +188,8 @@
27228         __common_field(unsigned char, flags);
27229         __common_field(unsigned char, preempt_count);
27230         __common_field(int, pid);
27231 +       __common_field(unsigned short, migrate_disable);
27232 +       __common_field(unsigned short, padding);
27234         return ret;
27236 @@ -244,6 +246,14 @@
27238         local_save_flags(fbuffer->flags);
27239         fbuffer->pc = preempt_count();
27240 +       /*
27241 +        * If CONFIG_PREEMPT is enabled, then the tracepoint itself disables
27242 +        * preemption (adding one to the preempt_count). Since we are
27243 +        * interested in the preempt_count at the time the tracepoint was
27244 +        * hit, we need to subtract one to offset the increment.
27245 +        */
27246 +       if (IS_ENABLED(CONFIG_PREEMPT))
27247 +               fbuffer->pc--;
27248         fbuffer->trace_file = trace_file;
27250         fbuffer->event =
27251 diff -Nur linux-4.4.46.orig/kernel/trace/trace.h linux-4.4.46/kernel/trace/trace.h
27252 --- linux-4.4.46.orig/kernel/trace/trace.h      2017-02-01 08:31:11.000000000 +0100
27253 +++ linux-4.4.46/kernel/trace/trace.h   2017-02-03 17:18:10.943619676 +0100
27254 @@ -117,6 +117,7 @@
27255   *  NEED_RESCHED       - reschedule is requested
27256   *  HARDIRQ            - inside an interrupt handler
27257   *  SOFTIRQ            - inside a softirq handler
27258 + *  NEED_RESCHED_LAZY  - lazy reschedule is requested
27259   */
27260  enum trace_flag_type {
27261         TRACE_FLAG_IRQS_OFF             = 0x01,
27262 @@ -125,6 +126,7 @@
27263         TRACE_FLAG_HARDIRQ              = 0x08,
27264         TRACE_FLAG_SOFTIRQ              = 0x10,
27265         TRACE_FLAG_PREEMPT_RESCHED      = 0x20,
27266 +       TRACE_FLAG_NEED_RESCHED_LAZY    = 0x40,
27267  };
27269  #define TRACE_BUF_SIZE         1024
27270 diff -Nur linux-4.4.46.orig/kernel/trace/trace_irqsoff.c linux-4.4.46/kernel/trace/trace_irqsoff.c
27271 --- linux-4.4.46.orig/kernel/trace/trace_irqsoff.c      2017-02-01 08:31:11.000000000 +0100
27272 +++ linux-4.4.46/kernel/trace/trace_irqsoff.c   2017-02-03 17:18:10.943619676 +0100
27273 @@ -13,6 +13,7 @@
27274  #include <linux/uaccess.h>
27275  #include <linux/module.h>
27276  #include <linux/ftrace.h>
27277 +#include <trace/events/hist.h>
27279  #include "trace.h"
27281 @@ -424,11 +425,13 @@
27283         if (preempt_trace() || irq_trace())
27284                 start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
27285 +       trace_preemptirqsoff_hist_rcuidle(TRACE_START, 1);
27287  EXPORT_SYMBOL_GPL(start_critical_timings);
27289  void stop_critical_timings(void)
27291 +       trace_preemptirqsoff_hist_rcuidle(TRACE_STOP, 0);
27292         if (preempt_trace() || irq_trace())
27293                 stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
27295 @@ -438,6 +441,7 @@
27296  #ifdef CONFIG_PROVE_LOCKING
27297  void time_hardirqs_on(unsigned long a0, unsigned long a1)
27299 +       trace_preemptirqsoff_hist_rcuidle(IRQS_ON, 0);
27300         if (!preempt_trace() && irq_trace())
27301                 stop_critical_timing(a0, a1);
27303 @@ -446,6 +450,7 @@
27305         if (!preempt_trace() && irq_trace())
27306                 start_critical_timing(a0, a1);
27307 +       trace_preemptirqsoff_hist_rcuidle(IRQS_OFF, 1);
27310  #else /* !CONFIG_PROVE_LOCKING */
27311 @@ -471,6 +476,7 @@
27312   */
27313  void trace_hardirqs_on(void)
27315 +       trace_preemptirqsoff_hist(IRQS_ON, 0);
27316         if (!preempt_trace() && irq_trace())
27317                 stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
27319 @@ -480,11 +486,13 @@
27321         if (!preempt_trace() && irq_trace())
27322                 start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
27323 +       trace_preemptirqsoff_hist(IRQS_OFF, 1);
27325  EXPORT_SYMBOL(trace_hardirqs_off);
27327  __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
27329 +       trace_preemptirqsoff_hist(IRQS_ON, 0);
27330         if (!preempt_trace() && irq_trace())
27331                 stop_critical_timing(CALLER_ADDR0, caller_addr);
27333 @@ -494,6 +502,7 @@
27335         if (!preempt_trace() && irq_trace())
27336                 start_critical_timing(CALLER_ADDR0, caller_addr);
27337 +       trace_preemptirqsoff_hist(IRQS_OFF, 1);
27339  EXPORT_SYMBOL(trace_hardirqs_off_caller);
27341 @@ -503,12 +512,14 @@
27342  #ifdef CONFIG_PREEMPT_TRACER
27343  void trace_preempt_on(unsigned long a0, unsigned long a1)
27345 +       trace_preemptirqsoff_hist(PREEMPT_ON, 0);
27346         if (preempt_trace() && !irq_trace())
27347                 stop_critical_timing(a0, a1);
27350  void trace_preempt_off(unsigned long a0, unsigned long a1)
27352 +       trace_preemptirqsoff_hist(PREEMPT_ON, 1);
27353         if (preempt_trace() && !irq_trace())
27354                 start_critical_timing(a0, a1);
27356 diff -Nur linux-4.4.46.orig/kernel/trace/trace_output.c linux-4.4.46/kernel/trace/trace_output.c
27357 --- linux-4.4.46.orig/kernel/trace/trace_output.c       2017-02-01 08:31:11.000000000 +0100
27358 +++ linux-4.4.46/kernel/trace/trace_output.c    2017-02-03 17:18:10.943619676 +0100
27359 @@ -386,6 +386,7 @@
27361         char hardsoft_irq;
27362         char need_resched;
27363 +       char need_resched_lazy;
27364         char irqs_off;
27365         int hardirq;
27366         int softirq;
27367 @@ -413,6 +414,8 @@
27368                 need_resched = '.';
27369                 break;
27370         }
27371 +       need_resched_lazy =
27372 +               (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.';
27374         hardsoft_irq =
27375                 (hardirq && softirq) ? 'H' :
27376 @@ -420,14 +423,25 @@
27377                 softirq ? 's' :
27378                 '.';
27380 -       trace_seq_printf(s, "%c%c%c",
27381 -                        irqs_off, need_resched, hardsoft_irq);
27382 +       trace_seq_printf(s, "%c%c%c%c",
27383 +                        irqs_off, need_resched, need_resched_lazy,
27384 +                        hardsoft_irq);
27386         if (entry->preempt_count)
27387                 trace_seq_printf(s, "%x", entry->preempt_count);
27388         else
27389                 trace_seq_putc(s, '.');
27391 +       if (entry->preempt_lazy_count)
27392 +               trace_seq_printf(s, "%x", entry->preempt_lazy_count);
27393 +       else
27394 +               trace_seq_putc(s, '.');
27396 +       if (entry->migrate_disable)
27397 +               trace_seq_printf(s, "%x", entry->migrate_disable);
27398 +       else
27399 +               trace_seq_putc(s, '.');
27401         return !trace_seq_has_overflowed(s);
27404 diff -Nur linux-4.4.46.orig/kernel/user.c linux-4.4.46/kernel/user.c
27405 --- linux-4.4.46.orig/kernel/user.c     2017-02-01 08:31:11.000000000 +0100
27406 +++ linux-4.4.46/kernel/user.c  2017-02-03 17:18:10.943619676 +0100
27407 @@ -161,11 +161,11 @@
27408         if (!up)
27409                 return;
27411 -       local_irq_save(flags);
27412 +       local_irq_save_nort(flags);
27413         if (atomic_dec_and_lock(&up->__count, &uidhash_lock))
27414                 free_user(up, flags);
27415         else
27416 -               local_irq_restore(flags);
27417 +               local_irq_restore_nort(flags);
27420  struct user_struct *alloc_uid(kuid_t uid)
27421 diff -Nur linux-4.4.46.orig/kernel/watchdog.c linux-4.4.46/kernel/watchdog.c
27422 --- linux-4.4.46.orig/kernel/watchdog.c 2017-02-01 08:31:11.000000000 +0100
27423 +++ linux-4.4.46/kernel/watchdog.c      2017-02-03 17:18:10.943619676 +0100
27424 @@ -299,6 +299,8 @@
27426  #ifdef CONFIG_HARDLOCKUP_DETECTOR
27428 +static DEFINE_RAW_SPINLOCK(watchdog_output_lock);
27430  static struct perf_event_attr wd_hw_attr = {
27431         .type           = PERF_TYPE_HARDWARE,
27432         .config         = PERF_COUNT_HW_CPU_CYCLES,
27433 @@ -332,6 +334,13 @@
27434                 /* only print hardlockups once */
27435                 if (__this_cpu_read(hard_watchdog_warn) == true)
27436                         return;
27437 +               /*
27438 +                * If early-printk is enabled then make sure we do not
27439 +                * lock up in printk() and kill console logging:
27440 +                */
27441 +               printk_kill();
27443 +               raw_spin_lock(&watchdog_output_lock);
27445                 pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
27446                 print_modules();
27447 @@ -349,8 +358,9 @@
27448                                 !test_and_set_bit(0, &hardlockup_allcpu_dumped))
27449                         trigger_allbutself_cpu_backtrace();
27451 +               raw_spin_unlock(&watchdog_output_lock);
27452                 if (hardlockup_panic)
27453 -                       panic("Hard LOCKUP");
27454 +                       nmi_panic(regs, "Hard LOCKUP");
27456                 __this_cpu_write(hard_watchdog_warn, true);
27457                 return;
27458 @@ -496,6 +506,7 @@
27459         /* kick off the timer for the hardlockup detector */
27460         hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
27461         hrtimer->function = watchdog_timer_fn;
27462 +       hrtimer->irqsafe = 1;
27464         /* Enable the perf event */
27465         watchdog_nmi_enable(cpu);
27466 diff -Nur linux-4.4.46.orig/kernel/workqueue.c linux-4.4.46/kernel/workqueue.c
27467 --- linux-4.4.46.orig/kernel/workqueue.c        2017-02-01 08:31:11.000000000 +0100
27468 +++ linux-4.4.46/kernel/workqueue.c     2017-02-03 17:18:10.943619676 +0100
27469 @@ -48,6 +48,8 @@
27470  #include <linux/nodemask.h>
27471  #include <linux/moduleparam.h>
27472  #include <linux/uaccess.h>
27473 +#include <linux/locallock.h>
27474 +#include <linux/delay.h>
27476  #include "workqueue_internal.h"
27478 @@ -121,11 +123,16 @@
27479   *    cpu or grabbing pool->lock is enough for read access.  If
27480   *    POOL_DISASSOCIATED is set, it's identical to L.
27481   *
27482 + *    On RT we need the extra protection via rt_lock_idle_list() for
27483 + *    the list manipulations against read access from
27484 + *    wq_worker_sleeping(). All other places are nicely serialized via
27485 + *    pool->lock.
27486 + *
27487   * A: pool->attach_mutex protected.
27488   *
27489   * PL: wq_pool_mutex protected.
27490   *
27491 - * PR: wq_pool_mutex protected for writes.  Sched-RCU protected for reads.
27492 + * PR: wq_pool_mutex protected for writes.  RCU protected for reads.
27493   *
27494   * PW: wq_pool_mutex and wq->mutex protected for writes.  Either for reads.
27495   *
27496 @@ -134,7 +141,7 @@
27497   *
27498   * WQ: wq->mutex protected.
27499   *
27500 - * WR: wq->mutex protected for writes.  Sched-RCU protected for reads.
27501 + * WR: wq->mutex protected for writes.  RCU protected for reads.
27502   *
27503   * MD: wq_mayday_lock protected.
27504   */
27505 @@ -183,7 +190,7 @@
27506         atomic_t                nr_running ____cacheline_aligned_in_smp;
27508         /*
27509 -        * Destruction of pool is sched-RCU protected to allow dereferences
27510 +        * Destruction of pool is RCU protected to allow dereferences
27511          * from get_work_pool().
27512          */
27513         struct rcu_head         rcu;
27514 @@ -212,7 +219,7 @@
27515         /*
27516          * Release of unbound pwq is punted to system_wq.  See put_pwq()
27517          * and pwq_unbound_release_workfn() for details.  pool_workqueue
27518 -        * itself is also sched-RCU protected so that the first pwq can be
27519 +        * itself is also RCU protected so that the first pwq can be
27520          * determined without grabbing wq->mutex.
27521          */
27522         struct work_struct      unbound_release_work;
27523 @@ -331,6 +338,8 @@
27524  struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
27525  EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
27527 +static DEFINE_LOCAL_IRQ_LOCK(pendingb_lock);
27529  static int worker_thread(void *__worker);
27530  static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
27532 @@ -338,20 +347,20 @@
27533  #include <trace/events/workqueue.h>
27535  #define assert_rcu_or_pool_mutex()                                     \
27536 -       RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() &&                 \
27537 +       RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&                       \
27538                          !lockdep_is_held(&wq_pool_mutex),              \
27539 -                        "sched RCU or wq_pool_mutex should be held")
27540 +                        "RCU or wq_pool_mutex should be held")
27542  #define assert_rcu_or_wq_mutex(wq)                                     \
27543 -       RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() &&                 \
27544 +       RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&                       \
27545                          !lockdep_is_held(&wq->mutex),                  \
27546 -                        "sched RCU or wq->mutex should be held")
27547 +                        "RCU or wq->mutex should be held")
27549  #define assert_rcu_or_wq_mutex_or_pool_mutex(wq)                       \
27550 -       RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() &&                 \
27551 +       RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&                       \
27552                          !lockdep_is_held(&wq->mutex) &&                \
27553                          !lockdep_is_held(&wq_pool_mutex),              \
27554 -                        "sched RCU, wq->mutex or wq_pool_mutex should be held")
27555 +                        "RCU, wq->mutex or wq_pool_mutex should be held")
27557  #define for_each_cpu_worker_pool(pool, cpu)                            \
27558         for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0];               \
27559 @@ -363,7 +372,7 @@
27560   * @pool: iteration cursor
27561   * @pi: integer used for iteration
27562   *
27563 - * This must be called either with wq_pool_mutex held or sched RCU read
27564 + * This must be called either with wq_pool_mutex held or RCU read
27565   * locked.  If the pool needs to be used beyond the locking in effect, the
27566   * caller is responsible for guaranteeing that the pool stays online.
27567   *
27568 @@ -395,7 +404,7 @@
27569   * @pwq: iteration cursor
27570   * @wq: the target workqueue
27571   *
27572 - * This must be called either with wq->mutex held or sched RCU read locked.
27573 + * This must be called either with wq->mutex held or RCU read locked.
27574   * If the pwq needs to be used beyond the locking in effect, the caller is
27575   * responsible for guaranteeing that the pwq stays online.
27576   *
27577 @@ -407,6 +416,31 @@
27578                 if (({ assert_rcu_or_wq_mutex(wq); false; })) { }       \
27579                 else
27581 +#ifdef CONFIG_PREEMPT_RT_BASE
27582 +static inline void rt_lock_idle_list(struct worker_pool *pool)
27584 +       preempt_disable();
27586 +static inline void rt_unlock_idle_list(struct worker_pool *pool)
27588 +       preempt_enable();
27590 +static inline void sched_lock_idle_list(struct worker_pool *pool) { }
27591 +static inline void sched_unlock_idle_list(struct worker_pool *pool) { }
27592 +#else
27593 +static inline void rt_lock_idle_list(struct worker_pool *pool) { }
27594 +static inline void rt_unlock_idle_list(struct worker_pool *pool) { }
27595 +static inline void sched_lock_idle_list(struct worker_pool *pool)
27597 +       spin_lock_irq(&pool->lock);
27599 +static inline void sched_unlock_idle_list(struct worker_pool *pool)
27601 +       spin_unlock_irq(&pool->lock);
27603 +#endif
27606  #ifdef CONFIG_DEBUG_OBJECTS_WORK
27608  static struct debug_obj_descr work_debug_descr;
27609 @@ -557,7 +591,7 @@
27610   * @wq: the target workqueue
27611   * @node: the node ID
27612   *
27613 - * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU
27614 + * This must be called with any of wq_pool_mutex, wq->mutex or RCU
27615   * read locked.
27616   * If the pwq needs to be used beyond the locking in effect, the caller is
27617   * responsible for guaranteeing that the pwq stays online.
27618 @@ -701,8 +735,8 @@
27619   * @work: the work item of interest
27620   *
27621   * Pools are created and destroyed under wq_pool_mutex, and allows read
27622 - * access under sched-RCU read lock.  As such, this function should be
27623 - * called under wq_pool_mutex or with preemption disabled.
27624 + * access under RCU read lock.  As such, this function should be
27625 + * called under wq_pool_mutex or inside of a rcu_read_lock() region.
27626   *
27627   * All fields of the returned pool are accessible as long as the above
27628   * mentioned locking is in effect.  If the returned pool needs to be used
27629 @@ -839,51 +873,44 @@
27630   */
27631  static void wake_up_worker(struct worker_pool *pool)
27633 -       struct worker *worker = first_idle_worker(pool);
27634 +       struct worker *worker;
27636 +       rt_lock_idle_list(pool);
27638 +       worker = first_idle_worker(pool);
27640         if (likely(worker))
27641                 wake_up_process(worker->task);
27643 +       rt_unlock_idle_list(pool);
27646  /**
27647 - * wq_worker_waking_up - a worker is waking up
27648 - * @task: task waking up
27649 - * @cpu: CPU @task is waking up to
27650 + * wq_worker_running - a worker is running again
27651 + * @task: task returning from sleep
27652   *
27653 - * This function is called during try_to_wake_up() when a worker is
27654 - * being awoken.
27655 - *
27656 - * CONTEXT:
27657 - * spin_lock_irq(rq->lock)
27658 + * This function is called when a worker returns from schedule()
27659   */
27660 -void wq_worker_waking_up(struct task_struct *task, int cpu)
27661 +void wq_worker_running(struct task_struct *task)
27663         struct worker *worker = kthread_data(task);
27665 -       if (!(worker->flags & WORKER_NOT_RUNNING)) {
27666 -               WARN_ON_ONCE(worker->pool->cpu != cpu);
27667 +       if (!worker->sleeping)
27668 +               return;
27669 +       if (!(worker->flags & WORKER_NOT_RUNNING))
27670                 atomic_inc(&worker->pool->nr_running);
27671 -       }
27672 +       worker->sleeping = 0;
27675  /**
27676   * wq_worker_sleeping - a worker is going to sleep
27677   * @task: task going to sleep
27678 - * @cpu: CPU in question, must be the current CPU number
27679 - *
27680 - * This function is called during schedule() when a busy worker is
27681 - * going to sleep.  Worker on the same cpu can be woken up by
27682 - * returning pointer to its task.
27683 - *
27684 - * CONTEXT:
27685 - * spin_lock_irq(rq->lock)
27686 - *
27687 - * Return:
27688 - * Worker task on @cpu to wake up, %NULL if none.
27689 + * This function is called from schedule() when a busy worker is
27690 + * going to sleep.
27691   */
27692 -struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
27693 +void wq_worker_sleeping(struct task_struct *task)
27695 -       struct worker *worker = kthread_data(task), *to_wakeup = NULL;
27696 +       struct worker *worker = kthread_data(task);
27697         struct worker_pool *pool;
27699         /*
27700 @@ -892,29 +919,26 @@
27701          * checking NOT_RUNNING.
27702          */
27703         if (worker->flags & WORKER_NOT_RUNNING)
27704 -               return NULL;
27705 +               return;
27707         pool = worker->pool;
27709 -       /* this can only happen on the local cpu */
27710 -       if (WARN_ON_ONCE(cpu != raw_smp_processor_id() || pool->cpu != cpu))
27711 -               return NULL;
27712 +       if (WARN_ON_ONCE(worker->sleeping))
27713 +               return;
27715 +       worker->sleeping = 1;
27717         /*
27718          * The counterpart of the following dec_and_test, implied mb,
27719          * worklist not empty test sequence is in insert_work().
27720          * Please read comment there.
27721 -        *
27722 -        * NOT_RUNNING is clear.  This means that we're bound to and
27723 -        * running on the local cpu w/ rq lock held and preemption
27724 -        * disabled, which in turn means that none else could be
27725 -        * manipulating idle_list, so dereferencing idle_list without pool
27726 -        * lock is safe.
27727          */
27728         if (atomic_dec_and_test(&pool->nr_running) &&
27729 -           !list_empty(&pool->worklist))
27730 -               to_wakeup = first_idle_worker(pool);
27731 -       return to_wakeup ? to_wakeup->task : NULL;
27732 +           !list_empty(&pool->worklist)) {
27733 +               sched_lock_idle_list(pool);
27734 +               wake_up_worker(pool);
27735 +               sched_unlock_idle_list(pool);
27736 +       }
27739  /**
27740 @@ -1108,12 +1132,12 @@
27742         if (pwq) {
27743                 /*
27744 -                * As both pwqs and pools are sched-RCU protected, the
27745 +                * As both pwqs and pools are RCU protected, the
27746                  * following lock operations are safe.
27747                  */
27748 -               spin_lock_irq(&pwq->pool->lock);
27749 +               local_spin_lock_irq(pendingb_lock, &pwq->pool->lock);
27750                 put_pwq(pwq);
27751 -               spin_unlock_irq(&pwq->pool->lock);
27752 +               local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock);
27753         }
27756 @@ -1215,7 +1239,7 @@
27757         struct worker_pool *pool;
27758         struct pool_workqueue *pwq;
27760 -       local_irq_save(*flags);
27761 +       local_lock_irqsave(pendingb_lock, *flags);
27763         /* try to steal the timer if it exists */
27764         if (is_dwork) {
27765 @@ -1234,6 +1258,7 @@
27766         if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
27767                 return 0;
27769 +       rcu_read_lock();
27770         /*
27771          * The queueing is in progress, or it is already queued. Try to
27772          * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
27773 @@ -1272,14 +1297,16 @@
27774                 set_work_pool_and_keep_pending(work, pool->id);
27776                 spin_unlock(&pool->lock);
27777 +               rcu_read_unlock();
27778                 return 1;
27779         }
27780         spin_unlock(&pool->lock);
27781  fail:
27782 -       local_irq_restore(*flags);
27783 +       rcu_read_unlock();
27784 +       local_unlock_irqrestore(pendingb_lock, *flags);
27785         if (work_is_canceling(work))
27786                 return -ENOENT;
27787 -       cpu_relax();
27788 +       cpu_chill();
27789         return -EAGAIN;
27792 @@ -1348,7 +1375,7 @@
27793          * queued or lose PENDING.  Grabbing PENDING and queueing should
27794          * happen with IRQ disabled.
27795          */
27796 -       WARN_ON_ONCE(!irqs_disabled());
27797 +       WARN_ON_ONCE_NONRT(!irqs_disabled());
27799         debug_work_activate(work);
27801 @@ -1356,6 +1383,8 @@
27802         if (unlikely(wq->flags & __WQ_DRAINING) &&
27803             WARN_ON_ONCE(!is_chained_work(wq)))
27804                 return;
27806 +       rcu_read_lock();
27807  retry:
27808         if (req_cpu == WORK_CPU_UNBOUND)
27809                 cpu = raw_smp_processor_id();
27810 @@ -1412,10 +1441,8 @@
27811         /* pwq determined, queue */
27812         trace_workqueue_queue_work(req_cpu, pwq, work);
27814 -       if (WARN_ON(!list_empty(&work->entry))) {
27815 -               spin_unlock(&pwq->pool->lock);
27816 -               return;
27817 -       }
27818 +       if (WARN_ON(!list_empty(&work->entry)))
27819 +               goto out;
27821         pwq->nr_in_flight[pwq->work_color]++;
27822         work_flags = work_color_to_flags(pwq->work_color);
27823 @@ -1431,7 +1458,9 @@
27825         insert_work(pwq, work, worklist, work_flags);
27827 +out:
27828         spin_unlock(&pwq->pool->lock);
27829 +       rcu_read_unlock();
27832  /**
27833 @@ -1451,14 +1480,14 @@
27834         bool ret = false;
27835         unsigned long flags;
27837 -       local_irq_save(flags);
27838 +       local_lock_irqsave(pendingb_lock,flags);
27840         if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
27841                 __queue_work(cpu, wq, work);
27842                 ret = true;
27843         }
27845 -       local_irq_restore(flags);
27846 +       local_unlock_irqrestore(pendingb_lock, flags);
27847         return ret;
27849  EXPORT_SYMBOL(queue_work_on);
27850 @@ -1525,14 +1554,14 @@
27851         unsigned long flags;
27853         /* read the comment in __queue_work() */
27854 -       local_irq_save(flags);
27855 +       local_lock_irqsave(pendingb_lock, flags);
27857         if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
27858                 __queue_delayed_work(cpu, wq, dwork, delay);
27859                 ret = true;
27860         }
27862 -       local_irq_restore(flags);
27863 +       local_unlock_irqrestore(pendingb_lock, flags);
27864         return ret;
27866  EXPORT_SYMBOL(queue_delayed_work_on);
27867 @@ -1567,7 +1596,7 @@
27869         if (likely(ret >= 0)) {
27870                 __queue_delayed_work(cpu, wq, dwork, delay);
27871 -               local_irq_restore(flags);
27872 +               local_unlock_irqrestore(pendingb_lock, flags);
27873         }
27875         /* -ENOENT from try_to_grab_pending() becomes %true */
27876 @@ -1600,7 +1629,9 @@
27877         worker->last_active = jiffies;
27879         /* idle_list is LIFO */
27880 +       rt_lock_idle_list(pool);
27881         list_add(&worker->entry, &pool->idle_list);
27882 +       rt_unlock_idle_list(pool);
27884         if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
27885                 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
27886 @@ -1633,7 +1664,9 @@
27887                 return;
27888         worker_clr_flags(worker, WORKER_IDLE);
27889         pool->nr_idle--;
27890 +       rt_lock_idle_list(pool);
27891         list_del_init(&worker->entry);
27892 +       rt_unlock_idle_list(pool);
27895  static struct worker *alloc_worker(int node)
27896 @@ -1799,7 +1832,9 @@
27897         pool->nr_workers--;
27898         pool->nr_idle--;
27900 +       rt_lock_idle_list(pool);
27901         list_del_init(&worker->entry);
27902 +       rt_unlock_idle_list(pool);
27903         worker->flags |= WORKER_DIE;
27904         wake_up_process(worker->task);
27906 @@ -2716,14 +2751,14 @@
27908         might_sleep();
27910 -       local_irq_disable();
27911 +       rcu_read_lock();
27912         pool = get_work_pool(work);
27913         if (!pool) {
27914 -               local_irq_enable();
27915 +               rcu_read_unlock();
27916                 return false;
27917         }
27919 -       spin_lock(&pool->lock);
27920 +       spin_lock_irq(&pool->lock);
27921         /* see the comment in try_to_grab_pending() with the same code */
27922         pwq = get_work_pwq(work);
27923         if (pwq) {
27924 @@ -2750,10 +2785,11 @@
27925         else
27926                 lock_map_acquire_read(&pwq->wq->lockdep_map);
27927         lock_map_release(&pwq->wq->lockdep_map);
27929 +       rcu_read_unlock();
27930         return true;
27931  already_gone:
27932         spin_unlock_irq(&pool->lock);
27933 +       rcu_read_unlock();
27934         return false;
27937 @@ -2840,7 +2876,7 @@
27939         /* tell other tasks trying to grab @work to back off */
27940         mark_work_canceling(work);
27941 -       local_irq_restore(flags);
27942 +       local_unlock_irqrestore(pendingb_lock, flags);
27944         flush_work(work);
27945         clear_work_data(work);
27946 @@ -2895,10 +2931,10 @@
27947   */
27948  bool flush_delayed_work(struct delayed_work *dwork)
27950 -       local_irq_disable();
27951 +       local_lock_irq(pendingb_lock);
27952         if (del_timer_sync(&dwork->timer))
27953                 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
27954 -       local_irq_enable();
27955 +       local_unlock_irq(pendingb_lock);
27956         return flush_work(&dwork->work);
27958  EXPORT_SYMBOL(flush_delayed_work);
27959 @@ -2933,7 +2969,7 @@
27961         set_work_pool_and_clear_pending(&dwork->work,
27962                                         get_work_pool_id(&dwork->work));
27963 -       local_irq_restore(flags);
27964 +       local_unlock_irqrestore(pendingb_lock, flags);
27965         return ret;
27967  EXPORT_SYMBOL(cancel_delayed_work);
27968 @@ -3161,7 +3197,7 @@
27969   * put_unbound_pool - put a worker_pool
27970   * @pool: worker_pool to put
27971   *
27972 - * Put @pool.  If its refcnt reaches zero, it gets destroyed in sched-RCU
27973 + * Put @pool.  If its refcnt reaches zero, it gets destroyed in RCU
27974   * safe manner.  get_unbound_pool() calls this function on its failure path
27975   * and this function should be able to release pools which went through,
27976   * successfully or not, init_worker_pool().
27977 @@ -3215,8 +3251,8 @@
27978         del_timer_sync(&pool->idle_timer);
27979         del_timer_sync(&pool->mayday_timer);
27981 -       /* sched-RCU protected to allow dereferences from get_work_pool() */
27982 -       call_rcu_sched(&pool->rcu, rcu_free_pool);
27983 +       /* RCU protected to allow dereferences from get_work_pool() */
27984 +       call_rcu(&pool->rcu, rcu_free_pool);
27987  /**
27988 @@ -3323,14 +3359,14 @@
27989         put_unbound_pool(pool);
27990         mutex_unlock(&wq_pool_mutex);
27992 -       call_rcu_sched(&pwq->rcu, rcu_free_pwq);
27993 +       call_rcu(&pwq->rcu, rcu_free_pwq);
27995         /*
27996          * If we're the last pwq going away, @wq is already dead and no one
27997          * is gonna access it anymore.  Schedule RCU free.
27998          */
27999         if (is_last)
28000 -               call_rcu_sched(&wq->rcu, rcu_free_wq);
28001 +               call_rcu(&wq->rcu, rcu_free_wq);
28004  /**
28005 @@ -3983,7 +4019,7 @@
28006                  * The base ref is never dropped on per-cpu pwqs.  Directly
28007                  * schedule RCU free.
28008                  */
28009 -               call_rcu_sched(&wq->rcu, rcu_free_wq);
28010 +               call_rcu(&wq->rcu, rcu_free_wq);
28011         } else {
28012                 /*
28013                  * We're the sole accessor of @wq at this point.  Directly
28014 @@ -4076,7 +4112,8 @@
28015         struct pool_workqueue *pwq;
28016         bool ret;
28018 -       rcu_read_lock_sched();
28019 +       rcu_read_lock();
28020 +       preempt_disable();
28022         if (cpu == WORK_CPU_UNBOUND)
28023                 cpu = smp_processor_id();
28024 @@ -4087,7 +4124,8 @@
28025                 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
28027         ret = !list_empty(&pwq->delayed_works);
28028 -       rcu_read_unlock_sched();
28029 +       preempt_enable();
28030 +       rcu_read_unlock();
28032         return ret;
28034 @@ -4113,15 +4151,15 @@
28035         if (work_pending(work))
28036                 ret |= WORK_BUSY_PENDING;
28038 -       local_irq_save(flags);
28039 +       rcu_read_lock();
28040         pool = get_work_pool(work);
28041         if (pool) {
28042 -               spin_lock(&pool->lock);
28043 +               spin_lock_irqsave(&pool->lock, flags);
28044                 if (find_worker_executing_work(pool, work))
28045                         ret |= WORK_BUSY_RUNNING;
28046 -               spin_unlock(&pool->lock);
28047 +               spin_unlock_irqrestore(&pool->lock, flags);
28048         }
28049 -       local_irq_restore(flags);
28050 +       rcu_read_unlock();
28052         return ret;
28054 @@ -4310,7 +4348,7 @@
28055         unsigned long flags;
28056         int pi;
28058 -       rcu_read_lock_sched();
28059 +       rcu_read_lock();
28061         pr_info("Showing busy workqueues and worker pools:\n");
28063 @@ -4361,7 +4399,7 @@
28064                 spin_unlock_irqrestore(&pool->lock, flags);
28065         }
28067 -       rcu_read_unlock_sched();
28068 +       rcu_read_unlock();
28071  /*
28072 @@ -4722,16 +4760,16 @@
28073                  * nr_active is monotonically decreasing.  It's safe
28074                  * to peek without lock.
28075                  */
28076 -               rcu_read_lock_sched();
28077 +               rcu_read_lock();
28078                 for_each_pwq(pwq, wq) {
28079                         WARN_ON_ONCE(pwq->nr_active < 0);
28080                         if (pwq->nr_active) {
28081                                 busy = true;
28082 -                               rcu_read_unlock_sched();
28083 +                               rcu_read_unlock();
28084                                 goto out_unlock;
28085                         }
28086                 }
28087 -               rcu_read_unlock_sched();
28088 +               rcu_read_unlock();
28089         }
28090  out_unlock:
28091         mutex_unlock(&wq_pool_mutex);
28092 @@ -4921,7 +4959,8 @@
28093         const char *delim = "";
28094         int node, written = 0;
28096 -       rcu_read_lock_sched();
28097 +       get_online_cpus();
28098 +       rcu_read_lock();
28099         for_each_node(node) {
28100                 written += scnprintf(buf + written, PAGE_SIZE - written,
28101                                      "%s%d:%d", delim, node,
28102 @@ -4929,7 +4968,8 @@
28103                 delim = " ";
28104         }
28105         written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
28106 -       rcu_read_unlock_sched();
28107 +       rcu_read_unlock();
28108 +       put_online_cpus();
28110         return written;
28112 diff -Nur linux-4.4.46.orig/kernel/workqueue_internal.h linux-4.4.46/kernel/workqueue_internal.h
28113 --- linux-4.4.46.orig/kernel/workqueue_internal.h       2017-02-01 08:31:11.000000000 +0100
28114 +++ linux-4.4.46/kernel/workqueue_internal.h    2017-02-03 17:18:10.943619676 +0100
28115 @@ -43,6 +43,7 @@
28116         unsigned long           last_active;    /* L: last active timestamp */
28117         unsigned int            flags;          /* X: flags */
28118         int                     id;             /* I: worker id */
28119 +       int                     sleeping;       /* None */
28121         /*
28122          * Opaque string set with work_set_desc().  Printed out with task
28123 @@ -68,7 +69,7 @@
28124   * Scheduler hooks for concurrency managed workqueue.  Only to be used from
28125   * sched/core.c and workqueue.c.
28126   */
28127 -void wq_worker_waking_up(struct task_struct *task, int cpu);
28128 -struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu);
28129 +void wq_worker_running(struct task_struct *task);
28130 +void wq_worker_sleeping(struct task_struct *task);
28132  #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
28133 diff -Nur linux-4.4.46.orig/lib/debugobjects.c linux-4.4.46/lib/debugobjects.c
28134 --- linux-4.4.46.orig/lib/debugobjects.c        2017-02-01 08:31:11.000000000 +0100
28135 +++ linux-4.4.46/lib/debugobjects.c     2017-02-03 17:18:10.943619676 +0100
28136 @@ -309,7 +309,10 @@
28137         struct debug_obj *obj;
28138         unsigned long flags;
28140 -       fill_pool();
28141 +#ifdef CONFIG_PREEMPT_RT_FULL
28142 +       if (preempt_count() == 0 && !irqs_disabled())
28143 +#endif
28144 +               fill_pool();
28146         db = get_bucket((unsigned long) addr);
28148 diff -Nur linux-4.4.46.orig/lib/idr.c linux-4.4.46/lib/idr.c
28149 --- linux-4.4.46.orig/lib/idr.c 2017-02-01 08:31:11.000000000 +0100
28150 +++ linux-4.4.46/lib/idr.c      2017-02-03 17:18:10.943619676 +0100
28151 @@ -30,6 +30,7 @@
28152  #include <linux/idr.h>
28153  #include <linux/spinlock.h>
28154  #include <linux/percpu.h>
28155 +#include <linux/locallock.h>
28157  #define MAX_IDR_SHIFT          (sizeof(int) * 8 - 1)
28158  #define MAX_IDR_BIT            (1U << MAX_IDR_SHIFT)
28159 @@ -45,6 +46,37 @@
28160  static DEFINE_PER_CPU(int, idr_preload_cnt);
28161  static DEFINE_SPINLOCK(simple_ida_lock);
28163 +#ifdef CONFIG_PREEMPT_RT_FULL
28164 +static DEFINE_LOCAL_IRQ_LOCK(idr_lock);
28166 +static inline void idr_preload_lock(void)
28168 +       local_lock(idr_lock);
28171 +static inline void idr_preload_unlock(void)
28173 +       local_unlock(idr_lock);
28176 +void idr_preload_end(void)
28178 +       idr_preload_unlock();
28180 +EXPORT_SYMBOL(idr_preload_end);
28181 +#else
28182 +static inline void idr_preload_lock(void)
28184 +       preempt_disable();
28187 +static inline void idr_preload_unlock(void)
28189 +       preempt_enable();
28191 +#endif
28194  /* the maximum ID which can be allocated given idr->layers */
28195  static int idr_max(int layers)
28197 @@ -115,14 +147,14 @@
28198          * context.  See idr_preload() for details.
28199          */
28200         if (!in_interrupt()) {
28201 -               preempt_disable();
28202 +               idr_preload_lock();
28203                 new = __this_cpu_read(idr_preload_head);
28204                 if (new) {
28205                         __this_cpu_write(idr_preload_head, new->ary[0]);
28206                         __this_cpu_dec(idr_preload_cnt);
28207                         new->ary[0] = NULL;
28208                 }
28209 -               preempt_enable();
28210 +               idr_preload_unlock();
28211                 if (new)
28212                         return new;
28213         }
28214 @@ -366,7 +398,6 @@
28215         idr_mark_full(pa, id);
28219  /**
28220   * idr_preload - preload for idr_alloc()
28221   * @gfp_mask: allocation mask to use for preloading
28222 @@ -401,7 +432,7 @@
28223         WARN_ON_ONCE(in_interrupt());
28224         might_sleep_if(gfpflags_allow_blocking(gfp_mask));
28226 -       preempt_disable();
28227 +       idr_preload_lock();
28229         /*
28230          * idr_alloc() is likely to succeed w/o full idr_layer buffer and
28231 @@ -413,9 +444,9 @@
28232         while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) {
28233                 struct idr_layer *new;
28235 -               preempt_enable();
28236 +               idr_preload_unlock();
28237                 new = kmem_cache_zalloc(idr_layer_cache, gfp_mask);
28238 -               preempt_disable();
28239 +               idr_preload_lock();
28240                 if (!new)
28241                         break;
28243 diff -Nur linux-4.4.46.orig/lib/Kconfig linux-4.4.46/lib/Kconfig
28244 --- linux-4.4.46.orig/lib/Kconfig       2017-02-01 08:31:11.000000000 +0100
28245 +++ linux-4.4.46/lib/Kconfig    2017-02-03 17:18:10.943619676 +0100
28246 @@ -397,6 +397,7 @@
28248  config CPUMASK_OFFSTACK
28249         bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
28250 +       depends on !PREEMPT_RT_FULL
28251         help
28252           Use dynamic allocation for cpumask_var_t, instead of putting
28253           them on the stack.  This is a bit more expensive, but avoids
28254 diff -Nur linux-4.4.46.orig/lib/locking-selftest.c linux-4.4.46/lib/locking-selftest.c
28255 --- linux-4.4.46.orig/lib/locking-selftest.c    2017-02-01 08:31:11.000000000 +0100
28256 +++ linux-4.4.46/lib/locking-selftest.c 2017-02-03 17:18:10.947619830 +0100
28257 @@ -590,6 +590,8 @@
28258  #include "locking-selftest-spin-hardirq.h"
28259  GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin)
28261 +#ifndef CONFIG_PREEMPT_RT_FULL
28263  #include "locking-selftest-rlock-hardirq.h"
28264  GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock)
28266 @@ -605,9 +607,12 @@
28267  #include "locking-selftest-wlock-softirq.h"
28268  GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock)
28270 +#endif
28272  #undef E1
28273  #undef E2
28275 +#ifndef CONFIG_PREEMPT_RT_FULL
28276  /*
28277   * Enabling hardirqs with a softirq-safe lock held:
28278   */
28279 @@ -640,6 +645,8 @@
28280  #undef E1
28281  #undef E2
28283 +#endif
28285  /*
28286   * Enabling irqs with an irq-safe lock held:
28287   */
28288 @@ -663,6 +670,8 @@
28289  #include "locking-selftest-spin-hardirq.h"
28290  GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin)
28292 +#ifndef CONFIG_PREEMPT_RT_FULL
28294  #include "locking-selftest-rlock-hardirq.h"
28295  GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock)
28297 @@ -678,6 +687,8 @@
28298  #include "locking-selftest-wlock-softirq.h"
28299  GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
28301 +#endif
28303  #undef E1
28304  #undef E2
28306 @@ -709,6 +720,8 @@
28307  #include "locking-selftest-spin-hardirq.h"
28308  GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin)
28310 +#ifndef CONFIG_PREEMPT_RT_FULL
28312  #include "locking-selftest-rlock-hardirq.h"
28313  GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock)
28315 @@ -724,6 +737,8 @@
28316  #include "locking-selftest-wlock-softirq.h"
28317  GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
28319 +#endif
28321  #undef E1
28322  #undef E2
28323  #undef E3
28324 @@ -757,6 +772,8 @@
28325  #include "locking-selftest-spin-hardirq.h"
28326  GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin)
28328 +#ifndef CONFIG_PREEMPT_RT_FULL
28330  #include "locking-selftest-rlock-hardirq.h"
28331  GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock)
28333 @@ -772,10 +789,14 @@
28334  #include "locking-selftest-wlock-softirq.h"
28335  GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock)
28337 +#endif
28339  #undef E1
28340  #undef E2
28341  #undef E3
28343 +#ifndef CONFIG_PREEMPT_RT_FULL
28345  /*
28346   * read-lock / write-lock irq inversion.
28347   *
28348 @@ -838,6 +859,10 @@
28349  #undef E2
28350  #undef E3
28352 +#endif
28354 +#ifndef CONFIG_PREEMPT_RT_FULL
28356  /*
28357   * read-lock / write-lock recursion that is actually safe.
28358   */
28359 @@ -876,6 +901,8 @@
28360  #undef E2
28361  #undef E3
28363 +#endif
28365  /*
28366   * read-lock / write-lock recursion that is unsafe.
28367   */
28368 @@ -1858,6 +1885,7 @@
28370         printk("  --------------------------------------------------------------------------\n");
28372 +#ifndef CONFIG_PREEMPT_RT_FULL
28373         /*
28374          * irq-context testcases:
28375          */
28376 @@ -1870,6 +1898,28 @@
28378         DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
28379  //     DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);
28380 +#else
28381 +       /* On -rt, we only do hardirq context test for raw spinlock */
28382 +       DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 12);
28383 +       DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 21);
28385 +       DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 12);
28386 +       DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 21);
28388 +       DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 123);
28389 +       DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 132);
28390 +       DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 213);
28391 +       DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 231);
28392 +       DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 312);
28393 +       DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 321);
28395 +       DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 123);
28396 +       DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 132);
28397 +       DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 213);
28398 +       DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 231);
28399 +       DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 312);
28400 +       DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 321);
28401 +#endif
28403         ww_tests();
28405 diff -Nur linux-4.4.46.orig/lib/percpu_ida.c linux-4.4.46/lib/percpu_ida.c
28406 --- linux-4.4.46.orig/lib/percpu_ida.c  2017-02-01 08:31:11.000000000 +0100
28407 +++ linux-4.4.46/lib/percpu_ida.c       2017-02-03 17:18:10.947619830 +0100
28408 @@ -26,6 +26,9 @@
28409  #include <linux/string.h>
28410  #include <linux/spinlock.h>
28411  #include <linux/percpu_ida.h>
28412 +#include <linux/locallock.h>
28414 +static DEFINE_LOCAL_IRQ_LOCK(irq_off_lock);
28416  struct percpu_ida_cpu {
28417         /*
28418 @@ -148,13 +151,13 @@
28419         unsigned long flags;
28420         int tag;
28422 -       local_irq_save(flags);
28423 +       local_lock_irqsave(irq_off_lock, flags);
28424         tags = this_cpu_ptr(pool->tag_cpu);
28426         /* Fastpath */
28427         tag = alloc_local_tag(tags);
28428         if (likely(tag >= 0)) {
28429 -               local_irq_restore(flags);
28430 +               local_unlock_irqrestore(irq_off_lock, flags);
28431                 return tag;
28432         }
28434 @@ -173,6 +176,7 @@
28436                 if (!tags->nr_free)
28437                         alloc_global_tags(pool, tags);
28439                 if (!tags->nr_free)
28440                         steal_tags(pool, tags);
28442 @@ -184,7 +188,7 @@
28443                 }
28445                 spin_unlock(&pool->lock);
28446 -               local_irq_restore(flags);
28447 +               local_unlock_irqrestore(irq_off_lock, flags);
28449                 if (tag >= 0 || state == TASK_RUNNING)
28450                         break;
28451 @@ -196,7 +200,7 @@
28453                 schedule();
28455 -               local_irq_save(flags);
28456 +               local_lock_irqsave(irq_off_lock, flags);
28457                 tags = this_cpu_ptr(pool->tag_cpu);
28458         }
28459         if (state != TASK_RUNNING)
28460 @@ -221,7 +225,7 @@
28462         BUG_ON(tag >= pool->nr_tags);
28464 -       local_irq_save(flags);
28465 +       local_lock_irqsave(irq_off_lock, flags);
28466         tags = this_cpu_ptr(pool->tag_cpu);
28468         spin_lock(&tags->lock);
28469 @@ -253,7 +257,7 @@
28470                 spin_unlock(&pool->lock);
28471         }
28473 -       local_irq_restore(flags);
28474 +       local_unlock_irqrestore(irq_off_lock, flags);
28476  EXPORT_SYMBOL_GPL(percpu_ida_free);
28478 @@ -345,7 +349,7 @@
28479         struct percpu_ida_cpu *remote;
28480         unsigned cpu, i, err = 0;
28482 -       local_irq_save(flags);
28483 +       local_lock_irqsave(irq_off_lock, flags);
28484         for_each_possible_cpu(cpu) {
28485                 remote = per_cpu_ptr(pool->tag_cpu, cpu);
28486                 spin_lock(&remote->lock);
28487 @@ -367,7 +371,7 @@
28488         }
28489         spin_unlock(&pool->lock);
28490  out:
28491 -       local_irq_restore(flags);
28492 +       local_unlock_irqrestore(irq_off_lock, flags);
28493         return err;
28495  EXPORT_SYMBOL_GPL(percpu_ida_for_each_free);
28496 diff -Nur linux-4.4.46.orig/lib/radix-tree.c linux-4.4.46/lib/radix-tree.c
28497 --- linux-4.4.46.orig/lib/radix-tree.c  2017-02-01 08:31:11.000000000 +0100
28498 +++ linux-4.4.46/lib/radix-tree.c       2017-02-03 17:18:10.947619830 +0100
28499 @@ -196,13 +196,14 @@
28500                  * succeed in getting a node here (and never reach
28501                  * kmem_cache_alloc)
28502                  */
28503 -               rtp = this_cpu_ptr(&radix_tree_preloads);
28504 +               rtp = &get_cpu_var(radix_tree_preloads);
28505                 if (rtp->nr) {
28506                         ret = rtp->nodes;
28507                         rtp->nodes = ret->private_data;
28508                         ret->private_data = NULL;
28509                         rtp->nr--;
28510                 }
28511 +               put_cpu_var(radix_tree_preloads);
28512                 /*
28513                  * Update the allocation stack trace as this is more useful
28514                  * for debugging.
28515 @@ -242,6 +243,7 @@
28516         call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
28519 +#ifndef CONFIG_PREEMPT_RT_FULL
28520  /*
28521   * Load up this CPU's radix_tree_node buffer with sufficient objects to
28522   * ensure that the addition of a single element in the tree cannot fail.  On
28523 @@ -310,6 +312,7 @@
28524         return 0;
28526  EXPORT_SYMBOL(radix_tree_maybe_preload);
28527 +#endif
28529  /*
28530   *     Return the maximum key which can be store into a
28531 diff -Nur linux-4.4.46.orig/lib/rbtree.c linux-4.4.46/lib/rbtree.c
28532 --- linux-4.4.46.orig/lib/rbtree.c      2017-02-01 08:31:11.000000000 +0100
28533 +++ linux-4.4.46/lib/rbtree.c   2017-02-03 17:18:10.947619830 +0100
28534 @@ -23,6 +23,7 @@
28536  #include <linux/rbtree_augmented.h>
28537  #include <linux/export.h>
28538 +#include <linux/rcupdate.h>
28540  /*
28541   * red-black trees properties:  http://en.wikipedia.org/wiki/Rbtree
28542 @@ -590,3 +591,13 @@
28543         return rb_left_deepest_node(root->rb_node);
28545  EXPORT_SYMBOL(rb_first_postorder);
28547 +void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent,
28548 +                                   struct rb_node **rb_link)
28550 +       node->__rb_parent_color = (unsigned long)parent;
28551 +       node->rb_left = node->rb_right = NULL;
28553 +       rcu_assign_pointer(*rb_link, node);
28555 +EXPORT_SYMBOL(rb_link_node_rcu);
28556 diff -Nur linux-4.4.46.orig/lib/scatterlist.c linux-4.4.46/lib/scatterlist.c
28557 --- linux-4.4.46.orig/lib/scatterlist.c 2017-02-01 08:31:11.000000000 +0100
28558 +++ linux-4.4.46/lib/scatterlist.c      2017-02-03 17:18:10.947619830 +0100
28559 @@ -620,7 +620,7 @@
28560                         flush_kernel_dcache_page(miter->page);
28562                 if (miter->__flags & SG_MITER_ATOMIC) {
28563 -                       WARN_ON_ONCE(preemptible());
28564 +                       WARN_ON_ONCE(!pagefault_disabled());
28565                         kunmap_atomic(miter->addr);
28566                 } else
28567                         kunmap(miter->page);
28568 @@ -664,7 +664,7 @@
28569         if (!sg_miter_skip(&miter, skip))
28570                 return false;
28572 -       local_irq_save(flags);
28573 +       local_irq_save_nort(flags);
28575         while (sg_miter_next(&miter) && offset < buflen) {
28576                 unsigned int len;
28577 @@ -681,7 +681,7 @@
28579         sg_miter_stop(&miter);
28581 -       local_irq_restore(flags);
28582 +       local_irq_restore_nort(flags);
28583         return offset;
28585  EXPORT_SYMBOL(sg_copy_buffer);
28586 diff -Nur linux-4.4.46.orig/lib/smp_processor_id.c linux-4.4.46/lib/smp_processor_id.c
28587 --- linux-4.4.46.orig/lib/smp_processor_id.c    2017-02-01 08:31:11.000000000 +0100
28588 +++ linux-4.4.46/lib/smp_processor_id.c 2017-02-03 17:18:10.947619830 +0100
28589 @@ -39,8 +39,9 @@
28590         if (!printk_ratelimit())
28591                 goto out_enable;
28593 -       printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x] code: %s/%d\n",
28594 -               what1, what2, preempt_count() - 1, current->comm, current->pid);
28595 +       printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x %08x] code: %s/%d\n",
28596 +               what1, what2, preempt_count() - 1, __migrate_disabled(current),
28597 +               current->comm, current->pid);
28599         print_symbol("caller is %s\n", (long)__builtin_return_address(0));
28600         dump_stack();
28601 diff -Nur linux-4.4.46.orig/Makefile linux-4.4.46/Makefile
28602 --- linux-4.4.46.orig/Makefile  2017-02-01 08:31:11.000000000 +0100
28603 +++ linux-4.4.46/Makefile       2017-02-03 17:18:05.627414322 +0100
28604 @@ -785,6 +785,9 @@
28605  # Prohibit date/time macros, which would make the build non-deterministic
28606  KBUILD_CFLAGS   += $(call cc-option,-Werror=date-time)
28608 +# enforce correct pointer usage
28609 +KBUILD_CFLAGS   += $(call cc-option,-Werror=incompatible-pointer-types)
28611  # use the deterministic mode of AR if available
28612  KBUILD_ARFLAGS := $(call ar-option,D)
28614 diff -Nur linux-4.4.46.orig/mm/backing-dev.c linux-4.4.46/mm/backing-dev.c
28615 --- linux-4.4.46.orig/mm/backing-dev.c  2017-02-01 08:31:11.000000000 +0100
28616 +++ linux-4.4.46/mm/backing-dev.c       2017-02-03 17:18:10.947619830 +0100
28617 @@ -457,9 +457,9 @@
28619         unsigned long flags;
28621 -       local_irq_save(flags);
28622 +       local_irq_save_nort(flags);
28623         if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
28624 -               local_irq_restore(flags);
28625 +               local_irq_restore_nort(flags);
28626                 return;
28627         }
28629 diff -Nur linux-4.4.46.orig/mm/compaction.c linux-4.4.46/mm/compaction.c
28630 --- linux-4.4.46.orig/mm/compaction.c   2017-02-01 08:31:11.000000000 +0100
28631 +++ linux-4.4.46/mm/compaction.c        2017-02-03 17:18:10.947619830 +0100
28632 @@ -1430,10 +1430,12 @@
28633                                 cc->migrate_pfn & ~((1UL << cc->order) - 1);
28635                         if (cc->last_migrated_pfn < current_block_start) {
28636 -                               cpu = get_cpu();
28637 +                               cpu = get_cpu_light();
28638 +                               local_lock_irq(swapvec_lock);
28639                                 lru_add_drain_cpu(cpu);
28640 +                               local_unlock_irq(swapvec_lock);
28641                                 drain_local_pages(zone);
28642 -                               put_cpu();
28643 +                               put_cpu_light();
28644                                 /* No more flushing until we migrate again */
28645                                 cc->last_migrated_pfn = 0;
28646                         }
28647 diff -Nur linux-4.4.46.orig/mm/filemap.c linux-4.4.46/mm/filemap.c
28648 --- linux-4.4.46.orig/mm/filemap.c      2017-02-01 08:31:11.000000000 +0100
28649 +++ linux-4.4.46/mm/filemap.c   2017-02-03 17:18:10.947619830 +0100
28650 @@ -144,9 +144,12 @@
28651                  * node->private_list is protected by
28652                  * mapping->tree_lock.
28653                  */
28654 -               if (!list_empty(&node->private_list))
28655 -                       list_lru_del(&workingset_shadow_nodes,
28656 +               if (!list_empty(&node->private_list)) {
28657 +                       local_lock(workingset_shadow_lock);
28658 +                       list_lru_del(&__workingset_shadow_nodes,
28659                                      &node->private_list);
28660 +                       local_unlock(workingset_shadow_lock);
28661 +               }
28662         }
28663         return 0;
28665 @@ -218,7 +221,9 @@
28666         if (!workingset_node_pages(node) &&
28667             list_empty(&node->private_list)) {
28668                 node->private_data = mapping;
28669 -               list_lru_add(&workingset_shadow_nodes, &node->private_list);
28670 +               local_lock(workingset_shadow_lock);
28671 +               list_lru_add(&__workingset_shadow_nodes, &node->private_list);
28672 +               local_unlock(workingset_shadow_lock);
28673         }
28676 diff -Nur linux-4.4.46.orig/mm/highmem.c linux-4.4.46/mm/highmem.c
28677 --- linux-4.4.46.orig/mm/highmem.c      2017-02-01 08:31:11.000000000 +0100
28678 +++ linux-4.4.46/mm/highmem.c   2017-02-03 17:18:10.947619830 +0100
28679 @@ -29,10 +29,11 @@
28680  #include <linux/kgdb.h>
28681  #include <asm/tlbflush.h>
28684 +#ifndef CONFIG_PREEMPT_RT_FULL
28685  #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
28686  DEFINE_PER_CPU(int, __kmap_atomic_idx);
28687  #endif
28688 +#endif
28690  /*
28691   * Virtual_count is not a pure "count".
28692 @@ -107,8 +108,9 @@
28693  unsigned long totalhigh_pages __read_mostly;
28694  EXPORT_SYMBOL(totalhigh_pages);
28697 +#ifndef CONFIG_PREEMPT_RT_FULL
28698  EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx);
28699 +#endif
28701  unsigned int nr_free_highpages (void)
28703 diff -Nur linux-4.4.46.orig/mm/Kconfig linux-4.4.46/mm/Kconfig
28704 --- linux-4.4.46.orig/mm/Kconfig        2017-02-01 08:31:11.000000000 +0100
28705 +++ linux-4.4.46/mm/Kconfig     2017-02-03 17:18:10.947619830 +0100
28706 @@ -392,7 +392,7 @@
28708  config TRANSPARENT_HUGEPAGE
28709         bool "Transparent Hugepage Support"
28710 -       depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
28711 +       depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT_FULL
28712         select COMPACTION
28713         help
28714           Transparent Hugepages allows the kernel to use huge pages and
28715 diff -Nur linux-4.4.46.orig/mm/memcontrol.c linux-4.4.46/mm/memcontrol.c
28716 --- linux-4.4.46.orig/mm/memcontrol.c   2017-02-01 08:31:11.000000000 +0100
28717 +++ linux-4.4.46/mm/memcontrol.c        2017-02-03 17:18:10.947619830 +0100
28718 @@ -67,6 +67,8 @@
28719  #include <net/sock.h>
28720  #include <net/ip.h>
28721  #include <net/tcp_memcontrol.h>
28722 +#include <linux/locallock.h>
28724  #include "slab.h"
28726  #include <asm/uaccess.h>
28727 @@ -87,6 +89,7 @@
28728  #define do_swap_account                0
28729  #endif
28731 +static DEFINE_LOCAL_IRQ_LOCK(event_lock);
28732  static const char * const mem_cgroup_stat_names[] = {
28733         "cache",
28734         "rss",
28735 @@ -1922,14 +1925,17 @@
28736   */
28737  static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
28739 -       struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
28740 +       struct memcg_stock_pcp *stock;
28741 +       int cpu = get_cpu_light();
28743 +       stock = &per_cpu(memcg_stock, cpu);
28745         if (stock->cached != memcg) { /* reset if necessary */
28746                 drain_stock(stock);
28747                 stock->cached = memcg;
28748         }
28749         stock->nr_pages += nr_pages;
28750 -       put_cpu_var(memcg_stock);
28751 +       put_cpu_light();
28754  /*
28755 @@ -1945,7 +1951,7 @@
28756                 return;
28757         /* Notify other cpus that system-wide "drain" is running */
28758         get_online_cpus();
28759 -       curcpu = get_cpu();
28760 +       curcpu = get_cpu_light();
28761         for_each_online_cpu(cpu) {
28762                 struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
28763                 struct mem_cgroup *memcg;
28764 @@ -1962,7 +1968,7 @@
28765                                 schedule_work_on(cpu, &stock->work);
28766                 }
28767         }
28768 -       put_cpu();
28769 +       put_cpu_light();
28770         put_online_cpus();
28771         mutex_unlock(&percpu_charge_mutex);
28773 @@ -4709,12 +4715,12 @@
28775         ret = 0;
28777 -       local_irq_disable();
28778 +       local_lock_irq(event_lock);
28779         mem_cgroup_charge_statistics(to, page, nr_pages);
28780         memcg_check_events(to, page);
28781         mem_cgroup_charge_statistics(from, page, -nr_pages);
28782         memcg_check_events(from, page);
28783 -       local_irq_enable();
28784 +       local_unlock_irq(event_lock);
28785  out_unlock:
28786         unlock_page(page);
28787  out:
28788 @@ -5504,10 +5510,10 @@
28789                 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
28790         }
28792 -       local_irq_disable();
28793 +       local_lock_irq(event_lock);
28794         mem_cgroup_charge_statistics(memcg, page, nr_pages);
28795         memcg_check_events(memcg, page);
28796 -       local_irq_enable();
28797 +       local_unlock_irq(event_lock);
28799         if (do_swap_account && PageSwapCache(page)) {
28800                 swp_entry_t entry = { .val = page_private(page) };
28801 @@ -5563,14 +5569,14 @@
28802                 memcg_oom_recover(memcg);
28803         }
28805 -       local_irq_save(flags);
28806 +       local_lock_irqsave(event_lock, flags);
28807         __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);
28808         __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
28809         __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
28810         __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
28811         __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
28812         memcg_check_events(memcg, dummy_page);
28813 -       local_irq_restore(flags);
28814 +       local_unlock_irqrestore(event_lock, flags);
28816         if (!mem_cgroup_is_root(memcg))
28817                 css_put_many(&memcg->css, nr_pages);
28818 @@ -5762,6 +5768,7 @@
28820         struct mem_cgroup *memcg, *swap_memcg;
28821         unsigned short oldid;
28822 +       unsigned long flags;
28824         VM_BUG_ON_PAGE(PageLRU(page), page);
28825         VM_BUG_ON_PAGE(page_count(page), page);
28826 @@ -5802,12 +5809,16 @@
28827          * important here to have the interrupts disabled because it is the
28828          * only synchronisation we have for udpating the per-CPU variables.
28829          */
28830 +       local_lock_irqsave(event_lock, flags);
28831 +#ifndef CONFIG_PREEMPT_RT_BASE
28832         VM_BUG_ON(!irqs_disabled());
28833 +#endif
28834         mem_cgroup_charge_statistics(memcg, page, -1);
28835         memcg_check_events(memcg, page);
28837         if (!mem_cgroup_is_root(memcg))
28838                 css_put(&memcg->css);
28839 +       local_unlock_irqrestore(event_lock, flags);
28842  /**
28843 diff -Nur linux-4.4.46.orig/mm/mmu_context.c linux-4.4.46/mm/mmu_context.c
28844 --- linux-4.4.46.orig/mm/mmu_context.c  2017-02-01 08:31:11.000000000 +0100
28845 +++ linux-4.4.46/mm/mmu_context.c       2017-02-03 17:18:10.947619830 +0100
28846 @@ -23,6 +23,7 @@
28847         struct task_struct *tsk = current;
28849         task_lock(tsk);
28850 +       preempt_disable_rt();
28851         active_mm = tsk->active_mm;
28852         if (active_mm != mm) {
28853                 atomic_inc(&mm->mm_count);
28854 @@ -30,6 +31,7 @@
28855         }
28856         tsk->mm = mm;
28857         switch_mm(active_mm, mm, tsk);
28858 +       preempt_enable_rt();
28859         task_unlock(tsk);
28860  #ifdef finish_arch_post_lock_switch
28861         finish_arch_post_lock_switch();
28862 diff -Nur linux-4.4.46.orig/mm/page_alloc.c linux-4.4.46/mm/page_alloc.c
28863 --- linux-4.4.46.orig/mm/page_alloc.c   2017-02-01 08:31:11.000000000 +0100
28864 +++ linux-4.4.46/mm/page_alloc.c        2017-02-03 17:18:10.951619984 +0100
28865 @@ -60,6 +60,7 @@
28866  #include <linux/page_ext.h>
28867  #include <linux/hugetlb.h>
28868  #include <linux/sched/rt.h>
28869 +#include <linux/locallock.h>
28870  #include <linux/page_owner.h>
28871  #include <linux/kthread.h>
28873 @@ -264,6 +265,18 @@
28874  EXPORT_SYMBOL(nr_online_nodes);
28875  #endif
28877 +static DEFINE_LOCAL_IRQ_LOCK(pa_lock);
28879 +#ifdef CONFIG_PREEMPT_RT_BASE
28880 +# define cpu_lock_irqsave(cpu, flags)          \
28881 +       local_lock_irqsave_on(pa_lock, flags, cpu)
28882 +# define cpu_unlock_irqrestore(cpu, flags)     \
28883 +       local_unlock_irqrestore_on(pa_lock, flags, cpu)
28884 +#else
28885 +# define cpu_lock_irqsave(cpu, flags)          local_irq_save(flags)
28886 +# define cpu_unlock_irqrestore(cpu, flags)     local_irq_restore(flags)
28887 +#endif
28889  int page_group_by_mobility_disabled __read_mostly;
28891  #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
28892 @@ -786,7 +799,7 @@
28895  /*
28896 - * Frees a number of pages from the PCP lists
28897 + * Frees a number of pages which have been collected from the pcp lists.
28898   * Assumes all pages on list are in same zone, and of same order.
28899   * count is the number of pages to free.
28900   *
28901 @@ -797,18 +810,53 @@
28902   * pinned" detection logic.
28903   */
28904  static void free_pcppages_bulk(struct zone *zone, int count,
28905 -                                       struct per_cpu_pages *pcp)
28906 +                              struct list_head *list)
28908 -       int migratetype = 0;
28909 -       int batch_free = 0;
28910         int to_free = count;
28911         unsigned long nr_scanned;
28912 +       unsigned long flags;
28914 +       spin_lock_irqsave(&zone->lock, flags);
28916 -       spin_lock(&zone->lock);
28917         nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
28918         if (nr_scanned)
28919                 __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
28921 +       while (!list_empty(list)) {
28922 +               struct page *page = list_first_entry(list, struct page, lru);
28923 +               int mt; /* migratetype of the to-be-freed page */
28925 +               /* must delete as __free_one_page list manipulates */
28926 +               list_del(&page->lru);
28928 +               mt = get_pcppage_migratetype(page);
28929 +               /* MIGRATE_ISOLATE page should not go to pcplists */
28930 +               VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
28931 +               /* Pageblock could have been isolated meanwhile */
28932 +               if (unlikely(has_isolate_pageblock(zone)))
28933 +                       mt = get_pageblock_migratetype(page);
28935 +               __free_one_page(page, page_to_pfn(page), zone, 0, mt);
28936 +               trace_mm_page_pcpu_drain(page, 0, mt);
28937 +               to_free--;
28938 +       }
28939 +       WARN_ON(to_free != 0);
28940 +       spin_unlock_irqrestore(&zone->lock, flags);
28944 + * Moves a number of pages from the PCP lists to free list which
28945 + * is freed outside of the locked region.
28946 + *
28947 + * Assumes all pages on list are in same zone, and of same order.
28948 + * count is the number of pages to free.
28949 + */
28950 +static void isolate_pcp_pages(int to_free, struct per_cpu_pages *src,
28951 +                             struct list_head *dst)
28953 +       int migratetype = 0;
28954 +       int batch_free = 0;
28956         while (to_free) {
28957                 struct page *page;
28958                 struct list_head *list;
28959 @@ -824,7 +872,7 @@
28960                         batch_free++;
28961                         if (++migratetype == MIGRATE_PCPTYPES)
28962                                 migratetype = 0;
28963 -                       list = &pcp->lists[migratetype];
28964 +                       list = &src->lists[migratetype];
28965                 } while (list_empty(list));
28967                 /* This is the only non-empty list. Free them all. */
28968 @@ -832,24 +880,12 @@
28969                         batch_free = to_free;
28971                 do {
28972 -                       int mt; /* migratetype of the to-be-freed page */
28974 -                       page = list_entry(list->prev, struct page, lru);
28975 -                       /* must delete as __free_one_page list manipulates */
28976 +                       page = list_last_entry(list, struct page, lru);
28977                         list_del(&page->lru);
28979 -                       mt = get_pcppage_migratetype(page);
28980 -                       /* MIGRATE_ISOLATE page should not go to pcplists */
28981 -                       VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
28982 -                       /* Pageblock could have been isolated meanwhile */
28983 -                       if (unlikely(has_isolate_pageblock(zone)))
28984 -                               mt = get_pageblock_migratetype(page);
28986 -                       __free_one_page(page, page_to_pfn(page), zone, 0, mt);
28987 -                       trace_mm_page_pcpu_drain(page, 0, mt);
28988 +                       list_add(&page->lru, dst);
28989                 } while (--to_free && --batch_free && !list_empty(list));
28990         }
28991 -       spin_unlock(&zone->lock);
28994  static void free_one_page(struct zone *zone,
28995 @@ -858,7 +894,9 @@
28996                                 int migratetype)
28998         unsigned long nr_scanned;
28999 -       spin_lock(&zone->lock);
29000 +       unsigned long flags;
29002 +       spin_lock_irqsave(&zone->lock, flags);
29003         nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
29004         if (nr_scanned)
29005                 __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
29006 @@ -868,7 +906,7 @@
29007                 migratetype = get_pfnblock_migratetype(page, pfn);
29008         }
29009         __free_one_page(page, pfn, zone, order, migratetype);
29010 -       spin_unlock(&zone->lock);
29011 +       spin_unlock_irqrestore(&zone->lock, flags);
29014  static int free_tail_pages_check(struct page *head_page, struct page *page)
29015 @@ -1019,10 +1057,10 @@
29016                 return;
29018         migratetype = get_pfnblock_migratetype(page, pfn);
29019 -       local_irq_save(flags);
29020 +       local_lock_irqsave(pa_lock, flags);
29021         __count_vm_events(PGFREE, 1 << order);
29022         free_one_page(page_zone(page), page, pfn, order, migratetype);
29023 -       local_irq_restore(flags);
29024 +       local_unlock_irqrestore(pa_lock, flags);
29027  static void __init __free_pages_boot_core(struct page *page,
29028 @@ -1879,16 +1917,18 @@
29029  void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
29031         unsigned long flags;
29032 +       LIST_HEAD(dst);
29033         int to_drain, batch;
29035 -       local_irq_save(flags);
29036 +       local_lock_irqsave(pa_lock, flags);
29037         batch = READ_ONCE(pcp->batch);
29038         to_drain = min(pcp->count, batch);
29039         if (to_drain > 0) {
29040 -               free_pcppages_bulk(zone, to_drain, pcp);
29041 +               isolate_pcp_pages(to_drain, pcp, &dst);
29042                 pcp->count -= to_drain;
29043         }
29044 -       local_irq_restore(flags);
29045 +       local_unlock_irqrestore(pa_lock, flags);
29046 +       free_pcppages_bulk(zone, to_drain, &dst);
29048  #endif
29050 @@ -1904,16 +1944,21 @@
29051         unsigned long flags;
29052         struct per_cpu_pageset *pset;
29053         struct per_cpu_pages *pcp;
29054 +       LIST_HEAD(dst);
29055 +       int count;
29057 -       local_irq_save(flags);
29058 +       cpu_lock_irqsave(cpu, flags);
29059         pset = per_cpu_ptr(zone->pageset, cpu);
29061         pcp = &pset->pcp;
29062 -       if (pcp->count) {
29063 -               free_pcppages_bulk(zone, pcp->count, pcp);
29064 +       count = pcp->count;
29065 +       if (count) {
29066 +               isolate_pcp_pages(count, pcp, &dst);
29067                 pcp->count = 0;
29068         }
29069 -       local_irq_restore(flags);
29070 +       cpu_unlock_irqrestore(cpu, flags);
29071 +       if (count)
29072 +               free_pcppages_bulk(zone, count, &dst);
29075  /*
29076 @@ -1999,8 +2044,17 @@
29077                 else
29078                         cpumask_clear_cpu(cpu, &cpus_with_pcps);
29079         }
29080 +#ifndef CONFIG_PREEMPT_RT_BASE
29081         on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages,
29082                                                                 zone, 1);
29083 +#else
29084 +       for_each_cpu(cpu, &cpus_with_pcps) {
29085 +               if (zone)
29086 +                       drain_pages_zone(cpu, zone);
29087 +               else
29088 +                       drain_pages(cpu);
29089 +       }
29090 +#endif
29093  #ifdef CONFIG_HIBERNATION
29094 @@ -2056,7 +2110,7 @@
29096         migratetype = get_pfnblock_migratetype(page, pfn);
29097         set_pcppage_migratetype(page, migratetype);
29098 -       local_irq_save(flags);
29099 +       local_lock_irqsave(pa_lock, flags);
29100         __count_vm_event(PGFREE);
29102         /*
29103 @@ -2082,12 +2136,17 @@
29104         pcp->count++;
29105         if (pcp->count >= pcp->high) {
29106                 unsigned long batch = READ_ONCE(pcp->batch);
29107 -               free_pcppages_bulk(zone, batch, pcp);
29108 +               LIST_HEAD(dst);
29110 +               isolate_pcp_pages(batch, pcp, &dst);
29111                 pcp->count -= batch;
29112 +               local_unlock_irqrestore(pa_lock, flags);
29113 +               free_pcppages_bulk(zone, batch, &dst);
29114 +               return;
29115         }
29117  out:
29118 -       local_irq_restore(flags);
29119 +       local_unlock_irqrestore(pa_lock, flags);
29122  /*
29123 @@ -2222,7 +2281,7 @@
29124                 struct per_cpu_pages *pcp;
29125                 struct list_head *list;
29127 -               local_irq_save(flags);
29128 +               local_lock_irqsave(pa_lock, flags);
29129                 pcp = &this_cpu_ptr(zone->pageset)->pcp;
29130                 list = &pcp->lists[migratetype];
29131                 if (list_empty(list)) {
29132 @@ -2254,7 +2313,7 @@
29133                          */
29134                         WARN_ON_ONCE(order > 1);
29135                 }
29136 -               spin_lock_irqsave(&zone->lock, flags);
29137 +               local_spin_lock_irqsave(pa_lock, &zone->lock, flags);
29139                 page = NULL;
29140                 if (alloc_flags & ALLOC_HARDER) {
29141 @@ -2264,11 +2323,13 @@
29142                 }
29143                 if (!page)
29144                         page = __rmqueue(zone, order, migratetype, gfp_flags);
29145 -               spin_unlock(&zone->lock);
29146 -               if (!page)
29147 +               if (!page) {
29148 +                       spin_unlock(&zone->lock);
29149                         goto failed;
29150 +               }
29151                 __mod_zone_freepage_state(zone, -(1 << order),
29152                                           get_pcppage_migratetype(page));
29153 +               spin_unlock(&zone->lock);
29154         }
29156         __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
29157 @@ -2278,13 +2339,13 @@
29159         __count_zone_vm_events(PGALLOC, zone, 1 << order);
29160         zone_statistics(preferred_zone, zone, gfp_flags);
29161 -       local_irq_restore(flags);
29162 +       local_unlock_irqrestore(pa_lock, flags);
29164         VM_BUG_ON_PAGE(bad_range(zone, page), page);
29165         return page;
29167  failed:
29168 -       local_irq_restore(flags);
29169 +       local_unlock_irqrestore(pa_lock, flags);
29170         return NULL;
29173 @@ -5953,6 +6014,7 @@
29174  void __init page_alloc_init(void)
29176         hotcpu_notifier(page_alloc_cpu_notify, 0);
29177 +       local_irq_lock_init(pa_lock);
29180  /*
29181 @@ -6847,7 +6909,7 @@
29182         struct per_cpu_pageset *pset;
29184         /* avoid races with drain_pages()  */
29185 -       local_irq_save(flags);
29186 +       local_lock_irqsave(pa_lock, flags);
29187         if (zone->pageset != &boot_pageset) {
29188                 for_each_online_cpu(cpu) {
29189                         pset = per_cpu_ptr(zone->pageset, cpu);
29190 @@ -6856,7 +6918,7 @@
29191                 free_percpu(zone->pageset);
29192                 zone->pageset = &boot_pageset;
29193         }
29194 -       local_irq_restore(flags);
29195 +       local_unlock_irqrestore(pa_lock, flags);
29198  #ifdef CONFIG_MEMORY_HOTREMOVE
29199 diff -Nur linux-4.4.46.orig/mm/slab.h linux-4.4.46/mm/slab.h
29200 --- linux-4.4.46.orig/mm/slab.h 2017-02-01 08:31:11.000000000 +0100
29201 +++ linux-4.4.46/mm/slab.h      2017-02-03 17:18:10.951619984 +0100
29202 @@ -324,7 +324,11 @@
29203   * The slab lists for all objects.
29204   */
29205  struct kmem_cache_node {
29206 +#ifdef CONFIG_SLUB
29207 +       raw_spinlock_t list_lock;
29208 +#else
29209         spinlock_t list_lock;
29210 +#endif
29212  #ifdef CONFIG_SLAB
29213         struct list_head slabs_partial; /* partial list first, better asm code */
29214 diff -Nur linux-4.4.46.orig/mm/slub.c linux-4.4.46/mm/slub.c
29215 --- linux-4.4.46.orig/mm/slub.c 2017-02-01 08:31:11.000000000 +0100
29216 +++ linux-4.4.46/mm/slub.c      2017-02-03 17:18:10.951619984 +0100
29217 @@ -1075,7 +1075,7 @@
29218         void *object = head;
29219         int cnt = 0;
29221 -       spin_lock_irqsave(&n->list_lock, *flags);
29222 +       raw_spin_lock_irqsave(&n->list_lock, *flags);
29223         slab_lock(page);
29225         if (!check_slab(s, page))
29226 @@ -1136,7 +1136,7 @@
29228  fail:
29229         slab_unlock(page);
29230 -       spin_unlock_irqrestore(&n->list_lock, *flags);
29231 +       raw_spin_unlock_irqrestore(&n->list_lock, *flags);
29232         slab_fix(s, "Object at 0x%p not freed", object);
29233         return NULL;
29235 @@ -1263,6 +1263,12 @@
29237  #endif /* CONFIG_SLUB_DEBUG */
29239 +struct slub_free_list {
29240 +       raw_spinlock_t          lock;
29241 +       struct list_head        list;
29243 +static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
29245  /*
29246   * Hooks for other subsystems that check memory allocations. In a typical
29247   * production configuration these hooks all should produce no code at all.
29248 @@ -1399,10 +1405,17 @@
29249         gfp_t alloc_gfp;
29250         void *start, *p;
29251         int idx, order;
29252 +       bool enableirqs = false;
29254         flags &= gfp_allowed_mask;
29256         if (gfpflags_allow_blocking(flags))
29257 +               enableirqs = true;
29258 +#ifdef CONFIG_PREEMPT_RT_FULL
29259 +       if (system_state == SYSTEM_RUNNING)
29260 +               enableirqs = true;
29261 +#endif
29262 +       if (enableirqs)
29263                 local_irq_enable();
29265         flags |= s->allocflags;
29266 @@ -1473,7 +1486,7 @@
29267         page->frozen = 1;
29269  out:
29270 -       if (gfpflags_allow_blocking(flags))
29271 +       if (enableirqs)
29272                 local_irq_disable();
29273         if (!page)
29274                 return NULL;
29275 @@ -1529,6 +1542,16 @@
29276         __free_kmem_pages(page, order);
29279 +static void free_delayed(struct list_head *h)
29281 +       while(!list_empty(h)) {
29282 +               struct page *page = list_first_entry(h, struct page, lru);
29284 +               list_del(&page->lru);
29285 +               __free_slab(page->slab_cache, page);
29286 +       }
29289  #define need_reserve_slab_rcu                                          \
29290         (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
29292 @@ -1560,6 +1583,12 @@
29293                 }
29295                 call_rcu(head, rcu_free_slab);
29296 +       } else if (irqs_disabled()) {
29297 +               struct slub_free_list *f = this_cpu_ptr(&slub_free_list);
29299 +               raw_spin_lock(&f->lock);
29300 +               list_add(&page->lru, &f->list);
29301 +               raw_spin_unlock(&f->lock);
29302         } else
29303                 __free_slab(s, page);
29305 @@ -1673,7 +1702,7 @@
29306         if (!n || !n->nr_partial)
29307                 return NULL;
29309 -       spin_lock(&n->list_lock);
29310 +       raw_spin_lock(&n->list_lock);
29311         list_for_each_entry_safe(page, page2, &n->partial, lru) {
29312                 void *t;
29314 @@ -1698,7 +1727,7 @@
29315                         break;
29317         }
29318 -       spin_unlock(&n->list_lock);
29319 +       raw_spin_unlock(&n->list_lock);
29320         return object;
29323 @@ -1944,7 +1973,7 @@
29324                          * that acquire_slab() will see a slab page that
29325                          * is frozen
29326                          */
29327 -                       spin_lock(&n->list_lock);
29328 +                       raw_spin_lock(&n->list_lock);
29329                 }
29330         } else {
29331                 m = M_FULL;
29332 @@ -1955,7 +1984,7 @@
29333                          * slabs from diagnostic functions will not see
29334                          * any frozen slabs.
29335                          */
29336 -                       spin_lock(&n->list_lock);
29337 +                       raw_spin_lock(&n->list_lock);
29338                 }
29339         }
29341 @@ -1990,7 +2019,7 @@
29342                 goto redo;
29344         if (lock)
29345 -               spin_unlock(&n->list_lock);
29346 +               raw_spin_unlock(&n->list_lock);
29348         if (m == M_FREE) {
29349                 stat(s, DEACTIVATE_EMPTY);
29350 @@ -2022,10 +2051,10 @@
29351                 n2 = get_node(s, page_to_nid(page));
29352                 if (n != n2) {
29353                         if (n)
29354 -                               spin_unlock(&n->list_lock);
29355 +                               raw_spin_unlock(&n->list_lock);
29357                         n = n2;
29358 -                       spin_lock(&n->list_lock);
29359 +                       raw_spin_lock(&n->list_lock);
29360                 }
29362                 do {
29363 @@ -2054,7 +2083,7 @@
29364         }
29366         if (n)
29367 -               spin_unlock(&n->list_lock);
29368 +               raw_spin_unlock(&n->list_lock);
29370         while (discard_page) {
29371                 page = discard_page;
29372 @@ -2093,14 +2122,21 @@
29373                         pobjects = oldpage->pobjects;
29374                         pages = oldpage->pages;
29375                         if (drain && pobjects > s->cpu_partial) {
29376 +                               struct slub_free_list *f;
29377                                 unsigned long flags;
29378 +                               LIST_HEAD(tofree);
29379                                 /*
29380                                  * partial array is full. Move the existing
29381                                  * set to the per node partial list.
29382                                  */
29383                                 local_irq_save(flags);
29384                                 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
29385 +                               f = this_cpu_ptr(&slub_free_list);
29386 +                               raw_spin_lock(&f->lock);
29387 +                               list_splice_init(&f->list, &tofree);
29388 +                               raw_spin_unlock(&f->lock);
29389                                 local_irq_restore(flags);
29390 +                               free_delayed(&tofree);
29391                                 oldpage = NULL;
29392                                 pobjects = 0;
29393                                 pages = 0;
29394 @@ -2172,7 +2208,22 @@
29396  static void flush_all(struct kmem_cache *s)
29398 +       LIST_HEAD(tofree);
29399 +       int cpu;
29401         on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
29402 +       for_each_online_cpu(cpu) {
29403 +               struct slub_free_list *f;
29405 +               if (!has_cpu_slab(cpu, s))
29406 +                       continue;
29408 +               f = &per_cpu(slub_free_list, cpu);
29409 +               raw_spin_lock_irq(&f->lock);
29410 +               list_splice_init(&f->list, &tofree);
29411 +               raw_spin_unlock_irq(&f->lock);
29412 +               free_delayed(&tofree);
29413 +       }
29416  /*
29417 @@ -2208,10 +2259,10 @@
29418         unsigned long x = 0;
29419         struct page *page;
29421 -       spin_lock_irqsave(&n->list_lock, flags);
29422 +       raw_spin_lock_irqsave(&n->list_lock, flags);
29423         list_for_each_entry(page, &n->partial, lru)
29424                 x += get_count(page);
29425 -       spin_unlock_irqrestore(&n->list_lock, flags);
29426 +       raw_spin_unlock_irqrestore(&n->list_lock, flags);
29427         return x;
29429  #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
29430 @@ -2349,8 +2400,10 @@
29431   * already disabled (which is the case for bulk allocation).
29432   */
29433  static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
29434 -                         unsigned long addr, struct kmem_cache_cpu *c)
29435 +                         unsigned long addr, struct kmem_cache_cpu *c,
29436 +                         struct list_head *to_free)
29438 +       struct slub_free_list *f;
29439         void *freelist;
29440         struct page *page;
29442 @@ -2410,6 +2463,13 @@
29443         VM_BUG_ON(!c->page->frozen);
29444         c->freelist = get_freepointer(s, freelist);
29445         c->tid = next_tid(c->tid);
29447 +out:
29448 +       f = this_cpu_ptr(&slub_free_list);
29449 +       raw_spin_lock(&f->lock);
29450 +       list_splice_init(&f->list, to_free);
29451 +       raw_spin_unlock(&f->lock);
29453         return freelist;
29455  new_slab:
29456 @@ -2441,7 +2501,7 @@
29457         deactivate_slab(s, page, get_freepointer(s, freelist));
29458         c->page = NULL;
29459         c->freelist = NULL;
29460 -       return freelist;
29461 +       goto out;
29464  /*
29465 @@ -2453,6 +2513,7 @@
29467         void *p;
29468         unsigned long flags;
29469 +       LIST_HEAD(tofree);
29471         local_irq_save(flags);
29472  #ifdef CONFIG_PREEMPT
29473 @@ -2464,8 +2525,9 @@
29474         c = this_cpu_ptr(s->cpu_slab);
29475  #endif
29477 -       p = ___slab_alloc(s, gfpflags, node, addr, c);
29478 +       p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree);
29479         local_irq_restore(flags);
29480 +       free_delayed(&tofree);
29481         return p;
29484 @@ -2652,7 +2714,7 @@
29486         do {
29487                 if (unlikely(n)) {
29488 -                       spin_unlock_irqrestore(&n->list_lock, flags);
29489 +                       raw_spin_unlock_irqrestore(&n->list_lock, flags);
29490                         n = NULL;
29491                 }
29492                 prior = page->freelist;
29493 @@ -2684,7 +2746,7 @@
29494                                  * Otherwise the list_lock will synchronize with
29495                                  * other processors updating the list of slabs.
29496                                  */
29497 -                               spin_lock_irqsave(&n->list_lock, flags);
29498 +                               raw_spin_lock_irqsave(&n->list_lock, flags);
29500                         }
29501                 }
29502 @@ -2726,7 +2788,7 @@
29503                 add_partial(n, page, DEACTIVATE_TO_TAIL);
29504                 stat(s, FREE_ADD_PARTIAL);
29505         }
29506 -       spin_unlock_irqrestore(&n->list_lock, flags);
29507 +       raw_spin_unlock_irqrestore(&n->list_lock, flags);
29508         return;
29510  slab_empty:
29511 @@ -2741,7 +2803,7 @@
29512                 remove_full(s, n, page);
29513         }
29515 -       spin_unlock_irqrestore(&n->list_lock, flags);
29516 +       raw_spin_unlock_irqrestore(&n->list_lock, flags);
29517         stat(s, FREE_SLAB);
29518         discard_slab(s, page);
29520 @@ -2913,6 +2975,7 @@
29521                           void **p)
29523         struct kmem_cache_cpu *c;
29524 +       LIST_HEAD(to_free);
29525         int i;
29527         /* memcg and kmem_cache debug support */
29528 @@ -2936,7 +2999,7 @@
29529                          * of re-populating per CPU c->freelist
29530                          */
29531                         p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
29532 -                                           _RET_IP_, c);
29533 +                                           _RET_IP_, c, &to_free);
29534                         if (unlikely(!p[i]))
29535                                 goto error;
29537 @@ -2948,6 +3011,7 @@
29538         }
29539         c->tid = next_tid(c->tid);
29540         local_irq_enable();
29541 +       free_delayed(&to_free);
29543         /* Clear memory outside IRQ disabled fastpath loop */
29544         if (unlikely(flags & __GFP_ZERO)) {
29545 @@ -3095,7 +3159,7 @@
29546  init_kmem_cache_node(struct kmem_cache_node *n)
29548         n->nr_partial = 0;
29549 -       spin_lock_init(&n->list_lock);
29550 +       raw_spin_lock_init(&n->list_lock);
29551         INIT_LIST_HEAD(&n->partial);
29552  #ifdef CONFIG_SLUB_DEBUG
29553         atomic_long_set(&n->nr_slabs, 0);
29554 @@ -3677,7 +3741,7 @@
29555                 for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
29556                         INIT_LIST_HEAD(promote + i);
29558 -               spin_lock_irqsave(&n->list_lock, flags);
29559 +               raw_spin_lock_irqsave(&n->list_lock, flags);
29561                 /*
29562                  * Build lists of slabs to discard or promote.
29563 @@ -3708,7 +3772,7 @@
29564                 for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
29565                         list_splice(promote + i, &n->partial);
29567 -               spin_unlock_irqrestore(&n->list_lock, flags);
29568 +               raw_spin_unlock_irqrestore(&n->list_lock, flags);
29570                 /* Release empty slabs */
29571                 list_for_each_entry_safe(page, t, &discard, lru)
29572 @@ -3884,6 +3948,12 @@
29574         static __initdata struct kmem_cache boot_kmem_cache,
29575                 boot_kmem_cache_node;
29576 +       int cpu;
29578 +       for_each_possible_cpu(cpu) {
29579 +               raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
29580 +               INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
29581 +       }
29583         if (debug_guardpage_minorder())
29584                 slub_max_order = 0;
29585 @@ -4127,7 +4197,7 @@
29586         struct page *page;
29587         unsigned long flags;
29589 -       spin_lock_irqsave(&n->list_lock, flags);
29590 +       raw_spin_lock_irqsave(&n->list_lock, flags);
29592         list_for_each_entry(page, &n->partial, lru) {
29593                 validate_slab_slab(s, page, map);
29594 @@ -4149,7 +4219,7 @@
29595                        s->name, count, atomic_long_read(&n->nr_slabs));
29597  out:
29598 -       spin_unlock_irqrestore(&n->list_lock, flags);
29599 +       raw_spin_unlock_irqrestore(&n->list_lock, flags);
29600         return count;
29603 @@ -4337,12 +4407,12 @@
29604                 if (!atomic_long_read(&n->nr_slabs))
29605                         continue;
29607 -               spin_lock_irqsave(&n->list_lock, flags);
29608 +               raw_spin_lock_irqsave(&n->list_lock, flags);
29609                 list_for_each_entry(page, &n->partial, lru)
29610                         process_slab(&t, s, page, alloc, map);
29611                 list_for_each_entry(page, &n->full, lru)
29612                         process_slab(&t, s, page, alloc, map);
29613 -               spin_unlock_irqrestore(&n->list_lock, flags);
29614 +               raw_spin_unlock_irqrestore(&n->list_lock, flags);
29615         }
29617         for (i = 0; i < t.count; i++) {
29618 diff -Nur linux-4.4.46.orig/mm/swap.c linux-4.4.46/mm/swap.c
29619 --- linux-4.4.46.orig/mm/swap.c 2017-02-01 08:31:11.000000000 +0100
29620 +++ linux-4.4.46/mm/swap.c      2017-02-03 17:18:10.951619984 +0100
29621 @@ -31,6 +31,7 @@
29622  #include <linux/memcontrol.h>
29623  #include <linux/gfp.h>
29624  #include <linux/uio.h>
29625 +#include <linux/locallock.h>
29626  #include <linux/hugetlb.h>
29627  #include <linux/page_idle.h>
29629 @@ -46,6 +47,9 @@
29630  static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
29631  static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);
29633 +static DEFINE_LOCAL_IRQ_LOCK(rotate_lock);
29634 +DEFINE_LOCAL_IRQ_LOCK(swapvec_lock);
29636  /*
29637   * This path almost never happens for VM activity - pages are normally
29638   * freed via pagevecs.  But it gets used by networking.
29639 @@ -481,11 +485,11 @@
29640                 unsigned long flags;
29642                 page_cache_get(page);
29643 -               local_irq_save(flags);
29644 +               local_lock_irqsave(rotate_lock, flags);
29645                 pvec = this_cpu_ptr(&lru_rotate_pvecs);
29646                 if (!pagevec_add(pvec, page))
29647                         pagevec_move_tail(pvec);
29648 -               local_irq_restore(flags);
29649 +               local_unlock_irqrestore(rotate_lock, flags);
29650         }
29653 @@ -536,12 +540,13 @@
29654  void activate_page(struct page *page)
29656         if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
29657 -               struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
29658 +               struct pagevec *pvec = &get_locked_var(swapvec_lock,
29659 +                                                      activate_page_pvecs);
29661                 page_cache_get(page);
29662                 if (!pagevec_add(pvec, page))
29663                         pagevec_lru_move_fn(pvec, __activate_page, NULL);
29664 -               put_cpu_var(activate_page_pvecs);
29665 +               put_locked_var(swapvec_lock, activate_page_pvecs);
29666         }
29669 @@ -567,7 +572,7 @@
29671  static void __lru_cache_activate_page(struct page *page)
29673 -       struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
29674 +       struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
29675         int i;
29677         /*
29678 @@ -589,7 +594,7 @@
29679                 }
29680         }
29682 -       put_cpu_var(lru_add_pvec);
29683 +       put_locked_var(swapvec_lock, lru_add_pvec);
29686  /*
29687 @@ -630,13 +635,13 @@
29689  static void __lru_cache_add(struct page *page)
29691 -       struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
29692 +       struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
29694         page_cache_get(page);
29695         if (!pagevec_space(pvec))
29696                 __pagevec_lru_add(pvec);
29697         pagevec_add(pvec, page);
29698 -       put_cpu_var(lru_add_pvec);
29699 +       put_locked_var(swapvec_lock, lru_add_pvec);
29702  /**
29703 @@ -816,9 +821,15 @@
29704                 unsigned long flags;
29706                 /* No harm done if a racing interrupt already did this */
29707 -               local_irq_save(flags);
29708 +#ifdef CONFIG_PREEMPT_RT_BASE
29709 +               local_lock_irqsave_on(rotate_lock, flags, cpu);
29710 +               pagevec_move_tail(pvec);
29711 +               local_unlock_irqrestore_on(rotate_lock, flags, cpu);
29712 +#else
29713 +               local_lock_irqsave(rotate_lock, flags);
29714                 pagevec_move_tail(pvec);
29715 -               local_irq_restore(flags);
29716 +               local_unlock_irqrestore(rotate_lock, flags);
29717 +#endif
29718         }
29720         pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
29721 @@ -846,26 +857,47 @@
29722                 return;
29724         if (likely(get_page_unless_zero(page))) {
29725 -               struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
29726 +               struct pagevec *pvec = &get_locked_var(swapvec_lock,
29727 +                                                      lru_deactivate_file_pvecs);
29729                 if (!pagevec_add(pvec, page))
29730                         pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
29731 -               put_cpu_var(lru_deactivate_file_pvecs);
29732 +               put_locked_var(swapvec_lock, lru_deactivate_file_pvecs);
29733         }
29736  void lru_add_drain(void)
29738 -       lru_add_drain_cpu(get_cpu());
29739 -       put_cpu();
29740 +       lru_add_drain_cpu(local_lock_cpu(swapvec_lock));
29741 +       local_unlock_cpu(swapvec_lock);
29745 +#ifdef CONFIG_PREEMPT_RT_BASE
29746 +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
29748 +       local_lock_on(swapvec_lock, cpu);
29749 +       lru_add_drain_cpu(cpu);
29750 +       local_unlock_on(swapvec_lock, cpu);
29753 +#else
29755  static void lru_add_drain_per_cpu(struct work_struct *dummy)
29757         lru_add_drain();
29760  static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
29761 +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
29763 +       struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
29765 +       INIT_WORK(work, lru_add_drain_per_cpu);
29766 +       schedule_work_on(cpu, work);
29767 +       cpumask_set_cpu(cpu, has_work);
29769 +#endif
29771  void lru_add_drain_all(void)
29773 @@ -878,20 +910,17 @@
29774         cpumask_clear(&has_work);
29776         for_each_online_cpu(cpu) {
29777 -               struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
29779                 if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
29780                     pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
29781                     pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
29782 -                   need_activate_page_drain(cpu)) {
29783 -                       INIT_WORK(work, lru_add_drain_per_cpu);
29784 -                       schedule_work_on(cpu, work);
29785 -                       cpumask_set_cpu(cpu, &has_work);
29786 -               }
29787 +                   need_activate_page_drain(cpu))
29788 +                       remote_lru_add_drain(cpu, &has_work);
29789         }
29791 +#ifndef CONFIG_PREEMPT_RT_BASE
29792         for_each_cpu(cpu, &has_work)
29793                 flush_work(&per_cpu(lru_add_drain_work, cpu));
29794 +#endif
29796         put_online_cpus();
29797         mutex_unlock(&lock);
29798 diff -Nur linux-4.4.46.orig/mm/truncate.c linux-4.4.46/mm/truncate.c
29799 --- linux-4.4.46.orig/mm/truncate.c     2017-02-01 08:31:11.000000000 +0100
29800 +++ linux-4.4.46/mm/truncate.c  2017-02-03 17:18:10.951619984 +0100
29801 @@ -56,8 +56,11 @@
29802          * protected by mapping->tree_lock.
29803          */
29804         if (!workingset_node_shadows(node) &&
29805 -           !list_empty(&node->private_list))
29806 -               list_lru_del(&workingset_shadow_nodes, &node->private_list);
29807 +           !list_empty(&node->private_list)) {
29808 +               local_lock(workingset_shadow_lock);
29809 +               list_lru_del(&__workingset_shadow_nodes, &node->private_list);
29810 +               local_unlock(workingset_shadow_lock);
29811 +       }
29812         __radix_tree_delete_node(&mapping->page_tree, node);
29813  unlock:
29814         spin_unlock_irq(&mapping->tree_lock);
29815 diff -Nur linux-4.4.46.orig/mm/vmalloc.c linux-4.4.46/mm/vmalloc.c
29816 --- linux-4.4.46.orig/mm/vmalloc.c      2017-02-01 08:31:11.000000000 +0100
29817 +++ linux-4.4.46/mm/vmalloc.c   2017-02-03 17:18:10.951619984 +0100
29818 @@ -821,7 +821,7 @@
29819         struct vmap_block *vb;
29820         struct vmap_area *va;
29821         unsigned long vb_idx;
29822 -       int node, err;
29823 +       int node, err, cpu;
29824         void *vaddr;
29826         node = numa_node_id();
29827 @@ -864,11 +864,12 @@
29828         BUG_ON(err);
29829         radix_tree_preload_end();
29831 -       vbq = &get_cpu_var(vmap_block_queue);
29832 +       cpu = get_cpu_light();
29833 +       vbq = this_cpu_ptr(&vmap_block_queue);
29834         spin_lock(&vbq->lock);
29835         list_add_tail_rcu(&vb->free_list, &vbq->free);
29836         spin_unlock(&vbq->lock);
29837 -       put_cpu_var(vmap_block_queue);
29838 +       put_cpu_light();
29840         return vaddr;
29842 @@ -937,6 +938,7 @@
29843         struct vmap_block *vb;
29844         void *vaddr = NULL;
29845         unsigned int order;
29846 +       int cpu;
29848         BUG_ON(offset_in_page(size));
29849         BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
29850 @@ -951,7 +953,8 @@
29851         order = get_order(size);
29853         rcu_read_lock();
29854 -       vbq = &get_cpu_var(vmap_block_queue);
29855 +       cpu = get_cpu_light();
29856 +       vbq = this_cpu_ptr(&vmap_block_queue);
29857         list_for_each_entry_rcu(vb, &vbq->free, free_list) {
29858                 unsigned long pages_off;
29860 @@ -974,7 +977,7 @@
29861                 break;
29862         }
29864 -       put_cpu_var(vmap_block_queue);
29865 +       put_cpu_light();
29866         rcu_read_unlock();
29868         /* Allocate new block if nothing was found */
29869 diff -Nur linux-4.4.46.orig/mm/vmstat.c linux-4.4.46/mm/vmstat.c
29870 --- linux-4.4.46.orig/mm/vmstat.c       2017-02-01 08:31:11.000000000 +0100
29871 +++ linux-4.4.46/mm/vmstat.c    2017-02-03 17:18:10.951619984 +0100
29872 @@ -226,6 +226,7 @@
29873         long x;
29874         long t;
29876 +       preempt_disable_rt();
29877         x = delta + __this_cpu_read(*p);
29879         t = __this_cpu_read(pcp->stat_threshold);
29880 @@ -235,6 +236,7 @@
29881                 x = 0;
29882         }
29883         __this_cpu_write(*p, x);
29884 +       preempt_enable_rt();
29886  EXPORT_SYMBOL(__mod_zone_page_state);
29888 @@ -267,6 +269,7 @@
29889         s8 __percpu *p = pcp->vm_stat_diff + item;
29890         s8 v, t;
29892 +       preempt_disable_rt();
29893         v = __this_cpu_inc_return(*p);
29894         t = __this_cpu_read(pcp->stat_threshold);
29895         if (unlikely(v > t)) {
29896 @@ -275,6 +278,7 @@
29897                 zone_page_state_add(v + overstep, zone, item);
29898                 __this_cpu_write(*p, -overstep);
29899         }
29900 +       preempt_enable_rt();
29903  void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
29904 @@ -289,6 +293,7 @@
29905         s8 __percpu *p = pcp->vm_stat_diff + item;
29906         s8 v, t;
29908 +       preempt_disable_rt();
29909         v = __this_cpu_dec_return(*p);
29910         t = __this_cpu_read(pcp->stat_threshold);
29911         if (unlikely(v < - t)) {
29912 @@ -297,6 +302,7 @@
29913                 zone_page_state_add(v - overstep, zone, item);
29914                 __this_cpu_write(*p, overstep);
29915         }
29916 +       preempt_enable_rt();
29919  void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
29920 diff -Nur linux-4.4.46.orig/mm/workingset.c linux-4.4.46/mm/workingset.c
29921 --- linux-4.4.46.orig/mm/workingset.c   2017-02-01 08:31:11.000000000 +0100
29922 +++ linux-4.4.46/mm/workingset.c        2017-02-03 17:18:10.951619984 +0100
29923 @@ -264,7 +264,8 @@
29924   * point where they would still be useful.
29925   */
29927 -struct list_lru workingset_shadow_nodes;
29928 +struct list_lru __workingset_shadow_nodes;
29929 +DEFINE_LOCAL_IRQ_LOCK(workingset_shadow_lock);
29931  static unsigned long count_shadow_nodes(struct shrinker *shrinker,
29932                                         struct shrink_control *sc)
29933 @@ -274,9 +275,9 @@
29934         unsigned long pages;
29936         /* list_lru lock nests inside IRQ-safe mapping->tree_lock */
29937 -       local_irq_disable();
29938 -       shadow_nodes = list_lru_shrink_count(&workingset_shadow_nodes, sc);
29939 -       local_irq_enable();
29940 +       local_lock_irq(workingset_shadow_lock);
29941 +       shadow_nodes = list_lru_shrink_count(&__workingset_shadow_nodes, sc);
29942 +       local_unlock_irq(workingset_shadow_lock);
29944         pages = node_present_pages(sc->nid);
29945         /*
29946 @@ -361,9 +362,9 @@
29947         spin_unlock(&mapping->tree_lock);
29948         ret = LRU_REMOVED_RETRY;
29949  out:
29950 -       local_irq_enable();
29951 +       local_unlock_irq(workingset_shadow_lock);
29952         cond_resched();
29953 -       local_irq_disable();
29954 +       local_lock_irq(workingset_shadow_lock);
29955         spin_lock(lru_lock);
29956         return ret;
29958 @@ -374,10 +375,10 @@
29959         unsigned long ret;
29961         /* list_lru lock nests inside IRQ-safe mapping->tree_lock */
29962 -       local_irq_disable();
29963 -       ret =  list_lru_shrink_walk(&workingset_shadow_nodes, sc,
29964 +       local_lock_irq(workingset_shadow_lock);
29965 +       ret =  list_lru_shrink_walk(&__workingset_shadow_nodes, sc,
29966                                     shadow_lru_isolate, NULL);
29967 -       local_irq_enable();
29968 +       local_unlock_irq(workingset_shadow_lock);
29969         return ret;
29972 @@ -398,7 +399,7 @@
29974         int ret;
29976 -       ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key);
29977 +       ret = list_lru_init_key(&__workingset_shadow_nodes, &shadow_nodes_key);
29978         if (ret)
29979                 goto err;
29980         ret = register_shrinker(&workingset_shadow_shrinker);
29981 @@ -406,7 +407,7 @@
29982                 goto err_list_lru;
29983         return 0;
29984  err_list_lru:
29985 -       list_lru_destroy(&workingset_shadow_nodes);
29986 +       list_lru_destroy(&__workingset_shadow_nodes);
29987  err:
29988         return ret;
29990 diff -Nur linux-4.4.46.orig/mm/zsmalloc.c linux-4.4.46/mm/zsmalloc.c
29991 --- linux-4.4.46.orig/mm/zsmalloc.c     2017-02-01 08:31:11.000000000 +0100
29992 +++ linux-4.4.46/mm/zsmalloc.c  2017-02-03 17:18:10.951619984 +0100
29993 @@ -64,6 +64,7 @@
29994  #include <linux/debugfs.h>
29995  #include <linux/zsmalloc.h>
29996  #include <linux/zpool.h>
29997 +#include <linux/locallock.h>
29999  /*
30000   * This must be power of 2 and greater than of equal to sizeof(link_free).
30001 @@ -403,6 +404,7 @@
30003  /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
30004  static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
30005 +static DEFINE_LOCAL_IRQ_LOCK(zs_map_area_lock);
30007  static int is_first_page(struct page *page)
30009 @@ -1289,7 +1291,7 @@
30010         class = pool->size_class[class_idx];
30011         off = obj_idx_to_offset(page, obj_idx, class->size);
30013 -       area = &get_cpu_var(zs_map_area);
30014 +       area = &get_locked_var(zs_map_area_lock, zs_map_area);
30015         area->vm_mm = mm;
30016         if (off + class->size <= PAGE_SIZE) {
30017                 /* this object is contained entirely within a page */
30018 @@ -1342,7 +1344,7 @@
30020                 __zs_unmap_object(area, pages, off, class->size);
30021         }
30022 -       put_cpu_var(zs_map_area);
30023 +       put_locked_var(zs_map_area_lock, zs_map_area);
30024         unpin_tag(handle);
30026  EXPORT_SYMBOL_GPL(zs_unmap_object);
30027 diff -Nur linux-4.4.46.orig/net/core/dev.c linux-4.4.46/net/core/dev.c
30028 --- linux-4.4.46.orig/net/core/dev.c    2017-02-01 08:31:11.000000000 +0100
30029 +++ linux-4.4.46/net/core/dev.c 2017-02-03 17:18:10.955620139 +0100
30030 @@ -186,6 +186,7 @@
30031  static DEFINE_HASHTABLE(napi_hash, 8);
30033  static seqcount_t devnet_rename_seq;
30034 +static DEFINE_MUTEX(devnet_rename_mutex);
30036  static inline void dev_base_seq_inc(struct net *net)
30038 @@ -207,14 +208,14 @@
30039  static inline void rps_lock(struct softnet_data *sd)
30041  #ifdef CONFIG_RPS
30042 -       spin_lock(&sd->input_pkt_queue.lock);
30043 +       raw_spin_lock(&sd->input_pkt_queue.raw_lock);
30044  #endif
30047  static inline void rps_unlock(struct softnet_data *sd)
30049  #ifdef CONFIG_RPS
30050 -       spin_unlock(&sd->input_pkt_queue.lock);
30051 +       raw_spin_unlock(&sd->input_pkt_queue.raw_lock);
30052  #endif
30055 @@ -884,7 +885,8 @@
30056         strcpy(name, dev->name);
30057         rcu_read_unlock();
30058         if (read_seqcount_retry(&devnet_rename_seq, seq)) {
30059 -               cond_resched();
30060 +               mutex_lock(&devnet_rename_mutex);
30061 +               mutex_unlock(&devnet_rename_mutex);
30062                 goto retry;
30063         }
30065 @@ -1153,20 +1155,17 @@
30066         if (dev->flags & IFF_UP)
30067                 return -EBUSY;
30069 -       write_seqcount_begin(&devnet_rename_seq);
30070 +       mutex_lock(&devnet_rename_mutex);
30071 +       __raw_write_seqcount_begin(&devnet_rename_seq);
30073 -       if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
30074 -               write_seqcount_end(&devnet_rename_seq);
30075 -               return 0;
30076 -       }
30077 +       if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
30078 +               goto outunlock;
30080         memcpy(oldname, dev->name, IFNAMSIZ);
30082         err = dev_get_valid_name(net, dev, newname);
30083 -       if (err < 0) {
30084 -               write_seqcount_end(&devnet_rename_seq);
30085 -               return err;
30086 -       }
30087 +       if (err < 0)
30088 +               goto outunlock;
30090         if (oldname[0] && !strchr(oldname, '%'))
30091                 netdev_info(dev, "renamed from %s\n", oldname);
30092 @@ -1179,11 +1178,12 @@
30093         if (ret) {
30094                 memcpy(dev->name, oldname, IFNAMSIZ);
30095                 dev->name_assign_type = old_assign_type;
30096 -               write_seqcount_end(&devnet_rename_seq);
30097 -               return ret;
30098 +               err = ret;
30099 +               goto outunlock;
30100         }
30102 -       write_seqcount_end(&devnet_rename_seq);
30103 +       __raw_write_seqcount_end(&devnet_rename_seq);
30104 +       mutex_unlock(&devnet_rename_mutex);
30106         netdev_adjacent_rename_links(dev, oldname);
30108 @@ -1204,7 +1204,8 @@
30109                 /* err >= 0 after dev_alloc_name() or stores the first errno */
30110                 if (err >= 0) {
30111                         err = ret;
30112 -                       write_seqcount_begin(&devnet_rename_seq);
30113 +                       mutex_lock(&devnet_rename_mutex);
30114 +                       __raw_write_seqcount_begin(&devnet_rename_seq);
30115                         memcpy(dev->name, oldname, IFNAMSIZ);
30116                         memcpy(oldname, newname, IFNAMSIZ);
30117                         dev->name_assign_type = old_assign_type;
30118 @@ -1217,6 +1218,11 @@
30119         }
30121         return err;
30123 +outunlock:
30124 +       __raw_write_seqcount_end(&devnet_rename_seq);
30125 +       mutex_unlock(&devnet_rename_mutex);
30126 +       return err;
30129  /**
30130 @@ -2246,6 +2252,7 @@
30131         sd->output_queue_tailp = &q->next_sched;
30132         raise_softirq_irqoff(NET_TX_SOFTIRQ);
30133         local_irq_restore(flags);
30134 +       preempt_check_resched_rt();
30137  void __netif_schedule(struct Qdisc *q)
30138 @@ -2327,6 +2334,7 @@
30139         __this_cpu_write(softnet_data.completion_queue, skb);
30140         raise_softirq_irqoff(NET_TX_SOFTIRQ);
30141         local_irq_restore(flags);
30142 +       preempt_check_resched_rt();
30144  EXPORT_SYMBOL(__dev_kfree_skb_irq);
30146 @@ -2884,7 +2892,11 @@
30147          * This permits __QDISC___STATE_RUNNING owner to get the lock more
30148          * often and dequeue packets faster.
30149          */
30150 +#ifdef CONFIG_PREEMPT_RT_FULL
30151 +       contended = true;
30152 +#else
30153         contended = qdisc_is_running(q);
30154 +#endif
30155         if (unlikely(contended))
30156                 spin_lock(&q->busylock);
30158 @@ -2944,9 +2956,44 @@
30159  #define skb_update_prio(skb)
30160  #endif
30162 +#ifdef CONFIG_PREEMPT_RT_FULL
30164 +static inline int xmit_rec_read(void)
30166 +       return current->xmit_recursion;
30169 +static inline void xmit_rec_inc(void)
30171 +       current->xmit_recursion++;
30174 +static inline void xmit_rec_dec(void)
30176 +       current->xmit_recursion--;
30179 +#else
30181  DEFINE_PER_CPU(int, xmit_recursion);
30182  EXPORT_SYMBOL(xmit_recursion);
30184 +static inline int xmit_rec_read(void)
30186 +       return __this_cpu_read(xmit_recursion);
30189 +static inline void xmit_rec_inc(void)
30191 +       __this_cpu_inc(xmit_recursion);
30194 +static inline void xmit_rec_dec(void)
30196 +       __this_cpu_dec(xmit_recursion);
30198 +#endif
30200  #define RECURSION_LIMIT 10
30202  /**
30203 @@ -3139,7 +3186,7 @@
30205                 if (txq->xmit_lock_owner != cpu) {
30207 -                       if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
30208 +                       if (xmit_rec_read() > RECURSION_LIMIT)
30209                                 goto recursion_alert;
30211                         skb = validate_xmit_skb(skb, dev);
30212 @@ -3149,9 +3196,9 @@
30213                         HARD_TX_LOCK(dev, txq, cpu);
30215                         if (!netif_xmit_stopped(txq)) {
30216 -                               __this_cpu_inc(xmit_recursion);
30217 +                               xmit_rec_inc();
30218                                 skb = dev_hard_start_xmit(skb, dev, txq, &rc);
30219 -                               __this_cpu_dec(xmit_recursion);
30220 +                               xmit_rec_dec();
30221                                 if (dev_xmit_complete(rc)) {
30222                                         HARD_TX_UNLOCK(dev, txq);
30223                                         goto out;
30224 @@ -3525,6 +3572,7 @@
30225         rps_unlock(sd);
30227         local_irq_restore(flags);
30228 +       preempt_check_resched_rt();
30230         atomic_long_inc(&skb->dev->rx_dropped);
30231         kfree_skb(skb);
30232 @@ -3543,7 +3591,7 @@
30233                 struct rps_dev_flow voidflow, *rflow = &voidflow;
30234                 int cpu;
30236 -               preempt_disable();
30237 +               migrate_disable();
30238                 rcu_read_lock();
30240                 cpu = get_rps_cpu(skb->dev, skb, &rflow);
30241 @@ -3553,13 +3601,13 @@
30242                 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
30244                 rcu_read_unlock();
30245 -               preempt_enable();
30246 +               migrate_enable();
30247         } else
30248  #endif
30249         {
30250                 unsigned int qtail;
30251 -               ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
30252 -               put_cpu();
30253 +               ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail);
30254 +               put_cpu_light();
30255         }
30256         return ret;
30258 @@ -3593,16 +3641,44 @@
30260         trace_netif_rx_ni_entry(skb);
30262 -       preempt_disable();
30263 +       local_bh_disable();
30264         err = netif_rx_internal(skb);
30265 -       if (local_softirq_pending())
30266 -               do_softirq();
30267 -       preempt_enable();
30268 +       local_bh_enable();
30270         return err;
30272  EXPORT_SYMBOL(netif_rx_ni);
30274 +#ifdef CONFIG_PREEMPT_RT_FULL
30276 + * RT runs ksoftirqd as a real time thread and the root_lock is a
30277 + * "sleeping spinlock". If the trylock fails then we can go into an
30278 + * infinite loop when ksoftirqd preempted the task which actually
30279 + * holds the lock, because we requeue q and raise NET_TX softirq
30280 + * causing ksoftirqd to loop forever.
30281 + *
30282 + * It's safe to use spin_lock on RT here as softirqs run in thread
30283 + * context and cannot deadlock against the thread which is holding
30284 + * root_lock.
30285 + *
30286 + * On !RT the trylock might fail, but there we bail out from the
30287 + * softirq loop after 10 attempts which we can't do on RT. And the
30288 + * task holding root_lock cannot be preempted, so the only downside of
30289 + * that trylock is that we need 10 loops to decide that we should have
30290 + * given up in the first one :)
30291 + */
30292 +static inline int take_root_lock(spinlock_t *lock)
30294 +       spin_lock(lock);
30295 +       return 1;
30297 +#else
30298 +static inline int take_root_lock(spinlock_t *lock)
30300 +       return spin_trylock(lock);
30302 +#endif
30304  static void net_tx_action(struct softirq_action *h)
30306         struct softnet_data *sd = this_cpu_ptr(&softnet_data);
30307 @@ -3644,7 +3720,7 @@
30308                         head = head->next_sched;
30310                         root_lock = qdisc_lock(q);
30311 -                       if (spin_trylock(root_lock)) {
30312 +                       if (take_root_lock(root_lock)) {
30313                                 smp_mb__before_atomic();
30314                                 clear_bit(__QDISC_STATE_SCHED,
30315                                           &q->state);
30316 @@ -4066,7 +4142,7 @@
30317         skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
30318                 if (skb->dev == dev) {
30319                         __skb_unlink(skb, &sd->input_pkt_queue);
30320 -                       kfree_skb(skb);
30321 +                       __skb_queue_tail(&sd->tofree_queue, skb);
30322                         input_queue_head_incr(sd);
30323                 }
30324         }
30325 @@ -4075,10 +4151,13 @@
30326         skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
30327                 if (skb->dev == dev) {
30328                         __skb_unlink(skb, &sd->process_queue);
30329 -                       kfree_skb(skb);
30330 +                       __skb_queue_tail(&sd->tofree_queue, skb);
30331                         input_queue_head_incr(sd);
30332                 }
30333         }
30335 +       if (!skb_queue_empty(&sd->tofree_queue))
30336 +               raise_softirq_irqoff(NET_RX_SOFTIRQ);
30339  static int napi_gro_complete(struct sk_buff *skb)
30340 @@ -4535,6 +4614,7 @@
30341                 sd->rps_ipi_list = NULL;
30343                 local_irq_enable();
30344 +               preempt_check_resched_rt();
30346                 /* Send pending IPI's to kick RPS processing on remote cpus. */
30347                 while (remsd) {
30348 @@ -4548,6 +4628,7 @@
30349         } else
30350  #endif
30351                 local_irq_enable();
30352 +       preempt_check_resched_rt();
30355  static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
30356 @@ -4629,6 +4710,7 @@
30357         local_irq_save(flags);
30358         ____napi_schedule(this_cpu_ptr(&softnet_data), n);
30359         local_irq_restore(flags);
30360 +       preempt_check_resched_rt();
30362  EXPORT_SYMBOL(__napi_schedule);
30364 @@ -4905,7 +4987,7 @@
30365         list_splice_tail(&repoll, &list);
30366         list_splice(&list, &sd->poll_list);
30367         if (!list_empty(&sd->poll_list))
30368 -               __raise_softirq_irqoff(NET_RX_SOFTIRQ);
30369 +               __raise_softirq_irqoff_ksoft(NET_RX_SOFTIRQ);
30371         net_rps_action_and_irq_enable(sd);
30373 @@ -7244,7 +7326,7 @@
30374  void synchronize_net(void)
30376         might_sleep();
30377 -       if (rtnl_is_locked())
30378 +       if (rtnl_is_locked() && !IS_ENABLED(CONFIG_PREEMPT_RT_FULL))
30379                 synchronize_rcu_expedited();
30380         else
30381                 synchronize_rcu();
30382 @@ -7485,16 +7567,20 @@
30384         raise_softirq_irqoff(NET_TX_SOFTIRQ);
30385         local_irq_enable();
30386 +       preempt_check_resched_rt();
30388         /* Process offline CPU's input_pkt_queue */
30389         while ((skb = __skb_dequeue(&oldsd->process_queue))) {
30390                 netif_rx_ni(skb);
30391                 input_queue_head_incr(oldsd);
30392         }
30393 -       while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
30394 +       while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
30395                 netif_rx_ni(skb);
30396                 input_queue_head_incr(oldsd);
30397         }
30398 +       while ((skb = __skb_dequeue(&oldsd->tofree_queue))) {
30399 +               kfree_skb(skb);
30400 +       }
30402         return NOTIFY_OK;
30404 @@ -7796,8 +7882,9 @@
30405         for_each_possible_cpu(i) {
30406                 struct softnet_data *sd = &per_cpu(softnet_data, i);
30408 -               skb_queue_head_init(&sd->input_pkt_queue);
30409 -               skb_queue_head_init(&sd->process_queue);
30410 +               skb_queue_head_init_raw(&sd->input_pkt_queue);
30411 +               skb_queue_head_init_raw(&sd->process_queue);
30412 +               skb_queue_head_init_raw(&sd->tofree_queue);
30413                 INIT_LIST_HEAD(&sd->poll_list);
30414                 sd->output_queue_tailp = &sd->output_queue;
30415  #ifdef CONFIG_RPS
30416 diff -Nur linux-4.4.46.orig/net/core/skbuff.c linux-4.4.46/net/core/skbuff.c
30417 --- linux-4.4.46.orig/net/core/skbuff.c 2017-02-01 08:31:11.000000000 +0100
30418 +++ linux-4.4.46/net/core/skbuff.c      2017-02-03 17:18:10.955620139 +0100
30419 @@ -63,6 +63,7 @@
30420  #include <linux/errqueue.h>
30421  #include <linux/prefetch.h>
30422  #include <linux/if_vlan.h>
30423 +#include <linux/locallock.h>
30425  #include <net/protocol.h>
30426  #include <net/dst.h>
30427 @@ -351,6 +352,8 @@
30429  static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
30430  static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache);
30431 +static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock);
30432 +static DEFINE_LOCAL_IRQ_LOCK(napi_alloc_cache_lock);
30434  static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
30436 @@ -358,10 +361,10 @@
30437         unsigned long flags;
30438         void *data;
30440 -       local_irq_save(flags);
30441 +       local_lock_irqsave(netdev_alloc_lock, flags);
30442         nc = this_cpu_ptr(&netdev_alloc_cache);
30443         data = __alloc_page_frag(nc, fragsz, gfp_mask);
30444 -       local_irq_restore(flags);
30445 +       local_unlock_irqrestore(netdev_alloc_lock, flags);
30446         return data;
30449 @@ -380,9 +383,13 @@
30451  static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
30453 -       struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
30454 +       struct page_frag_cache *nc;
30455 +       void *data;
30457 -       return __alloc_page_frag(nc, fragsz, gfp_mask);
30458 +       nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
30459 +       data = __alloc_page_frag(nc, fragsz, gfp_mask);
30460 +       put_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
30461 +       return data;
30464  void *napi_alloc_frag(unsigned int fragsz)
30465 @@ -429,13 +436,13 @@
30466         if (sk_memalloc_socks())
30467                 gfp_mask |= __GFP_MEMALLOC;
30469 -       local_irq_save(flags);
30470 +       local_lock_irqsave(netdev_alloc_lock, flags);
30472         nc = this_cpu_ptr(&netdev_alloc_cache);
30473         data = __alloc_page_frag(nc, len, gfp_mask);
30474         pfmemalloc = nc->pfmemalloc;
30476 -       local_irq_restore(flags);
30477 +       local_unlock_irqrestore(netdev_alloc_lock, flags);
30479         if (unlikely(!data))
30480                 return NULL;
30481 @@ -476,9 +483,10 @@
30482  struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
30483                                  gfp_t gfp_mask)
30485 -       struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
30486 +       struct page_frag_cache *nc;
30487         struct sk_buff *skb;
30488         void *data;
30489 +       bool pfmemalloc;
30491         len += NET_SKB_PAD + NET_IP_ALIGN;
30493 @@ -496,7 +504,11 @@
30494         if (sk_memalloc_socks())
30495                 gfp_mask |= __GFP_MEMALLOC;
30497 +       nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
30498         data = __alloc_page_frag(nc, len, gfp_mask);
30499 +       pfmemalloc = nc->pfmemalloc;
30500 +       put_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
30502         if (unlikely(!data))
30503                 return NULL;
30505 @@ -507,7 +519,7 @@
30506         }
30508         /* use OR instead of assignment to avoid clearing of bits in mask */
30509 -       if (nc->pfmemalloc)
30510 +       if (pfmemalloc)
30511                 skb->pfmemalloc = 1;
30512         skb->head_frag = 1;
30514 diff -Nur linux-4.4.46.orig/net/core/sock.c linux-4.4.46/net/core/sock.c
30515 --- linux-4.4.46.orig/net/core/sock.c   2017-02-01 08:31:11.000000000 +0100
30516 +++ linux-4.4.46/net/core/sock.c        2017-02-03 17:18:10.955620139 +0100
30517 @@ -2436,12 +2436,11 @@
30518         if (sk->sk_lock.owned)
30519                 __lock_sock(sk);
30520         sk->sk_lock.owned = 1;
30521 -       spin_unlock(&sk->sk_lock.slock);
30522 +       spin_unlock_bh(&sk->sk_lock.slock);
30523         /*
30524          * The sk_lock has mutex_lock() semantics here:
30525          */
30526         mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
30527 -       local_bh_enable();
30529  EXPORT_SYMBOL(lock_sock_nested);
30531 diff -Nur linux-4.4.46.orig/net/ipv4/icmp.c linux-4.4.46/net/ipv4/icmp.c
30532 --- linux-4.4.46.orig/net/ipv4/icmp.c   2017-02-01 08:31:11.000000000 +0100
30533 +++ linux-4.4.46/net/ipv4/icmp.c        2017-02-03 17:18:10.955620139 +0100
30534 @@ -69,6 +69,7 @@
30535  #include <linux/jiffies.h>
30536  #include <linux/kernel.h>
30537  #include <linux/fcntl.h>
30538 +#include <linux/sysrq.h>
30539  #include <linux/socket.h>
30540  #include <linux/in.h>
30541  #include <linux/inet.h>
30542 @@ -77,6 +78,7 @@
30543  #include <linux/string.h>
30544  #include <linux/netfilter_ipv4.h>
30545  #include <linux/slab.h>
30546 +#include <linux/locallock.h>
30547  #include <net/snmp.h>
30548  #include <net/ip.h>
30549  #include <net/route.h>
30550 @@ -204,6 +206,8 @@
30551   *
30552   *     On SMP we have one ICMP socket per-cpu.
30553   */
30554 +static DEFINE_LOCAL_IRQ_LOCK(icmp_sk_lock);
30556  static struct sock *icmp_sk(struct net *net)
30558         return *this_cpu_ptr(net->ipv4.icmp_sk);
30559 @@ -215,12 +219,14 @@
30561         local_bh_disable();
30563 +       local_lock(icmp_sk_lock);
30564         sk = icmp_sk(net);
30566         if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
30567                 /* This can happen if the output path signals a
30568                  * dst_link_failure() for an outgoing ICMP packet.
30569                  */
30570 +               local_unlock(icmp_sk_lock);
30571                 local_bh_enable();
30572                 return NULL;
30573         }
30574 @@ -230,6 +236,7 @@
30575  static inline void icmp_xmit_unlock(struct sock *sk)
30577         spin_unlock_bh(&sk->sk_lock.slock);
30578 +       local_unlock(icmp_sk_lock);
30581  int sysctl_icmp_msgs_per_sec __read_mostly = 1000;
30582 @@ -358,6 +365,7 @@
30583         struct sock *sk;
30584         struct sk_buff *skb;
30586 +       local_lock(icmp_sk_lock);
30587         sk = icmp_sk(dev_net((*rt)->dst.dev));
30588         if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param,
30589                            icmp_param->data_len+icmp_param->head_len,
30590 @@ -380,6 +388,7 @@
30591                 skb->ip_summed = CHECKSUM_NONE;
30592                 ip_push_pending_frames(sk, fl4);
30593         }
30594 +       local_unlock(icmp_sk_lock);
30597  /*
30598 @@ -891,6 +900,30 @@
30601  /*
30602 + * 32bit and 64bit have different timestamp length, so we check for
30603 + * the cookie at offset 20 and verify it is repeated at offset 50
30604 + */
30605 +#define CO_POS0                20
30606 +#define CO_POS1                50
30607 +#define CO_SIZE                sizeof(int)
30608 +#define ICMP_SYSRQ_SIZE        57
30611 + * We got a ICMP_SYSRQ_SIZE sized ping request. Check for the cookie
30612 + * pattern and if it matches send the next byte as a trigger to sysrq.
30613 + */
30614 +static void icmp_check_sysrq(struct net *net, struct sk_buff *skb)
30616 +       int cookie = htonl(net->ipv4.sysctl_icmp_echo_sysrq);
30617 +       char *p = skb->data;
30619 +       if (!memcmp(&cookie, p + CO_POS0, CO_SIZE) &&
30620 +           !memcmp(&cookie, p + CO_POS1, CO_SIZE) &&
30621 +           p[CO_POS0 + CO_SIZE] == p[CO_POS1 + CO_SIZE])
30622 +               handle_sysrq(p[CO_POS0 + CO_SIZE]);
30626   *     Handle ICMP_ECHO ("ping") requests.
30627   *
30628   *     RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo
30629 @@ -917,6 +950,11 @@
30630                 icmp_param.data_len        = skb->len;
30631                 icmp_param.head_len        = sizeof(struct icmphdr);
30632                 icmp_reply(&icmp_param, skb);
30634 +               if (skb->len == ICMP_SYSRQ_SIZE &&
30635 +                   net->ipv4.sysctl_icmp_echo_sysrq) {
30636 +                       icmp_check_sysrq(net, skb);
30637 +               }
30638         }
30639         /* should there be an ICMP stat for ignored echos? */
30640         return true;
30641 diff -Nur linux-4.4.46.orig/net/ipv4/sysctl_net_ipv4.c linux-4.4.46/net/ipv4/sysctl_net_ipv4.c
30642 --- linux-4.4.46.orig/net/ipv4/sysctl_net_ipv4.c        2017-02-01 08:31:11.000000000 +0100
30643 +++ linux-4.4.46/net/ipv4/sysctl_net_ipv4.c     2017-02-03 17:18:10.955620139 +0100
30644 @@ -818,6 +818,13 @@
30645                 .proc_handler   = proc_dointvec
30646         },
30647         {
30648 +               .procname       = "icmp_echo_sysrq",
30649 +               .data           = &init_net.ipv4.sysctl_icmp_echo_sysrq,
30650 +               .maxlen         = sizeof(int),
30651 +               .mode           = 0644,
30652 +               .proc_handler   = proc_dointvec
30653 +       },
30654 +       {
30655                 .procname       = "icmp_ignore_bogus_error_responses",
30656                 .data           = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
30657                 .maxlen         = sizeof(int),
30658 diff -Nur linux-4.4.46.orig/net/ipv4/tcp_ipv4.c linux-4.4.46/net/ipv4/tcp_ipv4.c
30659 --- linux-4.4.46.orig/net/ipv4/tcp_ipv4.c       2017-02-01 08:31:11.000000000 +0100
30660 +++ linux-4.4.46/net/ipv4/tcp_ipv4.c    2017-02-03 17:18:10.955620139 +0100
30661 @@ -62,6 +62,7 @@
30662  #include <linux/init.h>
30663  #include <linux/times.h>
30664  #include <linux/slab.h>
30665 +#include <linux/locallock.h>
30667  #include <net/net_namespace.h>
30668  #include <net/icmp.h>
30669 @@ -566,6 +567,7 @@
30671  EXPORT_SYMBOL(tcp_v4_send_check);
30673 +static DEFINE_LOCAL_IRQ_LOCK(tcp_sk_lock);
30674  /*
30675   *     This routine will send an RST to the other tcp.
30676   *
30677 @@ -687,10 +689,13 @@
30678                 arg.bound_dev_if = sk->sk_bound_dev_if;
30680         arg.tos = ip_hdr(skb)->tos;
30682 +       local_lock(tcp_sk_lock);
30683         ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
30684                               skb, &TCP_SKB_CB(skb)->header.h4.opt,
30685                               ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
30686                               &arg, arg.iov[0].iov_len);
30687 +       local_unlock(tcp_sk_lock);
30689         TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
30690         TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
30691 @@ -772,10 +777,12 @@
30692         if (oif)
30693                 arg.bound_dev_if = oif;
30694         arg.tos = tos;
30695 +       local_lock(tcp_sk_lock);
30696         ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
30697                               skb, &TCP_SKB_CB(skb)->header.h4.opt,
30698                               ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
30699                               &arg, arg.iov[0].iov_len);
30700 +       local_unlock(tcp_sk_lock);
30702         TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
30704 diff -Nur linux-4.4.46.orig/net/mac80211/rx.c linux-4.4.46/net/mac80211/rx.c
30705 --- linux-4.4.46.orig/net/mac80211/rx.c 2017-02-01 08:31:11.000000000 +0100
30706 +++ linux-4.4.46/net/mac80211/rx.c      2017-02-03 17:18:10.955620139 +0100
30707 @@ -3580,7 +3580,7 @@
30708         struct ieee80211_supported_band *sband;
30709         struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
30711 -       WARN_ON_ONCE(softirq_count() == 0);
30712 +       WARN_ON_ONCE_NONRT(softirq_count() == 0);
30714         if (WARN_ON(status->band >= IEEE80211_NUM_BANDS))
30715                 goto drop;
30716 diff -Nur linux-4.4.46.orig/net/netfilter/core.c linux-4.4.46/net/netfilter/core.c
30717 --- linux-4.4.46.orig/net/netfilter/core.c      2017-02-01 08:31:11.000000000 +0100
30718 +++ linux-4.4.46/net/netfilter/core.c   2017-02-03 17:18:10.955620139 +0100
30719 @@ -22,11 +22,17 @@
30720  #include <linux/proc_fs.h>
30721  #include <linux/mutex.h>
30722  #include <linux/slab.h>
30723 +#include <linux/locallock.h>
30724  #include <net/net_namespace.h>
30725  #include <net/sock.h>
30727  #include "nf_internals.h"
30729 +#ifdef CONFIG_PREEMPT_RT_BASE
30730 +DEFINE_LOCAL_IRQ_LOCK(xt_write_lock);
30731 +EXPORT_PER_CPU_SYMBOL(xt_write_lock);
30732 +#endif
30734  static DEFINE_MUTEX(afinfo_mutex);
30736  const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
30737 diff -Nur linux-4.4.46.orig/net/packet/af_packet.c linux-4.4.46/net/packet/af_packet.c
30738 --- linux-4.4.46.orig/net/packet/af_packet.c    2017-02-01 08:31:11.000000000 +0100
30739 +++ linux-4.4.46/net/packet/af_packet.c 2017-02-03 17:18:10.959620294 +0100
30740 @@ -63,6 +63,7 @@
30741  #include <linux/if_packet.h>
30742  #include <linux/wireless.h>
30743  #include <linux/kernel.h>
30744 +#include <linux/delay.h>
30745  #include <linux/kmod.h>
30746  #include <linux/slab.h>
30747  #include <linux/vmalloc.h>
30748 @@ -694,7 +695,7 @@
30749         if (BLOCK_NUM_PKTS(pbd)) {
30750                 while (atomic_read(&pkc->blk_fill_in_prog)) {
30751                         /* Waiting for skb_copy_bits to finish... */
30752 -                       cpu_relax();
30753 +                       cpu_chill();
30754                 }
30755         }
30757 @@ -956,7 +957,7 @@
30758                 if (!(status & TP_STATUS_BLK_TMO)) {
30759                         while (atomic_read(&pkc->blk_fill_in_prog)) {
30760                                 /* Waiting for skb_copy_bits to finish... */
30761 -                               cpu_relax();
30762 +                               cpu_chill();
30763                         }
30764                 }
30765                 prb_close_block(pkc, pbd, po, status);
30766 diff -Nur linux-4.4.46.orig/net/rds/ib_rdma.c linux-4.4.46/net/rds/ib_rdma.c
30767 --- linux-4.4.46.orig/net/rds/ib_rdma.c 2017-02-01 08:31:11.000000000 +0100
30768 +++ linux-4.4.46/net/rds/ib_rdma.c      2017-02-03 17:18:10.959620294 +0100
30769 @@ -34,6 +34,7 @@
30770  #include <linux/slab.h>
30771  #include <linux/rculist.h>
30772  #include <linux/llist.h>
30773 +#include <linux/delay.h>
30775  #include "rds.h"
30776  #include "ib.h"
30777 @@ -313,7 +314,7 @@
30778         for_each_online_cpu(cpu) {
30779                 flag = &per_cpu(clean_list_grace, cpu);
30780                 while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
30781 -                       cpu_relax();
30782 +                       cpu_chill();
30783         }
30786 diff -Nur linux-4.4.46.orig/net/sched/sch_generic.c linux-4.4.46/net/sched/sch_generic.c
30787 --- linux-4.4.46.orig/net/sched/sch_generic.c   2017-02-01 08:31:11.000000000 +0100
30788 +++ linux-4.4.46/net/sched/sch_generic.c        2017-02-03 17:18:10.959620294 +0100
30789 @@ -893,7 +893,7 @@
30790         /* Wait for outstanding qdisc_run calls. */
30791         list_for_each_entry(dev, head, close_list)
30792                 while (some_qdisc_is_busy(dev))
30793 -                       yield();
30794 +                       msleep(1);
30797  void dev_deactivate(struct net_device *dev)
30798 diff -Nur linux-4.4.46.orig/net/sunrpc/svc_xprt.c linux-4.4.46/net/sunrpc/svc_xprt.c
30799 --- linux-4.4.46.orig/net/sunrpc/svc_xprt.c     2017-02-01 08:31:11.000000000 +0100
30800 +++ linux-4.4.46/net/sunrpc/svc_xprt.c  2017-02-03 17:18:10.959620294 +0100
30801 @@ -340,7 +340,7 @@
30802                 goto out;
30803         }
30805 -       cpu = get_cpu();
30806 +       cpu = get_cpu_light();
30807         pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
30809         atomic_long_inc(&pool->sp_stats.packets);
30810 @@ -376,7 +376,7 @@
30812                 atomic_long_inc(&pool->sp_stats.threads_woken);
30813                 wake_up_process(rqstp->rq_task);
30814 -               put_cpu();
30815 +               put_cpu_light();
30816                 goto out;
30817         }
30818         rcu_read_unlock();
30819 @@ -397,7 +397,7 @@
30820                 goto redo_search;
30821         }
30822         rqstp = NULL;
30823 -       put_cpu();
30824 +       put_cpu_light();
30825  out:
30826         trace_svc_xprt_do_enqueue(xprt, rqstp);
30828 diff -Nur linux-4.4.46.orig/scripts/mkcompile_h linux-4.4.46/scripts/mkcompile_h
30829 --- linux-4.4.46.orig/scripts/mkcompile_h       2017-02-01 08:31:11.000000000 +0100
30830 +++ linux-4.4.46/scripts/mkcompile_h    2017-02-03 17:18:10.959620294 +0100
30831 @@ -4,7 +4,8 @@
30832  ARCH=$2
30833  SMP=$3
30834  PREEMPT=$4
30835 -CC=$5
30836 +RT=$5
30837 +CC=$6
30839  vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; }
30841 @@ -57,6 +58,7 @@
30842  CONFIG_FLAGS=""
30843  if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi
30844  if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi
30845 +if [ -n "$RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS RT"; fi
30846  UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP"
30848  # Truncate to maximum length
30849 diff -Nur linux-4.4.46.orig/sound/core/pcm_native.c linux-4.4.46/sound/core/pcm_native.c
30850 --- linux-4.4.46.orig/sound/core/pcm_native.c   2017-02-01 08:31:11.000000000 +0100
30851 +++ linux-4.4.46/sound/core/pcm_native.c        2017-02-03 17:18:10.959620294 +0100
30852 @@ -135,7 +135,7 @@
30853  void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream)
30855         if (!substream->pcm->nonatomic)
30856 -               local_irq_disable();
30857 +               local_irq_disable_nort();
30858         snd_pcm_stream_lock(substream);
30860  EXPORT_SYMBOL_GPL(snd_pcm_stream_lock_irq);
30861 @@ -150,7 +150,7 @@
30863         snd_pcm_stream_unlock(substream);
30864         if (!substream->pcm->nonatomic)
30865 -               local_irq_enable();
30866 +               local_irq_enable_nort();
30868  EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irq);
30870 @@ -158,7 +158,7 @@
30872         unsigned long flags = 0;
30873         if (!substream->pcm->nonatomic)
30874 -               local_irq_save(flags);
30875 +               local_irq_save_nort(flags);
30876         snd_pcm_stream_lock(substream);
30877         return flags;
30879 @@ -176,7 +176,7 @@
30881         snd_pcm_stream_unlock(substream);
30882         if (!substream->pcm->nonatomic)
30883 -               local_irq_restore(flags);
30884 +               local_irq_restore_nort(flags);
30886  EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irqrestore);
30888 diff -Nur linux-4.4.46.orig/virt/kvm/async_pf.c linux-4.4.46/virt/kvm/async_pf.c
30889 --- linux-4.4.46.orig/virt/kvm/async_pf.c       2017-02-01 08:31:11.000000000 +0100
30890 +++ linux-4.4.46/virt/kvm/async_pf.c    2017-02-03 17:18:10.959620294 +0100
30891 @@ -98,8 +98,8 @@
30892          * This memory barrier pairs with prepare_to_wait's set_current_state()
30893          */
30894         smp_mb();
30895 -       if (waitqueue_active(&vcpu->wq))
30896 -               wake_up_interruptible(&vcpu->wq);
30897 +       if (swait_active(&vcpu->wq))
30898 +               swake_up(&vcpu->wq);
30900         mmput(mm);
30901         kvm_put_kvm(vcpu->kvm);
30902 diff -Nur linux-4.4.46.orig/virt/kvm/kvm_main.c linux-4.4.46/virt/kvm/kvm_main.c
30903 --- linux-4.4.46.orig/virt/kvm/kvm_main.c       2017-02-01 08:31:11.000000000 +0100
30904 +++ linux-4.4.46/virt/kvm/kvm_main.c    2017-02-03 17:18:10.959620294 +0100
30905 @@ -228,8 +228,7 @@
30906         vcpu->kvm = kvm;
30907         vcpu->vcpu_id = id;
30908         vcpu->pid = NULL;
30909 -       vcpu->halt_poll_ns = 0;
30910 -       init_waitqueue_head(&vcpu->wq);
30911 +       init_swait_queue_head(&vcpu->wq);
30912         kvm_async_pf_vcpu_init(vcpu);
30914         vcpu->pre_pcpu = -1;
30915 @@ -2005,7 +2004,7 @@
30916  void kvm_vcpu_block(struct kvm_vcpu *vcpu)
30918         ktime_t start, cur;
30919 -       DEFINE_WAIT(wait);
30920 +       DECLARE_SWAITQUEUE(wait);
30921         bool waited = false;
30922         u64 block_ns;
30924 @@ -2030,7 +2029,7 @@
30925         kvm_arch_vcpu_blocking(vcpu);
30927         for (;;) {
30928 -               prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
30929 +               prepare_to_swait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
30931                 if (kvm_vcpu_check_block(vcpu) < 0)
30932                         break;
30933 @@ -2039,7 +2038,7 @@
30934                 schedule();
30935         }
30937 -       finish_wait(&vcpu->wq, &wait);
30938 +       finish_swait(&vcpu->wq, &wait);
30939         cur = ktime_get();
30941         kvm_arch_vcpu_unblocking(vcpu);
30942 @@ -2071,11 +2070,11 @@
30944         int me;
30945         int cpu = vcpu->cpu;
30946 -       wait_queue_head_t *wqp;
30947 +       struct swait_queue_head *wqp;
30949         wqp = kvm_arch_vcpu_wq(vcpu);
30950 -       if (waitqueue_active(wqp)) {
30951 -               wake_up_interruptible(wqp);
30952 +       if (swait_active(wqp)) {
30953 +               swake_up(wqp);
30954                 ++vcpu->stat.halt_wakeup;
30955         }
30957 @@ -2176,7 +2175,7 @@
30958                                 continue;
30959                         if (vcpu == me)
30960                                 continue;
30961 -                       if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
30962 +                       if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
30963                                 continue;
30964                         if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
30965                                 continue;