From ea1ece2ff99df47b62640d08c426506fb6ef8cc3 Mon Sep 17 00:00:00 2001 From: Oliver Pinter Date: Mon, 21 Apr 2008 17:17:06 +0200 Subject: [PATCH] v2.6.22.22-op1-rc1 add chlog-v2.6.22.22-op1-rc1 add patch-v2.6.22.21-op1-v2.6.22.22-op1-rc1 Signed-off-by: Oliver Pinter --- patch-2.6.22.y/rc/chlog-v2.6.22.22-op1-rc1 | 226 +++++++++++++++++ .../rc/patch-v2.6.22.21-op1-v2.6.22.22-op1-rc1 | 270 +++++++++++++++++++++ 2 files changed, 496 insertions(+) create mode 100644 patch-2.6.22.y/rc/chlog-v2.6.22.22-op1-rc1 create mode 100644 patch-2.6.22.y/rc/patch-v2.6.22.21-op1-v2.6.22.22-op1-rc1 diff --git a/patch-2.6.22.y/rc/chlog-v2.6.22.22-op1-rc1 b/patch-2.6.22.y/rc/chlog-v2.6.22.22-op1-rc1 new file mode 100644 index 0000000..4c34f49 --- /dev/null +++ b/patch-2.6.22.y/rc/chlog-v2.6.22.22-op1-rc1 @@ -0,0 +1,226 @@ +commit 9dfbae6b401ff0c08c57860d3a78a1033c451e29 +Author: Oliver Pinter +Date: Mon Apr 21 17:08:48 2008 +0200 + + v2.6.22.22-op1-rc1 + + Signed-off-by: Oliver Pinter + +commit e4a59f83f2cf76176e3cc24351414cee77f45834 +Author: Patrick McHardy +Date: Sun Apr 6 23:43:18 2008 -0700 + + TCP: Fix shrinking windows with window scaling + + Upstream commit: 607bfbf2d55dd1cfe5368b41c2a81a8c9ccf4723 + + When selecting a new window, tcp_select_window() tries not to shrink + the offered window by using the maximum of the remaining offered window + size and the newly calculated window size. The newly calculated window + size is always a multiple of the window scaling factor, the remaining + window size however might not be since it depends on rcv_wup/rcv_nxt. + This means we're effectively shrinking the window when scaling it down. + + The dump below shows the problem (scaling factor 2^7): + + - Window size of 557 (71296) is advertised, up to 3111907257: + + IP 172.2.2.3.33000 > 172.2.2.2.33000: . ack 3111835961 win 557 <...> + + - New window size of 514 (65792) is advertised, up to 3111907217, 40 bytes + below the last end: + + IP 172.2.2.3.33000 > 172.2.2.2.33000: . 3113575668:3113577116(1448) ack 3111841425 win 514 <...> + + The number 40 results from downscaling the remaining window: + + 3111907257 - 3111841425 = 65832 + 65832 / 2^7 = 514 + 65832 % 2^7 = 40 + + If the sender uses up the entire window before it is shrunk, this can have + chaotic effects on the connection. When sending ACKs, tcp_acceptable_seq() + will notice that the window has been shrunk since tcp_wnd_end() is before + tp->snd_nxt, which makes it choose tcp_wnd_end() as sequence number. + This will fail the receivers checks in tcp_sequence() however since it + is before it's tp->rcv_wup, making it respond with a dupack. + + If both sides are in this condition, this leads to a constant flood of + ACKs until the connection times out. + + Make sure the window is never shrunk by aligning the remaining window to + the window scaling factor. + + Signed-off-by: Patrick McHardy + Signed-off-by: David S. Miller + Signed-off-by: Chris Wright + Signed-off-by: Oliver Pinter + +commit dbf81e6fe7d8124f58e51dcba9e13faa63be6263 +Author: James Morris +Date: Fri Jul 13 11:44:32 2007 +0200 + + security: revalidate rw permissions for sys_splice and sys_vmsplice + + git id: 29ce20586be54ceba49c55ae049541398cd2c416 + + Revalidate read/write permissions for splice(2) and vmslice(2), in case + security policy has changed since the files were opened. + + Acked-by: Stephen Smalley + Signed-off-by: James Morris + Signed-off-by: Jens Axboe + Backported-by: Oliver Pinter + Signed-off-by: Oliver Pinter + +commit 5794f1e90b2cecd56aff9d4e3f14cfe314eaf7e9 +Author: Jens Axboe +Date: Wed Feb 20 10:34:51 2008 +0100 + + splice: only return -EAGAIN if there's hope of more data + + sys_tee() currently is a bit eager in returning -EAGAIN, it may do so + even if we don't have a chance of anymore data becoming available. So + improve the logic and only return -EAGAIN if we have an attached writer + to the input pipe. + + Reported by Johann Felix Soden and + Patrick McManus . + + Tested-by: Johann Felix Soden + Signed-off-by: Jens Axboe + Signed-off-by: Oliver Pinter + +commit 0232995711395b1a4f1bc6af1bfc637ae8cd0d97 +Author: Davide Libenzi +Date: Thu Apr 10 21:29:19 2008 -0700 + + eventfd/kaio integration fix + + Jeff Roberson discovered a race when using kaio eventfd based notifications. + When it occurs it can lead tomissed wakeups and hung userspace. + + This patch fixes the race by moving the notification inside the spinlocked + section of kaio. The operation is safe since eventfd spinlock and kaio one + are unrelated. + + Signed-off-by: Davide Libenzi + Cc: Zach Brown + Cc: Jeff Roberson + Signed-off-by: Andrew Morton + Signed-off-by: Linus Torvalds + Signed-off-by: Oliver Pinter + +commit e2e9f307ca1d4becb3793742dde274fb89a19a2f +Author: Jeff Moyer +Date: Tue Oct 16 23:27:20 2007 -0700 + + aio: account I/O wait time properly + + Some months back I proposed changing the schedule() call in + read_events to an io_schedule(): + http://osdir.com/ml/linux.kernel.aio.general/2006-10/msg00024.html + This was rejected as there are AIO operations that do not initiate + disk I/O. I've had another look at the problem, and the only AIO + operation that will not initiate disk I/O is IOCB_CMD_NOOP. However, + this command isn't even wired up! + + Given that it doesn't work, and hasn't for *years*, I'm going to + suggest again that we do proper I/O accounting when using AIO. + + Signed-off-by: Jeff Moyer + Acked-by: Zach Brown + Cc: Benjamin LaHaise + Cc: Suparna Bhattacharya + Cc: Badari Pulavarty + Signed-off-by: Andrew Morton + Signed-off-by: Linus Torvalds + Signed-off-by: Oliver Pinter + +commit ab5fba64e22453b4acb0272332b38409a6ba3010 +Author: Hugh Dickins +Date: Thu Apr 3 23:35:22 2008 +0100 + + splice: use mapping_gfp_mask + + The loop block driver is careful to mask __GFP_IO|__GFP_FS out of its + mapping_gfp_mask, to avoid hangs under memory pressure. But nowadays + it uses splice, usually going through __generic_file_splice_read. That + must use mapping_gfp_mask instead of GFP_KERNEL to avoid those hangs. + + Signed-off-by: Hugh Dickins + Cc: Jens Axboe + Cc: Andrew Morton + Signed-off-by: Linus Torvalds + Signed-off-by: Oliver Pinter + +commit 552a99092bd8a74ec90d40f71eae819337d6928b +Author: Nick Piggin +Date: Wed Feb 6 01:37:29 2008 -0800 + + inotify: remove debug code + + The inotify debugging code is supposed to verify that the + DCACHE_INOTIFY_PARENT_WATCHED scalability optimisation does not result in + notifications getting lost nor extra needless locking generated. + + Unfortunately there are also some races in the debugging code. And it isn't + very good at finding problems anyway. So remove it for now. + + Signed-off-by: Nick Piggin + Cc: Robert Love + Cc: John McCutchan + Cc: Jan Kara + Cc: Yan Zheng + Signed-off-by: Andrew Morton + Signed-off-by: Linus Torvalds + Signed-off-by: Oliver Pinter + +commit 045916eb3ea9aec6d3baa18a6649eeb5d252f8bb +Author: Nick Piggin +Date: Wed Feb 6 01:37:28 2008 -0800 + + inotify: fix race + + There is a race between setting an inode's children's "parent watched" flag + when placing the first watch on a parent, and instantiating new children of + that parent: a child could miss having its flags set by + set_dentry_child_flags, but then inotify_d_instantiate might still see + !inotify_inode_watched. + + The solution is to set_dentry_child_flags after adding the watch. Locking is + taken care of, because both set_dentry_child_flags and inotify_d_instantiate + hold dcache_lock and child->d_locks. + + Signed-off-by: Nick Piggin + Cc: Robert Love + Cc: John McCutchan + Cc: Jan Kara + Cc: Yan Zheng + Signed-off-by: Andrew Morton + Signed-off-by: Linus Torvalds + Signed-off-by: Oliver Pinter + +commit 41fa66104561f5ba4d6ea671f908b3688daacdb3 +Author: Grant Grundler +Date: Sat Mar 8 18:33:16 2008 -0700 + + 2.6.25-rc4 de_stop_rxtx polling wrong + + backported to 2.6.22.y + + mainline: 69cac988f2d8506d0b479c5ae7903b9067d7641d + + This untested patch _should_ fix: + "(net de2104x) Kernel panic with de2104x tulip driver on boot" + http://bugzilla.kernel.org/show_bug.cgi?id=3156 + + But the bug submitter isn't responding. Same fix has been applied + to tulip.c (several years ago) and uli526x.c (Feb 2008) drivers. + + [ The panic reported in the bug report was removed in a recently + (march 2008) accepted patch from Ondrej Zary. ] + + Signed-off-by: Grant Grundler + Signed-off-by: Jeff Garzik + Signed-off-by: Oliver Pinter diff --git a/patch-2.6.22.y/rc/patch-v2.6.22.21-op1-v2.6.22.22-op1-rc1 b/patch-2.6.22.y/rc/patch-v2.6.22.21-op1-v2.6.22.22-op1-rc1 new file mode 100644 index 0000000..a42c095 --- /dev/null +++ b/patch-2.6.22.y/rc/patch-v2.6.22.21-op1-v2.6.22.22-op1-rc1 @@ -0,0 +1,270 @@ +diff --git a/Makefile b/Makefile +index b5f32ce..3b1515e 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,7 +1,7 @@ + VERSION = 2 + PATCHLEVEL = 6 + SUBLEVEL = 22 +-EXTRAVERSION = .21-op1 ++EXTRAVERSION = .22-op1-rc1 + NAME = Holy Dancing Manatees, Batman! + + # *DOCUMENTATION* +diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c +index 8617298..e8fcce7 100644 +--- a/drivers/net/tulip/de2104x.c ++++ b/drivers/net/tulip/de2104x.c +@@ -843,7 +843,7 @@ static inline int de_is_running (struct de_private *de) + static void de_stop_rxtx (struct de_private *de) + { + u32 macmode; +- unsigned int work = 1000; ++ unsigned int i = 1300/100; + + macmode = dr32(MacMode); + if (macmode & RxTx) { +@@ -851,10 +851,14 @@ static void de_stop_rxtx (struct de_private *de) + dr32(MacMode); + } + +- while (--work > 0) { ++ /* wait until in-flight frame completes. ++ * Max time @ 10BT: 1500*8b/10Mbps == 1200us (+ 100us margin) ++ * Typically expect this loop to end in < 50 us on 100BT. ++ */ ++ while (--i) { + if (!de_is_running(de)) + return; +- cpu_relax(); ++ udelay(100); + } + + printk(KERN_WARNING "%s: timeout expired stopping DMA\n", de->dev->name); +diff --git a/fs/aio.c b/fs/aio.c +index dbe699e..b3419c5 100644 +--- a/fs/aio.c ++++ b/fs/aio.c +@@ -303,7 +303,7 @@ static void wait_for_all_aios(struct kioctx *ctx) + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + while (ctx->reqs_active) { + spin_unlock_irq(&ctx->ctx_lock); +- schedule(); ++ io_schedule(); + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + spin_lock_irq(&ctx->ctx_lock); + } +@@ -323,7 +323,7 @@ ssize_t fastcall wait_on_sync_kiocb(struct kiocb *iocb) + set_current_state(TASK_UNINTERRUPTIBLE); + if (!iocb->ki_users) + break; +- schedule(); ++ io_schedule(); + } + __set_current_state(TASK_RUNNING); + return iocb->ki_user_data; +@@ -946,14 +946,6 @@ int fastcall aio_complete(struct kiocb *iocb, long res, long res2) + return 1; + } + +- /* +- * Check if the user asked us to deliver the result through an +- * eventfd. The eventfd_signal() function is safe to be called +- * from IRQ context. +- */ +- if (!IS_ERR(iocb->ki_eventfd)) +- eventfd_signal(iocb->ki_eventfd, 1); +- + info = &ctx->ring_info; + + /* add a completion event to the ring buffer. +@@ -1002,6 +994,15 @@ int fastcall aio_complete(struct kiocb *iocb, long res, long res2) + kunmap_atomic(ring, KM_IRQ1); + + pr_debug("added to ring %p at [%lu]\n", iocb, tail); ++ ++ /* ++ * Check if the user asked us to deliver the result through an ++ * eventfd. The eventfd_signal() function is safe to be called ++ * from IRQ context. ++ */ ++ if (!IS_ERR(iocb->ki_eventfd)) ++ eventfd_signal(iocb->ki_eventfd, 1); ++ + put_rq: + /* everything turned out well, dispose of the aiocb. */ + ret = __aio_put_req(ctx, iocb); +@@ -1170,7 +1171,7 @@ retry: + ret = 0; + if (to.timed_out) /* Only check after read evt */ + break; +- schedule(); ++ io_schedule(); + if (signal_pending(tsk)) { + ret = -EINTR; + break; +diff --git a/fs/dcache.c b/fs/dcache.c +index 0e73aa0..c54dc50 100644 +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -1407,9 +1407,6 @@ void d_delete(struct dentry * dentry) + if (atomic_read(&dentry->d_count) == 1) { + dentry_iput(dentry); + fsnotify_nameremove(dentry, isdir); +- +- /* remove this and other inotify debug checks after 2.6.18 */ +- dentry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED; + return; + } + +diff --git a/fs/inotify.c b/fs/inotify.c +index 7457501..8ee2b43 100644 +--- a/fs/inotify.c ++++ b/fs/inotify.c +@@ -168,20 +168,14 @@ static void set_dentry_child_flags(struct inode *inode, int watched) + struct dentry *child; + + list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) { +- if (!child->d_inode) { +- WARN_ON(child->d_flags & DCACHE_INOTIFY_PARENT_WATCHED); ++ if (!child->d_inode) + continue; +- } ++ + spin_lock(&child->d_lock); +- if (watched) { +- WARN_ON(child->d_flags & +- DCACHE_INOTIFY_PARENT_WATCHED); ++ if (watched) + child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED; +- } else { +- WARN_ON(!(child->d_flags & +- DCACHE_INOTIFY_PARENT_WATCHED)); +- child->d_flags&=~DCACHE_INOTIFY_PARENT_WATCHED; +- } ++ else ++ child->d_flags &=~DCACHE_INOTIFY_PARENT_WATCHED; + spin_unlock(&child->d_lock); + } + } +@@ -253,7 +247,6 @@ void inotify_d_instantiate(struct dentry *entry, struct inode *inode) + if (!inode) + return; + +- WARN_ON(entry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED); + spin_lock(&entry->d_lock); + parent = entry->d_parent; + if (parent->d_inode && inotify_inode_watched(parent->d_inode)) +@@ -627,6 +620,7 @@ s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch, + struct inode *inode, u32 mask) + { + int ret = 0; ++ int newly_watched; + + /* don't allow invalid bits: we don't want flags set */ + mask &= IN_ALL_EVENTS | IN_ONESHOT; +@@ -653,12 +647,18 @@ s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch, + */ + watch->inode = igrab(inode); + +- if (!inotify_inode_watched(inode)) +- set_dentry_child_flags(inode, 1); +- + /* Add the watch to the handle's and the inode's list */ ++ newly_watched = !inotify_inode_watched(inode); + list_add(&watch->h_list, &ih->watches); + list_add(&watch->i_list, &inode->inotify_watches); ++ /* ++ * Set child flags _after_ adding the watch, so there is no race ++ * windows where newly instantiated children could miss their parent's ++ * watched flag. ++ */ ++ if (newly_watched) ++ set_dentry_child_flags(inode, 1); ++ + out: + mutex_unlock(&ih->mutex); + mutex_unlock(&inode->inotify_mutex); +diff --git a/fs/splice.c b/fs/splice.c +index dbbe267..3da87fe 100644 +--- a/fs/splice.c ++++ b/fs/splice.c +@@ -28,6 +28,7 @@ + #include + #include + #include ++#include + + struct partial_page { + unsigned int offset; +@@ -331,7 +332,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, + break; + + error = add_to_page_cache_lru(page, mapping, index, +- GFP_KERNEL); ++ mapping_gfp_mask(mapping)); + if (unlikely(error)) { + page_cache_release(page); + if (error == -EEXIST) +@@ -932,6 +933,10 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, + if (unlikely(ret < 0)) + return ret; + ++ ret = security_file_permission(out, MAY_WRITE); ++ if (unlikely(ret < 0)) ++ return ret; ++ + return out->f_op->splice_write(pipe, out, ppos, len, flags); + } + +@@ -954,6 +959,10 @@ static long do_splice_to(struct file *in, loff_t *ppos, + if (unlikely(ret < 0)) + return ret; + ++ ret = security_file_permission(in, MAY_READ); ++ if (unlikely(ret < 0)) ++ return ret; ++ + return in->f_op->splice_read(in, ppos, pipe, len, flags); + } + +@@ -1489,6 +1498,13 @@ static int link_pipe(struct pipe_inode_info *ipipe, + i++; + } while (len); + ++ /* ++ * return EAGAIN if we have the potential of some data in the ++ * future, otherwise just return 0 ++ */ ++ if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK)) ++ ret = -EAGAIN; ++ + inode_double_unlock(ipipe->inode, opipe->inode); + + /* +@@ -1529,11 +1545,8 @@ static long do_tee(struct file *in, struct file *out, size_t len, + ret = link_ipipe_prep(ipipe, flags); + if (!ret) { + ret = link_opipe_prep(opipe, flags); +- if (!ret) { ++ if (!ret) + ret = link_pipe(ipipe, opipe, len, flags); +- if (!ret && (flags & SPLICE_F_NONBLOCK)) +- ret = -EAGAIN; +- } + } + } + +diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c +index eee57e6..de6e5df 100644 +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -246,7 +246,7 @@ static u16 tcp_select_window(struct sock *sk) + * + * Relax Will Robinson. + */ +- new_win = cur_win; ++ new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale); + } + tp->rcv_wnd = new_win; + tp->rcv_wup = tp->rcv_nxt; -- 2.11.4.GIT