From ea1ece2ff99df47b62640d08c426506fb6ef8cc3 Mon Sep 17 00:00:00 2001
From: Oliver Pinter <oliver.pntr@gmail.com>
Date: Mon, 21 Apr 2008 17:17:06 +0200
Subject: [PATCH] v2.6.22.22-op1-rc1

add chlog-v2.6.22.22-op1-rc1
add patch-v2.6.22.21-op1-v2.6.22.22-op1-rc1

Signed-off-by: Oliver Pinter <oliver.pntr@gmail.com>
---
 patch-2.6.22.y/rc/chlog-v2.6.22.22-op1-rc1         | 226 +++++++++++++++++
 .../rc/patch-v2.6.22.21-op1-v2.6.22.22-op1-rc1     | 270 +++++++++++++++++++++
 2 files changed, 496 insertions(+)
 create mode 100644 patch-2.6.22.y/rc/chlog-v2.6.22.22-op1-rc1
 create mode 100644 patch-2.6.22.y/rc/patch-v2.6.22.21-op1-v2.6.22.22-op1-rc1

diff --git a/patch-2.6.22.y/rc/chlog-v2.6.22.22-op1-rc1 b/patch-2.6.22.y/rc/chlog-v2.6.22.22-op1-rc1
new file mode 100644
index 0000000..4c34f49
--- /dev/null
+++ b/patch-2.6.22.y/rc/chlog-v2.6.22.22-op1-rc1
@@ -0,0 +1,226 @@
+commit 9dfbae6b401ff0c08c57860d3a78a1033c451e29
+Author: Oliver Pinter <oliver.pntr@gmail.com>
+Date:   Mon Apr 21 17:08:48 2008 +0200
+
+    v2.6.22.22-op1-rc1
+    
+    Signed-off-by: Oliver Pinter <oliver.pntr@gmail.com>
+
+commit e4a59f83f2cf76176e3cc24351414cee77f45834
+Author: Patrick McHardy <kaber@trash.net>
+Date:   Sun Apr 6 23:43:18 2008 -0700
+
+    TCP: Fix shrinking windows with window scaling
+    
+    Upstream commit: 607bfbf2d55dd1cfe5368b41c2a81a8c9ccf4723
+    
+    When selecting a new window, tcp_select_window() tries not to shrink
+    the offered window by using the maximum of the remaining offered window
+    size and the newly calculated window size. The newly calculated window
+    size is always a multiple of the window scaling factor, the remaining
+    window size however might not be since it depends on rcv_wup/rcv_nxt.
+    This means we're effectively shrinking the window when scaling it down.
+    
+    The dump below shows the problem (scaling factor 2^7):
+    
+    - Window size of 557 (71296) is advertised, up to 3111907257:
+    
+    IP 172.2.2.3.33000 > 172.2.2.2.33000: . ack 3111835961 win 557 <...>
+    
+    - New window size of 514 (65792) is advertised, up to 3111907217, 40 bytes
+      below the last end:
+    
+    IP 172.2.2.3.33000 > 172.2.2.2.33000: . 3113575668:3113577116(1448) ack 3111841425 win 514 <...>
+    
+    The number 40 results from downscaling the remaining window:
+    
+    3111907257 - 3111841425 = 65832
+    65832 / 2^7 = 514
+    65832 % 2^7 = 40
+    
+    If the sender uses up the entire window before it is shrunk, this can have
+    chaotic effects on the connection. When sending ACKs, tcp_acceptable_seq()
+    will notice that the window has been shrunk since tcp_wnd_end() is before
+    tp->snd_nxt, which makes it choose tcp_wnd_end() as sequence number.
+    This will fail the receivers checks in tcp_sequence() however since it
+    is before it's tp->rcv_wup, making it respond with a dupack.
+    
+    If both sides are in this condition, this leads to a constant flood of
+    ACKs until the connection times out.
+    
+    Make sure the window is never shrunk by aligning the remaining window to
+    the window scaling factor.
+    
+    Signed-off-by: Patrick McHardy <kaber@trash.net>
+    Signed-off-by: David S. Miller <davem@davemloft.net>
+    Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+    Signed-off-by: Oliver Pinter <oliver.pntr@gmail.com>
+
+commit dbf81e6fe7d8124f58e51dcba9e13faa63be6263
+Author: James Morris <jmorris@namei.org>
+Date:   Fri Jul 13 11:44:32 2007 +0200
+
+    security: revalidate rw permissions for sys_splice and sys_vmsplice
+    
+    git id: 29ce20586be54ceba49c55ae049541398cd2c416
+    
+    Revalidate read/write permissions for splice(2) and vmslice(2), in case
+    security policy has changed since the files were opened.
+    
+    Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
+    Signed-off-by: James Morris <jmorris@namei.org>
+    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
+    Backported-by: Oliver Pinter <oliver.pntr@gmail.com>
+    Signed-off-by: Oliver Pinter <oliver.pntr@gmail.com>
+
+commit 5794f1e90b2cecd56aff9d4e3f14cfe314eaf7e9
+Author: Jens Axboe <jens.axboe@oracle.com>
+Date:   Wed Feb 20 10:34:51 2008 +0100
+
+    splice: only return -EAGAIN if there's hope of more data
+    
+    sys_tee() currently is a bit eager in returning -EAGAIN, it may do so
+    even if we don't have a chance of anymore data becoming available. So
+    improve the logic and only return -EAGAIN if we have an attached writer
+    to the input pipe.
+    
+    Reported by Johann Felix Soden <johfel@gmx.de> and
+    Patrick McManus <mcmanus@ducksong.com>.
+    
+    Tested-by: Johann Felix Soden <johfel@users.sourceforge.net>
+    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
+    Signed-off-by: Oliver Pinter <oliver.pntr@gmail.com>
+
+commit 0232995711395b1a4f1bc6af1bfc637ae8cd0d97
+Author: Davide Libenzi <davidel@xmailserver.org>
+Date:   Thu Apr 10 21:29:19 2008 -0700
+
+    eventfd/kaio integration fix
+    
+    Jeff Roberson discovered a race when using kaio eventfd based notifications.
+    When it occurs it can lead tomissed wakeups and hung userspace.
+    
+    This patch fixes the race by moving the notification inside the spinlocked
+    section of kaio.  The operation is safe since eventfd spinlock and kaio one
+    are unrelated.
+    
+    Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
+    Cc: Zach Brown <zach.brown@oracle.com>
+    Cc: Jeff Roberson <jroberson@chesapeake.net>
+    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+    Signed-off-by: Oliver Pinter <oliver.pntr@gmail.com>
+
+commit e2e9f307ca1d4becb3793742dde274fb89a19a2f
+Author: Jeff Moyer <jmoyer@redhat.com>
+Date:   Tue Oct 16 23:27:20 2007 -0700
+
+    aio: account I/O wait time properly
+    
+    Some months back I proposed changing the schedule() call in
+    read_events to an io_schedule():
+    	http://osdir.com/ml/linux.kernel.aio.general/2006-10/msg00024.html
+    This was rejected as there are AIO operations that do not initiate
+    disk I/O.  I've had another look at the problem, and the only AIO
+    operation that will not initiate disk I/O is IOCB_CMD_NOOP.  However,
+    this command isn't even wired up!
+    
+    Given that it doesn't work, and hasn't for *years*, I'm going to
+    suggest again that we do proper I/O accounting when using AIO.
+    
+    Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
+    Acked-by: Zach Brown <zach.brown@oracle.com>
+    Cc: Benjamin LaHaise <bcrl@kvack.org>
+    Cc: Suparna Bhattacharya <suparna@in.ibm.com>
+    Cc: Badari Pulavarty <pbadari@us.ibm.com>
+    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+    Signed-off-by: Oliver Pinter <oliver.pntr@gmail.com>
+
+commit ab5fba64e22453b4acb0272332b38409a6ba3010
+Author: Hugh Dickins <hugh@veritas.com>
+Date:   Thu Apr 3 23:35:22 2008 +0100
+
+    splice: use mapping_gfp_mask
+    
+    The loop block driver is careful to mask __GFP_IO|__GFP_FS out of its
+    mapping_gfp_mask, to avoid hangs under memory pressure.  But nowadays
+    it uses splice, usually going through __generic_file_splice_read.  That
+    must use mapping_gfp_mask instead of GFP_KERNEL to avoid those hangs.
+    
+    Signed-off-by: Hugh Dickins <hugh@veritas.com>
+    Cc: Jens Axboe <jens.axboe@oracle.com>
+    Cc: Andrew Morton <akpm@linux-foundation.org>
+    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+    Signed-off-by: Oliver Pinter <oliver.pntr@gmail.com>
+
+commit 552a99092bd8a74ec90d40f71eae819337d6928b
+Author: Nick Piggin <npiggin@suse.de>
+Date:   Wed Feb 6 01:37:29 2008 -0800
+
+    inotify: remove debug code
+    
+    The inotify debugging code is supposed to verify that the
+    DCACHE_INOTIFY_PARENT_WATCHED scalability optimisation does not result in
+    notifications getting lost nor extra needless locking generated.
+    
+    Unfortunately there are also some races in the debugging code.  And it isn't
+    very good at finding problems anyway.  So remove it for now.
+    
+    Signed-off-by: Nick Piggin <npiggin@suse.de>
+    Cc: Robert Love <rlove@google.com>
+    Cc: John McCutchan <ttb@tentacle.dhs.org>
+    Cc: Jan Kara <jack@ucw.cz>
+    Cc: Yan Zheng <yanzheng@21cn.com>
+    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+    Signed-off-by: Oliver Pinter <oliver.pntr@gmail.com>
+
+commit 045916eb3ea9aec6d3baa18a6649eeb5d252f8bb
+Author: Nick Piggin <npiggin@suse.de>
+Date:   Wed Feb 6 01:37:28 2008 -0800
+
+    inotify: fix race
+    
+    There is a race between setting an inode's children's "parent watched" flag
+    when placing the first watch on a parent, and instantiating new children of
+    that parent: a child could miss having its flags set by
+    set_dentry_child_flags, but then inotify_d_instantiate might still see
+    !inotify_inode_watched.
+    
+    The solution is to set_dentry_child_flags after adding the watch.  Locking is
+    taken care of, because both set_dentry_child_flags and inotify_d_instantiate
+    hold dcache_lock and child->d_locks.
+    
+    Signed-off-by: Nick Piggin <npiggin@suse.de>
+    Cc: Robert Love <rlove@google.com>
+    Cc: John McCutchan <ttb@tentacle.dhs.org>
+    Cc: Jan Kara <jack@ucw.cz>
+    Cc: Yan Zheng <yanzheng@21cn.com>
+    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+    Signed-off-by: Oliver Pinter <oliver.pntr@gmail.com>
+
+commit 41fa66104561f5ba4d6ea671f908b3688daacdb3
+Author: Grant Grundler <grundler@parisc-linux.org>
+Date:   Sat Mar 8 18:33:16 2008 -0700
+
+    2.6.25-rc4 de_stop_rxtx polling wrong
+    
+    backported to 2.6.22.y
+    
+    mainline: 69cac988f2d8506d0b479c5ae7903b9067d7641d
+    
+    This untested patch _should_ fix:
+    	"(net de2104x) Kernel panic with de2104x tulip driver on boot"
+    	http://bugzilla.kernel.org/show_bug.cgi?id=3156
+    
+    But the bug submitter isn't responding.  Same fix has been applied
+    to tulip.c (several years ago) and uli526x.c (Feb 2008) drivers.
+    
+    [ The panic reported in the bug report was removed in a recently
+      (march 2008) accepted patch from Ondrej Zary. ]
+    
+    Signed-off-by: Grant Grundler <grundler@parisc-linux.org>
+    Signed-off-by: Jeff Garzik <jeff@garzik.org>
+    Signed-off-by: Oliver Pinter <oliver.pntr@gmail.com>
diff --git a/patch-2.6.22.y/rc/patch-v2.6.22.21-op1-v2.6.22.22-op1-rc1 b/patch-2.6.22.y/rc/patch-v2.6.22.21-op1-v2.6.22.22-op1-rc1
new file mode 100644
index 0000000..a42c095
--- /dev/null
+++ b/patch-2.6.22.y/rc/patch-v2.6.22.21-op1-v2.6.22.22-op1-rc1
@@ -0,0 +1,270 @@
+diff --git a/Makefile b/Makefile
+index b5f32ce..3b1515e 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ VERSION = 2
+ PATCHLEVEL = 6
+ SUBLEVEL = 22
+-EXTRAVERSION = .21-op1
++EXTRAVERSION = .22-op1-rc1
+ NAME = Holy Dancing Manatees, Batman!
+ 
+ # *DOCUMENTATION*
+diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c
+index 8617298..e8fcce7 100644
+--- a/drivers/net/tulip/de2104x.c
++++ b/drivers/net/tulip/de2104x.c
+@@ -843,7 +843,7 @@ static inline int de_is_running (struct de_private *de)
+ static void de_stop_rxtx (struct de_private *de)
+ {
+ 	u32 macmode;
+-	unsigned int work = 1000;
++	unsigned int i = 1300/100;
+ 
+ 	macmode = dr32(MacMode);
+ 	if (macmode & RxTx) {
+@@ -851,10 +851,14 @@ static void de_stop_rxtx (struct de_private *de)
+ 		dr32(MacMode);
+ 	}
+ 
+-	while (--work > 0) {
++	/* wait until in-flight frame completes.
++	 * Max time @ 10BT: 1500*8b/10Mbps == 1200us (+ 100us margin)
++	 * Typically expect this loop to end in < 50 us on 100BT.
++	 */
++	while (--i) {
+ 		if (!de_is_running(de))
+ 			return;
+-		cpu_relax();
++		udelay(100);
+ 	}
+ 
+ 	printk(KERN_WARNING "%s: timeout expired stopping DMA\n", de->dev->name);
+diff --git a/fs/aio.c b/fs/aio.c
+index dbe699e..b3419c5 100644
+--- a/fs/aio.c
++++ b/fs/aio.c
+@@ -303,7 +303,7 @@ static void wait_for_all_aios(struct kioctx *ctx)
+ 	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ 	while (ctx->reqs_active) {
+ 		spin_unlock_irq(&ctx->ctx_lock);
+-		schedule();
++		io_schedule();
+ 		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ 		spin_lock_irq(&ctx->ctx_lock);
+ 	}
+@@ -323,7 +323,7 @@ ssize_t fastcall wait_on_sync_kiocb(struct kiocb *iocb)
+ 		set_current_state(TASK_UNINTERRUPTIBLE);
+ 		if (!iocb->ki_users)
+ 			break;
+-		schedule();
++		io_schedule();
+ 	}
+ 	__set_current_state(TASK_RUNNING);
+ 	return iocb->ki_user_data;
+@@ -946,14 +946,6 @@ int fastcall aio_complete(struct kiocb *iocb, long res, long res2)
+ 		return 1;
+ 	}
+ 
+-	/*
+-	 * Check if the user asked us to deliver the result through an
+-	 * eventfd. The eventfd_signal() function is safe to be called
+-	 * from IRQ context.
+-	 */
+-	if (!IS_ERR(iocb->ki_eventfd))
+-		eventfd_signal(iocb->ki_eventfd, 1);
+-
+ 	info = &ctx->ring_info;
+ 
+ 	/* add a completion event to the ring buffer.
+@@ -1002,6 +994,15 @@ int fastcall aio_complete(struct kiocb *iocb, long res, long res2)
+ 	kunmap_atomic(ring, KM_IRQ1);
+ 
+ 	pr_debug("added to ring %p at [%lu]\n", iocb, tail);
++
++	/*
++	 * Check if the user asked us to deliver the result through an
++	 * eventfd. The eventfd_signal() function is safe to be called
++	 * from IRQ context.
++	 */
++	if (!IS_ERR(iocb->ki_eventfd))
++		eventfd_signal(iocb->ki_eventfd, 1);
++
+ put_rq:
+ 	/* everything turned out well, dispose of the aiocb. */
+ 	ret = __aio_put_req(ctx, iocb);
+@@ -1170,7 +1171,7 @@ retry:
+ 			ret = 0;
+ 			if (to.timed_out)	/* Only check after read evt */
+ 				break;
+-			schedule();
++			io_schedule();
+ 			if (signal_pending(tsk)) {
+ 				ret = -EINTR;
+ 				break;
+diff --git a/fs/dcache.c b/fs/dcache.c
+index 0e73aa0..c54dc50 100644
+--- a/fs/dcache.c
++++ b/fs/dcache.c
+@@ -1407,9 +1407,6 @@ void d_delete(struct dentry * dentry)
+ 	if (atomic_read(&dentry->d_count) == 1) {
+ 		dentry_iput(dentry);
+ 		fsnotify_nameremove(dentry, isdir);
+-
+-		/* remove this and other inotify debug checks after 2.6.18 */
+-		dentry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
+ 		return;
+ 	}
+ 
+diff --git a/fs/inotify.c b/fs/inotify.c
+index 7457501..8ee2b43 100644
+--- a/fs/inotify.c
++++ b/fs/inotify.c
+@@ -168,20 +168,14 @@ static void set_dentry_child_flags(struct inode *inode, int watched)
+ 		struct dentry *child;
+ 
+ 		list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
+-			if (!child->d_inode) {
+-				WARN_ON(child->d_flags & DCACHE_INOTIFY_PARENT_WATCHED);
++			if (!child->d_inode)
+ 				continue;
+-			}
++
+ 			spin_lock(&child->d_lock);
+-			if (watched) {
+-				WARN_ON(child->d_flags &
+-						DCACHE_INOTIFY_PARENT_WATCHED);
++			if (watched)
+ 				child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
+-			} else {
+-				WARN_ON(!(child->d_flags &
+-					DCACHE_INOTIFY_PARENT_WATCHED));
+-				child->d_flags&=~DCACHE_INOTIFY_PARENT_WATCHED;
+-			}
++			else
++				child->d_flags &=~DCACHE_INOTIFY_PARENT_WATCHED;
+ 			spin_unlock(&child->d_lock);
+ 		}
+ 	}
+@@ -253,7 +247,6 @@ void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
+ 	if (!inode)
+ 		return;
+ 
+-	WARN_ON(entry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED);
+ 	spin_lock(&entry->d_lock);
+ 	parent = entry->d_parent;
+ 	if (parent->d_inode && inotify_inode_watched(parent->d_inode))
+@@ -627,6 +620,7 @@ s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
+ 		      struct inode *inode, u32 mask)
+ {
+ 	int ret = 0;
++	int newly_watched;
+ 
+ 	/* don't allow invalid bits: we don't want flags set */
+ 	mask &= IN_ALL_EVENTS | IN_ONESHOT;
+@@ -653,12 +647,18 @@ s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
+ 	 */
+ 	watch->inode = igrab(inode);
+ 
+-	if (!inotify_inode_watched(inode))
+-		set_dentry_child_flags(inode, 1);
+-
+ 	/* Add the watch to the handle's and the inode's list */
++	newly_watched = !inotify_inode_watched(inode);
+ 	list_add(&watch->h_list, &ih->watches);
+ 	list_add(&watch->i_list, &inode->inotify_watches);
++	/*
++	 * Set child flags _after_ adding the watch, so there is no race
++	 * windows where newly instantiated children could miss their parent's
++	 * watched flag.
++	 */
++	if (newly_watched)
++		set_dentry_child_flags(inode, 1);
++
+ out:
+ 	mutex_unlock(&ih->mutex);
+ 	mutex_unlock(&inode->inotify_mutex);
+diff --git a/fs/splice.c b/fs/splice.c
+index dbbe267..3da87fe 100644
+--- a/fs/splice.c
++++ b/fs/splice.c
+@@ -28,6 +28,7 @@
+ #include <linux/module.h>
+ #include <linux/syscalls.h>
+ #include <linux/uio.h>
++#include <linux/security.h>
+ 
+ struct partial_page {
+ 	unsigned int offset;
+@@ -331,7 +332,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
+ 				break;
+ 
+ 			error = add_to_page_cache_lru(page, mapping, index,
+-					      GFP_KERNEL);
++						mapping_gfp_mask(mapping));
+ 			if (unlikely(error)) {
+ 				page_cache_release(page);
+ 				if (error == -EEXIST)
+@@ -932,6 +933,10 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
+ 	if (unlikely(ret < 0))
+ 		return ret;
+ 
++	ret = security_file_permission(out, MAY_WRITE);
++	if (unlikely(ret < 0))
++		return ret;
++
+ 	return out->f_op->splice_write(pipe, out, ppos, len, flags);
+ }
+ 
+@@ -954,6 +959,10 @@ static long do_splice_to(struct file *in, loff_t *ppos,
+ 	if (unlikely(ret < 0))
+ 		return ret;
+ 
++	ret = security_file_permission(in, MAY_READ);
++	if (unlikely(ret < 0))
++		return ret;
++
+ 	return in->f_op->splice_read(in, ppos, pipe, len, flags);
+ }
+ 
+@@ -1489,6 +1498,13 @@ static int link_pipe(struct pipe_inode_info *ipipe,
+ 		i++;
+ 	} while (len);
+ 
++	/*
++	 * return EAGAIN if we have the potential of some data in the
++	 * future, otherwise just return 0
++	 */
++	if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK))
++		ret = -EAGAIN;
++
+ 	inode_double_unlock(ipipe->inode, opipe->inode);
+ 
+ 	/*
+@@ -1529,11 +1545,8 @@ static long do_tee(struct file *in, struct file *out, size_t len,
+ 		ret = link_ipipe_prep(ipipe, flags);
+ 		if (!ret) {
+ 			ret = link_opipe_prep(opipe, flags);
+-			if (!ret) {
++			if (!ret)
+ 				ret = link_pipe(ipipe, opipe, len, flags);
+-				if (!ret && (flags & SPLICE_F_NONBLOCK))
+-					ret = -EAGAIN;
+-			}
+ 		}
+ 	}
+ 
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index eee57e6..de6e5df 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -246,7 +246,7 @@ static u16 tcp_select_window(struct sock *sk)
+ 		 *
+ 		 * Relax Will Robinson.
+ 		 */
+-		new_win = cur_win;
++		new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
+ 	}
+ 	tp->rcv_wnd = new_win;
+ 	tp->rcv_wup = tp->rcv_nxt;
-- 
2.11.4.GIT