From cbf5d468756d4db31bdd5c79b37538a8495222d6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 23 Nov 2007 15:25:38 -0500 Subject: [PATCH] Linux-2.3.7.. Let's be careful out there.. The new and much improved fully page-cache based filesystem code is now apparently stable, and works wonderfully well performancewise. We fixed all known issues with the IO subsystem: it scales well in SMP, and it avoids unnecessary copies and unnecessary temporary buffers for write-out. The shared mapping code in particular is much cleaner and also a _lot_ faster. In short, it's perfect. And we want as many people as possible out there testing out the new cool code, and bask in the success stories.. HOWEVER. _Just_ in case something goes wrong [ extremely unlikely of course. Sure. Sue me ], we want to indeminfy ourselves. There just might be a bug hiding there somewhere, and it might eat your filesystem while laughing in glee over you being naive and testing new code. So you have been warned. In particular, there's some indication that it might have problems on sparc still (and/or other architectures), possibly due to the ext2fs byte order cleanups that have also been done in order to reach the afore-mentioned state of perfection. I'd be especially interested in people running databases on top of Linux: Solid server in particular is very fsync-happy, and that's one of the operations that have been speeded up by orders of magnitude. Linus --- arch/i386/kernel/mca.c | 1 - drivers/pci/pci.c | 24 ++- drivers/usb/acm.c | 2 +- drivers/usb/audio.c | 2 +- drivers/usb/cpia.c | 2 +- drivers/usb/hub.c | 2 +- drivers/usb/keyboard.c | 2 +- drivers/usb/mouse.c | 2 +- drivers/usb/ohci-hcd.c | 2 +- drivers/usb/ohci.c | 4 +- drivers/usb/uhci-debug.c | 2 +- drivers/usb/uhci.c | 39 +++-- drivers/usb/usb.h | 4 +- drivers/usb/usb_scsi.c | 402 ++++++++++++++++++++++++++++++++++++------- drivers/usb/usb_scsi_debug.c | 2 +- fs/affs/dir.c | 1 - fs/affs/file.c | 2 - fs/autofs/dir.c | 1 - fs/autofs/root.c | 1 - fs/autofs/symlink.c | 1 - fs/bad_inode.c | 4 +- fs/block_dev.c | 1 + fs/buffer.c | 273 ++++++++++++++++++----------- fs/devices.c | 7 +- fs/devpts/root.c | 1 - fs/ext2/dir.c | 6 +- fs/ext2/file.c | 18 +- fs/ext2/inode.c | 62 ++++--- fs/ext2/symlink.c | 6 +- fs/fifo.c | 7 +- fs/hfs/dir_nat.c | 2 - fs/hfs/file.c | 1 - fs/hfs/file_cap.c | 1 - fs/hfs/file_hdr.c | 1 - fs/hpfs/inode.c | 3 - fs/isofs/file.c | 5 +- fs/isofs/inode.c | 12 +- fs/minix/file.c | 3 +- fs/msdos/namei.c | 1 - fs/ncpfs/dir.c | 1 - fs/nfs/dir.c | 4 +- fs/nfs/file.c | 5 +- fs/nfs/read.c | 3 + fs/nfs/symlink.c | 7 +- fs/nfs/write.c | 3 + fs/ntfs/fs.c | 3 - fs/pipe.c | 7 +- fs/proc/array.c | 14 +- fs/proc/base.c | 7 +- fs/proc/fd.c | 7 +- fs/proc/generic.c | 38 ++-- fs/proc/kmsg.c | 7 +- fs/proc/link.c | 7 +- fs/proc/mem.c | 7 +- fs/proc/net.c | 7 +- fs/proc/omirr.c | 38 ++-- fs/proc/proc_devtree.c | 6 +- fs/proc/root.c | 42 +++-- fs/proc/scsi.c | 37 ++-- fs/proc/sysvipc.c | 37 ++-- fs/smbfs/dir.c | 1 - fs/smbfs/file.c | 1 - fs/sysv/file.c | 3 +- fs/ufs/file.c | 3 +- fs/umsdos/dir.c | 1 - fs/umsdos/rdir.c | 1 - fs/umsdos/symlink.c | 1 - include/linux/fs.h | 56 ++++-- include/linux/swap.h | 1 + kernel/ksyms.c | 1 - kernel/sysctl.c | 7 +- mm/filemap.c | 162 ++++++++++------- mm/swap_state.c | 29 ++-- 73 files changed, 989 insertions(+), 477 deletions(-) diff --git a/arch/i386/kernel/mca.c b/arch/i386/kernel/mca.c index 9c6948c60..ae1980a42 100644 --- a/arch/i386/kernel/mca.c +++ b/arch/i386/kernel/mca.c @@ -148,7 +148,6 @@ static struct inode_operations proc_mca_inode_operations = { NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; #endif diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index bd385ccf8..9f5ecd98d 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -47,13 +47,23 @@ pci_find_slot(unsigned int bus, unsigned int devfn) struct pci_dev * pci_find_device(unsigned int vendor, unsigned int device, struct pci_dev *from) { - if (!from) - from = pci_devices; - else - from = from->next; - while (from && (from->vendor != vendor && vendor != PCI_ANY_ID || from->device != device && device != PCI_ANY_ID)) - from = from->next; - return from; + struct pci_dev *next; + + next = pci_devices; + if (from) + next = from->next; + + while (next) { + struct pci_dev *dev = next; + next = next->next; + if (vendor != PCI_ANY_ID && dev->vendor != vendor) + continue; + if (device != PCI_ANY_ID && dev->device != device) + continue; + + return dev; + } + return NULL; } diff --git a/drivers/usb/acm.c b/drivers/usb/acm.c index d4796e28c..10c837d5a 100644 --- a/drivers/usb/acm.c +++ b/drivers/usb/acm.c @@ -50,7 +50,7 @@ static struct acm_state static_acm_state; spinlock_t usb_acm_lock = SPIN_LOCK_UNLOCKED; -static int acm_irq(int state, void *__buffer, void *dev_id) +static int acm_irq(int state, void *__buffer, int len, void *dev_id) { // unsigned char *data = __buffer; struct acm_state *acm = &static_acm_state; diff --git a/drivers/usb/audio.c b/drivers/usb/audio.c index 45a276772..9743ec89e 100644 --- a/drivers/usb/audio.c +++ b/drivers/usb/audio.c @@ -27,7 +27,7 @@ static struct usb_driver usb_audio_driver = }; -static int usb_audio_irq(int state, void *buffer, void *dev_id) +static int usb_audio_irq(int state, void *buffer, int len, void *dev_id) { struct usb_audio *aud = (struct usb_audio*) dev_id; return 1; diff --git a/drivers/usb/cpia.c b/drivers/usb/cpia.c index 87e1e4254..2402d3425 100644 --- a/drivers/usb/cpia.c +++ b/drivers/usb/cpia.c @@ -451,7 +451,7 @@ printk("copying\n"); } } -static int cpia_isoc_irq(int status, void *__buffer, void *dev_id) +static int cpia_isoc_irq(int status, void *__buffer, int len, void *dev_id) { struct usb_cpia *cpia = dev_id; struct usb_device *dev = cpia->dev; diff --git a/drivers/usb/hub.c b/drivers/usb/hub.c index 1cd7d7ccb..0a1ec1f01 100644 --- a/drivers/usb/hub.c +++ b/drivers/usb/hub.c @@ -33,7 +33,7 @@ static int khubd_pid = 0; * the low-level driver that it wants to be re-activated, * or zero to say "I'm done". */ -static int hub_irq(int status, void *__buffer, void *dev_id) +static int hub_irq(int status, void *__buffer, int len, void *dev_id) { struct usb_hub *hub = dev_id; unsigned long flags; diff --git a/drivers/usb/keyboard.c b/drivers/usb/keyboard.c index 5d93a5a84..e87519d9f 100644 --- a/drivers/usb/keyboard.c +++ b/drivers/usb/keyboard.c @@ -92,7 +92,7 @@ usb_kbd_repeat(unsigned long dev_id) } static int -usb_kbd_irq(int state, void *buffer, void *dev_id) +usb_kbd_irq(int state, void *buffer, int len, void *dev_id) { struct usb_keyboard *kbd = (struct usb_keyboard*) dev_id; unsigned long *down = (unsigned long*) buffer; diff --git a/drivers/usb/mouse.c b/drivers/usb/mouse.c index f094c0b0d..a79c10a07 100644 --- a/drivers/usb/mouse.c +++ b/drivers/usb/mouse.c @@ -60,7 +60,7 @@ static struct mouse_state static_mouse_state; spinlock_t usb_mouse_lock = SPIN_LOCK_UNLOCKED; -static int mouse_irq(int state, void *__buffer, void *dev_id) +static int mouse_irq(int state, void *__buffer, int len, void *dev_id) { signed char *data = __buffer; /* finding the mouse is easy when there's only one */ diff --git a/drivers/usb/ohci-hcd.c b/drivers/usb/ohci-hcd.c index 820efc5dc..8db61e08e 100644 --- a/drivers/usb/ohci-hcd.c +++ b/drivers/usb/ohci-hcd.c @@ -102,7 +102,7 @@ static int sohci_int_handler(void * ohci_in, unsigned int ep_addr, int ctrl_len, OHCI_DEBUG( for(i=0; i < data_len; i++ ) printk(" %02x", ((__u8 *) data)[i]);) OHCI_DEBUG( printk(" ret_status: %x\n", status); }) - ret = handler(cc_to_status[status & 0xf], data, dev_id); + ret = handler(cc_to_status[status & 0xf], data, data_len, dev_id); if(ret == 0) return 0; /* 0 .. do not requeue */ if(status > 0) return -1; /* error occured do not requeue ? */ ohci_trans_req(ohci, ep_addr, 0, NULL, data, 8, (__OHCI_BAG) handler, (__OHCI_BAG) dev_id); /* requeue int request */ diff --git a/drivers/usb/ohci.c b/drivers/usb/ohci.c index 22b46a396..48191e11b 100644 --- a/drivers/usb/ohci.c +++ b/drivers/usb/ohci.c @@ -777,7 +777,7 @@ static DECLARE_WAIT_QUEUE_HEAD(control_wakeup); * * This function is called from the interrupt handler. */ -static int ohci_control_completed(int stats, void *buffer, void *dev_id) +static int ohci_control_completed(int stats, void *buffer, int len, void *dev_id) { /* pass the TDs completion status back to control_msg */ if (dev_id) { @@ -1456,7 +1456,7 @@ static void ohci_reap_donelist(struct ohci *ohci) /* Check if TD should be re-queued */ if ((td->completed != NULL) && - (td->completed(cc, td->data, td->dev_id))) { + (td->completed(cc, td->data, -1 /* XXX */, td->dev_id))) { /* Mark the TD as active again: * Set the not accessed condition code * Reset the Error count diff --git a/drivers/usb/uhci-debug.c b/drivers/usb/uhci-debug.c index 7c577a58f..32549763e 100644 --- a/drivers/usb/uhci-debug.c +++ b/drivers/usb/uhci-debug.c @@ -131,7 +131,7 @@ void show_queue(struct uhci_qh *qh) #if 0 printk(" link = %p, element = %p\n", qh->link, qh->element); #endif - if(!qh->element) { + if(!(qh->element & ~0xF)) { printk(" td 0 = NULL\n"); return; } diff --git a/drivers/usb/uhci.c b/drivers/usb/uhci.c index 2f8010ed1..c03ce5adf 100644 --- a/drivers/usb/uhci.c +++ b/drivers/usb/uhci.c @@ -126,7 +126,7 @@ static int uhci_td_result(struct uhci_device *dev, struct uhci_td *td, unsigned tmp = td->first; printk("uhci_td_result() failed with status %x\n", status); - show_status(dev->uhci); + //show_status(dev->uhci); do { show_td(tmp); if ((tmp->link & 1) || (tmp->link & 2)) @@ -422,7 +422,7 @@ static int uhci_remove_irq(struct usb_device *usb_dev, unsigned int pipe, usb_de /* notify removal */ - td->completed(USB_ST_REMOVED, NULL, td->dev_id); + td->completed(USB_ST_REMOVED, NULL, 0, td->dev_id); /* this is DANGEROUS - not sure whether this is right */ @@ -645,7 +645,7 @@ void uhci_delete_isochronous(struct usb_device *usb_dev, void *_isodesc) */ static DECLARE_WAIT_QUEUE_HEAD(control_wakeup); -static int uhci_control_completed(int status, void *buffer, void *dev_id) +static int uhci_control_completed(int status, void *buffer, int len, void *dev_id) { wake_up(&control_wakeup); return 0; /* Don't re-instate */ @@ -692,7 +692,7 @@ static int uhci_run_control(struct uhci_device *dev, struct uhci_td *first, stru // show_status(dev->uhci); // show_queues(dev->uhci); - schedule_timeout(HZ/10); + schedule_timeout(HZ*5); // control should be empty here... // show_status(dev->uhci); @@ -736,8 +736,7 @@ static int uhci_run_control(struct uhci_device *dev, struct uhci_td *first, stru * information, that's just ridiculously high. Most * control messages have just a few bytes of data. */ -static int uhci_control_msg(struct usb_device *usb_dev, unsigned int pipe, - devrequest *cmd, void *data, int len) +static int uhci_control_msg(struct usb_device *usb_dev, unsigned int pipe, void *cmd, void *data, int len) { struct uhci_device *dev = usb_to_uhci(usb_dev); struct uhci_td *first, *td, *prevtd; @@ -805,17 +804,18 @@ static int uhci_control_msg(struct usb_device *usb_dev, unsigned int pipe, } /* - * Build the final TD for control status + * Build the final TD for control status */ destination ^= (0xE1 ^ 0x69); /* OUT -> IN */ destination |= 1 << 19; /* End in Data1 */ - td->link = 1; /* Terminate */ - td->status = status | (1 << 24); /* IOC */ + td->backptr = &prevtd->link; + td->status = (status /* & ~(3 << 27) */) | (1 << 24); /* no limit on final packet */ td->info = destination | (0x7ff << 21); /* 0 bytes of data */ td->buffer = 0; td->first = first; - td->backptr = &prevtd->link; + td->link = 1; /* Terminate */ + /* Start it up.. */ ret = uhci_run_control(dev, first, td); @@ -841,7 +841,7 @@ static int uhci_control_msg(struct usb_device *usb_dev, unsigned int pipe, } if (uhci_debug && ret) { - __u8 *p = (__u8 *) cmd; + __u8 *p = cmd; printk("Failed cmd - %02X %02X %02X %02X %02X %02X %02X %02X\n", p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]); @@ -860,7 +860,7 @@ static int uhci_control_msg(struct usb_device *usb_dev, unsigned int pipe, */ static DECLARE_WAIT_QUEUE_HEAD(bulk_wakeup); -static int uhci_bulk_completed(int status, void *buffer, void *dev_id) +static int uhci_bulk_completed(int status, void *buffer, int len, void *dev_id) { wake_up(&bulk_wakeup); return 0; /* Don't re-instate */ @@ -908,10 +908,11 @@ static int uhci_run_bulk(struct uhci_device *dev, struct uhci_td *first, struct // show_status(dev->uhci); // show_queues(dev->uhci); - schedule_timeout(HZ/10); + schedule_timeout(HZ*5); // show_status(dev->uhci); // show_queues(dev->uhci); + //show_queue(first->qh); remove_wait_queue(&bulk_wakeup, &wait); /* Clean up in case it failed.. */ @@ -1243,6 +1244,7 @@ static void uhci_interrupt_notify(struct uhci *uhci) { struct list_head *head = &uhci->interrupt_list; struct list_head *tmp; + int status; spin_lock(&irqlist_lock); tmp = head->next; @@ -1252,12 +1254,14 @@ static void uhci_interrupt_notify(struct uhci *uhci) next = tmp->next; - if (!(td->status & (1 << 23))) { /* No longer active? */ + if (!((status = td->status) & (1 << 23)) || /* No longer active? */ + ((td->qh->element & ~15) && + !((status = uhci_link_to_td(td->qh->element)->status) & (1 <<23)) && + (status & 0x760000) /* is in error state (Stall, db, babble, timeout, bitstuff) */)) { /* remove from IRQ list */ __list_del(tmp->prev, next); INIT_LIST_HEAD(tmp); - if (td->completed(uhci_map_status((td->status & 0xff)>> 16, 0), - bus_to_virt(td->buffer), td->dev_id)) { + if (td->completed(uhci_map_status(status, 0), bus_to_virt(td->buffer), -1, td->dev_id)) { list_add(&td->irq_list, &uhci->interrupt_list); if (!(td->status & (1 << 25))) { @@ -1284,7 +1288,7 @@ static void uhci_interrupt_notify(struct uhci *uhci) /* If completed wants to not reactivate, then it's */ /* responsible for free'ing the TD's and QH's */ /* or another function (such as run_control) */ - } + } tmp = next; } spin_unlock(&irqlist_lock); @@ -1564,6 +1568,7 @@ static int uhci_control_thread(void * __uhci) { struct uhci *uhci = (struct uhci *)__uhci; struct uhci_device * root_hub =usb_to_uhci(uhci->bus->root_hub); + lock_kernel(); request_region(uhci->io_addr, 32, "usb-uhci"); diff --git a/drivers/usb/usb.h b/drivers/usb/usb.h index 63ebeffb9..a6bf78e4a 100644 --- a/drivers/usb/usb.h +++ b/drivers/usb/usb.h @@ -242,10 +242,12 @@ struct usb_driver { * until we come up with a common meaning. * void *buffer - This is a pointer to the data used in this * USB transfer. + * int length - This is the number of bytes transferred in or out + * of the buffer by this transfer. (-1 = unknown/unsupported) * void *dev_id - This is a user defined pointer set when the IRQ * is requested that is passed back. */ -typedef int (*usb_device_irq)(int, void *, void *); +typedef int (*usb_device_irq)(int, void *, int, void *); struct usb_operations { struct usb_device *(*allocate)(struct usb_device *); diff --git a/drivers/usb/usb_scsi.c b/drivers/usb/usb_scsi.c index 655045bea..1a3e16b25 100644 --- a/drivers/usb/usb_scsi.c +++ b/drivers/usb/usb_scsi.c @@ -74,7 +74,9 @@ struct us_data { __u8 ep_int; /* interrupt . */ __u8 subclass; /* as in overview */ __u8 protocol; /* .............. */ + __u8 attention_done; /* force attention on first command */ int (*pop)(Scsi_Cmnd *); /* protocol specific do cmd */ + int (*pop_reset)(struct us_data *); /* ................. device reset */ GUID(guid); /* unique dev id */ struct Scsi_Host *host; /* our dummy host data */ Scsi_Host_Template *htmplt; /* own host template */ @@ -142,6 +144,9 @@ static int us_one_transfer(struct us_data *us, int pipe, char *buf, int length) /* we want to retry if the device reported NAK */ if (result == USB_ST_TIMEOUT) { + if (partial != this_xfer) { + return 0; /* I do not like this */ + } if (!maxtry--) break; this_xfer -= partial; @@ -150,6 +155,11 @@ static int us_one_transfer(struct us_data *us, int pipe, char *buf, int length) /* short data - assume end */ result = USB_ST_DATAUNDERRUN; break; + } else if (result == USB_ST_STALL && us->protocol == US_PR_CB) { + if (!maxtry--) + break; + this_xfer -= partial; + buf += partial; } else break; } while ( this_xfer ); @@ -216,27 +226,57 @@ static unsigned int us_transfer_length(Scsi_Cmnd *srb) } -static int pop_CBI_irq(int state, void *buffer, void *dev_id) +static int pop_CBI_irq(int state, void *buffer, int len, void *dev_id) { struct us_data *us = (struct us_data *)dev_id; if (state != USB_ST_REMOVED) { us->ip_data = *(__u16 *)buffer; - us->ip_wanted = 0; + US_DEBUGP("Interrupt Status %x\n", us->ip_data); } - wake_up(&us->ip_waitq); + if (us->ip_wanted) + wake_up(&us->ip_waitq); + us->ip_wanted = 0; /* we dont want another interrupt */ return 0; } + +static int pop_CB_reset(struct us_data *us) +{ + unsigned char cmd[12]; + devrequest dr; + int result; + + dr.requesttype = USB_TYPE_CLASS | USB_RT_INTERFACE; + dr.request = US_CBI_ADSC; + dr.value = 0; + dr.index = us->pusb_dev->ifnum; + dr.length = 12; + memset(cmd, -1, sizeof(cmd)); + cmd[0] = SEND_DIAGNOSTIC; + cmd[1] = 4; + us->pusb_dev->bus->op->control_msg(us->pusb_dev, + usb_sndctrlpipe(us->pusb_dev,0), + &dr, cmd, 12); + + usb_clear_halt(us->pusb_dev, us->ep_in | 0x80); + usb_clear_halt(us->pusb_dev, us->ep_out); + + /* long wait for reset */ + + schedule_timeout(HZ*5); + return 0; +} + static int pop_CB_command(Scsi_Cmnd *srb) { struct us_data *us = (struct us_data *)srb->host_scribble; devrequest dr; unsigned char cmd[16]; int result; - int retry = 1; + int retry = 5; int done_start = 0; while (retry--) { @@ -279,7 +319,8 @@ static int pop_CB_command(Scsi_Cmnd *srb) result = us->pusb_dev->bus->op->control_msg(us->pusb_dev, usb_sndctrlpipe(us->pusb_dev,0), &dr, cmd, us->fixedlength); - if (!done_start && us->subclass == US_SC_UFI && cmd[0] == TEST_UNIT_READY && result) { + if (!done_start && (us->subclass == US_SC_UFI /*|| us->subclass == US_SC_8070*/) + && cmd[0] == TEST_UNIT_READY && result) { /* as per spec try a start command, wait and retry */ done_start++; @@ -302,35 +343,47 @@ static int pop_CB_command(Scsi_Cmnd *srb) return result; } -/* Protocol command handlers */ +/* + * Control/Bulk status handler + */ -static int pop_CBI(Scsi_Cmnd *srb) +static int pop_CB_status(Scsi_Cmnd *srb) { struct us_data *us = (struct us_data *)srb->host_scribble; int result; + __u8 status[2]; + devrequest dr; + int retry = 5; - /* run the command */ - - if ((result = pop_CB_command(srb))) { - US_DEBUGP("CBI command %x\n", result); - if (result == USB_ST_STALL || result == USB_ST_TIMEOUT) - return (DID_OK << 16) | 2; - return DID_ABORT << 16; - } - - /* transfer the data */ - - if (us_transfer_length(srb)) { - result = us_transfer(srb, US_DIRECTION(srb->cmnd[0])); - if (result && result != USB_ST_DATAUNDERRUN) { - US_DEBUGP("CBI transfer %x\n", result); + switch (us->protocol) { + case US_PR_CB: + /* get from control */ + + while (retry--) { + dr.requesttype = 0x80 | USB_TYPE_STANDARD | USB_RT_DEVICE; + dr.request = USB_REQ_GET_STATUS; + dr.index = 0; + dr.value = 0; + dr.length = 2; + result = us->pusb_dev->bus->op->control_msg(us->pusb_dev, + usb_rcvctrlpipe(us->pusb_dev,0), + &dr, status, sizeof(status)); + if (result != USB_ST_TIMEOUT) + break; + } + if (result) { + US_DEBUGP("Bad AP status request %d\n", result); return DID_ABORT << 16; } - } - - /* get status */ + US_DEBUGP("Got AP status %x %x\n", status[0], status[1]); + if (srb->cmnd[0] != REQUEST_SENSE && srb->cmnd[0] != INQUIRY && + ( (status[0] & ~3) || status[1])) + return (DID_OK << 16) | 2; + else + return DID_OK << 16; + break; - if (us->protocol == US_PR_CBI) { + case US_PR_CBI: /* get from interrupt pipe */ /* add interrupt transfer, marked for removal */ @@ -367,12 +420,48 @@ static int pop_CBI(Scsi_Cmnd *srb) return DID_ABORT << 16; } return (DID_OK << 16) + ((us->ip_data & 0x300) ? 2 : 0); - } else { - /* get from where? */ } return DID_ERROR << 16; } +/* Protocol command handlers */ + +static int pop_CBI(Scsi_Cmnd *srb) +{ + struct us_data *us = (struct us_data *)srb->host_scribble; + int result; + + /* run the command */ + + if ((result = pop_CB_command(srb))) { + US_DEBUGP("CBI command %x\n", result); + if (result == USB_ST_STALL || result == USB_ST_TIMEOUT) { + return (DID_OK << 16) | 2; + } + return DID_ABORT << 16; + } + + /* transfer the data */ + + if (us_transfer_length(srb)) { + result = us_transfer(srb, US_DIRECTION(srb->cmnd[0])); + if (result && result != USB_ST_DATAUNDERRUN) { + US_DEBUGP("CBI transfer %x\n", result); + return DID_ABORT << 16; + } else if (result == USB_ST_DATAUNDERRUN) { + return DID_OK << 16; + } + } else { + if (!result) { + return DID_OK << 16; + } + } + + /* get status */ + + return pop_CB_status(srb); +} + static int pop_Bulk_reset(struct us_data *us) { devrequest dr; @@ -380,21 +469,20 @@ static int pop_Bulk_reset(struct us_data *us) dr.requesttype = USB_TYPE_CLASS | USB_RT_INTERFACE; dr.request = US_BULK_RESET; - dr.value = US_BULK_RESET_SOFT; + dr.value = US_BULK_RESET_HARD; dr.index = 0; dr.length = 0; - US_DEBUGP("Bulk soft reset\n"); result = us->pusb_dev->bus->op->control_msg(us->pusb_dev, usb_sndctrlpipe(us->pusb_dev,0), &dr, NULL, 0); - if (result) { - US_DEBUGP("Bulk soft reset failed %d\n", result); - dr.value = US_BULK_RESET_HARD; - result = us->pusb_dev->bus->op->control_msg(us->pusb_dev, usb_sndctrlpipe(us->pusb_dev,0), &dr, NULL, 0); - if (result) - US_DEBUGP("Bulk hard reset failed %d\n", result); - } + if (result) + US_DEBUGP("Bulk hard reset failed %d\n", result); usb_clear_halt(us->pusb_dev, us->ep_in | 0x80); usb_clear_halt(us->pusb_dev, us->ep_out); + + /* long wait for reset */ + + schedule_timeout(HZ*5); + return result; } /* @@ -453,8 +541,6 @@ static int pop_Bulk(Scsi_Cmnd *srb) stall = 0; do { - //usb_settoggle(us->pusb_dev, us->ep_in, 0); /* AAARgh!! */ - US_DEBUGP("Toggle is %d\n", usb_gettoggle(us->pusb_dev, us->ep_in)); result = us->pusb_dev->bus->op->bulk_msg(us->pusb_dev, usb_rcvbulkpipe(us->pusb_dev, us->ep_in), &bcs, US_BULK_CS_WRAP_LEN, &partial); @@ -564,6 +650,9 @@ static int us_queuecommand( Scsi_Cmnd *srb , void (*done)(Scsi_Cmnd *)) struct us_data *us = (struct us_data *)srb->host->hostdata[0]; US_DEBUGP("Command wakeup\n"); + if (us->srb) { + /* busy */ + } srb->host_scribble = (unsigned char *)us; us->srb = srb; srb->scsi_done = done; @@ -581,9 +670,12 @@ static int us_abort( Scsi_Cmnd *srb ) return 0; } -static int us_device_reset( Scsi_Cmnd *srb ) +static int us_bus_reset( Scsi_Cmnd *srb ) { - return 0; + struct us_data *us = (struct us_data *)srb->host->hostdata[0]; + + us->pop_reset(us); + return SUCCESS; } static int us_host_reset( Scsi_Cmnd *srb ) @@ -591,10 +683,6 @@ static int us_host_reset( Scsi_Cmnd *srb ) return 0; } -static int us_bus_reset( Scsi_Cmnd *srb ) -{ - return 0; -} #undef SPRINTF #define SPRINTF(args...) { if (pos < (buffer + length)) pos += sprintf (pos, ## args); } @@ -623,9 +711,9 @@ int usb_scsi_proc_info (char *buffer, char **start, off_t offset, int length, in if (inout) return length; - if (!(vendor = usb_string(us->pusb_dev, us->pusb_dev->descriptor.iManufacturer))) + if (!us->pusb_dev || !(vendor = usb_string(us->pusb_dev, us->pusb_dev->descriptor.iManufacturer))) vendor = "?"; - if (!(product = usb_string(us->pusb_dev, us->pusb_dev->descriptor.iProduct))) + if (!us->pusb_dev || !(product = usb_string(us->pusb_dev, us->pusb_dev->descriptor.iProduct))) product = "?"; switch (us->protocol) { @@ -677,7 +765,7 @@ static Scsi_Host_Template my_host_template = { us_queuecommand, NULL, /* eh_strategy */ us_abort, - us_device_reset, + us_bus_reset, us_bus_reset, us_host_reset, NULL, /* abort */ @@ -695,6 +783,25 @@ static Scsi_Host_Template my_host_template = { TRUE /* emulated */ }; +static unsigned char sense_notready[] = { + 0x70, /* current error */ + 0x00, + 0x02, /* not ready */ + 0x00, + 0x00, + 10, /* additional length */ + 0x00, + 0x00, + 0x00, + 0x00, + 0x04, /* not ready */ + 0x03, /* manual intervention */ + 0x00, + 0x00, + 0x00, + 0x00 +}; + static int usbscsi_control_thread(void * __us) { struct us_data *us = (struct us_data *)__us; @@ -710,7 +817,7 @@ static int usbscsi_control_thread(void * __us) exit_files(current); //exit_fs(current); - sprintf(current->comm, "usbscsi%d", us->host_no); + sprintf(current->comm, "usbscsi%d", us->host_number); unlock_kernel(); @@ -727,18 +834,160 @@ static int usbscsi_control_thread(void * __us) switch (action) { case US_ACT_COMMAND : - if (!us->pusb_dev || us->srb->target || us->srb->lun) { + if (us->srb->target || us->srb->lun) { /* bad device */ US_DEBUGP( "Bad device number (%d/%d) or dev %x\n", us->srb->target, us->srb->lun, (unsigned int)us->pusb_dev); us->srb->result = DID_BAD_TARGET << 16; + } else if (!us->pusb_dev) { + + /* our device has gone - pretend not ready */ + + if (us->srb->cmnd[0] == REQUEST_SENSE) { + memcpy(us->srb->request_buffer, sense_notready, sizeof(sense_notready)); + us->srb->result = DID_OK << 16; + } else { + us->srb->result = (DID_OK << 16) | 2; + } } else { US_DEBUG(us_show_command(us->srb)); + + /* check for variable length - do properly if so */ + if (us->filter && us->filter->command) us->srb->result = us->filter->command(us->fdata, us->srb); - else + else if (us->srb->cmnd[0] == START_STOP && + us->pusb_dev->descriptor.idProduct == 0x0001 && + us->pusb_dev->descriptor.idVendor == 0x04e6) + us->srb->result = DID_OK << 16; + else { + unsigned int savelen = us->srb->request_bufflen; + unsigned int saveallocation; + + switch (us->srb->cmnd[0]) { + case REQUEST_SENSE: + if (us->srb->request_bufflen > 18) + us->srb->request_bufflen = 18; + else + break; + saveallocation = us->srb->cmnd[4]; + us->srb->cmnd[4] = 18; + break; + + case INQUIRY: + if (us->srb->request_bufflen > 36) + us->srb->request_bufflen = 36; + else + break; + saveallocation = us->srb->cmnd[4]; + us->srb->cmnd[4] = 36; + break; + + case MODE_SENSE: + if (us->srb->request_bufflen > 4) + us->srb->request_bufflen = 4; + else + break; + saveallocation = us->srb->cmnd[4]; + us->srb->cmnd[4] = 4; + break; + + case LOG_SENSE: + case MODE_SENSE_10: + if (us->srb->request_bufflen > 8) + us->srb->request_bufflen = 8; + else + break; + saveallocation = (us->srb->cmnd[7] << 8) | us->srb->cmnd[8]; + us->srb->cmnd[7] = 0; + us->srb->cmnd[8] = 8; + break; + + default: + break; + } us->srb->result = us->pop(us->srb); + + if (savelen != us->srb->request_bufflen && + us->srb->result == (DID_OK << 16)) { + unsigned char *p = (unsigned char *)us->srb->request_buffer; + unsigned int length; + + /* set correct length and retry */ + switch (us->srb->cmnd[0]) { + case REQUEST_SENSE: + /* simply return 18 bytes */ + p[7] = 10; + length = us->srb->request_bufflen;; + break; + + case INQUIRY: + length = p[4] + 5 > savelen ? savelen : p[4] + 5; + us->srb->cmnd[4] = length; + break; + + case MODE_SENSE: + length = p[0] + 4 > savelen ? savelen : p[0] + 4; + us->srb->cmnd[4] = 4; + break; + + case LOG_SENSE: + length = ((p[2] << 8) + p[3]) + 4 > savelen ? savelen : ((p[2] << 8) + p[3]) + 4; + us->srb->cmnd[7] = length >> 8; + us->srb->cmnd[8] = length; + break; + + case MODE_SENSE_10: + length = ((p[0] << 8) + p[1]) + 8 > savelen ? savelen : ((p[0] << 8) + p[1]) + 8; + us->srb->cmnd[7] = length >> 8; + us->srb->cmnd[8] = length; + break; + } + + US_DEBUGP("Old/New length = %d/%d\n", savelen, length); + + if (us->srb->request_bufflen != length) { + us->srb->request_bufflen = length; + us->srb->result = us->pop(us->srb); + } + /* reset back to original values */ + + us->srb->request_bufflen = savelen; + switch (us->srb->cmnd[0]) { + case REQUEST_SENSE: + case INQUIRY: + case MODE_SENSE: + us->srb->cmnd[4] = saveallocation; + break; + + case LOG_SENSE: + case MODE_SENSE_10: + us->srb->cmnd[7] = saveallocation >> 8; + us->srb->cmnd[8] = saveallocation; + break; + } + } + /* force attention on first command */ + if (!us->attention_done) { + if (us->srb->cmnd[0] == REQUEST_SENSE) { + if (us->srb->result == (DID_OK << 16)) { + unsigned char *p = (unsigned char *)us->srb->request_buffer; + + us->attention_done = 1; + if ((p[2] & 0x0f) != UNIT_ATTENTION) { + p[2] = UNIT_ATTENTION; + p[12] = 0x29; /* power on, reset or bus-reset */ + p[13] = 0; + } + } + } else if (us->srb->cmnd[0] != INQUIRY && + us->srb->result == (DID_OK << 16)) { + us->srb->result |= 2; /* force check condition */ + } + } + } } us->srb->scsi_done(us->srb); + us->srb = NULL; break; case US_ACT_ABORT : @@ -820,7 +1069,7 @@ static int scsi_probe(struct usb_device *dev) if (dev->descriptor.idVendor == 0x04e6 && dev->descriptor.idProduct == 0x0001) { /* shuttle E-USB */ - protocol = US_PR_ZIP; + protocol = US_PR_CB; subclass = US_SC_8070; /* an assumption */ } else if (dev->descriptor.bDeviceClass != 0 || dev->config->altsetting->interface->bInterfaceClass != 8 || @@ -835,11 +1084,15 @@ static int scsi_probe(struct usb_device *dev) usb_string(dev, dev->descriptor.iSerialNumber) ) { make_guid(guid, dev->descriptor.idVendor, dev->descriptor.idProduct, usb_string(dev, dev->descriptor.iSerialNumber)); - for (ss = us_list; ss; ss = ss->next) { - if (GUID_EQUAL(guid, ss->guid)) { - US_DEBUGP("Found existing GUID " GUID_FORMAT "\n", GUID_ARGS(guid)); - break; - } + } else { + make_guid(guid, dev->descriptor.idVendor, dev->descriptor.idProduct, + "0"); + } + for (ss = us_list; ss; ss = ss->next) { + if (!ss->pusb_dev && GUID_EQUAL(guid, ss->guid)) { + US_DEBUGP("Found existing GUID " GUID_FORMAT "\n", GUID_ARGS(guid)); + flags = ss->flags; + break; } } } @@ -865,6 +1118,7 @@ static int scsi_probe(struct usb_device *dev) ss->subclass = interface->bInterfaceSubClass; ss->protocol = interface->bInterfaceProtocol; } + ss->attention_done = 0; /* set the protocol op */ @@ -873,16 +1127,19 @@ static int scsi_probe(struct usb_device *dev) case US_PR_CB: US_DEBUGPX("Control/Bulk\n"); ss->pop = pop_CBI; + ss->pop_reset = pop_CB_reset; break; case US_PR_CBI: US_DEBUGPX("Control/Bulk/Interrupt\n"); ss->pop = pop_CBI; + ss->pop_reset = pop_CB_reset; break; default: US_DEBUGPX("Bulk\n"); ss->pop = pop_Bulk; + ss->pop_reset = pop_Bulk_reset; break; } @@ -907,6 +1164,7 @@ static int scsi_probe(struct usb_device *dev) /* exit if strange looking */ if (usb_set_configuration(dev, dev->config[0].bConfigurationValue) || + usb_set_interface(dev, interface->bInterfaceNumber, 0) || !ss->ep_in || !ss->ep_out || (ss->protocol == US_PR_CBI && ss->ep_int == 0)) { US_DEBUGP("Problems with device\n"); if (ss->host) { @@ -933,13 +1191,8 @@ static int scsi_probe(struct usb_device *dev) /* make unique id if possible */ - if (dev->descriptor.iSerialNumber && - usb_string(dev, dev->descriptor.iSerialNumber) ) { - make_guid(ss->guid, dev->descriptor.idVendor, dev->descriptor.idProduct, - usb_string(dev, dev->descriptor.iSerialNumber)); - } - US_DEBUGP("New GUID " GUID_FORMAT "\n", GUID_ARGS(guid)); + memcpy(ss->guid, guid, sizeof(guid)); /* set class specific stuff */ @@ -986,9 +1239,30 @@ static int scsi_probe(struct usb_device *dev) (struct us_data *)htmplt->proc_dir = ss; - if (ss->protocol == US_PR_CBI) + + if (dev->descriptor.idVendor == 0x04e6 && + dev->descriptor.idProduct == 0x0001) { + devrequest dr; + __u8 qstat[2]; + + /* shuttle E-USB */ + dr.requesttype = 0xC0; + dr.request = 1; + dr.index = 0; + dr.value = 0; + dr.length = 0; + ss->pusb_dev->bus->op->control_msg(ss->pusb_dev, usb_rcvctrlpipe(dev,0), &dr, qstat, 2); + US_DEBUGP("C0 status %x %x\n", qstat[0], qstat[1]); + init_waitqueue_head(&ss->ip_waitq); + ss->pusb_dev->bus->op->request_irq(ss->pusb_dev, + usb_rcvctrlpipe(ss->pusb_dev, ss->ep_int), + pop_CBI_irq, 0, (void *)ss); + interruptible_sleep_on_timeout(&ss->ip_waitq, HZ*5); + + } else if (ss->protocol == US_PR_CBI) init_waitqueue_head(&ss->ip_waitq); + /* start up our thread */ { diff --git a/drivers/usb/usb_scsi_debug.c b/drivers/usb/usb_scsi_debug.c index 2ca847c08..634f4c0f6 100644 --- a/drivers/usb/usb_scsi_debug.c +++ b/drivers/usb/usb_scsi_debug.c @@ -95,7 +95,7 @@ void us_show_command(Scsi_Cmnd *srb) case READ_ELEMENT_STATUS: what = "READ_ELEMENT_STATUS"; break; case SEND_VOLUME_TAG: what = "SEND_VOLUME_TAG"; break; case WRITE_LONG_2: what = "WRITE_LONG_2"; break; - default: what = "??"; break; + default: break; } printk(KERN_DEBUG USB_SCSI "Command %s (%d bytes)\n", what, srb->cmd_len); printk(KERN_DEBUG USB_SCSI " %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n", diff --git a/fs/affs/dir.c b/fs/affs/dir.c index 3a1c78ef0..ee08ff451 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c @@ -63,7 +63,6 @@ struct inode_operations affs_dir_inode_operations = { NULL, /* truncate */ NULL, /* permissions */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; diff --git a/fs/affs/file.c b/fs/affs/file.c index 1961b4ec3..bb1ce69c8 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -80,7 +80,6 @@ struct inode_operations affs_file_inode_operations = { affs_truncate, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; @@ -121,7 +120,6 @@ struct inode_operations affs_file_inode_operations_ofs = { affs_truncate, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; diff --git a/fs/autofs/dir.c b/fs/autofs/dir.c index 425df6577..f6ccf8419 100644 --- a/fs/autofs/dir.c +++ b/fs/autofs/dir.c @@ -79,7 +79,6 @@ struct inode_operations autofs_dir_inode_operations = { NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; diff --git a/fs/autofs/root.c b/fs/autofs/root.c index c1b57ec6e..011e3286f 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -60,7 +60,6 @@ struct inode_operations autofs_root_inode_operations = { NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; diff --git a/fs/autofs/symlink.c b/fs/autofs/symlink.c index a4cb5154f..0e46db365 100644 --- a/fs/autofs/symlink.c +++ b/fs/autofs/symlink.c @@ -55,6 +55,5 @@ struct inode_operations autofs_symlink_inode_operations = { NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 33560caa4..89711607b 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -60,13 +60,13 @@ struct inode_operations bad_inode_ops = EIO_ERROR, /* rename */ EIO_ERROR, /* readlink */ bad_follow_link, /* follow_link */ + EIO_ERROR, /* bmap */ EIO_ERROR, /* readpage */ EIO_ERROR, /* writepage */ - EIO_ERROR, /* bmap */ + EIO_ERROR, /* flushpage */ EIO_ERROR, /* truncate */ EIO_ERROR, /* permission */ EIO_ERROR, /* smap */ - EIO_ERROR, /* update_page */ EIO_ERROR /* revalidate */ }; diff --git a/fs/block_dev.c b/fs/block_dev.c index 13b3f534d..664522ab8 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -124,6 +124,7 @@ ssize_t block_write(struct file * filp, const char * buf, } buffercount=0; } + balance_dirty(dev); if(write_error) break; } diff --git a/fs/buffer.c b/fs/buffer.c index b6474f451..75f6486a0 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -24,6 +24,8 @@ * - RMK */ +#include +#include #include #include #include @@ -771,25 +773,44 @@ static void file_buffer(struct buffer_head *bh, int list) /* * if a new dirty buffer is created we need to balance bdflush. + * + * in the future we might want to make bdflush aware of different + * pressures on different devices - thus the (currently unused) + * 'dev' parameter. */ -static inline void balance_dirty (kdev_t dev) +void balance_dirty(kdev_t dev) { - int too_many = (nr_buffers * bdf_prm.b_un.nfract/100); - - /* This buffer is dirty, maybe we need to start flushing. - * If too high a percentage of the buffers are dirty... - */ - if (nr_buffers_type[BUF_DIRTY] > too_many) { - wakeup_bdflush(1); + int dirty = nr_buffers_type[BUF_DIRTY]; + int ndirty = bdf_prm.b_un.ndirty; + + if (dirty > ndirty) { + int wait = 0; + if (dirty > 2*ndirty) + wait = 1; + wakeup_bdflush(wait); } +} - /* If this is a loop device, and - * more than half of the buffers are dirty... - * (Prevents no-free-buffers deadlock with loop device.) - */ - if (MAJOR(dev) == LOOP_MAJOR && - nr_buffers_type[BUF_DIRTY]*2>nr_buffers) - wakeup_bdflush(1); +atomic_t too_many_dirty_buffers; + +static inline void __mark_dirty(struct buffer_head *bh, int flag) +{ + set_writetime(bh, flag); + refile_buffer(bh); + if (atomic_read(&too_many_dirty_buffers)) + balance_dirty(bh->b_dev); +} + +void __mark_buffer_dirty(struct buffer_head *bh, int flag) +{ + __mark_dirty(bh, flag); +} + +void __atomic_mark_buffer_dirty(struct buffer_head *bh, int flag) +{ + lock_kernel(); + __mark_dirty(bh, flag); + unlock_kernel(); } /* @@ -800,21 +821,19 @@ void refile_buffer(struct buffer_head * buf) { int dispose; - if(buf->b_dev == B_FREE) { + if (buf->b_dev == B_FREE) { printk("Attempt to refile free buffer\n"); return; } + + dispose = BUF_CLEAN; + if (buffer_locked(buf)) + dispose = BUF_LOCKED; if (buffer_dirty(buf)) dispose = BUF_DIRTY; - else if (buffer_locked(buf)) - dispose = BUF_LOCKED; - else - dispose = BUF_CLEAN; - if(dispose != buf->b_list) { + + if (dispose != buf->b_list) file_buffer(buf, dispose); - if (dispose == BUF_DIRTY) - balance_dirty(buf->b_dev); - } } /* @@ -1239,7 +1258,9 @@ static int create_page_buffers (int rw, struct page *page, kdev_t dev, int b[], * They show up in the buffer hash table and are registered in * page->buffers. */ + lock_kernel(); head = create_buffers(page_address(page), size, 1); + unlock_kernel(); if (page->buffers) BUG(); if (!head) @@ -1275,7 +1296,7 @@ static int create_page_buffers (int rw, struct page *page, kdev_t dev, int b[], * we have truncated the file and are going to free the * blocks on-disk.. */ -int generic_block_flushpage(struct inode *inode, struct page *page, unsigned long offset) +int block_flushpage(struct inode *inode, struct page *page, unsigned long offset) { struct buffer_head *head, *bh, *next; unsigned int curr_off = 0; @@ -1284,6 +1305,7 @@ int generic_block_flushpage(struct inode *inode, struct page *page, unsigned lon BUG(); if (!page->buffers) return 0; + lock_kernel(); head = page->buffers; bh = head; @@ -1311,21 +1333,25 @@ int generic_block_flushpage(struct inode *inode, struct page *page, unsigned lon /* * subtle. We release buffer-heads only if this is - * the 'final' flushpage. We invalidate the bmap - * cached value in all cases. + * the 'final' flushpage. We have invalidated the bmap + * cached value unconditionally, so real IO is not + * possible anymore. */ if (!offset) try_to_free_buffers(page); + unlock_kernel(); return 0; } -static inline void create_empty_buffers (struct page *page, +static void create_empty_buffers (struct page *page, struct inode *inode, unsigned long blocksize) { struct buffer_head *bh, *head, *tail; + lock_kernel(); head = create_buffers(page_address(page), blocksize, 1); + unlock_kernel(); if (page->buffers) BUG(); @@ -1341,6 +1367,10 @@ static inline void create_empty_buffers (struct page *page, get_page(page); } +/* + * block_write_full_page() is SMP-safe - currently it's still + * being called with the kernel lock held, but the code is ready. + */ int block_write_full_page (struct file *file, struct page *page, fs_getblock_t fs_get_block) { struct dentry *dentry = file->f_dentry; @@ -1381,12 +1411,13 @@ int block_write_full_page (struct file *file, struct page *page, fs_getblock_t f bh->b_state = (1<b_end_io = end_buffer_io_sync; set_bit(BH_Uptodate, &bh->b_state); } - mark_buffer_dirty(bh, 0); + atomic_mark_buffer_dirty(bh,0); bh = bh->b_this_page; block++; @@ -1399,12 +1430,12 @@ out: return err; } -int block_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf, fs_getblock_t fs_get_block) +int block_write_partial_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf, fs_getblock_t fs_get_block) { struct dentry *dentry = file->f_dentry; struct inode *inode = dentry->d_inode; unsigned long block; - int err, created; + int err, created, partial; unsigned long blocksize, start_block, end_block; unsigned long start_offset, start_bytes, end_bytes; unsigned long bbits, phys, blocks, i, len; @@ -1412,7 +1443,6 @@ int block_write_one_page (struct file *file, struct page *page, unsigned long of char * target_buf; target_buf = (char *)page_address(page) + offset; - lock_kernel(); if (!PageLocked(page)) BUG(); @@ -1449,40 +1479,16 @@ int block_write_one_page (struct file *file, struct page *page, unsigned long of i = 0; bh = head; + partial = 0; do { if (!bh) BUG(); if ((i < start_block) || (i > end_block)) { + if (!buffer_uptodate(bh)) + partial = 1; goto skip; } - unlock_kernel(); - - err = -EFAULT; - if (start_offset) { - len = start_bytes; - start_offset = 0; - } else - if (end_bytes && (i == end_block)) { - len = end_bytes; - end_bytes = 0; - } else { - /* - * Overwritten block. - */ - len = blocksize; - } - if (copy_from_user(target_buf, buf, len)) - goto out_nolock; - target_buf += len; - buf += len; - - /* - * we dirty buffers only after copying the data into - * the page - this way we can dirty the buffer even if - * the bh is still doing IO. - */ - lock_kernel(); if (!bh->b_blocknr) { err = -EIO; down(&inode->i_sem); @@ -1496,12 +1502,16 @@ int block_write_one_page (struct file *file, struct page *page, unsigned long of /* * if partially written block which has contents on * disk, then we have to read it first. + * We also rely on the fact that filesystem holes + * cannot be written. */ if (!created && (start_offset || (end_bytes && (i == end_block)))) { bh->b_state = 0; ll_rw_block(READ, 1, &bh); + lock_kernel(); wait_on_buffer(bh); + unlock_kernel(); err = -EIO; if (!buffer_uptodate(bh)) goto out; @@ -1515,19 +1525,48 @@ int block_write_one_page (struct file *file, struct page *page, unsigned long of bh->b_end_io = end_buffer_io_sync; set_bit(BH_Uptodate, &bh->b_state); } - mark_buffer_dirty(bh, 0); + + err = -EFAULT; + if (start_offset) { + len = start_bytes; + start_offset = 0; + } else + if (end_bytes && (i == end_block)) { + len = end_bytes; + end_bytes = 0; + } else { + /* + * Overwritten block. + */ + len = blocksize; + } + if (copy_from_user(target_buf, buf, len)) + goto out; + target_buf += len; + buf += len; + + /* + * we dirty buffers only after copying the data into + * the page - this way we can dirty the buffer even if + * the bh is still doing IO. + */ + atomic_mark_buffer_dirty(bh,0); skip: i++; block++; bh = bh->b_this_page; } while (bh != head); - unlock_kernel(); - SetPageUptodate(page); + /* + * is this a partial write that happened to make all buffers + * uptodate then we can optimize away a bogus readpage() for + * the next read(). Here we 'discover' wether the page went + * uptodate as a result of this (potentially partial) write. + */ + if (!partial) + SetPageUptodate(page); return bytes; out: - unlock_kernel(); -out_nolock: ClearPageUptodate(page); return err; } @@ -1537,11 +1576,14 @@ out_nolock: * This function expects the page to be locked and may return * before I/O is complete. You then have to check page->locked, * page->uptodate, and maybe wait on page->wait. + * + * brw_page() is SMP-safe, although it's being called with the + * kernel lock held - but the code is ready. */ int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size, int bmap) { struct buffer_head *head, *bh, *arr[MAX_BUF_PER_PAGE]; - int nr, fresh, block; + int nr, fresh /* temporary debugging flag */, block; if (!PageLocked(page)) panic("brw_page: page not locked for I/O"); @@ -1590,7 +1632,7 @@ int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size, int bmap) BUG(); } set_bit(BH_Uptodate, &bh->b_state); - mark_buffer_dirty(bh, 0); + atomic_mark_buffer_dirty(bh, 0); arr[nr++] = bh; } bh = bh->b_this_page; @@ -1600,20 +1642,15 @@ int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size, int bmap) if ((rw == READ) && nr) { if (Page_Uptodate(page)) BUG(); - unlock_kernel(); ll_rw_block(rw, nr, arr); - lock_kernel(); } else { if (!nr && rw == READ) { SetPageUptodate(page); page->owner = (int)current; UnlockPage(page); } - if (nr && (rw == WRITE)) { - unlock_kernel(); + if (nr && (rw == WRITE)) ll_rw_block(rw, nr, arr); - lock_kernel(); - } } return 0; } @@ -1648,33 +1685,70 @@ void mark_buffer_uptodate(struct buffer_head * bh, int on) * mark_buffer_uptodate() functions propagate buffer state into the * page struct once IO has completed. */ -int generic_readpage(struct file * file, struct page * page) +int block_read_full_page(struct file * file, struct page * page) { struct dentry *dentry = file->f_dentry; struct inode *inode = dentry->d_inode; - unsigned long block; - int *p, nr[PAGE_SIZE/512]; - int i; + unsigned long iblock, phys_block; + struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; + unsigned int blocksize, blocks; + int nr; - if (page->buffers) { - printk("hm, no brw_page(%p) because IO already started.\n", - page); - goto out; - } + if (!PageLocked(page)) + PAGE_BUG(page); + blocksize = inode->i_sb->s_blocksize; + if (!page->buffers) + create_empty_buffers(page, inode, blocksize); + head = page->buffers; - i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits; - block = page->offset >> inode->i_sb->s_blocksize_bits; - p = nr; + blocks = PAGE_SIZE >> inode->i_sb->s_blocksize_bits; + iblock = page->offset >> inode->i_sb->s_blocksize_bits; + page->owner = -1; + head = page->buffers; + bh = head; + nr = 0; do { - *p = inode->i_op->bmap(inode, block); - i--; - block++; - p++; - } while (i > 0); + phys_block = bh->b_blocknr; + /* + * important, we have to retry buffers that already have + * their bnr cached but had an IO error! + */ + if (!buffer_uptodate(bh)) { + phys_block = inode->i_op->bmap(inode, iblock); + /* + * this is safe to do because we hold the page lock: + */ + if (phys_block) { + init_buffer(bh, inode->i_dev, phys_block, + end_buffer_io_async, NULL); + arr[nr] = bh; + nr++; + } else { + /* + * filesystem 'hole' represents zero-contents: + */ + memset(bh->b_data, 0, blocksize); + set_bit(BH_Uptodate, &bh->b_state); + } + } + iblock++; + bh = bh->b_this_page; + } while (bh != head); - /* IO start */ - brw_page(READ, page, inode->i_dev, nr, inode->i_sb->s_blocksize, 1); -out: + ++current->maj_flt; + if (nr) { + if (Page_Uptodate(page)) + BUG(); + ll_rw_block(READ, nr, arr); + } else { + /* + * all buffers are uptodate - we can set the page + * uptodate as well. + */ + SetPageUptodate(page); + page->owner = (int)current; + UnlockPage(page); + } return 0; } @@ -1753,8 +1827,6 @@ int try_to_free_buffers(struct page * page) tmp = tmp->b_this_page; if (!buffer_busy(p)) continue; - - wakeup_bdflush(0); return 0; } while (tmp != bh); @@ -2151,9 +2223,14 @@ int bdflush(void * unused) run_task_queue(&tq_disk); wake_up(&bdflush_done); - /* If there are still a lot of dirty buffers around, skip the sleep - and flush some more */ - if(ndirty == 0 || nr_buffers_type[BUF_DIRTY] <= nr_buffers * bdf_prm.b_un.nfract/100) { + /* + * If there are still a lot of dirty buffers around, + * skip the sleep and flush some more + */ + if ((ndirty == 0) || (nr_buffers_type[BUF_DIRTY] <= + nr_buffers * bdf_prm.b_un.nfract/100)) { + + atomic_set(&too_many_dirty_buffers, 0); spin_lock_irq(¤t->sigmask_lock); flush_signals(current); spin_unlock_irq(¤t->sigmask_lock); diff --git a/fs/devices.c b/fs/devices.c index 8d9200f87..934fe290f 100644 --- a/fs/devices.c +++ b/fs/devices.c @@ -277,11 +277,14 @@ struct inode_operations blkdev_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; /* diff --git a/fs/devpts/root.c b/fs/devpts/root.c index c284f1d97..c1c1a6000 100644 --- a/fs/devpts/root.c +++ b/fs/devpts/root.c @@ -57,7 +57,6 @@ struct inode_operations devpts_root_inode_operations = { NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index a6753d276..59f068b5e 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -67,12 +67,14 @@ struct inode_operations ext2_dir_inode_operations = { ext2_rename, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ ext2_permission, /* permission */ - NULL /* smap */ + NULL, /* smap */ + NULL /* revalidate */ }; int ext2_check_dir_entry (const char * function, struct inode * dir, diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 806859ba0..c90419ce3 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -113,7 +113,7 @@ static int ext2_writepage (struct file * file, struct page * page) static long ext2_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf) { - return block_write_one_page(file, page, offset, bytes, buf, ext2_getblk_block); + return block_write_partial_page(file, page, offset, bytes, buf, ext2_getblk_block); } /* @@ -122,7 +122,14 @@ static long ext2_write_one_page (struct file *file, struct page *page, unsigned static ssize_t ext2_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) { - return generic_file_write(file, buf, count, ppos, ext2_write_one_page); + ssize_t retval = generic_file_write(file, buf, count, ppos, ext2_write_one_page); + if (retval > 0) { + struct inode *inode = file->f_dentry->d_inode; + remove_suid(inode); + inode->i_ctime = inode->i_mtime = CURRENT_TIME; + mark_inode_dirty(inode); + } + return retval; } /* @@ -188,13 +195,12 @@ struct inode_operations ext2_file_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ - generic_readpage, /* readpage */ - ext2_writepage, /* writepage */ ext2_bmap, /* bmap */ + block_read_full_page, /* readpage */ + ext2_writepage, /* writepage */ + block_flushpage, /* flushpage */ ext2_truncate, /* truncate */ ext2_permission, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ - generic_block_flushpage,/* flushpage */ }; diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 1258a39b6..02fb5b7b7 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -31,6 +31,7 @@ #include #include #include +#include static int ext2_update_inode(struct inode * inode, int do_sync); @@ -131,58 +132,66 @@ static int ext2_alloc_block (struct inode * inode, unsigned long goal, int *err) int ext2_bmap (struct inode * inode, int block) { - int i; + int i, ret; int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); int addr_per_block_bits = EXT2_ADDR_PER_BLOCK_BITS(inode->i_sb); + ret = 0; + lock_kernel(); if (block < 0) { ext2_warning (inode->i_sb, "ext2_bmap", "block < 0"); - return 0; + goto out; } if (block >= EXT2_NDIR_BLOCKS + addr_per_block + (1 << (addr_per_block_bits * 2)) + ((1 << (addr_per_block_bits * 2)) << addr_per_block_bits)) { ext2_warning (inode->i_sb, "ext2_bmap", "block > big"); - return 0; + goto out; + } + if (block < EXT2_NDIR_BLOCKS) { + ret = inode_bmap (inode, block); + goto out; } - if (block < EXT2_NDIR_BLOCKS) - return inode_bmap (inode, block); block -= EXT2_NDIR_BLOCKS; if (block < addr_per_block) { i = inode_bmap (inode, EXT2_IND_BLOCK); if (!i) - return 0; - return block_bmap (bread (inode->i_dev, i, + goto out; + ret = block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize), block); + goto out; } block -= addr_per_block; if (block < (1 << (addr_per_block_bits * 2))) { i = inode_bmap (inode, EXT2_DIND_BLOCK); if (!i) - return 0; + goto out; i = block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize), block >> addr_per_block_bits); if (!i) - return 0; - return block_bmap (bread (inode->i_dev, i, + goto out; + ret = block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize), - block & (addr_per_block - 1)); + block & (addr_per_block - 1)); } block -= (1 << (addr_per_block_bits * 2)); i = inode_bmap (inode, EXT2_TIND_BLOCK); if (!i) - return 0; + goto out; i = block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize), block >> (addr_per_block_bits * 2)); if (!i) - return 0; + goto out; i = block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize), (block >> addr_per_block_bits) & (addr_per_block - 1)); if (!i) - return 0; - return block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize), + goto out; + ret = block_bmap (bread (inode->i_dev, i, inode->i_sb->s_blocksize), block & (addr_per_block - 1)); +out: + unlock_kernel(); + return ret; } int ext2_bmap_create (struct inode * inode, int block) @@ -461,18 +470,20 @@ int ext2_getblk_block (struct inode * inode, long block, unsigned long b; unsigned long addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); int addr_per_block_bits = EXT2_ADDR_PER_BLOCK_BITS(inode->i_sb); - int phys_block; + int phys_block, ret; + lock_kernel(); + ret = 0; *err = -EIO; if (block < 0) { ext2_warning (inode->i_sb, "ext2_getblk", "block < 0"); - return 0; + goto abort; } if (block > EXT2_NDIR_BLOCKS + addr_per_block + (1 << (addr_per_block_bits * 2)) + ((1 << (addr_per_block_bits * 2)) << addr_per_block_bits)) { ext2_warning (inode->i_sb, "ext2_getblk", "block > big"); - return 0; + goto abort; } /* * If this is a sequential block allocation, set the next_alloc_block @@ -527,13 +538,14 @@ int ext2_getblk_block (struct inode * inode, long block, inode->i_sb->s_blocksize, b, err, 0, &phys_block, created); out: - if (!phys_block) { - return 0; - } - if (*err) { - return 0; - } - return phys_block; + if (!phys_block) + goto abort; + if (*err) + goto abort; + ret = phys_block; +abort: + unlock_kernel(); + return ret; } struct buffer_head * ext2_getblk (struct inode * inode, long block, diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c index 826cb4176..b0ebcb91b 100644 --- a/fs/ext2/symlink.c +++ b/fs/ext2/symlink.c @@ -43,12 +43,14 @@ struct inode_operations ext2_symlink_inode_operations = { NULL, /* rename */ ext2_readlink, /* readlink */ ext2_follow_link, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL /* smap */ + NULL, /* smap */ + NULL /* revalidate */ }; static struct dentry * ext2_follow_link(struct dentry * dentry, diff --git a/fs/fifo.c b/fs/fifo.c index ecb27722e..e18183fc9 100644 --- a/fs/fifo.c +++ b/fs/fifo.c @@ -179,11 +179,14 @@ struct inode_operations fifo_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; diff --git a/fs/hfs/dir_nat.c b/fs/hfs/dir_nat.c index be6974b66..21d4ca9af 100644 --- a/fs/hfs/dir_nat.c +++ b/fs/hfs/dir_nat.c @@ -99,7 +99,6 @@ struct inode_operations hfs_nat_ndir_inode_operations = { NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; @@ -122,7 +121,6 @@ struct inode_operations hfs_nat_hdir_inode_operations = { NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; diff --git a/fs/hfs/file.c b/fs/hfs/file.c index 00bebd017..d3796e275 100644 --- a/fs/hfs/file.c +++ b/fs/hfs/file.c @@ -69,7 +69,6 @@ struct inode_operations hfs_file_inode_operations = { hfs_file_truncate, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; diff --git a/fs/hfs/file_cap.c b/fs/hfs/file_cap.c index b3a58912c..789073d19 100644 --- a/fs/hfs/file_cap.c +++ b/fs/hfs/file_cap.c @@ -83,7 +83,6 @@ struct inode_operations hfs_cap_info_inode_operations = { cap_info_truncate, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidata */ }; diff --git a/fs/hfs/file_hdr.c b/fs/hfs/file_hdr.c index c1e1534b0..d112b3498 100644 --- a/fs/hfs/file_hdr.c +++ b/fs/hfs/file_hdr.c @@ -85,7 +85,6 @@ struct inode_operations hfs_hdr_inode_operations = { hdr_truncate, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 99bfa1004..17984d667 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -48,7 +48,6 @@ static const struct inode_operations hpfs_file_iops = &hpfs_truncate, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ }; @@ -91,7 +90,6 @@ static const struct inode_operations hpfs_dir_iops = NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ }; @@ -115,7 +113,6 @@ const struct inode_operations hpfs_symlink_iops = NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ }; diff --git a/fs/isofs/file.c b/fs/isofs/file.c index e2b4405d9..ce85b367a 100644 --- a/fs/isofs/file.c +++ b/fs/isofs/file.c @@ -48,9 +48,10 @@ struct inode_operations isofs_file_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ - generic_readpage, /* readpage */ - NULL, /* writepage */ isofs_bmap, /* bmap */ + block_read_full_page, /* readpage */ + NULL, /* writepage */ + NULL, /* flushpage */ NULL, /* truncate */ NULL /* permission */ }; diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 1d88aaea8..01d37a849 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -909,7 +910,7 @@ int isofs_statfs (struct super_block *sb, struct statfs *buf, int bufsiz) return copy_to_user(buf, &tmp, bufsiz) ? -EFAULT : 0; } -int isofs_bmap(struct inode * inode,int block) +static int do_isofs_bmap(struct inode * inode,int block) { off_t b_off, offset, size; struct inode *ino; @@ -991,6 +992,15 @@ int isofs_bmap(struct inode * inode,int block) return (b_off - offset + firstext) >> ISOFS_BUFFER_BITS(inode); } +int isofs_bmap(struct inode * inode,int block) +{ + int retval; + + lock_kernel(); + retval = do_isofs_bmap(inode, block); + unlock_kernel(); + return retval; +} static void test_and_set_uid(uid_t *p, uid_t value) { diff --git a/fs/minix/file.c b/fs/minix/file.c index 55ed5fd5d..d6b7ecb17 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c @@ -110,7 +110,6 @@ struct inode_operations minix_file_inode_operations = { minix_truncate, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ - generic_block_flushpage,/* flushpage */ + block_flushpage, /* flushpage */ }; diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c index be1c34dac..8175724d7 100644 --- a/fs/msdos/namei.c +++ b/fs/msdos/namei.c @@ -633,7 +633,6 @@ struct inode_operations msdos_dir_inode_operations = { NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ }; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 0aa50559b..680f011a1 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -98,7 +98,6 @@ struct inode_operations ncp_dir_inode_operations = NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ }; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 6f600dd5d..d41505862 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -78,13 +78,13 @@ struct inode_operations nfs_dir_inode_operations = { nfs_rename, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ nfs_revalidate, /* revalidate */ }; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index f10ce96ed..d3066f4cd 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -71,15 +71,14 @@ struct inode_operations nfs_file_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ nfs_readpage, /* readpage */ nfs_writepage, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ nfs_revalidate, /* revalidate */ - NULL, /* flushpage */ }; /* Hack for future NFS swap support */ diff --git a/fs/nfs/read.c b/fs/nfs/read.c index c0a7adaee..843f6b23e 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -222,6 +223,7 @@ nfs_readpage(struct file *file, struct page *page) struct inode *inode = dentry->d_inode; int error; + lock_kernel(); dprintk("NFS: nfs_readpage (%p %ld@%ld)\n", page, PAGE_SIZE, page->offset); get_page(page); @@ -254,5 +256,6 @@ out_error: out_free: free_page(page_address(page)); out: + unlock_kernel(); return error; } diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index b2ac2f6f8..c6fc4d685 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -43,11 +43,14 @@ struct inode_operations nfs_symlink_inode_operations = { NULL, /* rename */ nfs_readlink, /* readlink */ nfs_follow_link, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; /* Symlink caching in the page cache is even more simplistic diff --git a/fs/nfs/write.c b/fs/nfs/write.c index de5ab535d..911a5261e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -55,6 +55,7 @@ #include #include #include +#include #define NFS_PARANOIA 1 #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -93,6 +94,7 @@ nfs_writepage_sync(struct dentry *dentry, struct inode *inode, u8 *buffer; struct nfs_fattr fattr; + lock_kernel(); dprintk("NFS: nfs_writepage_sync(%s/%s %d@%ld)\n", dentry->d_parent->d_name.name, dentry->d_name.name, count, page->offset + offset); @@ -153,6 +155,7 @@ io_error: inode->i_ino, fattr.fileid); } + unlock_kernel(); return written? written : result; } diff --git a/fs/ntfs/fs.c b/fs/ntfs/fs.c index a43e071fe..d9430c2cc 100644 --- a/fs/ntfs/fs.c +++ b/fs/ntfs/fs.c @@ -445,7 +445,6 @@ static struct inode_operations ntfs_inode_operations_nobmap = { NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ }; @@ -628,7 +627,6 @@ static struct inode_operations ntfs_inode_operations = { NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ }; @@ -677,7 +675,6 @@ static struct inode_operations ntfs_dir_inode_operations = { NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ }; diff --git a/fs/pipe.c b/fs/pipe.c index dd4f6cd19..9830418cc 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -461,11 +461,14 @@ struct inode_operations pipe_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; int do_pipe(int *fd) diff --git a/fs/proc/array.c b/fs/proc/array.c index d2ec8eae6..7f4aca723 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -1519,11 +1519,14 @@ struct inode_operations proc_array_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; static ssize_t arraylong_read(struct file * file, char * buf, @@ -1567,9 +1570,12 @@ struct inode_operations proc_arraylong_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; diff --git a/fs/proc/base.c b/fs/proc/base.c index c9b2d8649..8579dd8c5 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -45,11 +45,14 @@ static struct inode_operations proc_base_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; /* diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 1defdbae1..2bbb51d28 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -51,11 +51,14 @@ struct inode_operations proc_fd_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - proc_permission /* permission */ + proc_permission, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; /* diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 54b16f84b..4e59fed73 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -51,20 +51,23 @@ struct inode_operations proc_file_inode_operations = { &proc_file_operations, /* default proc file-ops */ NULL, /* create */ NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* bmap */ - NULL, /* truncate */ - NULL /* permission */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + NULL, /* readpage */ + NULL, /* writepage */ + NULL, /* flushpage */ + NULL, /* truncate */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; /* @@ -83,11 +86,14 @@ struct inode_operations proc_net_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c index ba78768b6..3cfccab96 100644 --- a/fs/proc/kmsg.c +++ b/fs/proc/kmsg.c @@ -72,9 +72,12 @@ struct inode_operations proc_kmsg_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; diff --git a/fs/proc/link.c b/fs/proc/link.c index 9df4de674..3a5639825 100644 --- a/fs/proc/link.c +++ b/fs/proc/link.c @@ -49,11 +49,14 @@ struct inode_operations proc_link_inode_operations = { NULL, /* rename */ proc_readlink, /* readlink */ proc_follow_link, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - proc_permission /* permission */ + proc_permission, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; static struct dentry * proc_follow_link(struct dentry *dentry, diff --git a/fs/proc/mem.c b/fs/proc/mem.c index df0447383..0e89f7645 100644 --- a/fs/proc/mem.c +++ b/fs/proc/mem.c @@ -336,9 +336,12 @@ struct inode_operations proc_mem_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - proc_permission /* permission */ + proc_permission, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; diff --git a/fs/proc/net.c b/fs/proc/net.c index a6d8c5616..1ad226de0 100644 --- a/fs/proc/net.c +++ b/fs/proc/net.c @@ -113,9 +113,12 @@ struct inode_operations proc_net_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; diff --git a/fs/proc/omirr.c b/fs/proc/omirr.c index dbf2b32b9..562aa11c5 100644 --- a/fs/proc/omirr.c +++ b/fs/proc/omirr.c @@ -277,22 +277,24 @@ static struct file_operations omirr_operations = { }; struct inode_operations proc_omirr_inode_operations = { - &omirr_operations, - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* bmap */ - NULL, /* truncate */ - NULL, /* permission */ - NULL /* smap */ + &omirr_operations, + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + NULL, /* readpage */ + NULL, /* writepage */ + NULL, /* flushpage */ + NULL, /* truncate */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index cd4aca324..594f00858 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c @@ -57,12 +57,14 @@ struct inode_operations devtree_symlink_inode_operations = { NULL, /* rename */ devtree_readlink, /* readlink */ devtree_follow_link, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL /* smap */ + NULL, /* smap */ + NULL /* revalidate */ }; static struct dentry *devtree_follow_link(struct dentry *dentry, diff --git a/fs/proc/root.c b/fs/proc/root.c index 79622b022..62f016221 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -71,11 +71,14 @@ struct inode_operations proc_dir_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; /* @@ -94,11 +97,14 @@ struct inode_operations proc_dyna_dir_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; /* @@ -136,11 +142,14 @@ static struct inode_operations proc_root_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; /* @@ -293,11 +302,14 @@ struct inode_operations proc_openprom_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; struct proc_dir_entry proc_openprom = { @@ -478,11 +490,14 @@ static struct inode_operations proc_self_inode_operations = { NULL, /* rename */ proc_self_readlink, /* readlink */ proc_self_follow_link, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; static struct inode_operations proc_link_inode_operations = { @@ -498,11 +513,14 @@ static struct inode_operations proc_link_inode_operations = { NULL, /* rename */ proc_readlink, /* readlink */ proc_follow_link, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - NULL /* permission */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; static struct proc_dir_entry proc_root_loadavg = { diff --git a/fs/proc/scsi.c b/fs/proc/scsi.c index 6f3ad0770..ae2679b6d 100644 --- a/fs/proc/scsi.c +++ b/fs/proc/scsi.c @@ -59,23 +59,26 @@ static struct file_operations proc_scsi_operations = { * proc directories can do almost nothing.. */ struct inode_operations proc_scsi_inode_operations = { - &proc_scsi_operations, /* default scsi directory file-ops */ - NULL, /* create */ - proc_lookup, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* bmap */ - NULL, /* truncate */ - NULL /* permission */ +&proc_scsi_operations, /* default scsi directory file-ops */ + NULL, /* create */ + proc_lookup, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + NULL, /* readpage */ + NULL, /* writepage */ + NULL, /* flushpage */ + NULL, /* truncate */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; int get_not_present_info(char *buffer, char **start, off_t offset, int length) diff --git a/fs/proc/sysvipc.c b/fs/proc/sysvipc.c index eab3e3186..c6e32894d 100644 --- a/fs/proc/sysvipc.c +++ b/fs/proc/sysvipc.c @@ -118,21 +118,24 @@ static struct file_operations proc_sysvipc_operations = { * proc directories can do almost nothing.. */ struct inode_operations proc_sysvipc_inode_operations = { - &proc_sysvipc_operations, /* default net file-ops */ - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* bmap */ - NULL, /* truncate */ - NULL /* permission */ + &proc_sysvipc_operations, /* default net file-ops */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* bmap */ + NULL, /* readpage */ + NULL, /* writepage */ + NULL, /* flushpage */ + NULL, /* truncate */ + NULL, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c index b820642fe..870ebac74 100644 --- a/fs/smbfs/dir.c +++ b/fs/smbfs/dir.c @@ -65,7 +65,6 @@ struct inode_operations smb_dir_inode_operations = NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ smb_revalidate_inode, /* revalidate */ }; diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index 1a4a0add1..cfb2d82da 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -410,6 +410,5 @@ struct inode_operations smb_file_inode_operations = NULL, /* truncate */ smb_file_permission, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ smb_revalidate_inode, /* revalidate */ }; diff --git a/fs/sysv/file.c b/fs/sysv/file.c index 9e806e4d1..19443f289 100644 --- a/fs/sysv/file.c +++ b/fs/sysv/file.c @@ -119,7 +119,6 @@ struct inode_operations sysv_file_inode_operations = { sysv_truncate, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ - generic_block_flushpage,/* flushpage */ + block_flushpage, /* flushpage */ }; diff --git a/fs/ufs/file.c b/fs/ufs/file.c index 57db16bae..9e027cfc3 100644 --- a/fs/ufs/file.c +++ b/fs/ufs/file.c @@ -182,7 +182,6 @@ struct inode_operations ufs_file_inode_operations = { ufs_truncate, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ - generic_block_flushpage,/* flushpage */ + block_flushpage, /* flushpage */ }; diff --git a/fs/umsdos/dir.c b/fs/umsdos/dir.c index a780a9587..0f26103c4 100644 --- a/fs/umsdos/dir.c +++ b/fs/umsdos/dir.c @@ -838,6 +838,5 @@ struct inode_operations umsdos_dir_inode_operations = NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ }; diff --git a/fs/umsdos/rdir.c b/fs/umsdos/rdir.c index 3f5d10953..7951bb8f8 100644 --- a/fs/umsdos/rdir.c +++ b/fs/umsdos/rdir.c @@ -253,6 +253,5 @@ struct inode_operations umsdos_rdir_inode_operations = NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL, /* revalidate */ }; diff --git a/fs/umsdos/symlink.c b/fs/umsdos/symlink.c index 4b3678a22..97ea2da41 100644 --- a/fs/umsdos/symlink.c +++ b/fs/umsdos/symlink.c @@ -141,7 +141,6 @@ struct inode_operations umsdos_symlink_inode_operations = NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; diff --git a/include/linux/fs.h b/include/linux/fs.h index a613816aa..fd67e059a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -596,15 +596,23 @@ struct inode_operations { struct inode *, struct dentry *); int (*readlink) (struct dentry *, char *,int); struct dentry * (*follow_link) (struct dentry *, struct dentry *, unsigned int); + /* + * the order of these functions within the VFS template has been + * changed because SMP locking has changed: from now on all bmap, + * readpage, writepage and flushpage functions are supposed to do + * whatever locking they need to get proper SMP operation - for + * now in most cases this means a lock/unlock_kernel at entry/exit. + * [The new order is also slightly more logical :)] + */ + int (*bmap) (struct inode *,int); int (*readpage) (struct file *, struct page *); int (*writepage) (struct file *, struct page *); - int (*bmap) (struct inode *,int); + int (*flushpage) (struct inode *, struct page *, unsigned long); + void (*truncate) (struct inode *); int (*permission) (struct inode *, int); int (*smap) (struct inode *,int); - int (*updatepage) (struct file *, struct page *, unsigned long, unsigned int); int (*revalidate) (struct dentry *); - int (*flushpage) (struct inode *, struct page *, unsigned long); }; struct super_operations { @@ -745,21 +753,36 @@ void mark_buffer_uptodate(struct buffer_head *, int); extern inline void mark_buffer_clean(struct buffer_head * bh) { - if (test_and_clear_bit(BH_Dirty, &bh->b_state)) { - if (bh->b_list == BUF_DIRTY) - refile_buffer(bh); - } + if (test_and_clear_bit(BH_Dirty, &bh->b_state)) + refile_buffer(bh); } +extern void FASTCALL(__mark_buffer_dirty(struct buffer_head *bh, int flag)); +extern void FASTCALL(__atomic_mark_buffer_dirty(struct buffer_head *bh, int flag)); + +#define atomic_set_buffer_dirty(bh) test_and_set_bit(BH_Dirty, &(bh)->b_state) + extern inline void mark_buffer_dirty(struct buffer_head * bh, int flag) { - if (!test_and_set_bit(BH_Dirty, &bh->b_state)) { - set_writetime(bh, flag); - if (bh->b_list != BUF_DIRTY) - refile_buffer(bh); - } + if (!atomic_set_buffer_dirty(bh)) + __mark_buffer_dirty(bh, flag); +} + +/* + * SMP-safe version of the above - does synchronization with + * other users of buffer-cache data structures. + * + * since we test-set the dirty bit in a CPU-atomic way we also + * have optimized the common 'redirtying' case away completely. + */ +extern inline void atomic_mark_buffer_dirty(struct buffer_head * bh, int flag) +{ + if (!atomic_set_buffer_dirty(bh)) + __atomic_mark_buffer_dirty(bh, flag); } + +extern void balance_dirty(kdev_t); extern int check_disk_change(kdev_t); extern int invalidate_inodes(struct super_block *); extern void invalidate_inode_pages(struct inode *); @@ -850,14 +873,15 @@ extern int brw_page(int, struct page *, kdev_t, int [], int, int); typedef long (*writepage_t)(struct file *, struct page *, unsigned long, unsigned long, const char *); typedef int (*fs_getblock_t)(struct inode *, long, int, int *, int *); +/* Generic buffer handling for block filesystems.. */ +extern int block_read_full_page(struct file *, struct page *); +extern int block_write_full_page (struct file *, struct page *, fs_getblock_t); +extern int block_write_partial_page (struct file *, struct page *, unsigned long, unsigned long, const char *, fs_getblock_t); +extern int block_flushpage(struct inode *, struct page *, unsigned long); -extern int generic_readpage(struct file *, struct page *); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *); extern ssize_t generic_file_write(struct file *, const char *, size_t, loff_t *, writepage_t); -extern int generic_block_flushpage(struct inode *, struct page *, unsigned long); -extern int block_write_one_page (struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf, fs_getblock_t fs_get_block); -extern int block_write_full_page (struct file *file, struct page *page, fs_getblock_t fs_get_block); extern struct super_block *get_super(kdev_t); diff --git a/include/linux/swap.h b/include/linux/swap.h index f0ba31405..c06ddba63 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -107,6 +107,7 @@ extern int FASTCALL(swap_count(unsigned long)); /* * Make these inline later once they are working properly. */ +extern void __delete_from_swap_cache(struct page *page); extern void delete_from_swap_cache(struct page *page); extern void free_page_and_swap_cache(unsigned long addr); diff --git a/kernel/ksyms.c b/kernel/ksyms.c index 03c516710..a57d67d8b 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -167,7 +167,6 @@ EXPORT_SYMBOL(add_blkdev_randomness); EXPORT_SYMBOL(generic_file_read); EXPORT_SYMBOL(generic_file_write); EXPORT_SYMBOL(generic_file_mmap); -EXPORT_SYMBOL(generic_readpage); EXPORT_SYMBOL(file_lock_table); EXPORT_SYMBOL(posix_lock_file); EXPORT_SYMBOL(posix_test_lock); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ed9824136..2d4fb7169 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -121,11 +121,14 @@ struct inode_operations proc_sys_inode_operations = NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + NULL, /* flushpage */ NULL, /* truncate */ - proc_sys_permission + proc_sys_permission, /* permission */ + NULL, /* smap */ + NULL /* revalidate */ }; extern struct proc_dir_entry proc_sys_root; diff --git a/mm/filemap.c b/mm/filemap.c index c0a17a00f..4e885758f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -228,12 +228,14 @@ repeat: spin_unlock(&pagecache_lock); } +extern atomic_t too_many_dirty_buffers; + int shrink_mmap(int priority, int gfp_mask) { static unsigned long clock = 0; unsigned long limit = num_physpages; struct page * page; - int count, err; + int count, users; count = limit >> priority; @@ -262,24 +264,64 @@ int shrink_mmap(int priority, int gfp_mask) if ((gfp_mask & __GFP_DMA) && !PageDMA(page)) continue; - if (PageLocked(page)) + /* + * Some common cases that we just short-circuit without + * getting the locks - we need to re-check this once we + * have the lock, but that's fine. + */ + users = page_count(page); + if (!users) continue; + if (!page->buffers) { + if (!page->inode) + continue; + if (users > 1) + continue; + } + + /* + * ok, now the page looks interesting. Re-check things + * and keep the lock. + */ + spin_lock(&pagecache_lock); + if (!page->inode && !page->buffers) { + spin_unlock(&pagecache_lock); + continue; + } + if (!page_count(page)) { +// BUG(); + spin_unlock(&pagecache_lock); + continue; + } + get_page(page); + if (TryLockPage(page)) { + spin_unlock(&pagecache_lock); + goto put_continue; + } + + /* + * we keep pagecache_lock locked and unlock it in + * each branch, so that the page->inode case doesnt + * have to re-grab it. Here comes the 'real' logic + * to free memory: + */ /* Is it a buffer page? */ if (page->buffers) { - if (TryLockPage(page)) - continue; - err = try_to_free_buffers(page); - UnlockPage(page); - - if (!err) - continue; - goto out; + kdev_t dev = page->buffers->b_dev; + spin_unlock(&pagecache_lock); + if (try_to_free_buffers(page)) + goto made_progress; + if (!atomic_read(&too_many_dirty_buffers)) { + atomic_set(&too_many_dirty_buffers, 1); + balance_dirty(dev); + } + goto unlock_continue; } /* We can't free pages unless there's just one user */ - if (page_count(page) != 1) - continue; + if (page_count(page) != 2) + goto spin_unlock_continue; count--; @@ -289,44 +331,36 @@ int shrink_mmap(int priority, int gfp_mask) * were to be marked referenced.. */ if (PageSwapCache(page)) { - if (referenced && swap_count(page->offset) != 1) - continue; - delete_from_swap_cache(page); - err = 1; - goto out; + spin_unlock(&pagecache_lock); + if (referenced && swap_count(page->offset) != 2) + goto unlock_continue; + __delete_from_swap_cache(page); + page_cache_release(page); + goto made_progress; } - if (referenced) - continue; - /* is it a page-cache page? */ - spin_lock(&pagecache_lock); - if (page->inode) { - if (pgcache_under_min()) - goto unlock_continue; - if (TryLockPage(page)) - goto unlock_continue; - - if (page_count(page) == 1) { - remove_page_from_inode_queue(page); - remove_page_from_hash_queue(page); - page->inode = NULL; - } + if (!referenced && page->inode && !pgcache_under_min()) { + remove_page_from_inode_queue(page); + remove_page_from_hash_queue(page); + page->inode = NULL; spin_unlock(&pagecache_lock); - UnlockPage(page); page_cache_release(page); - err = 1; - goto out; -unlock_continue: - spin_unlock(&pagecache_lock); - continue; + goto made_progress; } +spin_unlock_continue: spin_unlock(&pagecache_lock); +unlock_continue: + UnlockPage(page); +put_continue: + put_page(page); } while (count > 0); - err = 0; -out: - return err; + return 0; +made_progress: + UnlockPage(page); + put_page(page); + return 1; } static inline struct page * __find_page_nolock(struct inode * inode, unsigned long offset, struct page *page) @@ -499,9 +533,7 @@ static unsigned long try_to_read_ahead(struct file * file, * We do not have to check the return value here * because it's a readahead. */ - lock_kernel(); inode->i_op->readpage(file, page); - unlock_kernel(); page_cache = 0; page_cache_release(page); } @@ -522,15 +554,13 @@ void ___wait_on_page(struct page *page) DECLARE_WAITQUEUE(wait, tsk); add_wait_queue(&page->wait, &wait); - tsk->state = TASK_UNINTERRUPTIBLE; - run_task_queue(&tq_disk); - if (PageLocked(page)) { - do { - tsk->state = TASK_UNINTERRUPTIBLE; - run_task_queue(&tq_disk); - schedule(); - } while (PageLocked(page)); - } + do { + tsk->state = TASK_UNINTERRUPTIBLE; + run_task_queue(&tq_disk); + if (!PageLocked(page)) + break; + schedule(); + } while (PageLocked(page)); tsk->state = TASK_RUNNING; remove_wait_queue(&page->wait, &wait); } @@ -1039,11 +1069,9 @@ page_not_up_to_date: goto page_ok; } -read_page: +readpage: /* ... and start the actual read. The read will unlock the page. */ - lock_kernel(); error = inode->i_op->readpage(filp, page); - unlock_kernel(); if (!error) { if (Page_Uptodate(page)) @@ -1095,7 +1123,7 @@ no_cached_page: spin_unlock(&pagecache_lock); page_cache = 0; - goto read_page; + goto readpage; } *ppos = pos; @@ -1221,6 +1249,7 @@ asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, size_t cou if (retval) goto fput_out; + unlock_kernel(); retval = 0; if (count) { read_descriptor_t desc; @@ -1230,7 +1259,7 @@ asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, size_t cou ppos = &in_file->f_pos; if (offset) { if (get_user(pos, offset)) - goto fput_out; + goto fput_out_lock; ppos = &pos; } @@ -1247,7 +1276,8 @@ asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, size_t cou put_user(pos, offset); } - +fput_out_lock: + lock_kernel(); fput_out: fput(out_file); fput_in: @@ -1283,6 +1313,7 @@ static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset; if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm) goto no_page_nolock; + unlock_kernel(); /* @@ -1306,10 +1337,12 @@ found_page: goto failure; } - wait_on_page(page); - - if (!Page_Uptodate(page)) - PAGE_BUG(page); + if (!Page_Uptodate(page)) { + lock_page(page); + if (!Page_Uptodate(page)) + goto page_not_uptodate; + UnlockPage(page); + } success: /* @@ -1377,9 +1410,8 @@ no_cached_page: */ new_page = 0; - lock_kernel(); +page_not_uptodate: error = inode->i_op->readpage(file, page); - unlock_kernel(); if (!error) { wait_on_page(page); @@ -1398,9 +1430,7 @@ page_read_error: if (!PageLocked(page)) PAGE_BUG(page); ClearPageError(page); - lock_kernel(); error = inode->i_op->readpage(file, page); - unlock_kernel(); if (error) goto failure; wait_on_page(page); diff --git a/mm/swap_state.c b/mm/swap_state.c index 1b33794fd..21723c1db 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -39,15 +39,14 @@ static struct inode_operations swapper_inode_operations = { NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ + NULL, /* bmap */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* bmap */ + block_flushpage, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ - NULL, /* revalidate */ - generic_block_flushpage, /* flushpage */ + NULL /* revalidate */ }; struct inode swapper_inode = { i_op: &swapper_inode_operations }; @@ -221,17 +220,14 @@ static inline void remove_from_swap_cache(struct page *page) remove_inode_page(page); } - /* * This must be called only on pages that have * been verified to be in the swap cache. */ -void delete_from_swap_cache(struct page *page) +void __delete_from_swap_cache(struct page *page) { long entry = page->offset; - lock_page(page); - #ifdef SWAP_CACHE_INFO swap_cache_del_total++; #endif @@ -241,9 +237,21 @@ void delete_from_swap_cache(struct page *page) page_address(page), page_count(page), entry); #endif remove_from_swap_cache (page); + swap_free (entry); +} + +/* + * This must be called only on pages that have + * been verified to be in the swap cache. + */ +void delete_from_swap_cache(struct page *page) +{ + lock_page(page); + + __delete_from_swap_cache(page); + UnlockPage(page); page_cache_release(page); - swap_free (entry); } /* @@ -258,9 +266,8 @@ void free_page_and_swap_cache(unsigned long addr) /* * If we are the only user, then free up the swap cache. */ - if (PageSwapCache(page) && !is_page_shared(page)) { + if (PageSwapCache(page) && !is_page_shared(page)) delete_from_swap_cache(page); - } __free_page(page); } -- 2.11.4.GIT