diff --git a/lustre/kernel_patches/patches/cifs_2.6.5.patch b/lustre/kernel_patches/patches/cifs_2.6.5.patch new file mode 100644 index 0000000000000000000000000000000000000000..35db137e0d5156da14a7ec6b937d5cf3edc7e6a6 --- /dev/null +++ b/lustre/kernel_patches/patches/cifs_2.6.5.patch @@ -0,0 +1,65 @@ +#EJF i took out part of this change set + + +# This is a BitKeeper generated diff -Nru style patch. +# +# ChangeSet +# 2004/01/30 17:20:19-06:00 stevef@stevef95.austin.ibm.com +# Relax requested CIFS permissions on open to simply request GENERIC_READ and GENERIC_WRITE (instead of GENERIC_ALL which +# can unnecessarily conflict with share permissions by asking implicitly for take ownership and other unneeded flags) +# +# fs/cifs/dir.c +# 2004/01/30 17:20:13-06:00 stevef@stevef95.austin.ibm.com +7 -3 +# Relax requested CIFS permissions on open to simply request GENERIC_READ and GENERIC_WRITE (instead of GENERIC_ALL which +# can unnecessarily conflict with share permissions by asking implicitly for take ownership and other unneeded flags) +# +# fs/cifs/file.c +# 2004/01/30 17:20:13-06:00 stevef@stevef95.austin.ibm.com +12 -4 +# Relax requested CIFS permissions on open to simply request GENERIC_READ and GENERIC_WRITE (instead of GENERIC_ALL which +# can unnecessarily conflict with share permissions by asking implicitly for take ownership and other unneeded flags) +# +diff -Nru a/fs/cifs/dir.c b/fs/cifs/dir.c +--- a/fs/cifs/dir.c Mon May 3 16:32:08 2004 ++++ b/fs/cifs/dir.c Mon May 3 16:32:08 2004 +@@ -125,7 +125,7 @@ + int rc = -ENOENT; + int xid; + int oplock = 0; +- int desiredAccess = GENERIC_ALL; ++ int desiredAccess = GENERIC_READ | GENERIC_WRITE; + __u16 fileHandle; + struct cifs_sb_info *cifs_sb; + struct cifsTconInfo *pTcon; +@@ -150,8 +150,12 @@ + desiredAccess = GENERIC_READ; + else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY) + desiredAccess = GENERIC_WRITE; +- else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR) +- desiredAccess = GENERIC_ALL; ++ else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR) { ++ /* GENERIC_ALL is too much permission to request */ ++ /* can cause unnecessary access denied on create */ ++ /* desiredAccess = GENERIC_ALL; */ ++ desiredAccess = GENERIC_READ | GENERIC_WRITE; ++ } + + if((nd->intent.open.flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) + disposition = FILE_CREATE; +diff -Nru a/fs/cifs/file.c b/fs/cifs/file.c +--- a/fs/cifs/file.c Mon May 3 16:32:08 2004 ++++ b/fs/cifs/file.c Mon May 3 16:32:08 2004 +@@ -87,8 +87,12 @@ + desiredAccess = GENERIC_READ; + else if ((file->f_flags & O_ACCMODE) == O_WRONLY) + desiredAccess = GENERIC_WRITE; +- else if ((file->f_flags & O_ACCMODE) == O_RDWR) +- desiredAccess = GENERIC_ALL; ++ else if ((file->f_flags & O_ACCMODE) == O_RDWR) { ++ /* GENERIC_ALL is too much permission to request */ ++ /* can cause unnecessary access denied on create */ ++ /* desiredAccess = GENERIC_ALL; */ ++ desiredAccess = GENERIC_READ | GENERIC_WRITE; ++ } + + /********************************************************************* + * open flag mapping table: diff --git a/lustre/kernel_patches/patches/nfs-cifs-intent-vanilla-2.6.patch b/lustre/kernel_patches/patches/nfs-cifs-intent-vanilla-2.6.patch new file mode 100644 index 0000000000000000000000000000000000000000..5bcd4f024678961ea0d336f053b02177b05e040f --- /dev/null +++ b/lustre/kernel_patches/patches/nfs-cifs-intent-vanilla-2.6.patch @@ -0,0 +1,116 @@ +.old..........pc/linux-2.6.3-nfs-intent-suse/fs/nfs/dir.c +.new.........fs/nfs/dir.c +.old..........pc/linux-2.6.3-nfs-intent-suse/fs/nfs/dir.c +.new.........fs/nfs/dir.c +Index: linux-2.6.4-51.0/fs/nfs/dir.c +=================================================================== +--- linux-2.6.4-51.0.orig/fs/nfs/dir.c 2004-04-05 17:09:16.000000000 -0400 ++++ linux-2.6.4-51.0/fs/nfs/dir.c 2004-04-05 17:09:23.000000000 -0400 +@@ -782,7 +782,7 @@ + if (nd->flags & LOOKUP_DIRECTORY) + return 0; + /* Are we trying to write to a read only partition? */ +- if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) ++ if (IS_RDONLY(dir) && (nd->intent.it_flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) + return 0; + return 1; + } +@@ -803,7 +803,7 @@ + dentry->d_op = NFS_PROTO(dir)->dentry_ops; + + /* Let vfs_create() deal with O_EXCL */ +- if (nd->intent.open.flags & O_EXCL) ++ if (nd->intent.it_flags & O_EXCL) + goto no_entry; + + /* Open the file on the server */ +@@ -811,7 +811,7 @@ + /* Revalidate parent directory attribute cache */ + nfs_revalidate_inode(NFS_SERVER(dir), dir); + +- if (nd->intent.open.flags & O_CREAT) { ++ if (nd->intent.it_flags & O_CREAT) { + nfs_begin_data_update(dir); + inode = nfs4_atomic_open(dir, dentry, nd); + nfs_end_data_update(dir); +@@ -827,7 +827,7 @@ + break; + /* This turned out not to be a regular file */ + case -ELOOP: +- if (!(nd->intent.open.flags & O_NOFOLLOW)) ++ if (!(nd->intent.it_flags & O_NOFOLLOW)) + goto no_open; + /* case -EISDIR: */ + /* case -EINVAL: */ +@@ -861,7 +861,7 @@ + dir = parent->d_inode; + if (!is_atomic_open(dir, nd)) + goto no_open; +- openflags = nd->intent.open.flags; ++ openflags = nd->intent.it_flags; + if (openflags & O_CREAT) { + /* If this is a negative dentry, just drop it */ + if (!inode) +Index: linux-2.6.4-51.0/fs/nfs/nfs4proc.c +=================================================================== +--- linux-2.6.4-51.0.orig/fs/nfs/nfs4proc.c 2004-04-05 12:41:59.000000000 -0400 ++++ linux-2.6.4-51.0/fs/nfs/nfs4proc.c 2004-04-05 17:09:23.000000000 -0400 +@@ -792,17 +792,17 @@ + struct nfs4_state *state; + + if (nd->flags & LOOKUP_CREATE) { +- attr.ia_mode = nd->intent.open.create_mode; ++ attr.ia_mode = nd->intent.it_create_mode; + attr.ia_valid = ATTR_MODE; + if (!IS_POSIXACL(dir)) + attr.ia_mode &= ~current->fs->umask; + } else { + attr.ia_valid = 0; +- BUG_ON(nd->intent.open.flags & O_CREAT); ++ BUG_ON(nd->intent.it_flags & O_CREAT); + } + + cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); +- state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred); ++ state = nfs4_do_open(dir, &dentry->d_name, nd->intent.it_flags, &attr, cred); + put_rpccred(cred); + if (IS_ERR(state)) + return (struct inode *)state; +Index: linux-2.6.4-51.0/fs/cifs/dir.c +=================================================================== +--- linux-2.6.4-51.0.orig/fs/cifs/dir.c 2004-04-05 12:41:59.000000000 -0400 ++++ linux-2.6.4-51.0/fs/cifs/dir.c 2004-04-05 17:13:47.000000000 -0400 +@@ -146,18 +146,18 @@ + if(nd) { + cFYI(1,("In create for inode %p dentry->inode %p nd flags = 0x%x for %s",inode, direntry->d_inode, nd->flags,full_path)); + +- if ((nd->intent.open.flags & O_ACCMODE) == O_RDONLY) ++ if ((nd->intent.it_flags & O_ACCMODE) == O_RDONLY) + desiredAccess = GENERIC_READ; +- else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY) ++ else if ((nd->intent.it_flags & O_ACCMODE) == O_WRONLY) + desiredAccess = GENERIC_WRITE; +- else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR) ++ else if ((nd->intent.it_flags & O_ACCMODE) == O_RDWR) + desiredAccess = GENERIC_ALL; + +- if((nd->intent.open.flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) ++ if((nd->intent.it_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) + disposition = FILE_CREATE; +- else if((nd->intent.open.flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) ++ else if((nd->intent.it_flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) + disposition = FILE_OVERWRITE_IF; +- else if((nd->intent.open.flags & O_CREAT) == O_CREAT) ++ else if((nd->intent.it_flags & O_CREAT) == O_CREAT) + disposition = FILE_OPEN_IF; + else { + cFYI(1,("Create flag not set in create function")); +@@ -319,7 +319,7 @@ + parent_dir_inode, direntry->d_name.name, direntry)); + + if(nd) { /* BB removeme */ +- cFYI(1,("In lookup nd flags 0x%x open intent flags 0x%x",nd->flags,nd->intent.open.flags)); ++ cFYI(1,("In lookup nd flags 0x%x open intent flags 0x%x",nd->flags,nd->intent.it_flags)); + } /* BB removeme BB */ + /* BB Add check of incoming data - e.g. frame not longer than maximum SMB - let server check the namelen BB */ + diff --git a/lustre/kernel_patches/patches/o_direct-2.6.5.patch b/lustre/kernel_patches/patches/o_direct-2.6.5.patch new file mode 100644 index 0000000000000000000000000000000000000000..76a9399eece70e268d537e850b60e3692e950cc9 --- /dev/null +++ b/lustre/kernel_patches/patches/o_direct-2.6.5.patch @@ -0,0 +1,500 @@ +# This is a BitKeeper generated diff -Nru style patch. +# +# ChangeSet +# 2004/04/12 13:09:10-07:00 akpm@osdl.org +# [PATCH] O_DIRECT data exposure fixes +# +# From: Badari Pulavarty, Suparna Bhattacharya, Andrew Morton +# +# Forward port of Stephen Tweedie's DIO fixes from 2.4, to fix various DIO vs +# buffered IO exposures involving races causing: +# +# (a) stale data from uninstantiated blocks to be read, e.g. +# +# - O_DIRECT reads against buffered writes to a sparse region +# +# - O_DIRECT writes to a sparse region against buffered reads +# +# (b) potential data corruption with +# +# - O_DIRECT IOs against truncate +# +# due to writes to truncated blocks (which may have been reallocated to +# another file). +# +# Summary of fixes: +# +# 1) All the changes affect only regular files. RAW/O_DIRECT on block are +# unaffected. +# +# 2) The DIO code will not fill in sparse regions on a write. Instead +# -ENOTBLK is returned and the generic file write code would fallthrough to +# buffered IO in this case followed by writing through the pages to disk +# using filemap_fdatawrite/wait. +# +# 3) i_sem is held during both DIO reads and writes. For reads, and writes +# to already allocated blocks, it is released right after IO is issued, +# while for writes to newly allocated blocks (e.g file extending writes and +# hole overwrites) it is held all the way through until IO completes (and +# data is committed to disk). +# +# 4) filemap_fdatawrite/wait are called under i_sem to synchronize buffered +# pages to disk blocks before issuing DIO. +# +# 5) A new rwsem (i_alloc_sem) is held in shared mode all the while a DIO +# (read or write) is in progress, and in exclusive mode by truncate to guard +# against deallocation of data blocks during DIO. +# +# 6) All this new locking has been pushed down into blockdev_direct_IO to +# avoid interfering with NFS direct IO. The locks are taken in the order +# i_sem followed by i_alloc_sem. While i_sem may be released after IO +# submission in some cases, i_alloc_sem is held through until dio_complete +# (in the case of AIO-DIO this happens through the IO completion callback). +# +# 7) i_sem and i_alloc_sem are not held for the _nolock versions of write +# routines, as used by blockdev and XFS. Filesystems can specify the +# needs_special_locking parameter to __blockdev_direct_IO from their direct +# IO address space op accordingly. +# +# Note from Badari: +# Here is the locking (when needs_special_locking is true): +# +# (1) generic_file_*_write() holds i_sem (as before) and calls +# ->direct_IO(). blockdev_direct_IO gets i_alloc_sem and call +# direct_io_worker(). +# +# (2) generic_file_*_read() does not hold any locks. blockdev_direct_IO() +# gets i_sem and then i_alloc_sem and calls direct_io_worker() to do the +# work +# +# (3) direct_io_worker() does the work and drops i_sem after submitting IOs +# if appropriate and drops i_alloc_sem after completing IOs. +# +# fs/direct-io.c +# 2004/04/12 10:54:33-07:00 akpm@osdl.org +80 -13 +# O_DIRECT data exposure fixes +# +# fs/inode.c +# 2004/04/12 10:54:33-07:00 akpm@osdl.org +1 -0 +# O_DIRECT data exposure fixes +# +# fs/open.c +# 2004/04/12 10:54:33-07:00 akpm@osdl.org +2 -0 +# O_DIRECT data exposure fixes +# +# fs/xfs/linux/xfs_aops.c +# 2004/04/12 10:54:33-07:00 akpm@osdl.org +2 -1 +# O_DIRECT data exposure fixes +# +# include/linux/fs.h +# 2004/04/12 10:54:33-07:00 akpm@osdl.org +28 -3 +# O_DIRECT data exposure fixes +# +# mm/filemap.c +# 2004/04/12 10:54:33-07:00 akpm@osdl.org +41 -12 +# O_DIRECT data exposure fixes +# +diff -Nru a/fs/direct-io.c b/fs/direct-io.c +--- a/fs/direct-io.c Mon May 3 16:20:32 2004 ++++ b/fs/direct-io.c Mon May 3 16:20:32 2004 +@@ -52,6 +52,10 @@ + * + * If blkfactor is zero then the user's request was aligned to the filesystem's + * blocksize. ++ * ++ * needs_locking is set for regular files on direct-IO-naive filesystems. It ++ * determines whether we need to do the fancy locking which prevents direct-IO ++ * from being able to read uninitialised disk blocks. + */ + + struct dio { +@@ -59,6 +63,7 @@ + struct bio *bio; /* bio under assembly */ + struct inode *inode; + int rw; ++ int needs_locking; /* doesn't change */ + unsigned blkbits; /* doesn't change */ + unsigned blkfactor; /* When we're using an alignment which + is finer than the filesystem's soft +@@ -206,6 +211,8 @@ + { + if (dio->end_io) + dio->end_io(dio->inode, offset, bytes, dio->map_bh.b_private); ++ if (dio->needs_locking) ++ up_read(&dio->inode->i_alloc_sem); + } + + /* +@@ -449,6 +456,7 @@ + unsigned long fs_count; /* Number of filesystem-sized blocks */ + unsigned long dio_count;/* Number of dio_block-sized blocks */ + unsigned long blkmask; ++ int beyond_eof = 0; + + /* + * If there was a memory error and we've overwritten all the +@@ -466,8 +474,19 @@ + if (dio_count & blkmask) + fs_count++; + ++ if (dio->needs_locking) { ++ if (dio->block_in_file >= (i_size_read(dio->inode) >> ++ dio->blkbits)) ++ beyond_eof = 1; ++ } ++ /* ++ * For writes inside i_size we forbid block creations: only ++ * overwrites are permitted. We fall back to buffered writes ++ * at a higher level for inside-i_size block-instantiating ++ * writes. ++ */ + ret = (*dio->get_blocks)(dio->inode, fs_startblk, fs_count, +- map_bh, dio->rw == WRITE); ++ map_bh, (dio->rw == WRITE) && beyond_eof); + } + return ret; + } +@@ -774,6 +793,10 @@ + if (!buffer_mapped(map_bh)) { + char *kaddr; + ++ /* AKPM: eargh, -ENOTBLK is a hack */ ++ if (dio->rw == WRITE) ++ return -ENOTBLK; ++ + if (dio->block_in_file >= + i_size_read(dio->inode)>>blkbits) { + /* We hit eof */ +@@ -839,21 +862,21 @@ + return ret; + } + ++/* ++ * Releases both i_sem and i_alloc_sem ++ */ + static int + direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, + const struct iovec *iov, loff_t offset, unsigned long nr_segs, +- unsigned blkbits, get_blocks_t get_blocks, dio_iodone_t end_io) ++ unsigned blkbits, get_blocks_t get_blocks, dio_iodone_t end_io, ++ struct dio *dio) + { + unsigned long user_addr; + int seg; + int ret = 0; + int ret2; +- struct dio *dio; + size_t bytes; + +- dio = kmalloc(sizeof(*dio), GFP_KERNEL); +- if (!dio) +- return -ENOMEM; + dio->is_async = !is_sync_kiocb(iocb); + + dio->bio = NULL; +@@ -864,7 +887,6 @@ + dio->start_zero_done = 0; + dio->block_in_file = offset >> blkbits; + dio->blocks_available = 0; +- + dio->cur_page = NULL; + + dio->boundary = 0; +@@ -953,6 +975,13 @@ + dio_cleanup(dio); + + /* ++ * All new block allocations have been performed. We can let i_sem ++ * go now. ++ */ ++ if (dio->needs_locking) ++ up(&dio->inode->i_sem); ++ ++ /* + * OK, all BIOs are submitted, so we can decrement bio_count to truly + * reflect the number of to-be-processed BIOs. + */ +@@ -987,11 +1016,17 @@ + + /* + * This is a library function for use by filesystem drivers. ++ * ++ * For writes to S_ISREG files, we are called under i_sem and return with i_sem ++ * held, even though it is internally dropped. ++ * ++ * For writes to S_ISBLK files, i_sem is not held on entry; it is never taken. + */ + int +-blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, ++__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, + struct block_device *bdev, const struct iovec *iov, loff_t offset, +- unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io) ++ unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io, ++ int needs_special_locking) + { + int seg; + size_t size; +@@ -1000,6 +1035,8 @@ + unsigned bdev_blkbits = 0; + unsigned blocksize_mask = (1 << blkbits) - 1; + ssize_t retval = -EINVAL; ++ struct dio *dio; ++ int needs_locking; + + if (bdev) + bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev)); +@@ -1025,10 +1062,40 @@ + } + } + +- retval = direct_io_worker(rw, iocb, inode, iov, offset, +- nr_segs, blkbits, get_blocks, end_io); ++ dio = kmalloc(sizeof(*dio), GFP_KERNEL); ++ retval = -ENOMEM; ++ if (!dio) ++ goto out; ++ ++ /* ++ * For regular files, ++ * readers need to grab i_sem and i_alloc_sem ++ * writers need to grab i_alloc_sem only (i_sem is already held) ++ */ ++ needs_locking = 0; ++ if (S_ISREG(inode->i_mode) && needs_special_locking) { ++ needs_locking = 1; ++ if (rw == READ) { ++ struct address_space *mapping; ++ ++ mapping = iocb->ki_filp->f_mapping; ++ down(&inode->i_sem); ++ retval = filemap_write_and_wait(mapping); ++ if (retval) { ++ up(&inode->i_sem); ++ kfree(dio); ++ goto out; ++ } ++ } ++ down_read(&inode->i_alloc_sem); ++ } ++ dio->needs_locking = needs_locking; ++ ++ retval = direct_io_worker(rw, iocb, inode, iov, offset, ++ nr_segs, blkbits, get_blocks, end_io, dio); ++ if (needs_locking && rw == WRITE) ++ down(&inode->i_sem); + out: + return retval; + } +- +-EXPORT_SYMBOL(blockdev_direct_IO); ++EXPORT_SYMBOL(__blockdev_direct_IO); +diff -Nru a/fs/inode.c b/fs/inode.c +--- a/fs/inode.c Mon May 3 16:20:32 2004 ++++ b/fs/inode.c Mon May 3 16:20:32 2004 +@@ -185,6 +185,7 @@ + INIT_LIST_HEAD(&inode->i_dentry); + INIT_LIST_HEAD(&inode->i_devices); + sema_init(&inode->i_sem, 1); ++ init_rwsem(&inode->i_alloc_sem); + INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); + spin_lock_init(&inode->i_data.page_lock); + init_MUTEX(&inode->i_data.i_shared_sem); +diff -Nru a/fs/open.c b/fs/open.c +--- a/fs/open.c Mon May 3 16:20:32 2004 ++++ b/fs/open.c Mon May 3 16:20:32 2004 +@@ -192,7 +192,9 @@ + newattrs.ia_size = length; + newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; + down(&dentry->d_inode->i_sem); ++ down_write(&dentry->d_inode->i_alloc_sem); + err = notify_change(dentry, &newattrs); ++ up_write(&dentry->d_inode->i_alloc_sem); + up(&dentry->d_inode->i_sem); + return err; + } +diff -Nru a/fs/xfs/linux/xfs_aops.c b/fs/xfs/linux/xfs_aops.c +--- a/fs/xfs/linux/xfs_aops.c Mon May 3 16:20:32 2004 ++++ b/fs/xfs/linux/xfs_aops.c Mon May 3 16:20:32 2004 +@@ -1032,7 +1032,8 @@ + if (error) + return -error; + +- return blockdev_direct_IO(rw, iocb, inode, iomap.iomap_target->pbr_bdev, ++ return blockdev_direct_IO_no_locking(rw, iocb, inode, ++ iomap.iomap_target->pbr_bdev, + iov, offset, nr_segs, + linvfs_get_blocks_direct, + linvfs_unwritten_convert_direct); +diff -Nru a/include/linux/fs.h b/include/linux/fs.h +--- a/include/linux/fs.h Mon May 3 16:20:32 2004 ++++ b/include/linux/fs.h Mon May 3 16:20:32 2004 +@@ -397,6 +397,7 @@ + unsigned short i_bytes; + spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ + struct semaphore i_sem; ++ struct rw_semaphore i_alloc_sem; + struct inode_operations *i_op; + struct file_operations *i_fop; /* former ->i_op->default_file_ops */ + struct super_block *i_sb; +@@ -1235,6 +1236,7 @@ + extern int filemap_fdatawrite(struct address_space *); + extern int filemap_flush(struct address_space *); + extern int filemap_fdatawait(struct address_space *); ++extern int filemap_write_and_wait(struct address_space *mapping); + extern void sync_supers(void); + extern void sync_filesystems(int wait); + extern void emergency_sync(void); +@@ -1347,9 +1349,6 @@ + file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); + extern ssize_t generic_file_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, loff_t offset, unsigned long nr_segs); +-extern int blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, +- struct block_device *bdev, const struct iovec *iov, loff_t offset, +- unsigned long nr_segs, get_blocks_t *get_blocks, dio_iodone_t *end_io); + extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, + unsigned long nr_segs, loff_t *ppos); + ssize_t generic_file_writev(struct file *filp, const struct iovec *iov, +@@ -1369,6 +1368,32 @@ + ppos, + desc, + actor); ++} ++ ++int __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, ++ struct block_device *bdev, const struct iovec *iov, loff_t offset, ++ unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io, ++ int needs_special_locking); ++ ++/* ++ * For filesystems which need locking between buffered and direct access ++ */ ++static inline int blockdev_direct_IO(int rw, struct kiocb *iocb, ++ struct inode *inode, struct block_device *bdev, const struct iovec *iov, ++ loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks, ++ dio_iodone_t end_io) ++{ ++ return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, ++ nr_segs, get_blocks, end_io, 1); ++} ++ ++static inline int blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb, ++ struct inode *inode, struct block_device *bdev, const struct iovec *iov, ++ loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks, ++ dio_iodone_t end_io) ++{ ++ return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, ++ nr_segs, get_blocks, end_io, 0); + } + + extern struct file_operations generic_ro_fops; +diff -Nru a/mm/filemap.c b/mm/filemap.c +--- a/mm/filemap.c Mon May 3 16:20:32 2004 ++++ b/mm/filemap.c Mon May 3 16:20:32 2004 +@@ -73,6 +73,9 @@ + * ->mmap_sem + * ->i_sem (msync) + * ++ * ->i_sem ++ * ->i_alloc_sem (various) ++ * + * ->inode_lock + * ->sb_lock (fs/fs-writeback.c) + * ->mapping->page_lock (__sync_single_inode) +@@ -228,6 +231,18 @@ + + EXPORT_SYMBOL(filemap_fdatawait); + ++int filemap_write_and_wait(struct address_space *mapping) ++{ ++ int retval = 0; ++ ++ if (mapping->nrpages) { ++ retval = filemap_fdatawrite(mapping); ++ if (retval == 0) ++ retval = filemap_fdatawait(mapping); ++ } ++ return retval; ++} ++ + /* + * This adds a page to the page cache, starting out as locked, unreferenced, + * not uptodate and with no errors. +@@ -1716,6 +1731,7 @@ + + /* + * Write to a file through the page cache. ++ * Called under i_sem for S_ISREG files. + * + * We put everything into the page cache prior to writing it. This is not a + * problem when writing full pages. With partial pages, however, we first have +@@ -1806,12 +1822,19 @@ + /* + * Sync the fs metadata but not the minor inode changes and + * of course not the data as we did direct DMA for the IO. ++ * i_sem is held, which protects generic_osync_inode() from ++ * livelocking. + */ + if (written >= 0 && file->f_flags & O_SYNC) + status = generic_osync_inode(inode, mapping, OSYNC_METADATA); + if (written >= 0 && !is_sync_kiocb(iocb)) + written = -EIOCBQUEUED; +- goto out_status; ++ if (written != -ENOTBLK) ++ goto out_status; ++ /* ++ * direct-io write to a hole: fall through to buffered I/O ++ */ ++ written = 0; + } + + buf = iov->iov_base; +@@ -1900,6 +1923,14 @@ + OSYNC_METADATA|OSYNC_DATA); + } + ++ /* ++ * If we get here for O_DIRECT writes then we must have fallen through ++ * to buffered writes (block instantiation inside i_size). So we sync ++ * the file data here, to try to honour O_DIRECT expectations. ++ */ ++ if (unlikely(file->f_flags & O_DIRECT) && written) ++ status = filemap_write_and_wait(mapping); ++ + out_status: + err = written ? written : status; + out: +@@ -1991,6 +2022,9 @@ + + EXPORT_SYMBOL(generic_file_writev); + ++/* ++ * Called under i_sem for writes to S_ISREG files ++ */ + ssize_t + generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, + loff_t offset, unsigned long nr_segs) +@@ -1999,18 +2033,13 @@ + struct address_space *mapping = file->f_mapping; + ssize_t retval; + +- if (mapping->nrpages) { +- retval = filemap_fdatawrite(mapping); +- if (retval == 0) +- retval = filemap_fdatawait(mapping); +- if (retval) +- goto out; ++ retval = filemap_write_and_wait(mapping); ++ if (retval == 0) { ++ retval = mapping->a_ops->direct_IO(rw, iocb, iov, ++ offset, nr_segs); ++ if (rw == WRITE && mapping->nrpages) ++ invalidate_inode_pages2(mapping); + } +- +- retval = mapping->a_ops->direct_IO(rw, iocb, iov, offset, nr_segs); +- if (rw == WRITE && mapping->nrpages) +- invalidate_inode_pages2(mapping); +-out: + return retval; + } + diff --git a/lustre/kernel_patches/patches/removepage-vanilla-2.6.5.patch b/lustre/kernel_patches/patches/removepage-vanilla-2.6.5.patch new file mode 100644 index 0000000000000000000000000000000000000000..9cdd51a4b0d6ddcdf65d0e42e622877d955109c2 --- /dev/null +++ b/lustre/kernel_patches/patches/removepage-vanilla-2.6.5.patch @@ -0,0 +1,31 @@ + include/linux/fs.h | 1 + + mm/filemap.c | 3 +++ + 2 files changed, 4 insertions(+) + +Index: linux-2.6.4-30.1/include/linux/fs.h +=================================================================== +--- linux-2.6.4-30.1.orig/include/linux/fs.h 2004-04-02 03:20:19.000000000 -0500 ++++ linux-2.6.4-30.1/include/linux/fs.h 2004-04-02 03:20:19.000000000 -0500 +@@ -320,6 +320,7 @@ + int (*releasepage) (struct page *, int); + int (*direct_IO)(int, struct kiocb *, const struct iovec *iov, + loff_t offset, unsigned long nr_segs); ++ void (*removepage)(struct page *); /* called when page gets removed from the inode */ + }; + + struct backing_dev_info; +Index: linux-2.6.4-30.1/mm/filemap.c +=================================================================== +--- linux-2.6.4-30.1.orig/mm/filemap.c 2004-04-02 03:19:42.000000000 -0500 ++++ linux-2.6.4-30.1/mm/filemap.c 2004-04-02 03:23:10.000000000 -0500 +@@ -102,6 +102,9 @@ + { + struct address_space *mapping = page->mapping; + ++ if (mapping->a_ops->removepage) ++ mapping->a_ops->removepage(page); ++ + radix_tree_delete(&mapping->page_tree, page->index); + list_del(&page->list); + page->mapping = NULL; + diff --git a/lustre/kernel_patches/series/vanilla-2.6.5 b/lustre/kernel_patches/series/vanilla-2.6.5 new file mode 100644 index 0000000000000000000000000000000000000000..7a85e720122f4ffc01e01f2b263c535745f0e6b1 --- /dev/null +++ b/lustre/kernel_patches/series/vanilla-2.6.5 @@ -0,0 +1,14 @@ +lustre_version.patch +o_direct-2.6.5.patch +vfs_intent-2.6-suse.patch +vfs_nointent-2.6-suse.patch +vfs_races-2.6-suse.patch +ext3-wantedi-misc-2.6-suse.patch +nfs-cifs-intent-vanilla-2.6.patch +iopen-misc-2.6-suse.patch +export-truncate-2.6-suse.patch +export_symbols-2.6-suse.patch +removepage-vanilla-2.6.5.patch +dev_read_only-2.6-suse.patch +export-2.6-suse.patch +header-guards-2.6-suse.patch