diff --git a/lustre/kernel_patches/patches/ext3-extents-2.4.20.patch b/lustre/kernel_patches/patches/ext3-extents-2.4.20.patch index 1da5f7c8954561459abaedb189a6e7a9586116c5..ed392a63aa7e03849b9eeaa3d7d09d7e67737b68 100644 --- a/lustre/kernel_patches/patches/ext3-extents-2.4.20.patch +++ b/lustre/kernel_patches/patches/ext3-extents-2.4.20.patch @@ -1,8 +1,8 @@ -Index: linux-2.4.20/fs/ext3/extents.c +Index: linux-2.4.24/fs/ext3/extents.c =================================================================== ---- linux-2.4.20.orig/fs/ext3/extents.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.20/fs/ext3/extents.c 2004-01-24 14:19:29.000000000 +0300 -@@ -0,0 +1,2224 @@ +--- linux-2.4.24.orig/fs/ext3/extents.c 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.4.24/fs/ext3/extents.c 2004-01-26 20:52:25.000000000 +0300 +@@ -0,0 +1,2260 @@ +/* + * Copyright (C) 2003 Alex Tomas <alex@clusterfs.com> + * @@ -268,6 +268,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + struct ext3_extent_idx *ix; + int l = 0, k, r; + ++ EXT_ASSERT(eh->e_magic == EXT3_EXT_MAGIC); + EXT_ASSERT(eh->e_num <= eh->e_max); + EXT_ASSERT(eh->e_num > 0); + @@ -303,6 +304,12 @@ Index: linux-2.4.20/fs/ext3/extents.c + + chix = ix = EXT_FIRST_INDEX(eh); + for (k = 0; k < eh->e_num; k++, ix++) { ++ if (k != 0 && ix->e_block <= ix[-1].e_block) { ++ printk("k=%d, ix=0x%p, first=0x%p\n", k, ++ ix, EXT_FIRST_INDEX(eh)); ++ printk("%u <= %u\n", ++ ix->e_block,ix[-1].e_block); ++ } + EXT_ASSERT(k == 0 || ix->e_block > ix[-1].e_block); + if (block < ix->e_block) + break; @@ -325,6 +332,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + struct ext3_extent *ex; + int l = 0, k, r; + ++ EXT_ASSERT(eh->e_magic == EXT3_EXT_MAGIC); + EXT_ASSERT(eh->e_num <= eh->e_max); + + if (eh->e_num == 0) { @@ -388,6 +396,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + eh = EXT_ROOT_HDR(tree); + eh->e_depth = 0; + eh->e_num = 0; ++ eh->e_magic = EXT3_EXT_MAGIC; + eh->e_max = ext3_ext_space_root(tree); + ext3_ext_mark_root_dirty(handle, tree); + return 0; @@ -408,6 +417,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + eh = EXT_ROOT_HDR(tree); + i = depth = EXT_DEPTH(tree); + EXT_ASSERT(eh->e_max); ++ EXT_ASSERT(eh->e_magic == EXT3_EXT_MAGIC); + EXT_ASSERT(i == 0 || eh->e_num > 0); + + /* account possible depth increase */ @@ -500,6 +510,9 @@ Index: linux-2.4.20/fs/ext3/extents.c + ix->e_leaf = ptr; + curp->p_hdr->e_num++; + ++ EXT_ASSERT(curp->p_hdr->e_num <= curp->p_hdr->e_max); ++ EXT_ASSERT(ix <= EXT_LAST_INDEX(curp->p_hdr)); ++ + err = ext3_ext_dirty(handle, tree, curp); + ext3_std_error(tree->inode->i_sb, err); + @@ -534,7 +547,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + + /* if current leaf will be splitted, then we should use + * border from split point */ -+ ++ EXT_ASSERT(path[depth].p_ext <= EXT_MAX_EXTENT(path[depth].p_hdr)); + if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { + border = path[depth].p_ext[1].e_block; + ext_debug(tree, "leaf will be splitted." @@ -589,6 +602,8 @@ Index: linux-2.4.20/fs/ext3/extents.c + neh = EXT_BLOCK_HDR(bh); + neh->e_num = 0; + neh->e_max = ext3_ext_space_block(tree); ++ neh->e_magic = EXT3_EXT_MAGIC; ++ neh->e_depth = 0; + ex = EXT_FIRST_EXTENT(neh); + + /* move remain of path[depth] to the new leaf */ @@ -649,33 +664,33 @@ Index: linux-2.4.20/fs/ext3/extents.c + + neh = EXT_BLOCK_HDR(bh); + neh->e_num = 1; ++ neh->e_magic = EXT3_EXT_MAGIC; + neh->e_max = ext3_ext_space_block_idx(tree); ++ neh->e_depth = depth - i; + fidx = EXT_FIRST_INDEX(neh); + fidx->e_block = border; + fidx->e_leaf = oldblock; + -+ ext_debug(tree, "int.index at %d (block %u): %d -> %d\n", -+ i, (unsigned) newblock, -+ (int) border, -+ (int) oldblock); ++ ext_debug(tree, "int.index at %d (block %lu): %lu -> %lu\n", ++ i, newblock, border, oldblock); + /* copy indexes */ + m = 0; + path[i].p_idx++; ++ + ext_debug(tree, "cur 0x%p, last 0x%p\n", path[i].p_idx, + EXT_MAX_INDEX(path[i].p_hdr)); + EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) == + EXT_LAST_INDEX(path[i].p_hdr)); -+ while (path[i].p_idx <= -+ EXT_MAX_INDEX(path[i].p_hdr)) { ++ while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { + ext_debug(tree, "%d: move %d:%d in new index\n", + i, path[i].p_idx->e_block, + path[i].p_idx->e_leaf); + memmove(++fidx, path[i].p_idx++, + sizeof(struct ext3_extent_idx)); + neh->e_num++; ++ EXT_ASSERT(neh->e_num <= neh->e_max); + m++; + } -+ + mark_buffer_uptodate(bh, 1); + unlock_buffer(bh); + @@ -734,12 +749,12 @@ Index: linux-2.4.20/fs/ext3/extents.c + struct ext3_ext_path *path, + struct ext3_extent *newext) +{ -+ struct buffer_head *bh; + struct ext3_ext_path *curp = path; + struct ext3_extent_header *neh; + struct ext3_extent_idx *fidx; -+ int len, err = 0; ++ struct buffer_head *bh; + unsigned long newblock; ++ int err = 0; + + newblock = ext3_ext_new_block(handle, tree, path, newext, &err); + if (newblock == 0) @@ -759,14 +774,17 @@ Index: linux-2.4.20/fs/ext3/extents.c + } + + /* move top-level index/leaf into new block */ -+ len = sizeof(struct ext3_extent_header) + -+ sizeof(struct ext3_extent) * curp->p_hdr->e_max; -+ EXT_ASSERT(len >= 0 && len < 4096); -+ memmove(bh->b_data, curp->p_hdr, len); ++ memmove(bh->b_data, curp->p_hdr, tree->buffer_len); + + /* set size of new block */ + neh = EXT_BLOCK_HDR(bh); -+ neh->e_max = ext3_ext_space_block(tree); ++ /* old root could have indexes or leaves ++ * so calculate e_max right way */ ++ if (EXT_DEPTH(tree)) ++ neh->e_max = ext3_ext_space_block_idx(tree); ++ else ++ neh->e_max = ext3_ext_space_block(tree); ++ neh->e_magic = EXT3_EXT_MAGIC; + mark_buffer_uptodate(bh, 1); + unlock_buffer(bh); + @@ -777,9 +795,11 @@ Index: linux-2.4.20/fs/ext3/extents.c + if ((err = ext3_ext_get_access(handle, tree, curp))) + goto out; + ++ curp->p_hdr->e_magic = EXT3_EXT_MAGIC; + curp->p_hdr->e_max = ext3_ext_space_root_idx(tree); + curp->p_hdr->e_num = 1; + curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); ++ /* FIXME: it works, but actually path[0] can be index */ + curp->p_idx->e_block = EXT_FIRST_EXTENT(path[0].p_hdr)->e_block; + curp->p_idx->e_leaf = newblock; + @@ -839,7 +859,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + path = ext3_ext_find_extent(tree, newext->e_block, path); + if (IS_ERR(path)) + err = PTR_ERR(path); -+ ++ + /* + * only first (depth 0 -> 1) produces free space + * in all other cases we have to split growed tree @@ -1003,12 +1023,11 @@ Index: linux-2.4.20/fs/ext3/extents.c + struct ext3_ext_path *path, + struct ext3_extent *newext) +{ -+ int depth, len; + struct ext3_extent_header * eh; -+ struct ext3_extent *ex; ++ struct ext3_extent *ex, *fex; + struct ext3_extent *nearex; /* nearest extent */ + struct ext3_ext_path *npath = NULL; -+ int err; ++ int depth, len, err, next; + + depth = EXT_DEPTH(tree); + ex = path[depth].p_ext; @@ -1028,36 +1047,41 @@ Index: linux-2.4.20/fs/ext3/extents.c +repeat: + depth = EXT_DEPTH(tree); + eh = path[depth].p_hdr; -+ if (eh->e_num == eh->e_max) { -+ /* probably next leaf has space for us? */ -+ int next = ext3_ext_next_leaf_block(tree, path); -+ if (next != 0xffffffff) { -+ ext_debug(tree, "next leaf block - %d\n", next); -+ EXT_ASSERT(!npath); -+ npath = ext3_ext_find_extent(tree, next, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ EXT_ASSERT(npath->p_depth == path->p_depth); -+ eh = npath[depth].p_hdr; -+ if (eh->e_num < eh->e_max) { -+ ext_debug(tree, "next leaf isnt full(%d)\n", -+ eh->e_num); -+ path = npath; -+ goto repeat; -+ } -+ ext_debug(tree, "next leaf hasno free space(%d,%d)\n", -+ eh->e_num, eh->e_max); ++ if (eh->e_num < eh->e_max) ++ goto has_space; ++ ++ /* probably next leaf has space for us? */ ++ fex = EXT_LAST_EXTENT(eh); ++ next = ext3_ext_next_leaf_block(tree, path); ++ if (newext->e_block > fex->e_block && next != 0xffffffff) { ++ ext_debug(tree, "next leaf block - %d\n", next); ++ EXT_ASSERT(!npath); ++ npath = ext3_ext_find_extent(tree, next, NULL); ++ if (IS_ERR(npath)) ++ return PTR_ERR(npath); ++ EXT_ASSERT(npath->p_depth == path->p_depth); ++ eh = npath[depth].p_hdr; ++ if (eh->e_num < eh->e_max) { ++ ext_debug(tree, "next leaf isnt full(%d)\n", ++ eh->e_num); ++ path = npath; ++ goto repeat; + } -+ /* -+ * there is no free space in found leaf -+ * we're gonna add new leaf in the tree -+ */ -+ err = ext3_ext_create_new_leaf(handle, tree, path, newext); -+ if (err) -+ goto cleanup; -+ goto repeat; ++ ext_debug(tree, "next leaf hasno free space(%d,%d)\n", ++ eh->e_num, eh->e_max); + } + ++ /* ++ * there is no free space in found leaf ++ * we're gonna add new leaf in the tree ++ */ ++ err = ext3_ext_create_new_leaf(handle, tree, path, newext); ++ if (err) ++ goto cleanup; ++ depth = EXT_DEPTH(tree); ++ eh = path[depth].p_hdr; ++ ++has_space: + nearex = path[depth].p_ext; + + if ((err = ext3_ext_get_access(handle, tree, path + depth))) @@ -1091,21 +1115,20 @@ Index: linux-2.4.20/fs/ext3/extents.c + "move %d from 0x%p to 0x%p\n", + newext->e_block, newext->e_start, newext->e_num, + nearex, len, nearex + 1, nearex + 2); -+ + memmove(nearex + 1, nearex, len); + path[depth].p_ext = nearex; + } + -+ if (!err) { -+ eh->e_num++; -+ nearex = path[depth].p_ext; -+ nearex->e_block = newext->e_block; -+ nearex->e_start = newext->e_start; -+ nearex->e_num = newext->e_num; ++ eh->e_num++; ++ nearex = path[depth].p_ext; ++ nearex->e_block = newext->e_block; ++ nearex->e_start = newext->e_start; ++ nearex->e_num = newext->e_num; + -+ /* time to correct all indexes above */ -+ err = ext3_ext_correct_indexes(handle, tree, path); -+ } ++ /* time to correct all indexes above */ ++ err = ext3_ext_correct_indexes(handle, tree, path); ++ if (err) ++ goto cleanup; + + err = ext3_ext_dirty(handle, tree, path + depth); + @@ -1241,6 +1264,9 @@ Index: linux-2.4.20/fs/ext3/extents.c + int depth = EXT_DEPTH(tree); + struct ext3_extent *ex, gex; + ++ if (!tree->cex) ++ return; ++ + ex = path[depth].p_ext; + if (ex == NULL) { + /* there is no extent yet, so gap is [0;-] */ @@ -1291,7 +1317,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + ex->e_block = cex->e_block; + ex->e_start = cex->e_start; + ex->e_num = cex->e_num; -+ ext_debug(tree, "%lu cached by %lu:%lu:%lu(gap)\n", ++ ext_debug(tree, "%lu cached by %lu:%lu:%lu\n", + (unsigned long) block, + (unsigned long) ex->e_block, + (unsigned long) ex->e_num, @@ -1437,6 +1463,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + eh = path[depth].p_hdr; + EXT_ASSERT(eh); + EXT_ASSERT(eh->e_num <= eh->e_max); ++ EXT_ASSERT(eh->e_magic == EXT3_EXT_MAGIC); + + /* find where to start removing */ + le = ex = EXT_LAST_EXTENT(eh); @@ -1638,6 +1665,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + } + + EXT_ASSERT(path[i].p_hdr->e_num <= path[i].p_hdr->e_max); ++ EXT_ASSERT(path[i].p_hdr->e_magic == EXT3_EXT_MAGIC); + + if (!path[i].p_idx) { + /* this level hasn't touched yet */ @@ -1712,8 +1740,16 @@ Index: linux-2.4.20/fs/ext3/extents.c + * possible initialization would be here + */ + -+ if (test_opt(sb, EXTENTS)) -+ printk("EXT3-fs: file extents enabled\n"); ++ if (test_opt(sb, EXTENTS)) { ++ printk("EXT3-fs: file extents enabled"); ++#ifdef AGRESSIVE_TEST ++ printk(", agressive tests"); ++#endif ++#ifdef CHECK_BINSEARCH ++ printk(", check binsearch"); ++#endif ++ printk("\n"); ++ } +} + +/* @@ -1864,7 +1900,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + tree->remove_extent_credits = ext3_remove_blocks_credits; + tree->buffer = (void *) inode; + tree->buffer_len = sizeof(EXT3_I(inode)->i_data); -+ tree->cex = NULL; /* FIXME: add cache store later */ ++ tree->cex = (struct ext3_extent *) &EXT3_I(inode)->i_cached_extent; +} + +#if 0 @@ -2227,11 +2263,11 @@ Index: linux-2.4.20/fs/ext3/extents.c + return err; +} + -Index: linux-2.4.20/fs/ext3/ialloc.c +Index: linux-2.4.24/fs/ext3/ialloc.c =================================================================== ---- linux-2.4.20.orig/fs/ext3/ialloc.c 2004-01-23 19:00:25.000000000 +0300 -+++ linux-2.4.20/fs/ext3/ialloc.c 2004-01-24 00:45:20.000000000 +0300 -@@ -593,11 +593,13 @@ +--- linux-2.4.24.orig/fs/ext3/ialloc.c 2004-01-14 02:58:45.000000000 +0300 ++++ linux-2.4.24/fs/ext3/ialloc.c 2004-01-24 16:58:08.000000000 +0300 +@@ -592,11 +592,13 @@ iloc.bh = NULL; goto fail; } @@ -2247,10 +2283,10 @@ Index: linux-2.4.20/fs/ext3/ialloc.c unlock_super (sb); if(DQUOT_ALLOC_INODE(inode)) { DQUOT_DROP(inode); -Index: linux-2.4.20/fs/ext3/inode.c +Index: linux-2.4.24/fs/ext3/inode.c =================================================================== ---- linux-2.4.20.orig/fs/ext3/inode.c 2004-01-23 19:00:25.000000000 +0300 -+++ linux-2.4.20/fs/ext3/inode.c 2004-01-24 04:34:04.000000000 +0300 +--- linux-2.4.24.orig/fs/ext3/inode.c 2004-01-14 02:58:45.000000000 +0300 ++++ linux-2.4.24/fs/ext3/inode.c 2004-01-24 16:58:08.000000000 +0300 @@ -848,6 +848,15 @@ goto reread; } @@ -2304,7 +2340,7 @@ Index: linux-2.4.20/fs/ext3/inode.c handle = start_transaction(inode); if (IS_ERR(handle)) return; /* AKPM: return what? */ -@@ -2537,6 +2549,9 @@ +@@ -2536,6 +2548,9 @@ int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; int ret; @@ -2314,7 +2350,7 @@ Index: linux-2.4.20/fs/ext3/inode.c if (ext3_should_journal_data(inode)) ret = 3 * (bpp + indirects) + 2; else -@@ -2973,7 +2988,7 @@ +@@ -2972,7 +2987,7 @@ /* alloc blocks one by one */ for (i = 0; i < nblocks; i++) { @@ -2323,7 +2359,7 @@ Index: linux-2.4.20/fs/ext3/inode.c &bh_tmp, 1); if (ret) break; -@@ -3049,7 +3064,7 @@ +@@ -3048,7 +3063,7 @@ if (blocks[i] != 0) continue; @@ -2332,10 +2368,10 @@ Index: linux-2.4.20/fs/ext3/inode.c if (rc) { printk(KERN_INFO "ext3_map_inode_page: error %d " "allocating block %ld\n", rc, iblock); -Index: linux-2.4.20/fs/ext3/Makefile +Index: linux-2.4.24/fs/ext3/Makefile =================================================================== ---- linux-2.4.20.orig/fs/ext3/Makefile 2004-01-23 19:00:42.000000000 +0300 -+++ linux-2.4.20/fs/ext3/Makefile 2004-01-24 00:45:20.000000000 +0300 +--- linux-2.4.24.orig/fs/ext3/Makefile 2004-01-14 02:58:45.000000000 +0300 ++++ linux-2.4.24/fs/ext3/Makefile 2004-01-24 16:58:08.000000000 +0300 @@ -13,7 +13,7 @@ obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ @@ -2345,11 +2381,11 @@ Index: linux-2.4.20/fs/ext3/Makefile obj-m := $(O_TARGET) export-objs += xattr.o -Index: linux-2.4.20/fs/ext3/super.c +Index: linux-2.4.24/fs/ext3/super.c =================================================================== ---- linux-2.4.20.orig/fs/ext3/super.c 2004-01-23 19:00:25.000000000 +0300 -+++ linux-2.4.20/fs/ext3/super.c 2004-01-24 04:30:14.000000000 +0300 -@@ -623,6 +623,7 @@ +--- linux-2.4.24.orig/fs/ext3/super.c 2004-01-14 02:58:45.000000000 +0300 ++++ linux-2.4.24/fs/ext3/super.c 2004-01-24 16:58:08.000000000 +0300 +@@ -530,6 +530,7 @@ int i; J_ASSERT(sbi->s_delete_inodes == 0); @@ -2357,7 +2393,7 @@ Index: linux-2.4.20/fs/ext3/super.c ext3_xattr_put_super(sb); journal_destroy(sbi->s_journal); if (!(sb->s_flags & MS_RDONLY)) { -@@ -796,6 +797,10 @@ +@@ -702,6 +703,10 @@ return 0; } } @@ -2368,7 +2404,7 @@ Index: linux-2.4.20/fs/ext3/super.c else if (!strcmp (this_char, "grpid") || !strcmp (this_char, "bsdgroups")) set_opt (*mount_options, GRPID); -@@ -1485,6 +1490,8 @@ +@@ -1392,6 +1397,8 @@ test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": "writeback"); @@ -2377,11 +2413,11 @@ Index: linux-2.4.20/fs/ext3/super.c return sb; failed_mount3: -Index: linux-2.4.20/fs/ext3/ioctl.c +Index: linux-2.4.24/fs/ext3/ioctl.c =================================================================== ---- linux-2.4.20.orig/fs/ext3/ioctl.c 2004-01-13 17:00:09.000000000 +0300 -+++ linux-2.4.20/fs/ext3/ioctl.c 2004-01-24 14:54:31.000000000 +0300 -@@ -189,6 +189,10 @@ +--- linux-2.4.24.orig/fs/ext3/ioctl.c 2004-01-14 02:58:42.000000000 +0300 ++++ linux-2.4.24/fs/ext3/ioctl.c 2004-01-24 16:58:08.000000000 +0300 +@@ -174,6 +174,10 @@ return ret; } #endif @@ -2392,10 +2428,10 @@ Index: linux-2.4.20/fs/ext3/ioctl.c default: return -ENOTTY; } -Index: linux-2.4.20/include/linux/ext3_fs.h +Index: linux-2.4.24/include/linux/ext3_fs.h =================================================================== ---- linux-2.4.20.orig/include/linux/ext3_fs.h 2004-01-23 19:00:25.000000000 +0300 -+++ linux-2.4.20/include/linux/ext3_fs.h 2004-01-24 01:28:06.000000000 +0300 +--- linux-2.4.24.orig/include/linux/ext3_fs.h 2004-01-14 02:58:45.000000000 +0300 ++++ linux-2.4.24/include/linux/ext3_fs.h 2004-01-24 16:58:08.000000000 +0300 @@ -184,6 +184,7 @@ #define EXT3_IMAGIC_FL 0x00002000 /* AFS directory */ #define EXT3_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ @@ -2423,7 +2459,7 @@ Index: linux-2.4.20/include/linux/ext3_fs.h /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H -@@ -687,6 +693,7 @@ +@@ -688,6 +694,7 @@ extern unsigned long ext3_count_free (struct buffer_head *, unsigned); /* inode.c */ @@ -2431,7 +2467,7 @@ Index: linux-2.4.20/include/linux/ext3_fs.h extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); -@@ -767,6 +774,14 @@ +@@ -769,6 +776,14 @@ extern struct inode_operations ext3_symlink_inode_operations; extern struct inode_operations ext3_fast_symlink_inode_operations; @@ -2446,11 +2482,11 @@ Index: linux-2.4.20/include/linux/ext3_fs.h #endif /* __KERNEL__ */ -Index: linux-2.4.20/include/linux/ext3_extents.h +Index: linux-2.4.24/include/linux/ext3_extents.h =================================================================== ---- linux-2.4.20.orig/include/linux/ext3_extents.h 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.20/include/linux/ext3_extents.h 2004-01-24 15:15:11.000000000 +0300 -@@ -0,0 +1,207 @@ +--- linux-2.4.24.orig/include/linux/ext3_extents.h 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.4.24/include/linux/ext3_extents.h 2004-01-24 19:28:54.000000000 +0300 +@@ -0,0 +1,212 @@ +/* + * Copyright (C) 2003 Alex Tomas <alex@clusterfs.com> + * @@ -2468,6 +2504,8 @@ Index: linux-2.4.20/include/linux/ext3_extents.h + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ + ++#ifndef _LINUX_EXT3_EXTENTS ++#define _LINUX_EXT3_EXTENTS + +/* + * with AGRESSIVE_TEST defined capacity of index/leaf blocks @@ -2544,6 +2582,8 @@ Index: linux-2.4.20/include/linux/ext3_extents.h + __u16 e_depth; /* has tree real underlaying blocks? */ +}; + ++#define EXT3_EXT_MAGIC 0xf301 ++ +/* + * array of ext3_ext_path contains path to some extent + * creation/lookup routines use it for traversal/splitting/etc @@ -2657,4 +2697,18 @@ Index: linux-2.4.20/include/linux/ext3_extents.h +extern int ext3_ext_remove_space(struct ext3_extents_tree *, unsigned long, unsigned long); +extern struct ext3_ext_path * ext3_ext_find_extent(struct ext3_extents_tree *, int, struct ext3_ext_path *); + ++#endif /* _LINUX_EXT3_EXTENTS */ + +Index: linux-2.4.24/include/linux/ext3_fs_i.h +=================================================================== +--- linux-2.4.24.orig/include/linux/ext3_fs_i.h 2004-01-24 19:30:22.000000000 +0300 ++++ linux-2.4.24/include/linux/ext3_fs_i.h 2004-01-24 19:54:39.000000000 +0300 +@@ -76,6 +76,8 @@ + * by other means, so we have truncate_sem. + */ + struct rw_semaphore truncate_sem; ++ ++ __u32 i_cached_extent[3]; + }; + + #endif /* _LINUX_EXT3_FS_I */ diff --git a/lustre/kernel_patches/patches/ext3-extents-2.4.21-suse2.patch b/lustre/kernel_patches/patches/ext3-extents-2.4.21-suse2.patch new file mode 100644 index 0000000000000000000000000000000000000000..ee68dc0128aa15ed595447adebf9d4d9f53f3796 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-extents-2.4.21-suse2.patch @@ -0,0 +1,2723 @@ +Index: linux-2.4.21-suse2/fs/ext3/extents.c +=================================================================== +--- linux-2.4.21-suse2.orig/fs/ext3/extents.c 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.4.21-suse2/fs/ext3/extents.c 2004-01-26 22:51:30.000000000 +0300 +@@ -0,0 +1,2261 @@ ++/* ++ * Copyright (C) 2003 Alex Tomas <alex@clusterfs.com> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public Licens ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- ++ */ ++ ++/* ++ * Extents support for EXT3 ++ * ++ * TODO: ++ * - ext3_ext_walk_space() sould not use ext3_ext_find_extent() ++ * - ext3_ext_calc_credits() could take 'mergable' into account ++ * - ext3*_error() should be used in some situations ++ * - find_goal() [to be tested and improved] ++ * - smart tree reduction ++ * - arch-independence ++ * common on-disk format for big/little-endian arch ++ */ ++ ++#include <linux/module.h> ++#include <linux/fs.h> ++#include <linux/time.h> ++#include <linux/ext3_jbd.h> ++#include <linux/jbd.h> ++#include <linux/smp_lock.h> ++#include <linux/highuid.h> ++#include <linux/pagemap.h> ++#include <linux/quotaops.h> ++#include <linux/string.h> ++#include <linux/slab.h> ++#include <linux/locks.h> ++#include <linux/ext3_extents.h> ++#include <asm/uaccess.h> ++ ++static handle_t *ext3_ext_journal_restart(handle_t *handle, int needed) ++{ ++ int err; ++ ++ if (handle->h_buffer_credits > needed) ++ return handle; ++ if (!ext3_journal_extend(handle, needed)) ++ return handle; ++ err = ext3_journal_restart(handle, needed); ++ ++ return handle; ++} ++ ++static int inline ++ext3_ext_get_access_for_root(handle_t *h, struct ext3_extents_tree *tree) ++{ ++ if (tree->get_write_access) ++ return tree->get_write_access(h,tree->buffer); ++ else ++ return 0; ++} ++ ++static int inline ++ext3_ext_mark_root_dirty(handle_t *h, struct ext3_extents_tree *tree) ++{ ++ if (tree->mark_buffer_dirty) ++ return tree->mark_buffer_dirty(h,tree->buffer); ++ else ++ return 0; ++} ++ ++/* ++ * could return: ++ * - EROFS ++ * - ENOMEM ++ */ ++static int ext3_ext_get_access(handle_t *handle, ++ struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++ if (path->p_bh) { ++ /* path points to block */ ++ return ext3_journal_get_write_access(handle, path->p_bh); ++ } ++ ++ /* path points to leaf/index in inode body */ ++ return ext3_ext_get_access_for_root(handle, tree); ++} ++ ++/* ++ * could return: ++ * - EROFS ++ * - ENOMEM ++ * - EIO ++ */ ++static int ext3_ext_dirty(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++ if (path->p_bh) { ++ /* path points to block */ ++ return ext3_journal_dirty_metadata(handle, path->p_bh); ++ } ++ ++ /* path points to leaf/index in inode body */ ++ return ext3_ext_mark_root_dirty(handle, tree); ++} ++ ++static int inline ++ext3_ext_new_block(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, struct ext3_extent *ex, ++ int *err) ++{ ++ int goal, depth, newblock; ++ struct inode *inode; ++ ++ EXT_ASSERT(tree); ++ if (tree->new_block) ++ return tree->new_block(handle, tree, path, ex, err); ++ ++ inode = tree->inode; ++ depth = EXT_DEPTH(tree); ++ if (path && depth > 0) { ++ goal = path[depth-1].p_block; ++ } else { ++ struct ext3_inode_info *ei = EXT3_I(inode); ++ unsigned long bg_start; ++ unsigned long colour; ++ ++ bg_start = (ei->i_block_group * ++ EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + ++ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); ++ colour = (current->pid % 16) * ++ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); ++ goal = bg_start + colour; ++ } ++ ++ newblock = ext3_new_block(handle, inode, goal, 0, 0, err); ++ return newblock; ++} ++ ++static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) ++{ ++ int size; ++ ++ size = (tree->inode->i_sb->s_blocksize - ++ sizeof(struct ext3_extent_header)) ++ / sizeof(struct ext3_extent); ++#ifdef AGRESSIVE_TEST ++ size = 6; ++#endif ++ return size; ++} ++ ++static inline int ext3_ext_space_block_idx(struct ext3_extents_tree *tree) ++{ ++ int size; ++ ++ size = (tree->inode->i_sb->s_blocksize - ++ sizeof(struct ext3_extent_header)) ++ / sizeof(struct ext3_extent_idx); ++#ifdef AGRESSIVE_TEST ++ size = 5; ++#endif ++ return size; ++} ++ ++static inline int ext3_ext_space_root(struct ext3_extents_tree *tree) ++{ ++ int size; ++ ++ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) ++ / sizeof(struct ext3_extent); ++#ifdef AGRESSIVE_TEST ++ size = 3; ++#endif ++ return size; ++} ++ ++static inline int ext3_ext_space_root_idx(struct ext3_extents_tree *tree) ++{ ++ int size; ++ ++ size = (tree->buffer_len - ++ sizeof(struct ext3_extent_header)) ++ / sizeof(struct ext3_extent_idx); ++#ifdef AGRESSIVE_TEST ++ size = 4; ++#endif ++ return size; ++} ++ ++static void ext3_ext_show_path(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++#ifdef EXT_DEBUG ++ int k, l = path->p_depth; ++ ++ ext_debug(tree, "path:"); ++ for (k = 0; k <= l; k++, path++) { ++ if (path->p_idx) { ++ ext_debug(tree, " %d->%d", path->p_idx->e_block, ++ path->p_idx->e_leaf); ++ } else if (path->p_ext) { ++ ext_debug(tree, " %d:%d:%d", ++ path->p_ext->e_block, ++ path->p_ext->e_num, ++ path->p_ext->e_start); ++ } else ++ ext_debug(tree, " []"); ++ } ++ ext_debug(tree, "\n"); ++#endif ++} ++ ++static void ext3_ext_show_leaf(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++#ifdef EXT_DEBUG ++ int depth = EXT_DEPTH(tree); ++ struct ext3_extent_header *eh; ++ struct ext3_extent *ex; ++ int i; ++ ++ if (!path) ++ return; ++ ++ eh = path[depth].p_hdr; ++ ex = EXT_FIRST_EXTENT(eh); ++ ++ for (i = 0; i < eh->e_num; i++, ex++) { ++ ext_debug(tree, "%d:%d:%d ", ++ ex->e_block, ex->e_num, ex->e_start); ++ } ++ ext_debug(tree, "\n"); ++#endif ++} ++ ++static void ext3_ext_drop_refs(struct ext3_ext_path *path) ++{ ++ int depth = path->p_depth; ++ int i; ++ ++ for (i = 0; i <= depth; i++, path++) ++ if (path->p_bh) { ++ brelse(path->p_bh); ++ path->p_bh = NULL; ++ } ++} ++ ++/* ++ * binary search for closest index by given block ++ */ ++static inline void ++ext3_ext_binsearch_idx(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, int block) ++{ ++ struct ext3_extent_header *eh = path->p_hdr; ++ struct ext3_extent_idx *ix; ++ int l = 0, k, r; ++ ++ EXT_ASSERT(eh->e_magic == EXT3_EXT_MAGIC); ++ EXT_ASSERT(eh->e_num <= eh->e_max); ++ EXT_ASSERT(eh->e_num > 0); ++ ++ ext_debug(tree, "binsearch for %d(idx): ", block); ++ ++ path->p_idx = ix = EXT_FIRST_INDEX(eh); ++ ++ r = k = eh->e_num; ++ while (k > 1) { ++ k = (r - l) / 2; ++ if (block < ix[l + k].e_block) ++ r -= k; ++ else ++ l += k; ++ ext_debug(tree, "%d:%d:%d ", k, l, r); ++ } ++ ++ ix += l; ++ path->p_idx = ix; ++ ext_debug(tree, " -> %d->%d ", path->p_idx->e_block, path->p_idx->e_leaf); ++ ++ while (l++ < r) { ++ if (block < ix->e_block) ++ break; ++ path->p_idx = ix++; ++ } ++ ext_debug(tree, " -> %d->%d\n", path->p_idx->e_block, ++ path->p_idx->e_leaf); ++ ++#ifdef CHECK_BINSEARCH ++ { ++ struct ext3_extent_idx *chix; ++ ++ chix = ix = EXT_FIRST_INDEX(eh); ++ for (k = 0; k < eh->e_num; k++, ix++) { ++ if (k != 0 && ix->e_block <= ix[-1].e_block) { ++ printk("k=%d, ix=0x%p, first=0x%p\n", k, ++ ix, EXT_FIRST_INDEX(eh)); ++ printk("%u <= %u\n", ++ ix->e_block,ix[-1].e_block); ++ } ++ EXT_ASSERT(k == 0 || ix->e_block > ix[-1].e_block); ++ if (block < ix->e_block) ++ break; ++ chix = ix; ++ } ++ EXT_ASSERT(chix == path->p_idx); ++ } ++#endif ++ ++} ++ ++/* ++ * binary search for closest extent by given block ++ */ ++static inline void ++ext3_ext_binsearch(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, int block) ++{ ++ struct ext3_extent_header *eh = path->p_hdr; ++ struct ext3_extent *ex; ++ int l = 0, k, r; ++ ++ EXT_ASSERT(eh->e_magic == EXT3_EXT_MAGIC); ++ EXT_ASSERT(eh->e_num <= eh->e_max); ++ ++ if (eh->e_num == 0) { ++ /* ++ * this leaf is empty yet: ++ * we get such a leaf in split/add case ++ */ ++ return; ++ } ++ ++ ext_debug(tree, "binsearch for %d: ", block); ++ ++ path->p_ext = ex = EXT_FIRST_EXTENT(eh); ++ ++ r = k = eh->e_num; ++ while (k > 1) { ++ k = (r - l) / 2; ++ if (block < ex[l + k].e_block) ++ r -= k; ++ else ++ l += k; ++ ext_debug(tree, "%d:%d:%d ", k, l, r); ++ } ++ ++ ex += l; ++ path->p_ext = ex; ++ ext_debug(tree, " -> %d:%d:%d ", path->p_ext->e_block, ++ path->p_ext->e_start, path->p_ext->e_num); ++ ++ while (l++ < r) { ++ if (block < ex->e_block) ++ break; ++ path->p_ext = ex++; ++ } ++ ext_debug(tree, " -> %d:%d:%d\n", path->p_ext->e_block, ++ path->p_ext->e_start, path->p_ext->e_num); ++ ++#ifdef CHECK_BINSEARCH ++ { ++ struct ext3_extent *chex; ++ ++ chex = ex = EXT_FIRST_EXTENT(eh); ++ for (k = 0; k < eh->e_num; k++, ex++) { ++ EXT_ASSERT(k == 0 || ex->e_block > ex[-1].e_block); ++ if (block < ex->e_block) ++ break; ++ chex = ex; ++ } ++ EXT_ASSERT(chex == path->p_ext); ++ } ++#endif ++ ++} ++ ++int ext3_extent_tree_init(handle_t *handle, struct ext3_extents_tree *tree) ++{ ++ struct ext3_extent_header *eh; ++ ++ BUG_ON(tree->buffer_len == 0); ++ ext3_ext_get_access_for_root(handle, tree); ++ eh = EXT_ROOT_HDR(tree); ++ eh->e_depth = 0; ++ eh->e_num = 0; ++ eh->e_magic = EXT3_EXT_MAGIC; ++ eh->e_max = ext3_ext_space_root(tree); ++ ext3_ext_mark_root_dirty(handle, tree); ++ return 0; ++} ++ ++struct ext3_ext_path * ++ext3_ext_find_extent(struct ext3_extents_tree *tree, int block, ++ struct ext3_ext_path *path) ++{ ++ struct ext3_extent_header *eh; ++ struct buffer_head *bh; ++ int depth, i, ppos = 0; ++ ++ EXT_ASSERT(tree); ++ EXT_ASSERT(tree->inode); ++ EXT_ASSERT(tree->root); ++ ++ eh = EXT_ROOT_HDR(tree); ++ i = depth = EXT_DEPTH(tree); ++ EXT_ASSERT(eh->e_max); ++ EXT_ASSERT(eh->e_magic == EXT3_EXT_MAGIC); ++ EXT_ASSERT(i == 0 || eh->e_num > 0); ++ ++ /* account possible depth increase */ ++ if (!path) { ++ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2), ++ GFP_NOFS); ++ if (!path) ++ return ERR_PTR(-ENOMEM); ++ } ++ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); ++ path[0].p_hdr = eh; ++ ++ /* walk through the tree */ ++ while (i) { ++ ext_debug(tree, "depth %d: num %d, max %d\n", ++ ppos, eh->e_num, eh->e_max); ++ ext3_ext_binsearch_idx(tree, path + ppos, block); ++ path[ppos].p_block = path[ppos].p_idx->e_leaf; ++ path[ppos].p_depth = i; ++ path[ppos].p_ext = NULL; ++ ++ bh = sb_bread(tree->inode->i_sb, path[ppos].p_block); ++ if (!bh) { ++ ext3_ext_drop_refs(path); ++ kfree(path); ++ return ERR_PTR(-EIO); ++ } ++ eh = EXT_BLOCK_HDR(bh); ++ ppos++; ++ EXT_ASSERT(ppos <= depth); ++ path[ppos].p_bh = bh; ++ path[ppos].p_hdr = eh; ++ i--; ++ } ++ ++ path[ppos].p_depth = i; ++ path[ppos].p_hdr = eh; ++ path[ppos].p_ext = NULL; ++ ++ /* find extent */ ++ ext3_ext_binsearch(tree, path + ppos, block); ++ ++ ext3_ext_show_path(tree, path); ++ ++ return path; ++} ++ ++/* ++ * insert new index [logical;ptr] into the block at cupr ++ * it check where to insert: before curp or after curp ++ */ ++static int ext3_ext_insert_index(handle_t *handle, ++ struct ext3_extents_tree *tree, ++ struct ext3_ext_path *curp, ++ int logical, int ptr) ++{ ++ struct ext3_extent_idx *ix; ++ int len, err; ++ ++ if ((err = ext3_ext_get_access(handle, tree, curp))) ++ return err; ++ ++ EXT_ASSERT(logical != curp->p_idx->e_block); ++ len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; ++ if (logical > curp->p_idx->e_block) { ++ /* insert after */ ++ if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { ++ len = (len - 1) * sizeof(struct ext3_extent_idx); ++ len = len < 0 ? 0 : len; ++ ext_debug(tree, "insert new index %d after: %d. " ++ "move %d from 0x%p to 0x%p\n", ++ logical, ptr, len, ++ (curp->p_idx + 1), (curp->p_idx + 2)); ++ memmove(curp->p_idx + 2, curp->p_idx + 1, len); ++ } ++ ix = curp->p_idx + 1; ++ } else { ++ /* insert before */ ++ len = len * sizeof(struct ext3_extent_idx); ++ len = len < 0 ? 0 : len; ++ ext_debug(tree, "insert new index %d before: %d. " ++ "move %d from 0x%p to 0x%p\n", ++ logical, ptr, len, ++ curp->p_idx, (curp->p_idx + 1)); ++ memmove(curp->p_idx + 1, curp->p_idx, len); ++ ix = curp->p_idx; ++ } ++ ++ ix->e_block = logical; ++ ix->e_leaf = ptr; ++ curp->p_hdr->e_num++; ++ ++ EXT_ASSERT(curp->p_hdr->e_num <= curp->p_hdr->e_max); ++ EXT_ASSERT(ix <= EXT_LAST_INDEX(curp->p_hdr)); ++ ++ err = ext3_ext_dirty(handle, tree, curp); ++ ext3_std_error(tree->inode->i_sb, err); ++ ++ return err; ++} ++ ++/* ++ * routine inserts new subtree into the path, using free index entry ++ * at depth 'at: ++ * - allocates all needed blocks (new leaf and all intermediate index blocks) ++ * - makes decision where to split ++ * - moves remaining extens and index entries (right to the split point) ++ * into the newly allocated blocks ++ * - initialize subtree ++ */ ++static int ext3_ext_split(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_extent *newext, int at) ++{ ++ struct buffer_head *bh = NULL; ++ int depth = EXT_DEPTH(tree); ++ struct ext3_extent_header *neh; ++ struct ext3_extent_idx *fidx; ++ struct ext3_extent *ex; ++ int i = at, k, m, a; ++ unsigned long newblock, oldblock, border; ++ int *ablocks = NULL; /* array of allocated blocks */ ++ int err = 0; ++ ++ /* make decision: where to split? */ ++ /* FIXME: now desicion is simplest: at current extent */ ++ ++ /* if current leaf will be splitted, then we should use ++ * border from split point */ ++ EXT_ASSERT(path[depth].p_ext <= EXT_MAX_EXTENT(path[depth].p_hdr)); ++ if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { ++ border = path[depth].p_ext[1].e_block; ++ ext_debug(tree, "leaf will be splitted." ++ " next leaf starts at %d\n", ++ (int)border); ++ } else { ++ border = newext->e_block; ++ ext_debug(tree, "leaf will be added." ++ " next leaf starts at %d\n", ++ (int)border); ++ } ++ ++ /* ++ * if error occurs, then we break processing ++ * and turn filesystem read-only. so, index won't ++ * be inserted and tree will be in consistent ++ * state. next mount will repair buffers too ++ */ ++ ++ /* ++ * get array to track all allocated blocks ++ * we need this to handle errors and free blocks ++ * upon them ++ */ ++ ablocks = kmalloc(sizeof(unsigned long) * depth, GFP_NOFS); ++ if (!ablocks) ++ return -ENOMEM; ++ memset(ablocks, 0, sizeof(unsigned long) * depth); ++ ++ /* allocate all needed blocks */ ++ ext_debug(tree, "allocate %d blocks for indexes/leaf\n", depth - at); ++ for (a = 0; a < depth - at; a++) { ++ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); ++ if (newblock == 0) ++ goto cleanup; ++ ablocks[a] = newblock; ++ } ++ ++ /* initialize new leaf */ ++ newblock = ablocks[--a]; ++ EXT_ASSERT(newblock); ++ bh = sb_getblk(tree->inode->i_sb, newblock); ++ if (!bh) { ++ err = -EIO; ++ goto cleanup; ++ } ++ lock_buffer(bh); ++ ++ if ((err = ext3_journal_get_create_access(handle, bh))) ++ goto cleanup; ++ ++ neh = EXT_BLOCK_HDR(bh); ++ neh->e_num = 0; ++ neh->e_max = ext3_ext_space_block(tree); ++ neh->e_magic = EXT3_EXT_MAGIC; ++ neh->e_depth = 0; ++ ex = EXT_FIRST_EXTENT(neh); ++ ++ /* move remain of path[depth] to the new leaf */ ++ EXT_ASSERT(path[depth].p_hdr->e_num == path[depth].p_hdr->e_max); ++ /* start copy from next extent */ ++ /* TODO: we could do it by single memmove */ ++ m = 0; ++ path[depth].p_ext++; ++ while (path[depth].p_ext <= ++ EXT_MAX_EXTENT(path[depth].p_hdr)) { ++ ext_debug(tree, "move %d:%d:%d in new leaf\n", ++ path[depth].p_ext->e_block, ++ path[depth].p_ext->e_start, ++ path[depth].p_ext->e_num); ++ memmove(ex++, path[depth].p_ext++, ++ sizeof(struct ext3_extent)); ++ neh->e_num++; ++ m++; ++ } ++ mark_buffer_uptodate(bh, 1); ++ unlock_buffer(bh); ++ ++ if ((err = ext3_journal_dirty_metadata(handle, bh))) ++ goto cleanup; ++ brelse(bh); ++ bh = NULL; ++ ++ /* correct old leaf */ ++ if (m) { ++ if ((err = ext3_ext_get_access(handle, tree, path))) ++ goto cleanup; ++ path[depth].p_hdr->e_num -= m; ++ if ((err = ext3_ext_dirty(handle, tree, path))) ++ goto cleanup; ++ ++ } ++ ++ /* create intermediate indexes */ ++ k = depth - at - 1; ++ EXT_ASSERT(k >= 0); ++ if (k) ++ ext_debug(tree, "create %d intermediate indices\n", k); ++ /* insert new index into current index block */ ++ /* current depth stored in i var */ ++ i = depth - 1; ++ while (k--) { ++ oldblock = newblock; ++ newblock = ablocks[--a]; ++ bh = sb_getblk(tree->inode->i_sb, newblock); ++ if (!bh) { ++ err = -EIO; ++ goto cleanup; ++ } ++ lock_buffer(bh); ++ ++ if ((err = ext3_journal_get_create_access(handle, bh))) ++ goto cleanup; ++ ++ neh = EXT_BLOCK_HDR(bh); ++ neh->e_num = 1; ++ neh->e_magic = EXT3_EXT_MAGIC; ++ neh->e_max = ext3_ext_space_block_idx(tree); ++ neh->e_depth = depth - i; ++ fidx = EXT_FIRST_INDEX(neh); ++ fidx->e_block = border; ++ fidx->e_leaf = oldblock; ++ ++ ext_debug(tree, "int.index at %d (block %lu): %lu -> %lu\n", ++ i, newblock, border, oldblock); ++ /* copy indexes */ ++ m = 0; ++ path[i].p_idx++; ++ ++ ext_debug(tree, "cur 0x%p, last 0x%p\n", path[i].p_idx, ++ EXT_MAX_INDEX(path[i].p_hdr)); ++ EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) == ++ EXT_LAST_INDEX(path[i].p_hdr)); ++ while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { ++ ext_debug(tree, "%d: move %d:%d in new index\n", ++ i, path[i].p_idx->e_block, ++ path[i].p_idx->e_leaf); ++ memmove(++fidx, path[i].p_idx++, ++ sizeof(struct ext3_extent_idx)); ++ neh->e_num++; ++ EXT_ASSERT(neh->e_num <= neh->e_max); ++ m++; ++ } ++ mark_buffer_uptodate(bh, 1); ++ unlock_buffer(bh); ++ ++ if ((err = ext3_journal_dirty_metadata(handle, bh))) ++ goto cleanup; ++ brelse(bh); ++ bh = NULL; ++ ++ /* correct old index */ ++ if (m) { ++ err = ext3_ext_get_access(handle, tree, path + i); ++ if (err) ++ goto cleanup; ++ path[i].p_hdr->e_num -= m; ++ err = ext3_ext_dirty(handle, tree, path + i); ++ if (err) ++ goto cleanup; ++ } ++ ++ i--; ++ } ++ ++ /* insert new index */ ++ if (!err) ++ err = ext3_ext_insert_index(handle, tree, path + at, ++ border, newblock); ++ ++cleanup: ++ if (bh) { ++ if (buffer_locked(bh)) ++ unlock_buffer(bh); ++ brelse(bh); ++ } ++ ++ if (err) { ++ /* free all allocated blocks in error case */ ++ for (i = 0; i < depth; i++) ++ if (!ablocks[i]) ++ continue; ++ ext3_free_blocks(handle, tree->inode, ablocks[i], 1); ++ } ++ kfree(ablocks); ++ ++ return err; ++} ++ ++/* ++ * routine implements tree growing procedure: ++ * - allocates new block ++ * - moves top-level data (index block or leaf) into the new block ++ * - initialize new top-level, creating index that points to the ++ * just created block ++ */ ++static int ext3_ext_grow_indepth(handle_t *handle, ++ struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_extent *newext) ++{ ++ struct ext3_ext_path *curp = path; ++ struct ext3_extent_header *neh; ++ struct ext3_extent_idx *fidx; ++ struct buffer_head *bh; ++ unsigned long newblock; ++ int err = 0; ++ ++ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); ++ if (newblock == 0) ++ return err; ++ ++ bh = sb_getblk(tree->inode->i_sb, newblock); ++ if (!bh) { ++ err = -EIO; ++ ext3_std_error(tree->inode->i_sb, err); ++ return err; ++ } ++ lock_buffer(bh); ++ ++ if ((err = ext3_journal_get_create_access(handle, bh))) { ++ unlock_buffer(bh); ++ goto out; ++ } ++ ++ /* move top-level index/leaf into new block */ ++ memmove(bh->b_data, curp->p_hdr, tree->buffer_len); ++ ++ /* set size of new block */ ++ neh = EXT_BLOCK_HDR(bh); ++ /* old root could have indexes or leaves ++ * so calculate e_max right way */ ++ if (EXT_DEPTH(tree)) ++ neh->e_max = ext3_ext_space_block_idx(tree); ++ else ++ neh->e_max = ext3_ext_space_block(tree); ++ neh->e_magic = EXT3_EXT_MAGIC; ++ mark_buffer_uptodate(bh, 1); ++ unlock_buffer(bh); ++ ++ if ((err = ext3_journal_dirty_metadata(handle, bh))) ++ goto out; ++ ++ /* create index in new top-level index: num,max,pointer */ ++ if ((err = ext3_ext_get_access(handle, tree, curp))) ++ goto out; ++ ++ curp->p_hdr->e_magic = EXT3_EXT_MAGIC; ++ curp->p_hdr->e_max = ext3_ext_space_root_idx(tree); ++ curp->p_hdr->e_num = 1; ++ curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); ++ /* FIXME: it works, but actually path[0] can be index */ ++ curp->p_idx->e_block = EXT_FIRST_EXTENT(path[0].p_hdr)->e_block; ++ curp->p_idx->e_leaf = newblock; ++ ++ neh = EXT_ROOT_HDR(tree); ++ fidx = EXT_FIRST_INDEX(neh); ++ ext_debug(tree, "new root: num %d(%d), lblock %d, ptr %d\n", ++ neh->e_num, neh->e_max, fidx->e_block, fidx->e_leaf); ++ ++ neh->e_depth = path->p_depth + 1; ++ err = ext3_ext_dirty(handle, tree, curp); ++out: ++ brelse(bh); ++ ++ return err; ++} ++ ++/* ++ * routine finds empty index and adds new leaf. if no free index found ++ * then it requests in-depth growing ++ */ ++static int ext3_ext_create_new_leaf(handle_t *handle, ++ struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_extent *newext) ++{ ++ struct ext3_ext_path *curp; ++ int depth, i, err = 0; ++ ++repeat: ++ i = depth = EXT_DEPTH(tree); ++ ++ /* walk up to the tree and look for free index entry */ ++ curp = path + depth; ++ while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { ++ i--; ++ curp--; ++ } ++ ++ /* we use already allocated block for index block ++ * so, subsequent data blocks should be contigoues */ ++ if (EXT_HAS_FREE_INDEX(curp)) { ++ /* if we found index with free entry, then use that ++ * entry: create all needed subtree and add new leaf */ ++ err = ext3_ext_split(handle, tree, path, newext, i); ++ ++ /* refill path */ ++ ext3_ext_drop_refs(path); ++ path = ext3_ext_find_extent(tree, newext->e_block, path); ++ if (IS_ERR(path)) ++ err = PTR_ERR(path); ++ } else { ++ /* tree is full, time to grow in depth */ ++ err = ext3_ext_grow_indepth(handle, tree, path, newext); ++ ++ /* refill path */ ++ ext3_ext_drop_refs(path); ++ path = ext3_ext_find_extent(tree, newext->e_block, path); ++ if (IS_ERR(path)) ++ err = PTR_ERR(path); ++ ++ /* ++ * only first (depth 0 -> 1) produces free space ++ * in all other cases we have to split growed tree ++ */ ++ depth = EXT_DEPTH(tree); ++ if (path[depth].p_hdr->e_num == path[depth].p_hdr->e_max) { ++ /* now we need split */ ++ goto repeat; ++ } ++ } ++ ++ if (err) ++ return err; ++ ++ return 0; ++} ++ ++/* ++ * returns allocated block in subsequent extent or 0xffffffff ++ * NOTE: it consider block number from index entry as ++ * allocated block. thus, index entries have to be consistent ++ * with leafs ++ */ ++static unsigned long ++ext3_ext_next_allocated_block(struct ext3_ext_path *path) ++{ ++ int depth; ++ ++ EXT_ASSERT(path != NULL); ++ depth = path->p_depth; ++ ++ if (depth == 0 && path->p_ext == NULL) ++ return 0xffffffff; ++ ++ /* FIXME: what if index isn't full ?! */ ++ while (depth >= 0) { ++ if (depth == path->p_depth) { ++ /* leaf */ ++ if (path[depth].p_ext != ++ EXT_LAST_EXTENT(path[depth].p_hdr)) ++ return path[depth].p_ext[1].e_block; ++ } else { ++ /* index */ ++ if (path[depth].p_idx != ++ EXT_LAST_INDEX(path[depth].p_hdr)) ++ return path[depth].p_idx[1].e_block; ++ } ++ depth--; ++ } ++ ++ return 0xffffffff; ++} ++ ++/* ++ * returns first allocated block from next leaf or 0xffffffff ++ */ ++static unsigned ext3_ext_next_leaf_block(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++ int depth; ++ ++ EXT_ASSERT(path != NULL); ++ depth = path->p_depth; ++ ++ /* zero-tree has no leaf blocks at all */ ++ if (depth == 0) ++ return 0xffffffff; ++ ++ /* go to index block */ ++ depth--; ++ ++ while (depth >= 0) { ++ if (path[depth].p_idx != ++ EXT_LAST_INDEX(path[depth].p_hdr)) ++ return path[depth].p_idx[1].e_block; ++ depth--; ++ } ++ ++ return 0xffffffff; ++} ++ ++/* ++ * if leaf gets modified and modified extent is first in the leaf ++ * then we have to correct all indexes above ++ * TODO: do we need to correct tree in all cases? ++ */ ++int ext3_ext_correct_indexes(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++ struct ext3_extent_header *eh; ++ int depth = EXT_DEPTH(tree); ++ struct ext3_extent *ex; ++ unsigned long border; ++ int k, err = 0; ++ ++ eh = path[depth].p_hdr; ++ ex = path[depth].p_ext; ++ EXT_ASSERT(ex); ++ EXT_ASSERT(eh); ++ ++ if (depth == 0) { ++ /* there is no tree at all */ ++ return 0; ++ } ++ ++ if (ex != EXT_FIRST_EXTENT(eh)) { ++ /* we correct tree if first leaf got modified only */ ++ return 0; ++ } ++ ++ /* ++ * TODO: we need correction if border is smaller then current one ++ */ ++ k = depth - 1; ++ border = path[depth].p_ext->e_block; ++ if ((err = ext3_ext_get_access(handle, tree, path + k))) ++ return err; ++ path[k].p_idx->e_block = border; ++ if ((err = ext3_ext_dirty(handle, tree, path + k))) ++ return err; ++ ++ while (k--) { ++ /* change all left-side indexes */ ++ if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) ++ break; ++ if ((err = ext3_ext_get_access(handle, tree, path + k))) ++ break; ++ path[k].p_idx->e_block = border; ++ if ((err = ext3_ext_dirty(handle, tree, path + k))) ++ break; ++ } ++ ++ return err; ++} ++ ++static int inline ++ext3_can_extents_be_merged(struct ext3_extents_tree *tree, ++ struct ext3_extent *ex1, ++ struct ext3_extent *ex2) ++{ ++ if (ex1->e_block + ex1->e_num != ex2->e_block) ++ return 0; ++ ++#ifdef AGRESSIVE_TEST ++ if (ex1->e_num >= 4) ++ return 0; ++#endif ++ ++ if (!tree->mergable) ++ return 1; ++ ++ return tree->mergable(ex1, ex2); ++} ++ ++/* ++ * this routine tries to merge requsted extent into the existing ++ * extent or inserts requested extent as new one into the tree, ++ * creating new leaf in no-space case ++ */ ++int ext3_ext_insert_extent(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_extent *newext) ++{ ++ struct ext3_extent_header * eh; ++ struct ext3_extent *ex, *fex; ++ struct ext3_extent *nearex; /* nearest extent */ ++ struct ext3_ext_path *npath = NULL; ++ int depth, len, err, next; ++ ++ depth = EXT_DEPTH(tree); ++ ex = path[depth].p_ext; ++ ++ /* try to insert block into found extent and return */ ++ if (ex && ext3_can_extents_be_merged(tree, ex, newext)) { ++ ext_debug(tree, "append %d block to %d:%d (from %d)\n", ++ newext->e_num, ex->e_block, ex->e_num, ++ ex->e_start); ++ if ((err = ext3_ext_get_access(handle, tree, path + depth))) ++ return err; ++ ex->e_num += newext->e_num; ++ err = ext3_ext_dirty(handle, tree, path + depth); ++ return err; ++ } ++ ++repeat: ++ depth = EXT_DEPTH(tree); ++ eh = path[depth].p_hdr; ++ if (eh->e_num < eh->e_max) ++ goto has_space; ++ ++ /* probably next leaf has space for us? */ ++ fex = EXT_LAST_EXTENT(eh); ++ next = ext3_ext_next_leaf_block(tree, path); ++ if (newext->e_block > fex->e_block && next != 0xffffffff) { ++ ext_debug(tree, "next leaf block - %d\n", next); ++ EXT_ASSERT(!npath); ++ npath = ext3_ext_find_extent(tree, next, NULL); ++ if (IS_ERR(npath)) ++ return PTR_ERR(npath); ++ EXT_ASSERT(npath->p_depth == path->p_depth); ++ eh = npath[depth].p_hdr; ++ if (eh->e_num < eh->e_max) { ++ ext_debug(tree, "next leaf isnt full(%d)\n", ++ eh->e_num); ++ path = npath; ++ goto repeat; ++ } ++ ext_debug(tree, "next leaf hasno free space(%d,%d)\n", ++ eh->e_num, eh->e_max); ++ } ++ ++ /* ++ * there is no free space in found leaf ++ * we're gonna add new leaf in the tree ++ */ ++ err = ext3_ext_create_new_leaf(handle, tree, path, newext); ++ if (err) ++ goto cleanup; ++ depth = EXT_DEPTH(tree); ++ eh = path[depth].p_hdr; ++ ++has_space: ++ nearex = path[depth].p_ext; ++ ++ if ((err = ext3_ext_get_access(handle, tree, path + depth))) ++ goto cleanup; ++ ++ if (!nearex) { ++ /* there is no extent in this leaf, create first one */ ++ ext_debug(tree, "first extent in the leaf: %d:%d:%d\n", ++ newext->e_block, newext->e_start, ++ newext->e_num); ++ path[depth].p_ext = EXT_FIRST_EXTENT(eh); ++ } else if (newext->e_block > nearex->e_block) { ++ EXT_ASSERT(newext->e_block != nearex->e_block); ++ if (nearex != EXT_LAST_EXTENT(eh)) { ++ len = EXT_MAX_EXTENT(eh) - nearex; ++ len = (len - 1) * sizeof(struct ext3_extent); ++ len = len < 0 ? 0 : len; ++ ext_debug(tree, "insert %d:%d:%d after: nearest 0x%p, " ++ "move %d from 0x%p to 0x%p\n", ++ newext->e_block, newext->e_start, ++ newext->e_num, ++ nearex, len, nearex + 1, nearex + 2); ++ memmove(nearex + 2, nearex + 1, len); ++ } ++ path[depth].p_ext = nearex + 1; ++ } else { ++ EXT_ASSERT(newext->e_block != nearex->e_block); ++ len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent); ++ len = len < 0 ? 0 : len; ++ ext_debug(tree, "insert %d:%d:%d before: nearest 0x%p, " ++ "move %d from 0x%p to 0x%p\n", ++ newext->e_block, newext->e_start, newext->e_num, ++ nearex, len, nearex + 1, nearex + 2); ++ memmove(nearex + 1, nearex, len); ++ path[depth].p_ext = nearex; ++ } ++ ++ eh->e_num++; ++ nearex = path[depth].p_ext; ++ nearex->e_block = newext->e_block; ++ nearex->e_start = newext->e_start; ++ nearex->e_num = newext->e_num; ++ ++ /* time to correct all indexes above */ ++ err = ext3_ext_correct_indexes(handle, tree, path); ++ if (err) ++ goto cleanup; ++ ++ err = ext3_ext_dirty(handle, tree, path + depth); ++ ++cleanup: ++ if (npath) { ++ ext3_ext_drop_refs(npath); ++ kfree(npath); ++ } ++ ++ return err; ++} ++ ++int ext3_ext_walk_space(struct ext3_extents_tree *tree, unsigned long block, ++ unsigned long num, ext_prepare_callback func) ++{ ++ struct ext3_ext_path *path = NULL; ++ struct ext3_extent *ex, cbex; ++ unsigned long next, start = 0, end = 0; ++ int depth, exists, err = 0; ++ ++ EXT_ASSERT(tree); ++ EXT_ASSERT(func); ++ EXT_ASSERT(tree->inode); ++ EXT_ASSERT(tree->root); ++ ++ while (num > 0 && block != 0xfffffffff) { ++ /* find extent for this block */ ++ path = ext3_ext_find_extent(tree, block, path); ++ if (IS_ERR(path)) { ++ err = PTR_ERR(path); ++ break; ++ } ++ ++ depth = EXT_DEPTH(tree); ++ ex = path[depth].p_ext; ++ next = ext3_ext_next_allocated_block(path); ++ ++ exists = 0; ++ if (!ex) { ++ /* there is no extent yet, so try to allocate ++ * all requested space */ ++ start = block; ++ end = block + num - 1; ++ } else if (ex->e_block > block) { ++ /* need to allocate space before found extent */ ++ start = block; ++ end = ex->e_block - 1; ++ if (block + num - 1 < end) ++ end = block + num - 1; ++ } else if (block >= ex->e_block + ex->e_num) { ++ /* need to allocate space after found extent */ ++ start = block; ++ end = block + num - 1; ++ if (end >= next) ++ end = next - 1; ++ } else if (block >= ex->e_block) { ++ /* ++ * some part of requested space is covered ++ * by found extent ++ */ ++ start = block; ++ end = ex->e_block + ex->e_num - 1; ++ if (block + num - 1 < end) ++ end = block + num - 1; ++ exists = 1; ++ } else { ++ BUG(); ++ } ++ ++ if (!exists) { ++ cbex.e_block = start; ++ cbex.e_num = end - start + 1; ++ cbex.e_start = 0; ++ } else ++ cbex = *ex; ++ ++ err = func(tree, path, &cbex, exists); ++ if (err < 0) ++ break; ++ ++ if (err == EXT_BREAK) { ++ err = 0; ++ break; ++ } ++ ++ if (EXT_DEPTH(tree) != depth) { ++ /* depth was changed. we have to realloc path */ ++ ext3_ext_drop_refs(path); ++ kfree(path); ++ path = NULL; ++ } ++ ++ block += cbex.e_num; ++ num -= cbex.e_num; ++ } ++ ++ if (path) { ++ ext3_ext_drop_refs(path); ++ kfree(path); ++ } ++ ++ return err; ++} ++ ++static inline void ++ext3_ext_invalidate_cache(struct ext3_extents_tree *tree) ++{ ++ if (tree->cex) ++ tree->cex->e_num = 0; ++} ++ ++static inline void ++ext3_ext_put_in_cache(struct ext3_extents_tree *tree, struct ext3_extent *ex) ++{ ++ if (tree->cex) { ++ EXT_ASSERT(ex); ++ EXT_ASSERT(ex->e_num); ++ tree->cex->e_block = ex->e_block; ++ tree->cex->e_start = ex->e_start; ++ tree->cex->e_num = ex->e_num; ++ } ++} ++ ++/* ++ * this routine calculate boundaries of the gap requested block fits into ++ * and cache this gap ++ */ ++static inline void ++ext3_ext_put_gap_in_cache(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ unsigned long block) ++{ ++ int depth = EXT_DEPTH(tree); ++ struct ext3_extent *ex, gex; ++ ++ if (!tree->cex) ++ return; ++ ++ ex = path[depth].p_ext; ++ if (ex == NULL) { ++ /* there is no extent yet, so gap is [0;-] */ ++ gex.e_block = 0; ++ gex.e_num = 0xffffffff; ++ ext_debug(tree, "cache gap(whole file):"); ++ } else if (block < ex->e_block) { ++ gex.e_block = block; ++ gex.e_num = ex->e_block - block; ++ ext_debug(tree, "cache gap(before): %lu [%lu:%lu]", ++ (unsigned long) block, ++ (unsigned long) ex->e_block, ++ (unsigned long) ex->e_num); ++ } else if (block >= ex->e_block + ex->e_num) { ++ gex.e_block = ex->e_block + ex->e_num; ++ gex.e_num = ext3_ext_next_allocated_block(path); ++ ext_debug(tree, "cache gap(after): [%lu:%lu] %lu", ++ (unsigned long) ex->e_block, ++ (unsigned long) ex->e_num, ++ (unsigned long) block); ++ EXT_ASSERT(gex.e_num > gex.e_block); ++ gex.e_num = gex.e_num - gex.e_block; ++ } else { ++ BUG(); ++ } ++ ++ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) gex.e_block, ++ (unsigned long) gex.e_num); ++ gex.e_start = 0xffffffff; ++ ext3_ext_put_in_cache(tree, &gex); ++} ++ ++static inline int ++ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block, ++ struct ext3_extent *ex) ++{ ++ struct ext3_extent *cex = tree->cex; ++ ++ /* is there cache storage at all? */ ++ if (!cex) ++ return 0; ++ ++ /* has cache valid data? */ ++ if (cex->e_num == 0) ++ return 0; ++ ++ if (block >= cex->e_block && block < cex->e_block + cex->e_num) { ++ ex->e_block = cex->e_block; ++ ex->e_start = cex->e_start; ++ ex->e_num = cex->e_num; ++ ext_debug(tree, "%lu cached by %lu:%lu:%lu\n", ++ (unsigned long) block, ++ (unsigned long) ex->e_block, ++ (unsigned long) ex->e_num, ++ (unsigned long) ex->e_start); ++ return 1; ++ } ++ ++ /* not in cache */ ++ return 0; ++} ++ ++/* ++ * routine removes index from the index block ++ * it's used in truncate case only. thus all requests are for ++ * last index in the block only ++ */ ++int ext3_ext_rm_idx(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++ struct buffer_head *bh; ++ int err; ++ ++ /* free index block */ ++ path--; ++ EXT_ASSERT(path->p_hdr->e_num); ++ if ((err = ext3_ext_get_access(handle, tree, path))) ++ return err; ++ path->p_hdr->e_num--; ++ if ((err = ext3_ext_dirty(handle, tree, path))) ++ return err; ++ ext_debug(tree, "index is empty, remove it, free block %d\n", ++ path->p_idx->e_leaf); ++ bh = sb_get_hash_table(tree->inode->i_sb, path->p_idx->e_leaf); ++ ext3_forget(handle, 0, tree->inode, bh, path->p_idx->e_leaf); ++ ext3_free_blocks(handle, tree->inode, path->p_idx->e_leaf, 1); ++ return err; ++} ++ ++int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++ int depth = EXT_DEPTH(tree); ++ int needed; ++ ++ if (path) { ++ /* probably there is space in leaf? */ ++ if (path[depth].p_hdr->e_num < path[depth].p_hdr->e_max) ++ return 1; ++ } ++ ++ /* ++ * the worste case we're expecting is creation of the ++ * new root (growing in depth) with index splitting ++ * for splitting we have to consider depth + 1 because ++ * previous growing could increase it ++ */ ++ depth = depth + 1; ++ ++ /* ++ * growing in depth: ++ * block allocation + new root + old root ++ */ ++ needed = EXT3_ALLOC_NEEDED + 2; ++ ++ /* index split. we may need: ++ * allocate intermediate indexes and new leaf ++ * change two blocks at each level, but root ++ * modify root block (inode) ++ */ ++ needed += (depth * EXT3_ALLOC_NEEDED) + (2 * depth) + 1; ++ ++ return needed; ++} ++ ++static int ++ext3_ext_split_for_rm(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, unsigned long start, ++ unsigned long end) ++{ ++ struct ext3_extent *ex, tex; ++ struct ext3_ext_path *npath; ++ int depth, creds, err; ++ ++ depth = EXT_DEPTH(tree); ++ ex = path[depth].p_ext; ++ EXT_ASSERT(ex); ++ EXT_ASSERT(end < ex->e_block + ex->e_num - 1); ++ EXT_ASSERT(ex->e_block < start); ++ ++ /* calculate tail extent */ ++ tex.e_block = end + 1; ++ EXT_ASSERT(tex.e_block < ex->e_block + ex->e_num); ++ tex.e_num = ex->e_block + ex->e_num - tex.e_block; ++ ++ creds = ext3_ext_calc_credits_for_insert(tree, path); ++ handle = ext3_ext_journal_restart(handle, creds); ++ if (IS_ERR(handle)) ++ return PTR_ERR(handle); ++ ++ /* calculate head extent. use primary extent */ ++ err = ext3_ext_get_access(handle, tree, path + depth); ++ if (err) ++ return err; ++ ex->e_num = start - ex->e_block; ++ err = ext3_ext_dirty(handle, tree, path + depth); ++ if (err) ++ return err; ++ ++ /* FIXME: some callback to free underlying resource ++ * and correct e_start? */ ++ ext_debug(tree, "split extent: head %u:%u, tail %u:%u\n", ++ ex->e_block, ex->e_num, tex.e_block, tex.e_num); ++ ++ npath = ext3_ext_find_extent(tree, ex->e_block, NULL); ++ if (IS_ERR(npath)) ++ return PTR_ERR(npath); ++ depth = EXT_DEPTH(tree); ++ EXT_ASSERT(npath[depth].p_ext->e_block == ex->e_block); ++ EXT_ASSERT(npath[depth].p_ext->e_num == ex->e_num); ++ ++ err = ext3_ext_insert_extent(handle, tree, npath, &tex); ++ ext3_ext_drop_refs(npath); ++ kfree(npath); ++ ++ return err; ++ ++} ++ ++static int ++ext3_ext_rm_leaf(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, unsigned long start, ++ unsigned long end) ++{ ++ struct ext3_extent *ex, *fu = NULL, *lu, *le; ++ int err = 0, correct_index = 0; ++ int depth = EXT_DEPTH(tree), credits; ++ struct ext3_extent_header *eh; ++ unsigned a, b, block, num; ++ ++ ext_debug(tree, "remove [%lu:%lu] in leaf\n", start, end); ++ if (!path[depth].p_hdr) ++ path[depth].p_hdr = EXT_BLOCK_HDR(path[depth].p_bh); ++ eh = path[depth].p_hdr; ++ EXT_ASSERT(eh); ++ EXT_ASSERT(eh->e_num <= eh->e_max); ++ EXT_ASSERT(eh->e_magic == EXT3_EXT_MAGIC); ++ ++ /* find where to start removing */ ++ le = ex = EXT_LAST_EXTENT(eh); ++ while (ex != EXT_FIRST_EXTENT(eh)) { ++ if (ex->e_block <= end) ++ break; ++ ex--; ++ } ++ ++ if (start > ex->e_block && end < ex->e_block + ex->e_num - 1) { ++ /* removal of internal part of the extent requested ++ * tail and head must be placed in different extent ++ * so, we have to insert one more extent */ ++ path[depth].p_ext = ex; ++ return ext3_ext_split_for_rm(handle, tree, path, start, end); ++ } ++ ++ lu = ex; ++ while (ex >= EXT_FIRST_EXTENT(eh) && ++ ex->e_block + ex->e_num > start) { ++ ext_debug(tree, "remove ext %u:%u\n", ex->e_block, ex->e_num); ++ path[depth].p_ext = ex; ++ ++ a = ex->e_block > start ? ex->e_block : start; ++ b = ex->e_block + ex->e_num - 1 < end ? ++ ex->e_block + ex->e_num - 1 : end; ++ ++ ext_debug(tree, " border %u:%u\n", a, b); ++ ++ if (a != ex->e_block && b != ex->e_block + ex->e_num - 1) { ++ block = 0; ++ num = 0; ++ BUG(); ++ } else if (a != ex->e_block) { ++ /* remove tail of the extent */ ++ block = ex->e_block; ++ num = a - block; ++ } else if (b != ex->e_block + ex->e_num - 1) { ++ /* remove head of the extent */ ++ block = a; ++ num = b - a; ++ } else { ++ /* remove whole extent: excelent! */ ++ block = ex->e_block; ++ num = 0; ++ EXT_ASSERT(a == ex->e_block && ++ b == ex->e_block + ex->e_num - 1); ++ } ++ ++ if (ex == EXT_FIRST_EXTENT(eh)) ++ correct_index = 1; ++ ++ credits = 1; ++ if (correct_index) ++ credits += (EXT_DEPTH(tree) * EXT3_ALLOC_NEEDED) + 1; ++ if (tree->remove_extent_credits) ++ credits += tree->remove_extent_credits(tree, ex, a, b); ++ ++ handle = ext3_ext_journal_restart(handle, credits); ++ if (IS_ERR(handle)) { ++ err = PTR_ERR(handle); ++ goto out; ++ } ++ ++ err = ext3_ext_get_access(handle, tree, path + depth); ++ if (err) ++ goto out; ++ ++ if (tree->remove_extent) ++ err = tree->remove_extent(tree, ex, a, b); ++ if (err) ++ goto out; ++ ++ if (num == 0) { ++ /* this extent is removed entirely mark slot unused */ ++ ex->e_start = 0; ++ eh->e_num--; ++ fu = ex; ++ } ++ ++ ex->e_block = block; ++ ex->e_num = num; ++ ++ err = ext3_ext_dirty(handle, tree, path + depth); ++ if (err) ++ goto out; ++ ++ ext_debug(tree, "new extent: %u:%u:%u\n", ++ ex->e_block, ex->e_num, ex->e_start); ++ ex--; ++ } ++ ++ if (fu) { ++ /* reuse unused slots */ ++ while (lu < le) { ++ if (lu->e_start) { ++ *fu = *lu; ++ lu->e_start = 0; ++ fu++; ++ } ++ lu++; ++ } ++ } ++ ++ if (correct_index && eh->e_num) ++ err = ext3_ext_correct_indexes(handle, tree, path); ++ ++ /* if this leaf is free, then we should ++ * remove it from index block above */ ++ if (err == 0 && eh->e_num == 0 && path[depth].p_bh != NULL) ++ err = ext3_ext_rm_idx(handle, tree, path + depth); ++ ++out: ++ return err; ++} ++ ++ ++static struct ext3_extent_idx * ++ext3_ext_last_covered(struct ext3_extent_header *hdr, unsigned long block) ++{ ++ struct ext3_extent_idx *ix; ++ ++ ix = EXT_LAST_INDEX(hdr); ++ while (ix != EXT_FIRST_INDEX(hdr)) { ++ if (ix->e_block <= block) ++ break; ++ ix--; ++ } ++ return ix; ++} ++ ++/* ++ * returns 1 if current index have to be freed (even partial) ++ */ ++static int inline ++ext3_ext_more_to_rm(struct ext3_ext_path *path) ++{ ++ EXT_ASSERT(path->p_idx); ++ ++ if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) ++ return 0; ++ ++ /* ++ * if truncate on deeper level happened it it wasn't partial ++ * so we have to consider current index for truncation ++ */ ++ if (path->p_hdr->e_num == path->p_block) ++ return 0; ++ return 1; ++} ++ ++int ext3_ext_remove_space(struct ext3_extents_tree *tree, ++ unsigned long start, unsigned long end) ++{ ++ struct inode *inode = tree->inode; ++ struct super_block *sb = inode->i_sb; ++ int depth = EXT_DEPTH(tree); ++ struct ext3_ext_path *path; ++ handle_t *handle; ++ int i = 0, err = 0; ++ ++ ext_debug(tree, "space to be removed: %lu:%lu\n", start, end); ++ ++ /* probably first extent we're gonna free will be last in block */ ++ handle = ext3_journal_start(inode, depth + 1); ++ if (IS_ERR(handle)) ++ return PTR_ERR(handle); ++ ++ ext3_ext_invalidate_cache(tree); ++ ++ /* ++ * we start scanning from right side freeing all the blocks ++ * after i_size and walking into the deep ++ */ ++ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL); ++ if (IS_ERR(path)) { ++ ext3_error(sb, "ext3_ext_remove_space", ++ "Can't allocate path array"); ++ ext3_journal_stop(handle, inode); ++ return -ENOMEM; ++ } ++ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); ++ path[i].p_hdr = EXT_ROOT_HDR(tree); ++ ++ while (i >= 0 && err == 0) { ++ if (i == depth) { ++ /* this is leaf block */ ++ err = ext3_ext_rm_leaf(handle, tree, path, start, end); ++ /* root level have p_bh == NULL, brelse() eats this */ ++ brelse(path[i].p_bh); ++ i--; ++ continue; ++ } ++ ++ /* this is index block */ ++ if (!path[i].p_hdr) { ++ ext_debug(tree, "initialize header\n"); ++ path[i].p_hdr = EXT_BLOCK_HDR(path[i].p_bh); ++ } ++ ++ EXT_ASSERT(path[i].p_hdr->e_num <= path[i].p_hdr->e_max); ++ EXT_ASSERT(path[i].p_hdr->e_magic == EXT3_EXT_MAGIC); ++ ++ if (!path[i].p_idx) { ++ /* this level hasn't touched yet */ ++ path[i].p_idx = ++ ext3_ext_last_covered(path[i].p_hdr, end); ++ path[i].p_block = path[i].p_hdr->e_num + 1; ++ ext_debug(tree, "init index ptr: hdr 0x%p, num %d\n", ++ path[i].p_hdr, path[i].p_hdr->e_num); ++ } else { ++ /* we've already was here, see at next index */ ++ path[i].p_idx--; ++ } ++ ++ ext_debug(tree, "level %d - index, first 0x%p, cur 0x%p\n", ++ i, EXT_FIRST_INDEX(path[i].p_hdr), ++ path[i].p_idx); ++ if (ext3_ext_more_to_rm(path + i)) { ++ /* go to the next level */ ++ ext_debug(tree, "move to level %d (block %d)\n", ++ i + 1, path[i].p_idx->e_leaf); ++ memset(path + i + 1, 0, sizeof(*path)); ++ path[i+1].p_bh = sb_bread(sb, path[i].p_idx->e_leaf); ++ if (!path[i+1].p_bh) { ++ /* should we reset i_size? */ ++ err = -EIO; ++ break; ++ } ++ /* put actual number of indexes to know is this ++ * number got changed at the next iteration */ ++ path[i].p_block = path[i].p_hdr->e_num; ++ i++; ++ } else { ++ /* we finish processing this index, go up */ ++ if (path[i].p_hdr->e_num == 0 && i > 0) { ++ /* index is empty, remove it ++ * handle must be already prepared by the ++ * truncate_leaf() */ ++ err = ext3_ext_rm_idx(handle, tree, path + i); ++ } ++ /* root level have p_bh == NULL, brelse() eats this */ ++ brelse(path[i].p_bh); ++ i--; ++ ext_debug(tree, "return to level %d\n", i); ++ } ++ } ++ ++ /* TODO: flexible tree reduction should be here */ ++ if (path->p_hdr->e_num == 0) { ++ /* ++ * truncate to zero freed all the tree ++ * so, we need to correct e_depth ++ */ ++ err = ext3_ext_get_access(handle, tree, path); ++ if (err == 0) { ++ EXT_ROOT_HDR(tree)->e_depth = 0; ++ err = ext3_ext_dirty(handle, tree, path); ++ } ++ } ++ ++ kfree(path); ++ ext3_journal_stop(handle, inode); ++ ++ return err; ++} ++ ++/* ++ * called at mount time ++ */ ++void ext3_ext_init(struct super_block *sb) ++{ ++ /* ++ * possible initialization would be here ++ */ ++ ++ if (test_opt(sb, EXTENTS)) { ++ printk("EXT3-fs: file extents enabled"); ++#ifdef AGRESSIVE_TEST ++ printk(", agressive tests"); ++#endif ++#ifdef CHECK_BINSEARCH ++ printk(", check binsearch"); ++#endif ++ printk("\n"); ++ } ++} ++ ++/* ++ * called at umount time ++ */ ++void ext3_ext_release(struct super_block *sb) ++{ ++} ++ ++/************************************************************************ ++ * VFS related routines ++ ************************************************************************/ ++ ++static int ext3_get_inode_write_access(handle_t *handle, void *buffer) ++{ ++ /* we use in-core data, not bh */ ++ return 0; ++} ++ ++static int ext3_mark_buffer_dirty(handle_t *handle, void *buffer) ++{ ++ struct inode *inode = buffer; ++ ext3_mark_inode_dirty(handle, inode); ++ return 0; ++} ++ ++static int ext3_ext_mergable(struct ext3_extent *ex1, ++ struct ext3_extent *ex2) ++{ ++ if (ex1->e_start + ex1->e_num == ex2->e_start) ++ return 1; ++ return 0; ++} ++ ++static int ++ext3_remove_blocks_credits(struct ext3_extents_tree *tree, ++ struct ext3_extent *ex, ++ unsigned long from, unsigned long to) ++{ ++ int needed; ++ ++ /* at present, extent can't cross block group */; ++ needed = 3; /* bitmap + group desc + sb */ ++ ++#ifdef CONFIG_QUOTA ++ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; ++#endif ++ return needed; ++} ++ ++static int ++ext3_remove_blocks(struct ext3_extents_tree *tree, ++ struct ext3_extent *ex, ++ unsigned long from, unsigned long to) ++{ ++ int needed = ext3_remove_blocks_credits(tree, ex, from, to); ++ handle_t *handle = ext3_journal_start(tree->inode, needed); ++ ++ if (IS_ERR(handle)) ++ return PTR_ERR(handle); ++ if (from >= ex->e_block && to == ex->e_block + ex->e_num - 1) { ++ /* tail removal */ ++ unsigned long num, start; ++ num = ex->e_block + ex->e_num - from; ++ start = ex->e_start + ex->e_num - num; ++ ext_debug(tree, "free last %lu blocks starting %lu\n", ++ num, start); ++ ext3_free_blocks(handle, tree->inode, start, num); ++ } else if (from == ex->e_block && to <= ex->e_block + ex->e_num - 1) { ++ printk("strange request: removal %lu-%lu from %u:%u\n", ++ from, to, ex->e_block, ex->e_num); ++ } else { ++ printk("strange request: removal(2) %lu-%lu from %u:%u\n", ++ from, to, ex->e_block, ex->e_num); ++ } ++ ext3_journal_stop(handle, tree->inode); ++ return 0; ++} ++ ++static int ext3_ext_find_goal(struct inode *inode, ++ struct ext3_ext_path *path) ++{ ++ struct ext3_inode_info *ei = EXT3_I(inode); ++ unsigned long bg_start; ++ unsigned long colour; ++ int depth; ++ ++ if (path) { ++ depth = path->p_depth; ++ /* try to find previous block */ ++ if (path[depth].p_ext) ++ return path[depth].p_ext->e_start + ++ path[depth].p_ext->e_num - 1; ++ ++ /* it looks index is empty ++ * try to find starting from index itself */ ++ if (path[depth].p_bh) ++ return path[depth].p_bh->b_blocknr; ++ } ++ ++ /* OK. use inode's group */ ++ bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + ++ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); ++ colour = (current->pid % 16) * ++ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); ++ return bg_start + colour; ++} ++ ++static int ext3_new_block_cb(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_extent *ex, int *err) ++{ ++ struct inode *inode = tree->inode; ++ int newblock, goal; ++ ++ EXT_ASSERT(path); ++ EXT_ASSERT(ex); ++ EXT_ASSERT(ex->e_start); ++ EXT_ASSERT(ex->e_num); ++ ++ /* reuse block from the extent to order data/metadata */ ++ newblock = ex->e_start++; ++ ex->e_num--; ++ if (ex->e_num == 0) { ++ ex->e_num = 1; ++ /* allocate new block for the extent */ ++ goal = ext3_ext_find_goal(inode, path); ++ ex->e_start = ext3_new_block(handle, inode, goal, 0, 0, err); ++ if (ex->e_start == 0) { ++ /* error occured: restore old extent */ ++ ex->e_start = newblock; ++ return 0; ++ } ++ } ++ return newblock; ++} ++ ++static void ext3_init_tree_desc(struct ext3_extents_tree *tree, ++ struct inode *inode) ++{ ++ tree->inode = inode; ++ tree->root = (void *) EXT3_I(inode)->i_data; ++ tree->get_write_access = ext3_get_inode_write_access; ++ tree->mark_buffer_dirty = ext3_mark_buffer_dirty; ++ tree->mergable = ext3_ext_mergable; ++ tree->new_block = ext3_new_block_cb; ++ tree->remove_extent = ext3_remove_blocks; ++ tree->remove_extent_credits = ext3_remove_blocks_credits; ++ tree->buffer = (void *) inode; ++ tree->buffer_len = sizeof(EXT3_I(inode)->i_data); ++ tree->cex = (struct ext3_extent *) &EXT3_I(inode)->i_cached_extent; ++} ++ ++#if 0 ++static int ++ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_extent *newex, int exist) ++{ ++ struct inode *inode = tree->inode; ++ int count, err, goal; ++ loff_t new_i_size; ++ handle_t *handle; ++ unsigned long pblock; ++ ++ if (exist) ++ return EXT_CONTINUE; ++ ++ count = ext3_ext_calc_credits_for_insert(tree, path); ++ handle = ext3_journal_start(inode, count + EXT3_ALLOC_NEEDED + 1); ++ if (IS_ERR(handle)) ++ return PTR_ERR(handle); ++ ++ goal = ext3_ext_find_goal(inode, path); ++ count = newex->e_num; ++#ifdef EXT3_MULTIBLOCK_ALLOCATOR ++ pblock = ext3_new_block(handle, inode, goal, &count, NULL, &err); ++ EXT_ASSERT(count <= num); ++ /* FIXME: error handling here */ ++ EXT_ASSERT(err == 0); ++#else ++ pblock = 0; ++#endif ++ ++ /* insert new extent */ ++ newex->e_start = pblock; ++ newex->e_num = count; ++ err = ext3_ext_insert_extent(handle, tree, path, newex); ++ if (err) ++ goto out; ++ ++ /* correct on-disk inode size */ ++ if (newex->e_num > 0) { ++ new_i_size = (loff_t) newex->e_block + newex->e_num; ++ new_i_size = new_i_size << inode->i_blkbits; ++ if (new_i_size > i_size_read(inode)) ++ new_i_size = i_size_read(inode); ++ if (new_i_size > EXT3_I(inode)->i_disksize) { ++ EXT3_I(inode)->i_disksize = new_i_size; ++ err = ext3_mark_inode_dirty(handle, inode); ++ } ++ } ++ ++out: ++ ext3_journal_stop(handle, inode); ++ return err; ++} ++ ++ ++int ext3_ext_allocate_nblocks(struct inode *inode, unsigned long block, ++ unsigned long num) ++{ ++ struct ext3_extents_tree tree; ++ int err; ++ ++ ext_debug(&tree, "blocks %lu-%lu requested for inode %u\n", ++ block, block + num,(unsigned) inode->i_ino); ++ ++ ext3_init_tree_desc(&tree, inode); ++ down(&EXT3_I(inode)->truncate_sem); ++ err = ext3_ext_walk_space(&tree, block, num, ext3_ext_new_extent_cb); ++ ext3_ext_invalidate_cache(&tree); ++ up(&EXT3_I(inode)->truncate_sem); ++ ++ return err; ++} ++#endif ++ ++int ext3_ext_get_block(handle_t *handle, struct inode *inode, ++ long iblock, struct buffer_head *bh_result, ++ int create, int extend_disksize) ++{ ++ struct ext3_ext_path *path = NULL; ++ struct ext3_extent newex; ++ struct ext3_extent *ex; ++ int goal, newblock, err = 0, depth; ++ struct ext3_extents_tree tree; ++ ++ clear_bit(BH_New, &bh_result->b_state); ++ ext3_init_tree_desc(&tree, inode); ++ ext_debug(&tree, "block %d requested for inode %u\n", ++ (int) iblock, (unsigned) inode->i_ino); ++ down_write(&EXT3_I(inode)->truncate_sem); ++ ++ /* check in cache */ ++ if (ext3_ext_in_cache(&tree, iblock, &newex)) { ++ if (newex.e_start == 0xffffffff && !create) { ++ /* block isn't allocated yet and ++ * user don't want to allocate it */ ++ goto out2; ++ } else if (newex.e_start) { ++ /* block is already allocated */ ++ newblock = iblock - newex.e_block + newex.e_start; ++ goto out; ++ } ++ } ++ ++ /* find extent for this block */ ++ path = ext3_ext_find_extent(&tree, iblock, NULL); ++ if (IS_ERR(path)) { ++ err = PTR_ERR(path); ++ goto out2; ++ } ++ ++ depth = EXT_DEPTH(&tree); ++ ++ /* ++ * consistent leaf must not be empty ++ * this situations is possible, though, _during_ tree modification ++ * this is why assert can't be put in ext3_ext_find_extent() ++ */ ++ EXT_ASSERT(path[depth].p_ext != NULL || depth == 0); ++ ++ if ((ex = path[depth].p_ext)) { ++ /* if found exent covers block, simple return it */ ++ if (iblock >= ex->e_block && iblock < ex->e_block + ex->e_num) { ++ newblock = iblock - ex->e_block + ex->e_start; ++ ext_debug(&tree, "%d fit into %d:%d -> %d\n", ++ (int) iblock, ex->e_block, ex->e_num, ++ newblock); ++ ext3_ext_put_in_cache(&tree, ex); ++ goto out; ++ } ++ } ++ ++ /* ++ * requested block isn't allocated yet ++ * we couldn't try to create block if create flag is zero ++ */ ++ if (!create) { ++ /* put just found gap into cache to speedup subsequest reqs */ ++ ext3_ext_put_gap_in_cache(&tree, path, iblock); ++ goto out2; ++ } ++ ++ /* allocate new block */ ++ goal = ext3_ext_find_goal(inode, path); ++ newblock = ext3_new_block(handle, inode, goal, 0, 0, &err); ++ if (!newblock) ++ goto out2; ++ ext_debug(&tree, "allocate new block: goal %d, found %d\n", ++ goal, newblock); ++ ++ /* try to insert new extent into found leaf and return */ ++ newex.e_block = iblock; ++ newex.e_start = newblock; ++ newex.e_num = 1; ++ err = ext3_ext_insert_extent(handle, &tree, path, &newex); ++ if (err) ++ goto out2; ++ ++ if (extend_disksize && inode->i_size > EXT3_I(inode)->i_disksize) ++ EXT3_I(inode)->i_disksize = inode->i_size; ++ ++ /* previous routine could use block we allocated */ ++ newblock = newex.e_start; ++ set_bit(BH_New, &bh_result->b_state); ++ ++ ext3_ext_put_in_cache(&tree, &newex); ++out: ++ ext3_ext_show_leaf(&tree, path); ++ set_bit(BH_Mapped, &bh_result->b_state); ++ bh_result->b_dev = inode->i_sb->s_dev; ++ bh_result->b_blocknr = newblock; ++out2: ++ if (path) { ++ ext3_ext_drop_refs(path); ++ kfree(path); ++ } ++ up_write(&EXT3_I(inode)->truncate_sem); ++ ++ return err; ++} ++ ++void ext3_ext_truncate(struct inode * inode) ++{ ++ struct address_space *mapping = inode->i_mapping; ++ struct super_block *sb = inode->i_sb; ++ struct ext3_extents_tree tree; ++ unsigned long last_block; ++ handle_t *handle; ++ int err = 0; ++ ++ ext3_init_tree_desc(&tree, inode); ++ ++ /* ++ * probably first extent we're gonna free will be last in block ++ */ ++ err = ext3_writepage_trans_blocks(inode) + 3; ++ handle = ext3_journal_start(inode, err); ++ if (IS_ERR(handle)) ++ return; ++ ++ ext3_block_truncate_page(handle, mapping, inode->i_size); ++ ++ down_write(&EXT3_I(inode)->truncate_sem); ++ ext3_ext_invalidate_cache(&tree); ++ ++ /* ++ * TODO: optimization is possible here ++ * probably we need not scaning at all, ++ * because page truncation is enough ++ */ ++ if (ext3_orphan_add(handle, inode)) ++ goto out_stop; ++ ++ /* we have to know where to truncate from in crash case */ ++ EXT3_I(inode)->i_disksize = inode->i_size; ++ ext3_mark_inode_dirty(handle, inode); ++ ++ last_block = (inode->i_size + sb->s_blocksize - 1) ++ >> EXT3_BLOCK_SIZE_BITS(sb); ++ err = ext3_ext_remove_space(&tree, last_block, 0xffffffff); ++ ++ /* In a multi-transaction truncate, we only make the final ++ * transaction synchronous */ ++ if (IS_SYNC(inode)) ++ handle->h_sync = 1; ++ ++out_stop: ++ /* ++ * If this was a simple ftruncate(), and the file will remain alive ++ * then we need to clear up the orphan record which we created above. ++ * However, if this was a real unlink then we were called by ++ * ext3_delete_inode(), and we allow that function to clean up the ++ * orphan info for us. ++ */ ++ if (inode->i_nlink) ++ ext3_orphan_del(handle, inode); ++ ++ up_write(&EXT3_I(inode)->truncate_sem); ++ ext3_journal_stop(handle, inode); ++} ++ ++/* ++ * this routine calculate max number of blocks we could modify ++ * in order to allocate new block for an inode ++ */ ++int ext3_ext_writepage_trans_blocks(struct inode *inode, int num) ++{ ++ struct ext3_extents_tree tree; ++ int needed; ++ ++ ext3_init_tree_desc(&tree, inode); ++ ++ needed = ext3_ext_calc_credits_for_insert(&tree, NULL); ++ ++ /* caller want to allocate num blocks */ ++ needed *= num; ++ ++#ifdef CONFIG_QUOTA ++ /* ++ * FIXME: real calculation should be here ++ * it depends on blockmap format of qouta file ++ */ ++ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; ++#endif ++ ++ return needed; ++} ++ ++void ext3_extents_initialize_blockmap(handle_t *handle, struct inode *inode) ++{ ++ struct ext3_extents_tree tree; ++ ++ ext3_init_tree_desc(&tree, inode); ++ ext3_extent_tree_init(handle, &tree); ++} ++ ++static int ++ext3_ext_store_extent_cb(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_extent *newex, int exist) ++{ ++ struct ext3_extent_buf *buf = (struct ext3_extent_buf *) tree->private; ++ ++ if (!exist) ++ return EXT_CONTINUE; ++ if (buf->err < 0) ++ return EXT_BREAK; ++ if (buf->cur - buf->buffer + sizeof(*newex) > buf->buflen) ++ return EXT_BREAK; ++ ++ if (!copy_to_user(buf->cur, newex, sizeof(*newex))) { ++ buf->err++; ++ buf->cur += sizeof(*newex); ++ } else { ++ buf->err = -EFAULT; ++ return EXT_BREAK; ++ } ++ return EXT_CONTINUE; ++} ++ ++static int ++ext3_ext_collect_stats_cb(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_extent *ex, int exist) ++{ ++ struct ext3_extent_tree_stats *buf = ++ (struct ext3_extent_tree_stats *) tree->private; ++ int depth; ++ ++ if (!exist) ++ return EXT_CONTINUE; ++ ++ depth = EXT_DEPTH(tree); ++ buf->extents_num++; ++ if (path[depth].p_ext == EXT_FIRST_EXTENT(path[depth].p_hdr)) ++ buf->leaf_num++; ++ return EXT_CONTINUE; ++} ++ ++int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, ++ unsigned long arg) ++{ ++ int err = 0; ++ ++ if (cmd == EXT3_IOC_GET_EXTENTS) { ++ struct ext3_extent_buf buf; ++ struct ext3_extents_tree tree; ++ ++ if (copy_from_user(&buf, (void *) arg, sizeof(buf))) ++ return -EFAULT; ++ ++ ext3_init_tree_desc(&tree, inode); ++ buf.cur = buf.buffer; ++ buf.err = 0; ++ tree.private = &buf; ++ err = ext3_ext_walk_space(&tree, buf.start, 0xffffffff, ++ ext3_ext_store_extent_cb); ++ if (err == 0) ++ err = buf.err; ++ } else if (cmd == EXT3_IOC_GET_TREE_STATS) { ++ struct ext3_extent_tree_stats buf; ++ struct ext3_extents_tree tree; ++ ++ ext3_init_tree_desc(&tree, inode); ++ buf.depth = EXT_DEPTH(&tree); ++ buf.extents_num = 0; ++ buf.leaf_num = 0; ++ tree.private = &buf; ++ err = ext3_ext_walk_space(&tree, 0, 0xffffffff, ++ ext3_ext_collect_stats_cb); ++ if (!err) ++ err = copy_to_user((void *) arg, &buf, sizeof(buf)); ++ } else if (cmd == EXT3_IOC_GET_TREE_DEPTH) { ++ struct ext3_extents_tree tree; ++ ext3_init_tree_desc(&tree, inode); ++ err = EXT_DEPTH(&tree); ++ } ++ ++ return err; ++} ++ +Index: linux-2.4.21-suse2/fs/ext3/ialloc.c +=================================================================== +--- linux-2.4.21-suse2.orig/fs/ext3/ialloc.c 2004-01-23 19:04:17.000000000 +0300 ++++ linux-2.4.21-suse2/fs/ext3/ialloc.c 2004-01-24 20:10:25.000000000 +0300 +@@ -592,6 +592,10 @@ + iloc.bh = NULL; + goto fail; + } ++ if (test_opt(sb, EXTENTS)) { ++ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; ++ ext3_extents_initialize_blockmap(handle, inode); ++ } + err = ext3_mark_iloc_dirty(handle, inode, &iloc); + if (err) goto fail; + +Index: linux-2.4.21-suse2/fs/ext3/inode.c +=================================================================== +--- linux-2.4.21-suse2.orig/fs/ext3/inode.c 2004-01-23 19:04:17.000000000 +0300 ++++ linux-2.4.21-suse2/fs/ext3/inode.c 2004-01-24 20:10:25.000000000 +0300 +@@ -853,6 +853,18 @@ + goto reread; + } + ++static inline int ++ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block, ++ struct buffer_head *bh, int create, int extend_disksize) ++{ ++ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) ++ return ext3_ext_get_block(handle, inode, ++ block, bh, create, ++ extend_disksize); ++ return ext3_get_block_handle(handle, inode, ++ block, bh, create, extend_disksize); ++} ++ + /* + * The BKL is not held on entry here. + */ +@@ -866,7 +878,7 @@ + handle = ext3_journal_current_handle(); + J_ASSERT(handle != 0); + } +- ret = ext3_get_block_handle(handle, inode, iblock, ++ ret = ext3_get_block_wrap(handle, inode, iblock, + bh_result, create, 1); + return ret; + } +@@ -893,7 +905,7 @@ + } + } + if (ret == 0) +- ret = ext3_get_block_handle(handle, inode, iblock, ++ ret = ext3_get_block_wrap(handle, inode, iblock, + bh_result, create, 0); + if (ret == 0) + bh_result->b_size = (1 << inode->i_blkbits); +@@ -915,7 +927,7 @@ + dummy.b_state = 0; + dummy.b_blocknr = -1000; + buffer_trace_init(&dummy.b_history); +- *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1); ++ *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1); + if (!*errp && buffer_mapped(&dummy)) { + struct buffer_head *bh; + bh = sb_getblk(inode->i_sb, dummy.b_blocknr); +@@ -1502,7 +1514,7 @@ + * This required during truncate. We need to physically zero the tail end + * of that block so it doesn't yield old data if the file is later grown. + */ +-static int ext3_block_truncate_page(handle_t *handle, ++int ext3_block_truncate_page(handle_t *handle, + struct address_space *mapping, loff_t from) + { + unsigned long index = from >> PAGE_CACHE_SHIFT; +@@ -1987,6 +1999,9 @@ + + ext3_discard_prealloc(inode); + ++ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) ++ return ext3_ext_truncate(inode); ++ + handle = start_transaction(inode); + if (IS_ERR(handle)) + return; /* AKPM: return what? */ +@@ -2663,6 +2678,9 @@ + int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; + int ret; + ++ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) ++ return ext3_ext_writepage_trans_blocks(inode, bpp); ++ + if (ext3_should_journal_data(inode)) + ret = 3 * (bpp + indirects) + 2; + else +@@ -3099,7 +3117,7 @@ + + /* alloc blocks one by one */ + for (i = 0; i < nblocks; i++) { +- ret = ext3_get_block_handle(handle, inode, blocks[i], ++ ret = ext3_get_block_wrap(handle, inode, blocks[i], + &bh_tmp, 1, 1); + if (ret) + break; +@@ -3175,7 +3193,7 @@ + if (blocks[i] != 0) + continue; + +- rc = ext3_get_block_handle(handle, inode, iblock, &bh, 1, 1); ++ rc = ext3_get_block_wrap(handle, inode, iblock, &bh, 1, 1); + if (rc) { + printk(KERN_INFO "ext3_map_inode_page: error %d " + "allocating block %ld\n", rc, iblock); +Index: linux-2.4.21-suse2/fs/ext3/Makefile +=================================================================== +--- linux-2.4.21-suse2.orig/fs/ext3/Makefile 2004-01-13 17:45:20.000000000 +0300 ++++ linux-2.4.21-suse2/fs/ext3/Makefile 2004-01-24 20:10:25.000000000 +0300 +@@ -12,7 +12,8 @@ + export-objs := ext3-exports.o + + obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ +- ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o ++ ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o \ ++ extents.o + obj-m := $(O_TARGET) + + export-objs += xattr.o +Index: linux-2.4.21-suse2/fs/ext3/super.c +=================================================================== +--- linux-2.4.21-suse2.orig/fs/ext3/super.c 2004-01-23 19:04:17.000000000 +0300 ++++ linux-2.4.21-suse2/fs/ext3/super.c 2004-01-24 20:10:25.000000000 +0300 +@@ -624,6 +624,7 @@ + int i; + + J_ASSERT(sbi->s_delete_inodes == 0); ++ ext3_ext_release(sb); + ext3_xattr_put_super(sb); + journal_destroy(sbi->s_journal); + if (!(sb->s_flags & MS_RDONLY)) { +@@ -829,6 +830,10 @@ + return 0; + } + } ++ else if (!strcmp (this_char, "extents")) ++ set_opt (*mount_options, EXTENTS); ++ else if (!strcmp (this_char, "extdebug")) ++ set_opt (*mount_options, EXTDEBUG); + else if (!strcmp (this_char, "grpid") || + !strcmp (this_char, "bsdgroups")) + set_opt (*mount_options, GRPID); +@@ -1523,6 +1528,8 @@ + test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": + "writeback"); + ++ ext3_ext_init(sb); ++ + return sb; + + failed_mount3: +Index: linux-2.4.21-suse2/fs/ext3/ioctl.c +=================================================================== +--- linux-2.4.21-suse2.orig/fs/ext3/ioctl.c 2004-01-13 17:45:18.000000000 +0300 ++++ linux-2.4.21-suse2/fs/ext3/ioctl.c 2004-01-24 20:10:25.000000000 +0300 +@@ -174,6 +174,10 @@ + return ret; + } + #endif ++ case EXT3_IOC_GET_EXTENTS: ++ case EXT3_IOC_GET_TREE_STATS: ++ case EXT3_IOC_GET_TREE_DEPTH: ++ return ext3_ext_ioctl(inode, filp, cmd, arg); + default: + return -ENOTTY; + } +Index: linux-2.4.21-suse2/include/linux/ext3_fs.h +=================================================================== +--- linux-2.4.21-suse2.orig/include/linux/ext3_fs.h 2004-01-23 19:04:17.000000000 +0300 ++++ linux-2.4.21-suse2/include/linux/ext3_fs.h 2004-01-24 20:10:25.000000000 +0300 +@@ -184,6 +184,7 @@ + #define EXT3_IMAGIC_FL 0x00002000 /* AFS directory */ + #define EXT3_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ + #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ ++#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */ + + #define EXT3_FL_USER_VISIBLE 0x00005FFF /* User visible flags */ + #define EXT3_FL_USER_MODIFIABLE 0x000000FF /* User modifiable flags */ +@@ -208,6 +209,9 @@ + #ifdef CONFIG_JBD_DEBUG + #define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) + #endif ++#define EXT3_IOC_GET_EXTENTS _IOR('f', 5, long) ++#define EXT3_IOC_GET_TREE_DEPTH _IOR('f', 6, long) ++#define EXT3_IOC_GET_TREE_STATS _IOR('f', 7, long) + + /* + * Structure of an inode on the disk +@@ -328,6 +332,8 @@ + #define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ + #define EXT3_MOUNT_IOPEN 0x40000 /* Allow access via iopen */ + #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */ ++#define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */ ++#define EXT3_MOUNT_EXTDEBUG 0x200000/* Extents debug */ + + /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ + #ifndef _LINUX_EXT2_FS_H +@@ -689,6 +695,7 @@ + extern unsigned long ext3_count_free (struct buffer_head *, unsigned); + + /* inode.c */ ++extern int ext3_block_truncate_page(handle_t *, struct address_space *, loff_t); + extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); + extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); + extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); +@@ -770,6 +777,14 @@ + extern struct inode_operations ext3_symlink_inode_operations; + extern struct inode_operations ext3_fast_symlink_inode_operations; + ++/* extents.c */ ++extern int ext3_ext_writepage_trans_blocks(struct inode *, int); ++extern int ext3_ext_get_block(handle_t *, struct inode *, long, ++ struct buffer_head *, int, int); ++extern void ext3_ext_truncate(struct inode *); ++extern void ext3_ext_init(struct super_block *); ++extern void ext3_ext_release(struct super_block *); ++extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *); + + #endif /* __KERNEL__ */ + +Index: linux-2.4.21-suse2/include/linux/ext3_extents.h +=================================================================== +--- linux-2.4.21-suse2.orig/include/linux/ext3_extents.h 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.4.21-suse2/include/linux/ext3_extents.h 2004-01-24 20:10:25.000000000 +0300 +@@ -0,0 +1,212 @@ ++/* ++ * Copyright (C) 2003 Alex Tomas <alex@clusterfs.com> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public Licens ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- ++ */ ++ ++#ifndef _LINUX_EXT3_EXTENTS ++#define _LINUX_EXT3_EXTENTS ++ ++/* ++ * with AGRESSIVE_TEST defined capacity of index/leaf blocks ++ * become very little, so index split, in-depth growing and ++ * other hard changes happens much more often ++ * this is for debug purposes only ++ */ ++#define AGRESSIVE_TEST_ ++ ++/* ++ * if CHECK_BINSEARCH defined, then results of binary search ++ * will be checked by linear search ++ */ ++#define CHECK_BINSEARCH_ ++ ++/* ++ * if EXT_DEBUG is defined you can use 'extdebug' mount option ++ * to get lots of info what's going on ++ */ ++#define EXT_DEBUG ++#ifdef EXT_DEBUG ++#define ext_debug(tree,fmt,a...) \ ++do { \ ++ if (test_opt((tree)->inode->i_sb, EXTDEBUG)) \ ++ printk(fmt, ##a); \ ++} while (0); ++#else ++#define ext_debug(tree,fmt,a...) ++#endif ++ ++/* ++ * if EXT_STATS is defined then stats numbers are collected ++ * these number will be displayed at umount time ++ */ ++#define EXT_STATS_ ++ ++ ++#define EXT3_ALLOC_NEEDED 2 /* block bitmap + group descriptor */ ++ ++/* ++ * ext3_inode has i_block array (total 60 bytes) ++ * first 4 bytes are used to store: ++ * - tree depth (0 mean there is no tree yet. all extents in the inode) ++ * - number of alive extents in the inode ++ */ ++ ++/* ++ * this is extent on-disk structure ++ * it's used at the bottom of the tree ++ */ ++struct ext3_extent { ++ __u32 e_block; /* first logical block extent covers */ ++ __u32 e_start; /* first physical block extents lives */ ++ __u32 e_num; /* number of blocks covered by extent */ ++}; ++ ++/* ++ * this is index on-disk structure ++ * it's used at all the levels, but the bottom ++ */ ++struct ext3_extent_idx { ++ __u32 e_block; /* index covers logical blocks from 'block' */ ++ __u32 e_leaf; /* pointer to the physical block of the next * ++ * level. leaf or next index could bet here */ ++}; ++ ++/* ++ * each block (leaves and indexes), even inode-stored has header ++ */ ++struct ext3_extent_header { ++ __u16 e_magic; /* probably will support different formats */ ++ __u16 e_num; /* number of valid entries */ ++ __u16 e_max; /* capacity of store in entries */ ++ __u16 e_depth; /* has tree real underlaying blocks? */ ++}; ++ ++#define EXT3_EXT_MAGIC 0xf301 ++ ++/* ++ * array of ext3_ext_path contains path to some extent ++ * creation/lookup routines use it for traversal/splitting/etc ++ * truncate uses it to simulate recursive walking ++ */ ++struct ext3_ext_path { ++ __u32 p_block; ++ __u16 p_depth; ++ struct ext3_extent *p_ext; ++ struct ext3_extent_idx *p_idx; ++ struct ext3_extent_header *p_hdr; ++ struct buffer_head *p_bh; ++}; ++ ++/* ++ * structure for external API ++ */ ++ ++ ++/* ++ * ext3_extents_tree is used to pass initial information ++ * to top-level extents API ++ */ ++struct ext3_extents_tree { ++ struct inode *inode; /* inode which tree belongs to */ ++ void *root; /* ptr to data top of tree resides at */ ++ void *buffer; /* will be passed as arg to ^^ routines */ ++ int buffer_len; ++ void *private; ++ struct ext3_extent *cex;/* last found extent */ ++ int (*get_write_access)(handle_t *h, void *buffer); ++ int (*mark_buffer_dirty)(handle_t *h, void *buffer); ++ int (*mergable)(struct ext3_extent *ex1, struct ext3_extent *ex2); ++ int (*remove_extent_credits)(struct ext3_extents_tree *, ++ struct ext3_extent *, unsigned long, ++ unsigned long); ++ int (*remove_extent)(struct ext3_extents_tree *, ++ struct ext3_extent *, unsigned long, ++ unsigned long); ++ int (*new_block)(handle_t *, struct ext3_extents_tree *, ++ struct ext3_ext_path *, struct ext3_extent *, ++ int *); ++}; ++ ++/* ++ * to be called by ext3_ext_walk_space() ++ * negative retcode - error ++ * positive retcode - signal for ext3_ext_walk_space(), see below ++ * callback must return valid extent (passed or newly created) ++ */ ++typedef int (*ext_prepare_callback)(struct ext3_extents_tree *, ++ struct ext3_ext_path *, ++ struct ext3_extent *, int); ++ ++#define EXT_CONTINUE 0 ++#define EXT_BREAK 1 ++ ++ ++#define EXT_FIRST_EXTENT(__hdr__) \ ++ ((struct ext3_extent *) (((char *) (__hdr__)) + \ ++ sizeof(struct ext3_extent_header))) ++#define EXT_FIRST_INDEX(__hdr__) \ ++ ((struct ext3_extent_idx *) (((char *) (__hdr__)) + \ ++ sizeof(struct ext3_extent_header))) ++#define EXT_HAS_FREE_INDEX(__path__) \ ++ ((__path__)->p_hdr->e_num < (__path__)->p_hdr->e_max) ++#define EXT_LAST_EXTENT(__hdr__) \ ++ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->e_num - 1) ++#define EXT_LAST_INDEX(__hdr__) \ ++ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->e_num - 1) ++#define EXT_MAX_EXTENT(__hdr__) \ ++ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->e_max - 1) ++#define EXT_MAX_INDEX(__hdr__) \ ++ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->e_max - 1) ++ ++#define EXT_ROOT_HDR(tree) \ ++ ((struct ext3_extent_header *) (tree)->root) ++#define EXT_BLOCK_HDR(bh) \ ++ ((struct ext3_extent_header *) (bh)->b_data) ++#define EXT_DEPTH(_t_) \ ++ (((struct ext3_extent_header *)((_t_)->root))->e_depth) ++ ++ ++#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); ++ ++ ++/* ++ * this structure is used to gather extents from the tree via ioctl ++ */ ++struct ext3_extent_buf { ++ unsigned long start; ++ int buflen; ++ void *buffer; ++ void *cur; ++ int err; ++}; ++ ++/* ++ * this structure is used to collect stats info about the tree ++ */ ++struct ext3_extent_tree_stats { ++ int depth; ++ int extents_num; ++ int leaf_num; ++}; ++ ++extern int ext3_extent_tree_init(handle_t *, struct ext3_extents_tree *); ++extern int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *, struct ext3_ext_path *); ++extern int ext3_ext_insert_extent(handle_t *, struct ext3_extents_tree *, struct ext3_ext_path *, struct ext3_extent *); ++extern int ext3_ext_walk_space(struct ext3_extents_tree *, unsigned long, unsigned long, ext_prepare_callback); ++extern int ext3_ext_remove_space(struct ext3_extents_tree *, unsigned long, unsigned long); ++extern struct ext3_ext_path * ext3_ext_find_extent(struct ext3_extents_tree *, int, struct ext3_ext_path *); ++ ++#endif /* _LINUX_EXT3_EXTENTS */ ++ +Index: linux-2.4.21-suse2/include/linux/ext3_fs_i.h +=================================================================== +--- linux-2.4.21-suse2.orig/include/linux/ext3_fs_i.h 2004-01-23 19:04:17.000000000 +0300 ++++ linux-2.4.21-suse2/include/linux/ext3_fs_i.h 2004-01-24 20:10:25.000000000 +0300 +@@ -90,6 +90,8 @@ + * by other means, so we have truncate_sem. + */ + struct rw_semaphore truncate_sem; ++ ++ __u32 i_cached_extent[3]; + }; + + #endif /* _LINUX_EXT3_FS_I */