From 86dcbf50773b4429e2710f709fa0fb9ebb6dcd3d Mon Sep 17 00:00:00 2001 From: wangdi <wangdi> Date: Sun, 4 Jan 2004 07:22:40 +0000 Subject: [PATCH] add snapfs to cvs --- lustre/snapfs/cache.c | 145 +++++ lustre/snapfs/clonefs.c | 596 ++++++++++++++++++ lustre/snapfs/dcache.c | 56 ++ lustre/snapfs/dir.c | 777 ++++++++++++++++++++++++ lustre/snapfs/dotsnap.c | 208 +++++++ lustre/snapfs/file.c | 259 ++++++++ lustre/snapfs/filter.c | 433 ++++++++++++++ lustre/snapfs/inode.c | 211 +++++++ lustre/snapfs/journal_ext3.c | 84 +++ lustre/snapfs/psdev.c | 180 ++++++ lustre/snapfs/snap.c | 278 +++++++++ lustre/snapfs/snaptable.c | 1099 ++++++++++++++++++++++++++++++++++ lustre/snapfs/super.c | 714 ++++++++++++++++++++++ lustre/snapfs/symlink.c | 213 +++++++ lustre/snapfs/sysctl.c | 110 ++++ 15 files changed, 5363 insertions(+) create mode 100644 lustre/snapfs/cache.c create mode 100644 lustre/snapfs/clonefs.c create mode 100644 lustre/snapfs/dcache.c create mode 100644 lustre/snapfs/dir.c create mode 100644 lustre/snapfs/dotsnap.c create mode 100644 lustre/snapfs/file.c create mode 100644 lustre/snapfs/filter.c create mode 100644 lustre/snapfs/inode.c create mode 100644 lustre/snapfs/journal_ext3.c create mode 100644 lustre/snapfs/psdev.c create mode 100644 lustre/snapfs/snap.c create mode 100644 lustre/snapfs/snaptable.c create mode 100644 lustre/snapfs/super.c create mode 100644 lustre/snapfs/symlink.c create mode 100644 lustre/snapfs/sysctl.c diff --git a/lustre/snapfs/cache.c b/lustre/snapfs/cache.c new file mode 100644 index 0000000000..d0c8f1bb49 --- /dev/null +++ b/lustre/snapfs/cache.c @@ -0,0 +1,145 @@ +/* + * + * + * Copyright (C) 2000 Stelias Computing, Inc. + * Copyright (C) 2000 Red Hat, Inc. + * + * + */ + + +#include <stdarg.h> + +#include <asm/bitops.h> +#include <asm/uaccess.h> +#include <asm/system.h> + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/ext2_fs.h> +#include <linux/malloc.h> +#include <linux/vmalloc.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/locks.h> +#include <linux/blkdev.h> +#include <linux/init.h> +#define __NO_VERSION__ +#include <linux/module.h> + +#include <linux/filter.h> +#include <linux/snapfs.h> +#include <linux/snapsupport.h> + +/* + * XXX - Not sure for snapfs that the cache functions are even needed. + * Can't all lookups be done by an inode->superblock->u.generic_sbp + * lookup? + */ + +/* + This file contains the routines associated with managing a + cache of files . These caches need to be found + fast so they are hashed by the device, with an attempt to have + collision chains of length 1. +*/ + +/* the intent of this hash is to have collision chains of length 1 */ +#define CACHES_BITS 8 +#define CACHES_SIZE (1 << CACHES_BITS) +#define CACHES_MASK CACHES_SIZE - 1 +static struct list_head snap_caches[CACHES_SIZE]; + +static inline int snap_cache_hash(kdev_t dev) +{ + return (CACHES_MASK) & ((0x000F & (dev)) + ((0x0F00 & (dev)) >>8)); +} + +inline void snap_cache_add(struct snap_cache *cache, kdev_t dev) +{ + list_add(&cache->cache_chain, + &snap_caches[snap_cache_hash(dev)]); + cache->cache_dev = dev; +} + +inline void snap_init_cache_hash(void) +{ + int i; + for ( i = 0; i < CACHES_SIZE; i++ ) { + INIT_LIST_HEAD(&snap_caches[i]); + } +} + +/* map a device to a cache */ +struct snap_cache *snap_find_cache(kdev_t dev) +{ + struct snap_cache *cache; + struct list_head *lh, *tmp; + + lh = tmp = &(snap_caches[snap_cache_hash(dev)]); + while ( (tmp = lh->next) != lh ) { + cache = list_entry(tmp, struct snap_cache, cache_chain); + if ( cache->cache_dev == dev ) + return cache; + } + return NULL; +} + + +/* map an inode to a cache */ +struct snap_cache *snap_get_cache(struct inode *inode) +{ + struct snap_cache *cache; + + /* find the correct snap_cache here, based on the device */ + cache = snap_find_cache(inode->i_dev); + if ( !cache ) { + printk("WARNING: no cache for dev %d, ino %ld\n", + inode->i_dev, inode->i_ino); + return NULL; + } + + return cache; +} + + +/* another debugging routine: check fs is InterMezzo fs */ +int snap_ispresto(struct inode *inode) +{ + struct snap_cache *cache; + + if ( !inode ) + return 0; + cache = snap_get_cache(inode); + if ( !cache ) + return 0; + return (inode->i_dev == cache->cache_dev); +} + +/* setup a cache structure when we need one */ +struct snap_cache *snap_init_cache(void) +{ + struct snap_cache *cache; + + /* make a snap_cache structure for the hash */ + SNAP_ALLOC(cache, struct snap_cache *, sizeof(struct snap_cache)); + if ( cache ) { + memset(cache, 0, sizeof(struct snap_cache)); + INIT_LIST_HEAD(&cache->cache_chain); + INIT_LIST_HEAD(&cache->cache_clone_list); + } + return cache; +} + + +/* free a cache structure and all of the memory it is pointing to */ +inline void snap_free_cache(struct snap_cache *cache) +{ + if (!cache) + return; + + + SNAP_FREE(cache, sizeof(struct snap_cache)); +} + diff --git a/lustre/snapfs/clonefs.c b/lustre/snapfs/clonefs.c new file mode 100644 index 0000000000..1ef8078076 --- /dev/null +++ b/lustre/snapfs/clonefs.c @@ -0,0 +1,596 @@ +/* + * Super block/filesystem wide operations + * + * Copryright (C) 1996 Peter J. Braam <braam@maths.ox.ac.uk> and + * Michael Callahan <callahan@maths.ox.ac.uk> + * + * Rewritten for Linux 2.1. Peter Braam <braam@cs.cmu.edu> + * Copyright (C) Carnegie Mellon University + * + * Copyright (C) 2000, Mountain View Data, Inc, authors + * Peter Braam <braam@mountainviewdata.com>, + * Harrison Xing <harrisonx@mountainviewdata.com> + * + */ + +#define __NO_VERSION__ +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/locks.h> +#include <linux/unistd.h> + +#include <asm/system.h> +#include <asm/uaccess.h> + +#include <linux/fs.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/locks.h> +#include <linux/string.h> +#include <asm/uaccess.h> +#include <linux/malloc.h> +#include <linux/vmalloc.h> +#include <asm/segment.h> + +#include <linux/filter.h> +#include <linux/snapfs.h> +#include <linux/snapsupport.h> + +/* Clone is a simple file system, read only that just follows redirectors + we have placed the entire implementation except clone_read_super in + this file + */ + +struct inode_operations clonefs_dir_inode_operations; +struct inode_operations clonefs_file_inode_operations; +struct inode_operations clonefs_symlink_inode_operations; +struct inode_operations clonefs_special_inode_operations; +struct file_operations clonefs_dir_file_operations; +struct file_operations clonefs_file_file_operations; +struct file_operations clonefs_special_file_operations; + +/* support routines for following redirectors */ + +/* Parameter is clonefs inode, 'inode', and typically this may be + called before read_inode has completed on this clonefs inode, + i.e. we may only assume that i_ino is valid. + + We return an underlying (likely disk) fs inode. This involved + handling any redirector inodes found along the way. + + This function is used by all clone fs interface functions to get an + underlying fs inode. +*/ + +struct inode *clonefs_get_inode(struct inode *inode) +{ + struct snap_clone_info *clone_sb; + struct inode *cache_inode, *redirected_inode; + + ENTRY; + + /* this only works if snapfs_current does NOT overwrite read_inode */ + clone_sb = (struct snap_clone_info *) &inode->i_sb->u.generic_sbp; + + /* basic invariant: clone and current ino's are equal */ + cache_inode = iget(clone_sb->clone_cache->cache_sb, inode->i_ino); + + redirected_inode = snap_redirect(cache_inode, inode->i_sb); + + CDEBUG(D_SNAP, "redirected_inode: %lx, cache_inode %lx\n", + (ulong) redirected_inode, (ulong) cache_inode); + + CDEBUG(D_SNAP, "cache_inode: %lx, ino %ld, sb %lx, count %d\n", + (ulong) cache_inode, cache_inode->i_ino, + (ulong) cache_inode->i_sb, cache_inode->i_count); + + iput(cache_inode); + EXIT; + return redirected_inode; +} + + +/* super operations */ +static void clonefs_read_inode(struct inode *inode) +{ + struct inode *cache_inode; + + ENTRY; + + CDEBUG(D_SNAP, "inode: %lx, ino %ld, sb %lx, count %d\n", + (ulong) inode , inode->i_ino, (long) inode->i_sb, + inode->i_count); + + /* redirecting inode in the cache */ + cache_inode = clonefs_get_inode(inode); + if (!cache_inode) { + make_bad_inode(inode); + EXIT; + return; + } + /* copy attrs of that inode to our clone inode */ + snapfs_cpy_attrs(inode, cache_inode); + + if (S_ISREG(inode->i_mode)) + inode->i_op = &clonefs_file_inode_operations; + else if (S_ISDIR(inode->i_mode)) + inode->i_op = &clonefs_dir_inode_operations; + else if (S_ISLNK(inode->i_mode)) + inode->i_op = &clonefs_symlink_inode_operations; + else if (S_ISCHR(inode->i_mode)) + inode->i_op = &chrdev_inode_operations; + else if (S_ISBLK(inode->i_mode)) + inode->i_op = &blkdev_inode_operations; + else if (S_ISFIFO(inode->i_mode)) + init_fifo(inode); + + iput(cache_inode); + + CDEBUG(D_SNAP, "cache_inode: %lx ino %ld, sb %lx, count %d\n", + (ulong) cache_inode, cache_inode->i_ino, + (ulong) cache_inode->i_sb, cache_inode->i_count); + EXIT; + return; +} + + +static void clonefs_put_super(struct super_block *sb) +{ + struct snap_clone_info *clone_sb; + + ENTRY; + CDEBUG(D_SUPER, "sb %lx, &sb->u.generic_sbp: %lx\n", + (ulong) sb, (ulong) &sb->u.generic_sbp); + clone_sb = (struct snap_clone_info *)&sb->u.generic_sbp; + dput( clone_sb->clone_cache->cache_sb->s_root ); + list_del(&clone_sb->clone_list_entry); + + MOD_DEC_USE_COUNT; + + EXIT; +} + +static int clonefs_statfs(struct super_block *sb, struct statfs *buf, + int bufsiz) +{ + struct snap_clone_info *clone_sb; + struct snap_cache *cache; + + ENTRY; + clone_sb = (struct snap_clone_info *)&sb->u.generic_sbp; + + cache = clone_sb->clone_cache; + if (!cache) { + printk("clone_statfs: no cache\n"); + return -EINVAL; + } + + EXIT; + return cache->cache_filter->o_caops.cache_sops->statfs + (cache->cache_sb, buf, bufsiz); +} + +struct super_operations clone_super_ops = +{ + clonefs_read_inode, /* read_inode */ + NULL, /* write_inode */ + NULL, /* put_inode */ + NULL, /* delete_inode */ + NULL, /* notify_change */ + clonefs_put_super, /* put_super */ + NULL, /* write_super */ + clonefs_statfs, /* statfs */ + NULL /* remount_fs */ +}; + + +/* ***************** end of clonefs super ops ******************* */ +/* ***************** begin clonefs dir ops ******************* */ + +static void d_unalloc(struct dentry *dentry) +{ + + list_del(&dentry->d_hash); + INIT_LIST_HEAD(&dentry->d_hash); + dput(dentry); /* this will free the dentry memory */ +} + +/* + * Return the underlying fs dentry with name in 'dentry' that points + * to the right inode. 'dir' is the clone fs directory to search for + * the 'dentry'. + */ +struct dentry *clonefs_lookup(struct inode *dir, struct dentry *dentry) +{ + struct inode *cache_dir; + struct dentry *cache_dentry; + struct inode *cache_inode; + struct dentry *result; + struct inode *inode; + struct snap_clone_info *clone_sb; + + ENTRY; + + cache_dir = clonefs_get_inode(dir); + + cache_dentry = d_alloc(dentry->d_parent, &dentry->d_name); + if (!cache_dentry) { + iput(cache_dir); + EXIT; + return ERR_PTR(-ENOENT); + } + + /* Lock cache directory inode. */ + down(&cache_dir->i_sem); + /* + * Call underlying fs lookup function to set the 'd_inode' pointer + * to the corresponding directory inode. + * + * Note: If the lookup function does not return NULL, return + * from 'clone_lookup' with an error. + */ + result = cache_dir->i_op->lookup(cache_dir, cache_dentry); + if (result) { + dput(cache_dentry); + up(&cache_dir->i_sem); + iput(cache_dir); + dentry->d_inode = NULL; + EXIT; + return ERR_PTR(-ENOENT); + } + /* Unlock cache directory inode. */ + up(&cache_dir->i_sem); + + /* + * If there is no inode pointer in the underlying fs 'cache_dentry' + * then the directory doesn't have an entry with this name. In fs/ext2 + * we see that we return 0 and put dentry->d_inode = NULL; + */ + cache_inode = cache_dentry->d_inode; + if ( cache_inode == NULL ) { + inode = NULL; + } else { + clone_sb = (struct snap_clone_info *) &dir->i_sb->u.generic_sbp; + /* note, iget below will follow a redirector, since + it calls into clone_read_inode + */ + inode = iget(dir->i_sb, cache_inode->i_ino); + } + + /* dput(cache_dentry) will not put the dentry away + * immediately, unless we first arrange that its hash list is + * empty. + */ + + if ( cache_inode != NULL ) { + CDEBUG(D_INODE, "cache ino %ld, count %d, dir %ld, count %d\n", + cache_inode->i_ino, cache_inode->i_count, cache_dir->i_ino, + cache_dir->i_count); + } + + d_unalloc(cache_dentry); + iput(cache_dir); + + /* + * Add 'inode' to the directory entry 'dentry'. + */ + d_add(dentry, inode); + + EXIT; + return NULL; +} + + +/* instantiate a file handle to the cache file */ +static void clonefs_prepare_snapfile(struct inode *i, + struct file *clone_file, + struct inode *cache_inode, + struct file *cache_file, + struct dentry *cache_dentry) +{ + ENTRY; + cache_file->f_pos = clone_file->f_pos; + cache_file->f_mode = clone_file->f_mode; + cache_file->f_flags = clone_file->f_flags; + cache_file->f_count = clone_file->f_count; + cache_file->f_owner = clone_file->f_owner; + cache_file->f_op = cache_inode->i_op->default_file_ops; + cache_file->f_dentry = cache_dentry; + cache_file->f_dentry->d_inode = cache_inode; + EXIT; + return ; +} + +/* update the clonefs file struct after IO in cache file */ +static void clonefs_restore_snapfile(struct inode *cache_inode, + struct file *cache_file, + struct inode *clone_inode, + struct file *clone_file) +{ + ENTRY; + cache_file->f_pos = clone_file->f_pos; + cache_inode->i_size = clone_inode->i_size; + EXIT; + return; +} + +static int clonefs_readdir(struct file *file, void *dirent, + filldir_t filldir) +{ + int result; + struct inode *cache_inode; + struct file open_file; + struct dentry open_dentry; + struct inode *inode=file->f_dentry->d_inode; + + ENTRY; + + if(!inode) { + EXIT; + return -EINVAL; + } + cache_inode = clonefs_get_inode(inode); + + if (!cache_inode) { + make_bad_inode(inode); + EXIT; + return -ENOMEM; + } + + CDEBUG(D_INODE,"clone ino %ld\n",cache_inode->i_ino); + + clonefs_prepare_snapfile(inode, file, cache_inode, &open_file, + &open_dentry); + /* potemkin case: we are handed a directory inode */ + result = -ENOENT; + if (open_file.f_op->readdir) { + down(&cache_inode->i_sem); + result = open_file.f_op->readdir(&open_file, dirent, filldir); + up(&cache_inode->i_sem); + } + clonefs_restore_snapfile(inode, file, cache_inode, &open_file); + iput(cache_inode); + EXIT; + return result; +} + +struct file_operations clonefs_dir_file_operations = { + NULL, /* lseek */ + NULL, /* read -- bad */ + NULL, /* write */ + clonefs_readdir, /* readdir */ + NULL, /* select */ + NULL, /* ioctl */ + NULL, /* mmap */ + NULL, /* open */ + NULL, + NULL, /* release */ + NULL, /* fsync */ + NULL, + NULL, + NULL +}; + +struct inode_operations clonefs_dir_inode_operations = +{ + &clonefs_dir_file_operations, + NULL, /* create */ + clonefs_lookup, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* readpage */ + NULL, /* writepage */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL, /* permission */ + NULL, /* smap */ + NULL, /* update page */ + NULL, /* revalidate */ +}; + + +/* ***************** end of clonefs dir ops ******************* */ +/* ***************** begin clonefs file ops ******************* */ + +int clonefs_readpage(struct file *file, struct page *page) +{ + int result = 0; + struct inode *cache_inode; + struct file open_file; + struct dentry open_dentry; + struct inode *inode; + + ENTRY; + + inode = file->f_dentry->d_inode; + cache_inode = clonefs_get_inode(file->f_dentry->d_inode); + if (!cache_inode) { + make_bad_inode(file->f_dentry->d_inode); + EXIT; + return -ENOMEM; + } + + clonefs_prepare_snapfile(inode, file, cache_inode, &open_file, + &open_dentry); + /* tell currentfs_readpage the primary inode number */ + open_dentry.d_fsdata = (void*)inode->i_ino; + + /* potemkin case: we are handed a directory inode */ + down(&cache_inode->i_sem); + /* XXX - readpage NULL on directories... */ + if (cache_inode->i_op->readpage == NULL) + printk("Yes, Grigori, directories are a problem.\n"); + else + cache_inode->i_op->readpage(&open_file, page); + up(&cache_inode->i_sem); + clonefs_restore_snapfile(inode, file, cache_inode, &open_file); + iput(cache_inode); + EXIT; + return result; +} + + +struct file_operations clonefs_file_file_operations = { + NULL, /* lseek */ + generic_file_read, /* read -- bad */ + NULL, /* write */ + NULL, /* readdir */ + NULL, /* select */ + NULL, /* ioctl */ + generic_file_mmap, /* mmap */ + NULL, /* open */ + NULL, + NULL, /* release */ + NULL, /* fsync */ + NULL, + NULL, + NULL +}; + +struct inode_operations clonefs_file_inode_operations = +{ + &clonefs_file_file_operations, + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + clonefs_readpage, /* readpage */ + NULL, /* writepage */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL, /* permission */ + NULL, /* smap */ + NULL, /* update page */ + NULL, /* revalidate */ +}; + + + +/* ***************** end of clonefs file ops ******************* */ +/* ***************** begin clonefs symlink ops ******************* */ + +int clonefs_readlink(struct dentry *dentry, char *buf, int len) +{ + int res; + struct inode * cache_inode; + struct inode * old_inode; + + ENTRY; + + cache_inode = clonefs_get_inode(dentry->d_inode); + + res = -ENOENT; + + if ( ! cache_inode ) { + CDEBUG(D_INODE, "clonefs_get_inode failed, NULL\n"); + EXIT; + return res; + } + + /* XXX: shall we allocate a new dentry ? + The following is safe for ext2, etc. because ext2_readlink only + use the inode info */ + + /* save the old dentry inode */ + old_inode = dentry->d_inode; + /* set dentry inode to cache inode */ + dentry->d_inode = cache_inode; + + if ( cache_inode->i_op->readlink ) { + res = cache_inode->i_op->readlink(dentry, buf, len); + }else { + CDEBUG(D_INODE,"NO readlink for ino %lu\n", cache_inode->i_ino); + } + + /* restore the old inode */ + dentry->d_inode = old_inode; + + iput(cache_inode); + + EXIT; + return res; +} + +struct dentry * clonefs_follow_link(struct dentry * dentry, + struct dentry *base, + unsigned int follow) +{ + struct dentry * res; + struct inode * cache_inode; + struct inode * old_inode; + + ENTRY; + res = ERR_PTR(-ENOENT); + + cache_inode = clonefs_get_inode(dentry->d_inode); + if ( ! cache_inode ) { + CDEBUG(D_INODE, "clonefs_get_inode failed, NULL\n"); + EXIT; + return res; + } + + /* XXX: shall we allocate a new dentry ? + The following is safe for ext2, etc. because ext2_follow_link + only use the inode info */ + + /* save the old dentry inode */ + old_inode = dentry->d_inode; + /* set dentry inode to cache inode */ + dentry->d_inode = cache_inode; + + if ( cache_inode->i_op->follow_link ) { + res = cache_inode->i_op->follow_link(dentry, base, follow); + } + + /* restore the old inode */ + dentry->d_inode = old_inode; + + iput(cache_inode); + + EXIT; + return res; +} + +struct inode_operations clonefs_symlink_inode_operations = +{ + NULL, /* no file operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + clonefs_readlink, /* readlink */ + clonefs_follow_link,/* follow_link */ + NULL, /* readpage */ + NULL, /* writepage */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL, /* permission */ + NULL, /* smap */ + NULL, /* update page */ + NULL, /* revalidate */ +}; + + diff --git a/lustre/snapfs/dcache.c b/lustre/snapfs/dcache.c new file mode 100644 index 0000000000..88ce4bb54c --- /dev/null +++ b/lustre/snapfs/dcache.c @@ -0,0 +1,56 @@ +/* + * Directory operations for SnapFS filesystem + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/locks.h> +#include <asm/segment.h> +#include <asm/uaccess.h> +#include <linux/string.h> + +#include <linux/filter.h> +#include <linux/snapfs.h> +#include <linux/snapsupport.h> + +/* called when a cache lookup succeeds */ + +/* XXX PJB: the intent here is to make sure that inodes which are + currently primary inodes under .snap directories are dropped when + they are COWED. It seems hard to me to get semantics that are equally + good as for mounted snap_clone file systems, but we should try to get + close +*/ +static int currentfs_dentry_revalidate(struct dentry *de, int flag) +{ +// struct inode *inode = de->d_inode; + ENTRY; + + /* unless an ancestor is a .snap directory there is nothing to do */ +#if 0 + if ( !currentfs_is_under_dotsnap(dentry) ) { + EXIT; + return 1; + } + /* XXX PJB get this to work guys! */ + if ( de->d_parent == "dotsnap inode" && + inode_is_newer_than(find_time_by_name(de->d_parent->d_name.name))){ + 1. drop this dentry + 2. make sure the VFS does a new lookup + 3. probably all you need to do is + return 0; + } +#else + return 1; +#endif +} + +struct dentry_operations currentfs_dentry_ops = +{ + d_revalidate: currentfs_dentry_revalidate +}; + diff --git a/lustre/snapfs/dir.c b/lustre/snapfs/dir.c new file mode 100644 index 0000000000..0b83fa147c --- /dev/null +++ b/lustre/snapfs/dir.c @@ -0,0 +1,777 @@ +/* + * dir.c + */ + +#define EXPORT_SYMTAB + + +#define __NO_VERSION__ +#include <linux/module.h> +#include <asm/uaccess.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/locks.h> +#include <linux/quotaops.h> +#include <linux/list.h> +#include <linux/file.h> +#include <asm/bitops.h> +#include <asm/byteorder.h> + +#include <linux/filter.h> +#include <linux/snapfs.h> +#include <linux/snapsupport.h> + +#ifdef CONFIG_SNAPFS_EXT3 +void ext3_orphan_del(handle_t *handle, struct inode *inode); +#endif + +static ino_t get_parent_ino(struct inode * inode) +{ + ino_t ino = 0; + struct dentry * dentry; + + if (list_empty(&inode->i_dentry)) { + printk("snapfs ERROR: no dentry for ino %lu\n", inode->i_ino); + return 0; + } + + dentry = dget(list_entry(inode->i_dentry.next, struct dentry, d_alias)); + + if(dentry->d_parent->d_inode) + ino = dentry->d_parent->d_inode->i_ino; + + dput(dentry); + return ino; + +} + +static void d_unadd_iput(struct dentry *dentry) +{ + list_del(&dentry->d_alias); + INIT_LIST_HEAD(&dentry->d_alias); + list_del(&dentry->d_hash); + INIT_LIST_HEAD(&dentry->d_hash); + iput(dentry->d_inode); + dentry->d_inode = NULL; +} + +/* XXX check the return values */ +static struct dentry *currentfs_lookup(struct inode * dir,struct dentry *dentry) +{ + struct snap_cache *cache; + struct dentry *rc; + struct inode_operations *iops; + struct inode *cache_inode; + int index; + + ENTRY; + + cache = snap_find_cache(dir->i_dev); + if ( !cache ) { + EXIT; + return ERR_PTR(-EINVAL); + } + + if ( dentry->d_name.len == strlen(".snap") && + (memcmp(dentry->d_name.name, ".snap", strlen(".snap")) == 0) ) { + struct inode *snap; + ino_t ino; + + /* Don't permit .snap in clonefs */ + if( dentry->d_sb != cache->cache_sb ) + return ERR_PTR(-ENOENT); + + /* Don't permit .snap under .snap */ + if( currentfs_is_under_dotsnap(dentry) ) + return ERR_PTR(-ENOENT); + + ino = 0xF0000000 | dir->i_ino; + snap = iget(dir->i_sb, ino); + CDEBUG(D_INODE, ".snap inode ino %ld, mode %o\n", snap->i_ino, snap->i_mode); + d_add(dentry, snap); + EXIT; + return NULL; + } + + iops = filter_c2cdiops(cache->cache_filter); + if (!iops || !iops->lookup) { + EXIT; + return ERR_PTR(-EINVAL); + } + + rc = iops->lookup(dir, dentry); + if ( rc || !dentry->d_inode) { + EXIT; + return NULL; + } + + /* + * If we are under dotsnap, we need save extra data into + * dentry->d_fsdata: For dir, we only need _this_ snapshot's index; + * For others, save primary ino, with it we could found index later + * anyway + */ + cache_inode = dentry->d_inode; + if ( (index = currentfs_is_under_dotsnap(dentry)) ) { + struct snapshot_operations *snapops; + struct inode *ind_inode; + ino_t pri_ino, ind_ino; + + pri_ino = cache_inode->i_ino; + snapops = filter_c2csnapops(cache->cache_filter); + if( !snapops ) + goto err_out; + + ind_ino = snapops->get_indirect_ino(cache_inode, index); + if( ind_ino <=0 && ind_ino != -ENOATTR ) + goto err_out; + else if( ind_ino != -ENOATTR ){ + ind_inode = iget(cache_inode->i_sb, ind_ino); + if( !ind_inode ){ + goto err_out; + } + list_del(&dentry->d_alias); + INIT_LIST_HEAD(&dentry->d_alias); + list_add(&dentry->d_alias, &ind_inode->i_dentry); + dentry->d_inode = ind_inode; + iput(cache_inode); + } + + if( S_ISDIR(dentry->d_inode->i_mode) ) + dentry->d_fsdata = (void*)index; + else + dentry->d_fsdata = (void*)pri_ino; + } + + EXIT; + return NULL; + +#if 0 + /* XXX: PJB these need to be set up again. See dcache.c */ + printk("set up dentry ops\n"); + CDEBUG(D_CACHE, "\n"); + filter_setup_dentry_ops(cache->cache_filter, + dentry->d_op, ¤tfs_dentry_ops); + dentry->d_op = filter_c2udops(cache->cache_filter); + CDEBUG(D_CACHE, "\n"); +#endif + +err_out: + d_unadd_iput(dentry); + EXIT; + return ERR_PTR(-EINVAL); +} + +static int currentfs_create(struct inode *dir, struct dentry *dentry, int mode) +{ + struct snap_cache *cache; + int rc; + struct inode_operations *iops; + void *handle = NULL; + + ENTRY; + + if (currentfs_is_under_dotsnap(dentry)) { + EXIT; + return -EPERM; + } + + cache = snap_find_cache(dir->i_dev); + if ( !cache ) { + EXIT; + return -EINVAL; + } + + handle = snap_trans_start(cache, dir, SNAP_OP_CREATE); + + if ( snap_needs_cow(dir) != -1 ) { + printk("snap_needs_cow for ino %lu \n",dir->i_ino); + snap_debug_device_fail(dir->i_dev, SNAP_OP_CREATE, 1); + snap_do_cow(dir, get_parent_ino(dir), 0); + } + + iops = filter_c2cdiops(cache->cache_filter); + if (!iops || + !iops->create) { + rc = -EINVAL; + goto exit; + } + snap_debug_device_fail(dir->i_dev, SNAP_OP_CREATE, 2); + rc = iops->create(dir, dentry, mode); + + /* XXX now set the correct snap_{file,dir,sym}_iops */ + if ( ! dentry->d_inode) { + printk("Error in currentfs_create, dentry->d_inode is NULL\n"); + goto exit; + } + + if ( S_ISDIR(dentry->d_inode->i_mode) ) + dentry->d_inode->i_op = filter_c2udiops(cache->cache_filter); + else if ( S_ISREG(dentry->d_inode->i_mode) ) { + if ( !filter_c2cfiops(cache->cache_filter) ) { + filter_setup_file_ops(cache->cache_filter, + dentry->d_inode->i_op, ¤tfs_file_iops); + } + dentry->d_inode->i_op = filter_c2ufiops(cache->cache_filter); + } + printk("inode %lu, i_op %p\n", dentry->d_inode->i_ino, dentry->d_inode->i_op); + + snap_debug_device_fail(dir->i_dev, SNAP_OP_CREATE, 3); + +exit: + snap_trans_commit(cache, handle); + EXIT; + return rc; +} + +static int currentfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + struct snap_cache *cache; + int rc; + struct inode_operations *iops; + void *handle = NULL; + + ENTRY; + + if (currentfs_is_under_dotsnap(dentry)) { + EXIT; + return -EPERM; + } + + cache = snap_find_cache(dir->i_dev); + if ( !cache ) { + EXIT; + return -EINVAL; + } + + handle = snap_trans_start(cache, dir, SNAP_OP_MKDIR); + + if ( snap_needs_cow(dir) != -1 ) { + CDEBUG(D_FILE, "snap_needs_cow for ino %lu \n",dir->i_ino); + snap_debug_device_fail(dir->i_dev, SNAP_OP_MKDIR, 1); + snap_do_cow(dir, get_parent_ino(dir), 0); + } + + iops = filter_c2cdiops(cache->cache_filter); + if (!iops || + !iops->mkdir) { + rc = -EINVAL; + goto exit; + } + + snap_debug_device_fail(dir->i_dev, SNAP_OP_MKDIR, 2); + rc = iops->mkdir(dir, dentry, mode); + + if ( rc ) + goto exit; + + /* XXX now set the correct snap_{file,dir,sym}_iops */ + if ( dentry->d_inode) { + dentry->d_inode->i_op = filter_c2udiops(cache->cache_filter); + printk("inode %lu, i_op %p\n", dentry->d_inode->i_ino, dentry->d_inode->i_op); + } else { + printk("Error in currentfs_mkdir, dentry->d_inode is NULL\n"); + } + + snap_debug_device_fail(dir->i_dev, SNAP_OP_MKDIR, 3); + +exit: + snap_trans_commit(cache, handle); + EXIT; + return rc; +} + +static int currentfs_link (struct dentry * old_dentry, struct inode * dir, + struct dentry *dentry) +{ + struct snap_cache *cache; + int rc; + struct inode_operations *iops; + void *handle = NULL; + + ENTRY; + + if (currentfs_is_under_dotsnap(dentry)) { + EXIT; + return -EPERM; + } + + cache = snap_find_cache(dir->i_dev); + if ( !cache ) { + EXIT; + return -EINVAL; + } + + handle = snap_trans_start(cache, dir, SNAP_OP_LINK); + + if ( snap_needs_cow(dir) != -1 ) { + printk("snap_needs_cow for ino %lu \n",dir->i_ino); + snap_debug_device_fail(dir->i_dev, SNAP_OP_LINK, 1); + snap_do_cow(dir, get_parent_ino(dir), 0); + } + if ( snap_needs_cow(old_dentry->d_inode) != -1 ) { + printk("snap_needs_cow for ino %lu \n",old_dentry->d_inode->i_ino); + snap_debug_device_fail(dir->i_dev, SNAP_OP_LINK, 2); + snap_do_cow(old_dentry->d_inode, dir->i_ino, 0); + } + + iops = filter_c2cdiops(cache->cache_filter); + if (!iops || + !iops->link) { + rc = -EINVAL; + goto exit; + } + snap_debug_device_fail(dir->i_dev, SNAP_OP_LINK, 2); + rc = iops->link(old_dentry,dir, dentry); + snap_debug_device_fail(dir->i_dev, SNAP_OP_LINK, 3); + +exit: + snap_trans_commit(cache, handle); + EXIT; + return rc; +} + +static int currentfs_symlink(struct inode *dir, struct dentry *dentry, + const char * symname) +{ + struct snap_cache *cache; + int rc; + struct inode_operations *iops; + void *handle = NULL; + + ENTRY; + + cache = snap_find_cache(dir->i_dev); + if ( !cache ) { + EXIT; + return -EINVAL; + } + + handle = snap_trans_start(cache, dir, SNAP_OP_SYMLINK); + + if ( snap_needs_cow(dir) != -1 ) { + printk("snap_needs_cow for ino %lu \n",dir->i_ino); + snap_debug_device_fail(dir->i_dev, SNAP_OP_SYMLINK, 1); + snap_do_cow(dir, get_parent_ino(dir), 0); + } + + iops = filter_c2cdiops(cache->cache_filter); + if (!iops || + !iops->symlink) { + rc = -EINVAL; + goto exit; + } + snap_debug_device_fail(dir->i_dev, SNAP_OP_SYMLINK, 2); + rc = iops->symlink(dir, dentry, symname); + snap_debug_device_fail(dir->i_dev, SNAP_OP_SYMLINK, 3); + +exit: + snap_trans_commit(cache, handle); + EXIT; + return rc; +} + +static int currentfs_mknod(struct inode *dir, struct dentry *dentry, int mode, + int rdev) +{ + struct snap_cache *cache; + int rc; + struct inode_operations *iops; + void *handle = NULL; + + ENTRY; + + if (currentfs_is_under_dotsnap(dentry)) { + EXIT; + return -EPERM; + } + + cache = snap_find_cache(dir->i_dev); + if ( !cache ) { + EXIT; + return -EINVAL; + } + + handle = snap_trans_start(cache, dir, SNAP_OP_MKNOD); + + if ( snap_needs_cow(dir) != -1 ) { + printk("snap_needs_cow for ino %lu \n",dir->i_ino); + snap_debug_device_fail(dir->i_dev, SNAP_OP_MKNOD, 1); + snap_do_cow(dir, get_parent_ino(dir), 0); + } + + iops = filter_c2cdiops(cache->cache_filter); + if (!iops || + !iops->mknod) { + rc = -EINVAL; + goto exit; + } + snap_debug_device_fail(dir->i_dev, SNAP_OP_MKNOD, 2); + rc = iops->mknod(dir, dentry, mode, rdev); + snap_debug_device_fail(dir->i_dev, SNAP_OP_MKNOD, 3); + + /* XXX do we need to set the correct snap_{*}_iops */ + +exit: + snap_trans_commit(cache, handle); + EXIT; + return rc; +} + +static int currentfs_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct snap_cache *cache; + int rc; + struct inode_operations *iops; + struct inode *inode = NULL; +// time_t i_ctime = 0; + nlink_t i_nlink = 0; + off_t i_size = 0; + ino_t ino = 0; + int keep_inode = 0; +// struct dentry_operations *save_dop = NULL; + void *handle = NULL; + + ENTRY; + + if (currentfs_is_under_dotsnap(dentry)) { + EXIT; + return -EPERM; + } + + cache = snap_find_cache(dir->i_dev); + if ( !cache ) { + EXIT; + return -EINVAL; + } + + handle = snap_trans_start(cache, dir, SNAP_OP_RMDIR); + + if ( snap_needs_cow(dir) != -1 ) { + printk("snap_needs_cow for ino %lu \n",dir->i_ino); + snap_debug_device_fail(dir->i_dev, SNAP_OP_RMDIR, 1); + snap_do_cow(dir, get_parent_ino(dir), 0); + } + + iops = filter_c2cdiops(cache->cache_filter); + if (!iops || + !iops->rmdir) { + rc = -EINVAL; + goto exit; + } + + /* XXX : there are two cases that we can't remove this inode from disk. + 1. the inode needs to be cowed. + 2. the inode is a redirector. + then we must keep this inode(dir) so that the inode + will not be deleted after rmdir, will only remove dentry + */ + + if( snap_needs_cow(dentry->d_inode) != -1) { + snap_debug_device_fail(dir->i_dev, SNAP_OP_RMDIR, 2); + snap_do_cow (dentry->d_inode, dir->i_ino, + SNAP_DEL_PRI_WITHOUT_IND); + keep_inode = 1; + } + else if( snap_is_redirector(dentry->d_inode) ) { + snap_debug_device_fail(dir->i_dev, SNAP_OP_RMDIR, 3); + snap_do_cow(dentry->d_inode, dir->i_ino, SNAP_DEL_PRI_WITH_IND); + keep_inode = 1; + } +#if 0 + if ( keep_inode ) { + printk("set up dentry ops, before %p\n",dentry->d_op); + save_dop = dentry->d_op; + + filter_setup_dentry_ops(cache->cache_filter, + dentry->d_op, ¤tfs_dentry_ops); + dentry->d_op = filter_c2udops(cache->cache_filter); + + printk("set up dentry ops, after %p\n",dentry->d_op); + + } + +#endif + + if( keep_inode && dentry->d_inode ) { + ino = dentry->d_inode->i_ino; + // i_ctime = dentry->d_inode->i_ctime; + i_nlink = dentry->d_inode->i_nlink; + i_size = dentry->d_inode->i_size; + +} + + snap_debug_device_fail(dir->i_dev, SNAP_OP_RMDIR, 4); + rc = iops->rmdir(dir, dentry); + snap_debug_device_fail(dir->i_dev, SNAP_OP_RMDIR, 5); + + /* XXX : check this */ +#if 0 + if ( keep_inode ) { + dentry->d_op = save_dop; + printk("restore dentry ops, now at %p\n",dentry->d_op); + } + +#endif + + if( keep_inode && ino) { + inode = iget ( dir->i_sb, ino); + if( inode) { +// inode->i_ctime = i_ctime; + inode->i_nlink = i_nlink; + inode->i_size = i_size; + mark_inode_dirty(inode); + iput( inode); +#ifdef CONFIG_SNAPFS_EXT3 + /* + * In Ext3, rmdir() will put this inode into + * orphan list, we must remove it out. It's ugly!! + */ + if( cache->cache_type == FILTER_FS_EXT3 ) + ext3_orphan_del(handle, inode); +#endif + snap_debug_device_fail(dir->i_dev, SNAP_OP_RMDIR, 6); + } + } + +exit: + snap_trans_commit(cache, handle); + EXIT; + return rc; +} + +static int currentfs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + struct snap_cache *cache; + int rc; + struct inode_operations *iops; + void *handle = NULL; + + ENTRY; + + if (currentfs_is_under_dotsnap(dentry)) { + EXIT; + return -EPERM; + } + + cache = snap_find_cache(dir->i_dev); + if ( !cache ) { + EXIT; + return -EINVAL; + } + + handle = snap_trans_start(cache, dir, SNAP_OP_UNLINK); + + if ( snap_needs_cow(dir) != -1 ) { + printk("snap_needs_cow for ino %lu \n",dir->i_ino); + snap_debug_device_fail(dir->i_dev, SNAP_OP_UNLINK, 1); + snap_do_cow(dir, get_parent_ino(dir), 0); + } + + iops = filter_c2cdiops(cache->cache_filter); + if (!iops || + !iops->unlink) { + rc = -EINVAL; + goto exit; + } + + /* XXX : if nlink for this inode is 1, there are two cases that we + can't remove this inode from disk. + 1. the inode needs to be cowed. + 2. the inode is a redirector. + then we increament dentry->d_inode->i_nlink so that the inode + will not be deleted after unlink, will only remove dentry + */ + + if( snap_needs_cow (inode) != -1) { + /* call snap_do_cow with DEL_WITHOUT_IND option */ + snap_debug_device_fail(dir->i_dev, SNAP_OP_UNLINK, 2); + snap_do_cow(inode, dir->i_ino,SNAP_DEL_PRI_WITHOUT_IND); + if( inode->i_nlink == 1 ) + inode->i_nlink++; + } + else if( snap_is_redirector (inode) && inode->i_nlink == 1 ) { + /* call snap_do_cow with DEL_WITH_IND option + * just free the blocks of inode, not really delete it + */ + snap_debug_device_fail(dir->i_dev, SNAP_OP_UNLINK, 3); + snap_do_cow (inode, dir->i_ino, SNAP_DEL_PRI_WITH_IND); + inode->i_nlink++; + } + + snap_debug_device_fail(dir->i_dev, SNAP_OP_UNLINK, 4); + rc = iops->unlink(dir, dentry); + snap_debug_device_fail(dir->i_dev, SNAP_OP_UNLINK, 5); + +exit: + snap_trans_commit(cache, handle); + EXIT; + return rc; +} + +static int currentfs_rename (struct inode * old_dir, struct dentry *old_dentry, + struct inode * new_dir, struct dentry *new_dentry) +{ + struct snap_cache *cache; + int rc; + struct inode_operations *iops; + void *handle = NULL; + + ENTRY; + + if (currentfs_is_under_dotsnap(old_dentry) || + currentfs_is_under_dotsnap(new_dentry)) { + EXIT; + return -EPERM; + } + + cache = snap_find_cache(old_dir->i_dev); + if ( !cache ) { + EXIT; + return -EINVAL; + } + + handle = snap_trans_start(cache, old_dir, SNAP_OP_RENAME); + + /* Always cow the old dir and old dentry->d_inode */ + if ( snap_needs_cow(old_dir) != -1 ) { + printk("rename: needs_cow for old_dir %lu\n",old_dir->i_ino); + snap_debug_device_fail(old_dir->i_dev, SNAP_OP_RENAME, 1); + snap_do_cow(old_dir, get_parent_ino(old_dir), 0); + } + if( snap_needs_cow (old_dentry->d_inode) != -1) { + printk("rename: needs_cow for old_dentry, ino %lu\n", + old_dentry->d_inode->i_ino); + snap_debug_device_fail(old_dir->i_dev, SNAP_OP_RENAME, 2); + snap_do_cow(old_dentry->d_inode, old_dir->i_ino,0); + } + + /* If it's not in the same dir, whether the new_dentry is NULL or not, + * we should cow the new_dir. Because rename will use the ino of + * old_dentry as the ino of the new_dentry in new_dir. + */ + if(( old_dir != new_dir) ) { + if( snap_needs_cow(new_dir) !=-1 ){ + printk("rename:snap_needs_cow for new_dir %lu\n", + new_dir->i_ino); + snap_debug_device_fail(old_dir->i_dev,SNAP_OP_RENAME,3); + snap_do_cow(new_dir, get_parent_ino(new_dir),0); + } + } + +#if 0 + if( ( old_dir != new_dir) && ( new_dentry->d_inode )) { + if(snap_needs_cow(new_dentry->d_inode) !=-1 ){ + printk("rename:needs_cow for new_entry ,ino %lu\n", + new_dentry->d_inode->i_ino); + snap_debug_device_fail(old_dir->i_dev, SNAP_OP_RENAME, 4); + snap_do_cow (new_dentry->d_inode, + new_dentry->d_parent->d_inode->i_ino, 0); + } + } +#endif + /* The inode for the new_dentry will be freed for normal rename option. + * But we should keep this inode since we need to keep it available + * for the clone and for snap rollback + */ + if( new_dentry->d_inode && new_dentry->d_inode->i_nlink == 1 ) { + if( snap_needs_cow (new_dentry->d_inode) != -1) { + /* call snap_do_cow with DEL_WITHOUT_IND option */ + snap_debug_device_fail(old_dir->i_dev,SNAP_OP_RENAME,4); + snap_do_cow(new_dentry->d_inode, new_dir->i_ino, + SNAP_DEL_PRI_WITHOUT_IND); + new_dentry->d_inode->i_nlink++; + } + else if( snap_is_redirector (new_dentry->d_inode) ) { + /* call snap_do_cow with DEL_WITH_IND option + * just free the blocks of inode, not really delete it + */ + snap_debug_device_fail(old_dir->i_dev,SNAP_OP_RENAME,4); + snap_do_cow (new_dentry->d_inode, new_dir->i_ino, + SNAP_DEL_PRI_WITH_IND); + new_dentry->d_inode->i_nlink++; + } + } + + iops = filter_c2cdiops(cache->cache_filter); + if (!iops || + !iops->rename) { + rc = -EINVAL; + goto exit; + } + + snap_debug_device_fail(old_dir->i_dev, SNAP_OP_RENAME, 5); + rc = iops->rename(old_dir, old_dentry, new_dir, new_dentry); + snap_debug_device_fail(old_dir->i_dev, SNAP_OP_RENAME, 6); + +exit: + snap_trans_commit(cache, handle); + EXIT; + return rc; +} + +static int currentfs_readdir(struct file *filp, void *dirent, + filldir_t filldir) +{ + struct snap_cache *cache; + struct file_operations *fops; + int rc; + + ENTRY; + if( !filp || !filp->f_dentry || !filp->f_dentry->d_inode ) { + EXIT; + return -EINVAL; + } + + cache = snap_find_cache(filp->f_dentry->d_inode->i_dev); + if ( !cache ) { + EXIT; + return -EINVAL; + } + fops = filter_c2cdfops( cache->cache_filter ); + if( !fops ) { + EXIT; + return -EINVAL; + } + + /* + * no action if we are under clonefs or .snap + */ + if( cache->cache_show_dotsnap && + (filp->f_dentry->d_sb == cache->cache_sb) && + !currentfs_is_under_dotsnap(filp->f_dentry) ){ + if( filp->f_pos == 0 ){ + if( filldir(dirent, ".snap", + strlen(".snap")+1, filp->f_pos, + 0xF0000000|filp->f_dentry->d_inode->i_ino) ){ + return -EINVAL; + } + filp->f_pos += strlen(".snap")+1; + } + filp->f_pos -= strlen(".snap")+1; + rc = fops->readdir(filp, dirent, filldir); + filp->f_pos += strlen(".snap")+1; + }else + rc = fops->readdir(filp, dirent, filldir); + + return rc; +} + +struct file_operations currentfs_dir_fops = { + readdir: currentfs_readdir +}; + +struct inode_operations currentfs_dir_iops = { + default_file_ops: ¤tfs_dir_fops, + create: currentfs_create, + mkdir: currentfs_mkdir, + link: currentfs_link, + symlink: currentfs_symlink, + mknod: currentfs_mknod, + rmdir: currentfs_rmdir, + unlink: currentfs_unlink, + rename: currentfs_rename, + lookup: currentfs_lookup +}; diff --git a/lustre/snapfs/dotsnap.c b/lustre/snapfs/dotsnap.c new file mode 100644 index 0000000000..19dfc1ca2d --- /dev/null +++ b/lustre/snapfs/dotsnap.c @@ -0,0 +1,208 @@ +/* + * dotsnap.c - support for .snap directories + */ + +#define EXPORT_SYMTAB + + +#define __NO_VERSION__ +#include <linux/module.h> +#include <asm/uaccess.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/locks.h> +#include <linux/quotaops.h> +#include <linux/list.h> +#include <linux/file.h> +#include <asm/bitops.h> +#include <asm/byteorder.h> + +#include <linux/filter.h> +#include <linux/snapfs.h> +#include <linux/snapsupport.h> + +struct inode_operations dotsnap_inode_operations; +struct file_operations dotsnap_file_operations; + +int currentfs_is_under_dotsnap(struct dentry *de) +{ + int index = 0; + + while(de && de->d_parent != de) { + if ( de->d_inode && de->d_inode->i_ino & 0xF0000000 ) { + EXIT; + return index; + } + index = (int)de->d_fsdata; + de = de->d_parent; + } + + EXIT; + return 0; +} + +void currentfs_dotsnap_read_inode(struct snap_cache *cache, + struct inode *inode) +{ + int tableno = cache->cache_snap_tableno; + struct snap_table *table; + ENTRY; + + table = &snap_tables[tableno]; + + inode->i_mode = S_IFDIR | 0755 ; + inode->i_op = &dotsnap_inode_operations; + inode->i_size = table->tbl_count - 1; + /* all except current form a subdirectory and . and .. */ + inode->i_nlink = table->tbl_count -1 + 2; + inode->i_uid = 0; + inode->i_gid = 0; + EXIT; +} + +struct dentry *dotsnap_lookup(struct inode *dir, struct dentry *dentry) +{ + struct snap_table *table; + struct snap_cache *cache; + int i; + int index; + int tableno; + ino_t ino; + struct inode *inode; + struct snapshot_operations *snapops; + + ENTRY; + + cache = snap_find_cache(dir->i_dev); + if ( !cache ) { + printk("dotsnap_readdir: cannot find cache\n"); + make_bad_inode(dir); + EXIT; + return ERR_PTR(-EINVAL); + } + + snapops = filter_c2csnapops(cache->cache_filter); + if (!snapops || !snapops->get_indirect_ino) { + EXIT; + return ERR_PTR(-EINVAL); + } + + tableno = cache->cache_snap_tableno; + table = &snap_tables[tableno]; + + if( table->tbl_count <= 1 ) + return NULL; + + index = table->tbl_index[0]; + for ( i = 1 ; i < table->tbl_count ; i++ ) { + if ( (dentry->d_name.len == strlen(table->tbl_name[i])) && + (memcmp(dentry->d_name.name, table->tbl_name[i], + dentry->d_name.len) == 0) ) { + index = table->tbl_index[i]; + break; + } + } + + if( i >= table->tbl_count ) + return ERR_PTR(-ENOENT); + + inode = iget(dir->i_sb, dir->i_ino & (~0xF0000000)); + + if ( !inode ) + return ERR_PTR(-EINVAL); + + ino = snapops->get_indirect_ino(inode, index); + iput(inode); + + if ( ino == -ENOATTR || ino == 0 ) { + ino = dir->i_ino & (~0xF0000000); + } + + if ( ino == -EINVAL ) { + return ERR_PTR(-EINVAL); + } +CDEBUG(D_INODE, "index %d, ino is %lu\n",index, ino); + + inode = iget(dir->i_sb, ino); + d_add(dentry, inode); + dentry->d_fsdata = (void*)index; + inode->i_op = dentry->d_parent->d_parent->d_inode->i_op; + return NULL; +} + + +static int dotsnap_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + unsigned int i; + int tableno; + struct snap_cache *cache; + struct snap_table *table; + struct snapshot_operations *snapops; + + ENTRY; + + cache = snap_find_cache(filp->f_dentry->d_inode->i_dev); + if ( !cache ) { + printk("dotsnap_readdir: cannot find cache\n"); + make_bad_inode(filp->f_dentry->d_inode); + EXIT; + return -EINVAL; + } + + snapops = filter_c2csnapops(cache->cache_filter); + if (!snapops || !snapops->get_indirect_ino) { + EXIT; + return -EINVAL; + } + + tableno = cache->cache_snap_tableno; + table = &snap_tables[tableno]; + CDEBUG(D_INODE, "\n"); + for (i = filp->f_pos ; i < table->tbl_count -1 ; i++) { + int index; + struct inode *inode; + ino_t ino; + + CDEBUG(D_INODE, "%d\n",i); + + inode = filp->f_dentry->d_inode; + index = table->tbl_index[i+1]; + ino = snapops->get_indirect_ino + (filp->f_dentry->d_inode, index); + + CDEBUG(D_INODE, "\n"); + + if ( ino == -ENOATTR || ino == 0 ) { + ino = filp->f_dentry->d_parent->d_inode->i_ino; + } + + CDEBUG(D_INODE, "\n"); + if ( ino == -EINVAL ) { + return -EINVAL; + } + + CDEBUG(D_INODE, "Listing %s\n", table->tbl_name[i+1]); + if (filldir(dirent, table->tbl_name[i+1], + strlen(table->tbl_name[i+1]), + filp->f_pos, ino) < 0){ + CDEBUG(D_INODE, "\n"); + break; + } + filp->f_pos++; + } + EXIT; + return 0; +} + + +struct file_operations dotsnap_file_operations = { + readdir: dotsnap_readdir, /* readdir */ +}; + +struct inode_operations dotsnap_inode_operations = +{ + default_file_ops: &dotsnap_file_operations, + lookup: dotsnap_lookup +}; diff --git a/lustre/snapfs/file.c b/lustre/snapfs/file.c new file mode 100644 index 0000000000..38c5652c50 --- /dev/null +++ b/lustre/snapfs/file.c @@ -0,0 +1,259 @@ +/* + * file.c + */ + +#define EXPORT_SYMTAB + + +#define __NO_VERSION__ +#include <linux/module.h> +#include <asm/uaccess.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/locks.h> +#include <linux/quotaops.h> +#include <linux/list.h> +#include <linux/file.h> +#include <asm/bitops.h> +#include <asm/byteorder.h> + +#include <linux/filter.h> +#include <linux/snapfs.h> +#include <linux/snapsupport.h> + +/* instantiate a file handle to the cache file */ +static void currentfs_prepare_snapfile(struct inode *inode, + struct file *clone_file, + struct inode *cache_inode, + struct file *cache_file, + struct dentry *cache_dentry) +{ + ENTRY; + cache_file->f_pos = clone_file->f_pos; + cache_file->f_mode = clone_file->f_mode; + cache_file->f_flags = clone_file->f_flags; + cache_file->f_count = clone_file->f_count; + cache_file->f_owner = clone_file->f_owner; + cache_file->f_op = cache_inode->i_op->default_file_ops; + cache_file->f_dentry = cache_dentry; + cache_file->f_dentry->d_inode = cache_inode; + EXIT; + return ; +} + +/* update the currentfs file struct after IO in cache file */ +static void currentfs_restore_snapfile(struct inode *cache_inode, + struct file *cache_file, + struct inode *clone_inode, + struct file *clone_file) +{ + ENTRY; + cache_file->f_pos = clone_file->f_pos; + EXIT; + return; +} + + +static ssize_t currentfs_write (struct file *filp, const char *buf, + size_t count, loff_t *ppos) +{ + struct snap_cache *cache; + struct inode *inode = filp->f_dentry->d_inode; + ssize_t rc; + struct file_operations *fops; + loff_t pos; + long block[2]={-1,-1}, mask, i; + struct snap_table *table; + int slot = 0; + int index = 0; + struct inode_operations *ciops; + struct inode *cache_inode = NULL; + struct snapshot_operations *snapops; + + ENTRY; + + if (currentfs_is_under_dotsnap(filp->f_dentry)) { + EXIT; + return -ENOSPC; + } + + cache = snap_find_cache(inode->i_dev); + if ( !cache ) { + EXIT; + return -EINVAL; + } + + if ( snap_needs_cow(inode) != -1 ) { + CDEBUG(D_FILE, "snap_needs_cow for ino %lu \n",inode->i_ino); + snap_do_cow(inode, filp->f_dentry->d_parent->d_inode->i_ino, 0); + } + + fops = filter_c2cffops(cache->cache_filter); + if (!fops || + !fops->write) { + EXIT; + return -EINVAL; + } + + if (filp->f_flags & O_APPEND) + pos = inode->i_size; + else { + pos = *ppos; + if (pos != *ppos) + return -EINVAL; + } + + /* + * we only need to copy back the first and last blocks + */ + mask = inode->i_sb->s_blocksize-1; + if( pos & mask ) + block[0] = pos >> inode->i_sb->s_blocksize_bits; + pos += count - 1; + if( (pos+1) & mask ) + block[1] = pos >> inode->i_sb->s_blocksize_bits; + if( block[0] == block[1] ) + block[1] = -1; + + ciops = filter_c2cfiops(cache->cache_filter); + snapops = filter_c2csnapops(cache->cache_filter); + + for( i=0; i<2; i++ ){ + if( block[i]!=-1 && !ciops->bmap(inode, block[i]) ) { + table = &snap_tables[cache->cache_snap_tableno]; + for (slot = table->tbl_count ; slot >= 1; slot--) + { + cache_inode = NULL; + index = table->tbl_index[slot]; + cache_inode = snap_get_indirect(inode, NULL, index); + + if ( !cache_inode ) continue; + + if (cache_inode->i_op->bmap(cache_inode, block[i])) { + CDEBUG(D_FILE, "find cache_ino %lu\n", + cache_inode->i_ino); + if( snapops && snapops->copy_block) { + snapops->copy_block( inode, + cache_inode, block[i]); + } + + iput(cache_inode); + break; + } + iput(cache_inode); + } + } + } + + rc = fops->write(filp, buf, count, ppos); + + EXIT; + return rc; +} + +static int currentfs_readpage(struct file *file, struct page *page) +{ + int result = 0; + struct inode *inode = file->f_dentry->d_inode; + unsigned long ind_ino = inode->i_ino; + struct inode *pri_inode = NULL; + struct inode *cache_inode = NULL; + struct file open_file; + struct dentry open_dentry ; + struct inode_operations *ciops; + struct snap_cache *cache; + long block; + struct snap_table *table; + int slot = 0; + int index = 0; + int search_older = 0; + + ENTRY; + + cache = snap_find_cache(inode->i_dev); + if ( !cache ) { + EXIT; + return -EINVAL; + } + + ciops = filter_c2cfiops(cache->cache_filter); + + block = page->offset >> inode->i_sb->s_blocksize_bits; + + /* if there is a block in the cache, return the cache readpage */ + if( inode->i_blocks && ciops->bmap(inode, block) ) { + CDEBUG(D_FILE, "block %lu in cache, ino %lu\n", + block, inode->i_ino); + result = ciops->readpage(file, page); + EXIT; + return result; + } + + /* + * clonefs_readpage will fill this with primary ino number + * we need it to follow the cloned chain of primary inode + */ + if( file->f_dentry->d_fsdata ){ + pri_inode = iget(inode->i_sb, (unsigned long)file->f_dentry->d_fsdata); + if( !pri_inode ) + return -EINVAL; + inode = pri_inode; + search_older = 1; + } + + table = &snap_tables[cache->cache_snap_tableno]; + + for (slot = table->tbl_count ; slot >= 1; slot--) + { + cache_inode = NULL; + index = table->tbl_index[slot]; + cache_inode = snap_get_indirect(inode, NULL, index); + + if ( !cache_inode ) continue; + + /* we only want slots between cache_inode to the oldest one */ + if( search_older && cache_inode->i_ino == ind_ino ) + search_older = 0; + + if ( !search_older && cache_inode->i_op->bmap(cache_inode, block)) { + break; + } + iput(cache_inode); + } + if( pri_inode ) + iput(pri_inode); + + if ( !cache_inode ) { + EXIT; + return -EINVAL; + } + + currentfs_prepare_snapfile(inode, file, cache_inode, &open_file, + &open_dentry); + + down(&cache_inode->i_sem); + + if( ciops->readpage ) { + CDEBUG(D_FILE, "block %lu NOT in cache, use redirected ino %lu\n", block, cache_inode->i_ino ); + result = ciops->readpage(&open_file, page); + }else { + CDEBUG(D_FILE, "cache ino %lu, readpage is NULL\n", + cache_inode->i_ino); + } + + up(&cache_inode->i_sem); + currentfs_restore_snapfile(inode, file, cache_inode, &open_file); + iput(cache_inode); + EXIT; + return result; +} + +struct file_operations currentfs_file_fops = { + write:currentfs_write, +}; + +struct inode_operations currentfs_file_iops = { + default_file_ops: ¤tfs_file_fops, + readpage: currentfs_readpage, +}; diff --git a/lustre/snapfs/filter.c b/lustre/snapfs/filter.c new file mode 100644 index 0000000000..c63caac5cd --- /dev/null +++ b/lustre/snapfs/filter.c @@ -0,0 +1,433 @@ +/* + * + * + * Copyright (C) 2000 Stelias Computing, Inc. + * Copyright (C) 2000 Red Hat, Inc. + * Copyright (C) 2000 Mountain View Data, Inc. + * + * + */ + +#include <stdarg.h> + +#include <asm/bitops.h> +#include <asm/uaccess.h> +#include <asm/system.h> + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/ext2_fs.h> +#include <linux/malloc.h> +#include <linux/vmalloc.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/locks.h> +#include <linux/blkdev.h> +#include <linux/init.h> +#define __NO_VERSION__ +#include <linux/module.h> + +#include <linux/filter.h> + +int filter_print_entry = 1; +int filter_debug = 0xfffffff; +/* + * The function in this file are responsible for setting up the + * correct methods layered file systems like InterMezzo and SnapFS + */ + + +static struct filter_fs filter_oppar[FILTER_FS_TYPES]; + +/* get to the upper methods (intermezzo, snapfs) */ +inline struct super_operations *filter_c2usops(struct filter_fs *cache) +{ + return &cache->o_fops.filter_sops; +} + +inline struct inode_operations *filter_c2udiops(struct filter_fs *cache) +{ + return &cache->o_fops.filter_dir_iops; +} + +inline struct inode_operations *filter_c2ufiops(struct filter_fs *cache) +{ + return &cache->o_fops.filter_file_iops; +} + +inline struct inode_operations *filter_c2usiops(struct filter_fs *cache) +{ + return &cache->o_fops.filter_sym_iops; +} + +inline struct file_operations *filter_c2udfops(struct filter_fs *cache) +{ + return &cache->o_fops.filter_dir_fops; +} + +inline struct file_operations *filter_c2uffops(struct filter_fs *cache) +{ + return &cache->o_fops.filter_file_fops; +} + +inline struct file_operations *filter_c2usfops(struct filter_fs *cache) +{ + return &cache->o_fops.filter_sym_fops; +} + +inline struct dentry_operations *filter_c2udops(struct filter_fs *cache) +{ + return &cache->o_fops.filter_dentry_ops; +} + +/* get to the cache (lower) methods */ +inline struct super_operations *filter_c2csops(struct filter_fs *cache) +{ + return cache->o_caops.cache_sops; +} + +inline struct inode_operations *filter_c2cdiops(struct filter_fs *cache) +{ + return cache->o_caops.cache_dir_iops; +} + +inline struct inode_operations *filter_c2cfiops(struct filter_fs *cache) +{ + return cache->o_caops.cache_file_iops; +} + +inline struct inode_operations *filter_c2csiops(struct filter_fs *cache) +{ + return cache->o_caops.cache_sym_iops; +} + +inline struct file_operations *filter_c2cdfops(struct filter_fs *cache) +{ + return cache->o_caops.cache_dir_fops; +} + +inline struct file_operations *filter_c2cffops(struct filter_fs *cache) +{ + return cache->o_caops.cache_file_fops; +} + +inline struct file_operations *filter_c2csfops(struct filter_fs *cache) +{ + return cache->o_caops.cache_sym_fops; +} + +inline struct dentry_operations *filter_c2cdops(struct filter_fs *cache) +{ + return cache->o_caops.cache_dentry_ops; +} +/* snapfs: for snapshot operations */ +inline struct snapshot_operations *filter_c2csnapops(struct filter_fs *cache) +{ + return cache->o_snapops; +} + +/* find the cache for this FS */ +struct filter_fs *filter_get_filter_fs(const char *cache_type) +{ + struct filter_fs *ops = NULL; + FENTRY; + + if ( strlen(cache_type) == strlen("ext2") && + memcmp(cache_type, "ext2", strlen("ext2")) == 0 ) { + ops = &filter_oppar[FILTER_FS_EXT2]; + FDEBUG(D_SUPER, "ops at %p\n", ops); + } + + if ( strlen(cache_type) == strlen("ext3") && + memcmp(cache_type, "ext3", strlen("ext3")) == 0 ) { + ops = &filter_oppar[FILTER_FS_EXT3]; + FDEBUG(D_SUPER, "ops at %p\n", ops); + } + if ( strlen(cache_type) == strlen("reiser") && + memcmp(cache_type, "reiser", strlen("reiser")) == 0 ) { + ops = &filter_oppar[FILTER_FS_REISER]; + FDEBUG(D_SUPER, "ops at %p\n", ops); + } + + if (ops == NULL) { + printk("prepare to die: unrecognized cache type for Filter\n"); + } + FEXIT; + return ops; +} + + +/* + * Frobnicate the InterMezzo/SnapFS operations + * this establishes the link between the InterMezzo/SnapFS file system + * and the underlying file system used for the cache. + */ + +void filter_setup_super_ops(struct filter_fs *cache, struct super_operations *cache_sops, struct super_operations *filter_sops) +{ + /* Get ptr to the shared struct snapfs_ops structure. */ + struct filter_ops *uops = &cache->o_fops; + /* Get ptr to the shared struct cache_ops structure. */ + struct cache_ops *caops = &cache->o_caops; + + FENTRY; + + if ( cache->o_flags & FILTER_DID_SUPER_OPS ) { + FEXIT; + return; + } + cache->o_flags |= FILTER_DID_SUPER_OPS; + + /* Set the cache superblock operations to point to the + superblock operations of the underlying file system. */ + caops->cache_sops = cache_sops; + + /* + * Copy the cache (real fs) superblock ops to the "filter" + * superblock ops as defaults. Some will be changed below + */ + memcpy(&uops->filter_sops, cache_sops, sizeof(*cache_sops)); + + /* now overwrite with filtering ops */ + if (cache_sops->put_super && uops->filter_sops.put_super) { + uops->filter_sops.put_super = filter_sops->put_super; + } + if (cache_sops->read_inode && uops->filter_sops.read_inode) { + uops->filter_sops.read_inode = filter_sops->read_inode; + FDEBUG(D_INODE, "setting filter_read_inode, cache_ops %p, cache %p, ri at %p\n", + cache, cache, uops->filter_sops.read_inode); + } + if (cache_sops->notify_change && uops->filter_sops.notify_change) + uops->filter_sops.notify_change = filter_sops->notify_change; + if (cache_sops->remount_fs && uops->filter_sops.remount_fs) + uops->filter_sops.remount_fs = filter_sops->remount_fs; + FEXIT; +} + + +void filter_setup_dir_ops(struct filter_fs *cache, struct inode_operations *cache_iops, struct inode_operations *filter_iops) +{ + struct inode_operations *u_iops; + struct file_operations *u_fops, *c_fops, *f_fops; + FENTRY; + + if ( cache->o_flags & FILTER_DID_DIR_OPS ) { + FEXIT; + return; + } + FDEBUG(D_SUPER, "\n"); + cache->o_flags |= FILTER_DID_DIR_OPS; + + /* steal the old ops */ + cache->o_caops.cache_dir_iops = cache_iops; + cache->o_caops.cache_dir_fops = + cache_iops->default_file_ops; + + FDEBUG(D_SUPER, "\n"); + /* abbreviate */ + u_iops = &cache->o_fops.filter_dir_iops; + + /* setup our dir iops: copy and modify */ + memcpy(u_iops, cache_iops, sizeof(*cache_iops)); + FDEBUG(D_SUPER, "\n"); + + /* methods that filter if cache filesystem has these ops */ + if ( cache_iops->lookup && filter_iops->lookup ) { + FDEBUG(D_SUPER, "\n"); + u_iops->lookup = filter_iops->lookup; + FDEBUG(D_SUPER, "lookup at %p\n", &filter_iops->lookup); + } + if (cache_iops->create && filter_iops->create) + u_iops->create = filter_iops->create; + FDEBUG(D_SUPER, "\n"); + if (cache_iops->link && filter_iops->link) + u_iops->link = filter_iops->link; + FDEBUG(D_SUPER, "\n"); + if (cache_iops->unlink && filter_iops->unlink) + u_iops->unlink = filter_iops->unlink; + FDEBUG(D_SUPER, "\n"); + if (cache_iops->mkdir && filter_iops->mkdir) + u_iops->mkdir = filter_iops->mkdir; + FDEBUG(D_SUPER, "\n"); + if (cache_iops->rmdir && filter_iops->rmdir) + u_iops->rmdir = filter_iops->rmdir; + FDEBUG(D_SUPER, "\n"); + if (cache_iops->symlink && filter_iops->symlink) + u_iops->symlink = filter_iops->symlink; + FDEBUG(D_SUPER, "\n"); + if (cache_iops->rename && filter_iops->rename) + u_iops->rename = filter_iops->rename; + FDEBUG(D_SUPER, "\n"); + if (cache_iops->mknod && filter_iops->mknod) + u_iops->mknod = filter_iops->mknod; + FDEBUG(D_SUPER, "\n"); + if (cache_iops->permission && filter_iops->permission) + u_iops->permission = filter_iops->permission; + + /* copy dir fops */ + FDEBUG(D_SUPER, "\n"); + u_fops = &cache->o_fops.filter_dir_fops; + c_fops = cache_iops->default_file_ops; + f_fops = filter_iops->default_file_ops; + + memcpy(u_fops, c_fops, sizeof(*c_fops)); + + if( c_fops->readdir && f_fops->readdir ) + u_fops->readdir = f_fops->readdir; + + /* assign */ + FDEBUG(D_SUPER, "\n"); + filter_c2udiops(cache)->default_file_ops = filter_c2udfops(cache); + FDEBUG(D_SUPER, "\n"); + + /* unconditional filtering operations */ + if ( filter_iops->default_file_ops && + filter_iops->default_file_ops->open ) + filter_c2udfops(cache)->open = + filter_iops->default_file_ops->open; + + FEXIT; +} + + +void filter_setup_file_ops(struct filter_fs *cache, struct inode_operations *cache_iops, struct inode_operations *filter_iops) +{ + struct inode_operations *u_iops; + FENTRY; + + if ( cache->o_flags & FILTER_DID_FILE_OPS ) { + FEXIT; + return; + } + cache->o_flags |= FILTER_DID_FILE_OPS; + + /* steal the old ops */ + cache->o_caops.cache_file_iops = cache_iops; + cache->o_caops.cache_file_fops = + cache_iops->default_file_ops; + + /* abbreviate */ + u_iops = filter_c2ufiops(cache); + + /* setup our dir iops: copy and modify */ + memcpy(u_iops, cache_iops, sizeof(*cache_iops)); + + /* copy dir fops */ + memcpy(filter_c2uffops(cache), cache_iops->default_file_ops, + sizeof(*cache_iops->default_file_ops)); + /* assign */ + filter_c2ufiops(cache)->default_file_ops = filter_c2uffops(cache); + + /* unconditional filtering operations */ + if (filter_iops->default_file_ops && + filter_iops->default_file_ops->open ) + filter_c2uffops(cache)->open = + filter_iops->default_file_ops->open; + if (filter_iops->default_file_ops && + filter_iops->default_file_ops->release ) + filter_c2uffops(cache)->release = + filter_iops->default_file_ops->release; + if (filter_iops->default_file_ops && + filter_iops->default_file_ops->write ) + filter_c2uffops(cache)->write = + filter_iops->default_file_ops->write; + + /* set up readpage */ + if (filter_iops->readpage) + filter_c2ufiops(cache)->readpage = filter_iops->readpage; + + FEXIT; +} + +/* XXX in 2.3 there are "fast" and "slow" symlink ops for ext2 XXX */ +void filter_setup_symlink_ops(struct filter_fs *cache, struct inode_operations *cache_iops, struct inode_operations *filter_iops) +{ + struct inode_operations *u_iops; + FENTRY; + + if ( cache->o_flags & FILTER_DID_SYMLINK_OPS ) { + FEXIT; + return; + } + cache->o_flags |= FILTER_DID_SYMLINK_OPS; + + /* steal the old ops */ + cache->o_caops.cache_sym_iops = cache_iops; + cache->o_caops.cache_sym_fops = + cache_iops->default_file_ops; + + /* abbreviate */ + u_iops = filter_c2usiops(cache); + + /* setup our dir iops: copy and modify */ + memcpy(u_iops, cache_iops, sizeof(*cache_iops)); + + /* copy fops - careful for symlinks they might be NULL */ + if ( cache_iops->default_file_ops ) { + memcpy(filter_c2usfops(cache), cache_iops->default_file_ops, + sizeof(*cache_iops->default_file_ops)); + } + + /* assign */ + filter_c2usiops(cache)->default_file_ops = filter_c2usfops(cache); + + if (cache_iops->readlink && filter_iops->readlink) + u_iops->readlink = filter_iops->readlink; + if (cache_iops->follow_link && filter_iops->follow_link) + u_iops->follow_link = filter_iops->follow_link; + + FEXIT; +} + +void filter_setup_dentry_ops(struct filter_fs *cache, + struct dentry_operations *cache_dop, + struct dentry_operations *filter_dop) +{ + if ( cache->o_flags & FILTER_DID_DENTRY_OPS ) { + FEXIT; + return; + } + cache->o_flags |= FILTER_DID_DENTRY_OPS; + + cache->o_caops.cache_dentry_ops = cache_dop; + memcpy(&cache->o_fops.filter_dentry_ops, + filter_dop, sizeof(*filter_dop)); + + if (cache_dop && cache_dop != filter_dop && cache_dop->d_revalidate){ + printk("WARNING: filter overriding revalidation!\n"); + } + return; +} +/* snapfs : for snapshot operations */ +void filter_setup_snapshot_ops (struct filter_fs *cache, + struct snapshot_operations *cache_snapops) +{ + FENTRY; + + if ( cache->o_flags & FILTER_DID_SNAPSHOT_OPS ) { + FEXIT; + return; + } + cache->o_flags |= FILTER_DID_SNAPSHOT_OPS; + + cache->o_snapops = cache_snapops; + + FEXIT; +} + +void filter_setup_journal_ops (struct filter_fs *cache, + struct journal_ops *cache_journal_ops) +{ + FENTRY; + + if( cache->o_flags & FILTER_DID_JOURNAL_OPS ){ + FEXIT; + return; + } + cache->o_flags |= FILTER_DID_JOURNAL_OPS; + + cache->o_trops = cache_journal_ops; + + FEXIT; +} diff --git a/lustre/snapfs/inode.c b/lustre/snapfs/inode.c new file mode 100644 index 0000000000..d6434f3540 --- /dev/null +++ b/lustre/snapfs/inode.c @@ -0,0 +1,211 @@ +/* + * fs/snap/snap.c + * + * A snap shot file system. + * + */ + +#define EXPORT_SYMTAB + + +#define __NO_VERSION__ +#include <linux/module.h> +#include <asm/uaccess.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/locks.h> +#include <linux/quotaops.h> +#include <linux/list.h> +#include <linux/file.h> +#include <asm/bitops.h> +#include <asm/byteorder.h> + +#ifdef CONFIG_SNAPFS_EXT2 +#include <linux/ext2_fs.h> +#endif +#ifdef CONFIG_SNAPFS_EXT3 +#include <linux/ext3_fs.h> +#endif + +#include <linux/filter.h> +#include <linux/snapfs.h> +#include <linux/snapsupport.h> + + +extern int currentfs_remount(struct super_block * sb, int *flags, char *data); + +/* XXX PJB: this is exactly what we need to put things under + filters - we don't want the ext2 methods hardcoded, we want them + in the filter (in read_super) and then call those methods. + See how InterMezzo gets the journal operations . +*/ + +extern void currentfs_dotsnap_read_inode(struct snap_cache *, struct inode *); + +/* Superblock operations. */ +static void currentfs_read_inode(struct inode *inode) +{ + struct snap_cache *cache; + ENTRY; + + if( !inode ) + { + EXIT; + return; + } + + CDEBUG(D_INODE, "read_inode ino %lu\n", inode->i_ino); + + cache = snap_find_cache(inode->i_dev); + if ( !cache ) { + printk("currentfs_read_inode: cannot find cache\n"); + make_bad_inode(inode); + EXIT; + return ; + } + + if ( inode->i_ino & 0xF0000000 ) { + CDEBUG(D_INODE, "\n"); + currentfs_dotsnap_read_inode(cache, inode); + EXIT; + return ; + } + + if( filter_c2csops(cache->cache_filter) ) + filter_c2csops(cache->cache_filter)->read_inode(inode); + + /* XXX now set the correct snap_{file,dir,sym}_iops */ + if ( S_ISDIR(inode->i_mode) ) + inode->i_op = filter_c2udiops(cache->cache_filter); + else if ( S_ISREG(inode->i_mode) ) { + if ( !filter_c2cfiops(cache->cache_filter) ) { + filter_setup_file_ops(cache->cache_filter, + inode->i_op, ¤tfs_file_iops); + } + inode->i_op = filter_c2ufiops(cache->cache_filter); + printk("inode %lu, i_op at %p\n", inode->i_ino, inode->i_op); + } + else if ( S_ISLNK(inode->i_mode) ) { + if ( !filter_c2csiops(cache->cache_filter) ) { + filter_setup_symlink_ops(cache->cache_filter, + inode->i_op, ¤tfs_sym_iops); + } + inode->i_op = filter_c2usiops(cache->cache_filter); + printk("inode %lu, i_op at %p\n", inode->i_ino, inode->i_op); + } + + EXIT; + return; +} + + +static int currentfs_notify_change(struct dentry *dentry, struct iattr *iattr) +{ + struct snap_cache *cache; + int rc; + struct super_operations *sops; + + ENTRY; + + if (currentfs_is_under_dotsnap(dentry)) { + EXIT; + return -EPERM; + } + + cache = snap_find_cache(dentry->d_inode->i_dev); + if ( !cache ) { + EXIT; + return -EINVAL; + } + + /* XXX better alloc a new dentry */ + + if ( snap_needs_cow(dentry->d_inode) != -1 ) { + printk("notify_change:snap_needs_cow for ino %lu \n", + dentry->d_inode->i_ino); + snap_do_cow(dentry->d_inode, + dentry->d_parent->d_inode->i_ino, 0); + } + + sops = filter_c2csops(cache->cache_filter); + if (!sops || + !sops->notify_change) { + EXIT; + return -EINVAL; + } + rc = sops->notify_change(dentry, iattr); + + EXIT; + return rc; +} + + +static void currentfs_put_super(struct super_block *sb) +{ + + struct snap_cache *cache; + ENTRY; + + CDEBUG(D_SUPER, "sb %lx, sb->u.generic_sbp: %lx\n", + (ulong) sb, (ulong) sb->u.generic_sbp); + cache = snap_find_cache(sb->s_dev); + if (!cache) { + EXIT; + goto exit; + } + /* handle COMPAT_FEATUREs */ +#ifdef CONFIG_SNAPFS_EXT2 + else if( cache->cache_type == FILTER_FS_EXT2 ){ + if( !EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_SNAPFS) ){ + sb->u.ext2_sb.s_feature_compat &= + ~EXT2_FEATURE_COMPAT_BLOCKCOW; + sb->u.ext2_sb.s_es->s_feature_compat &= + cpu_to_le32(~EXT2_FEATURE_COMPAT_BLOCKCOW); + } + } +#endif +#ifdef CONFIG_SNAPFS_EXT3 + else if( cache->cache_type == FILTER_FS_EXT3 ){ + if( !EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_SNAPFS) ){ + sb->u.ext3_sb.s_es->s_feature_compat &= + cpu_to_le32(~EXT3_FEATURE_COMPAT_BLOCKCOW); + } + } +#endif + /* + * If there is a saved 'put_super' function for the underlying + * fs then call it. + */ + if (cache->cache_filter->o_caops.cache_sops->put_super) { + cache->cache_filter->o_caops.cache_sops->put_super(sb); + } + + if (!list_empty(&cache->cache_clone_list)) { + printk("Warning: snap_put_super: clones exist!\n"); + } + + list_del(&cache->cache_chain); + snap_free_cache(cache); + + CDEBUG(D_SUPER, "sb %lx, sb->u.generic_sbp: %lx\n", + (ulong) sb, (ulong) sb->u.generic_sbp); +exit: + CDEBUG(D_MALLOC, "after umount: kmem %ld, vmem %ld\n", + snap_kmemory, snap_vmemory); + MOD_DEC_USE_COUNT; + EXIT; + return ; +} + +struct super_operations currentfs_super_ops = { + currentfs_read_inode, + NULL, /* write inode */ + NULL, /* put inode */ + NULL, /* delete inode */ + currentfs_notify_change, + currentfs_put_super, + NULL, /* write super */ + NULL, + NULL, /* remount */ +}; diff --git a/lustre/snapfs/journal_ext3.c b/lustre/snapfs/journal_ext3.c new file mode 100644 index 0000000000..8171842e53 --- /dev/null +++ b/lustre/snapfs/journal_ext3.c @@ -0,0 +1,84 @@ + +/* + * Snapfs. (C) 2000 Peter J. Braam + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/malloc.h> +#include <linux/vmalloc.h> +#include <linux/stat.h> +#include <linux/errno.h> +#include <linux/locks.h> +#include <asm/segment.h> +#include <asm/uaccess.h> +#include <linux/string.h> +#ifdef CONFIG_SNAPFS_EXT3 +#include <linux/ext3_jfs.h> +#endif +#include "linux/filter.h" +#include "linux/snapfs.h" +#include "linux/snapsupport.h" + +#ifdef CONFIG_SNAPFS_EXT3 + +#define EXT3_EA_TRANS_BLOCKS EXT3_DATA_TRANS_BLOCKS + +/* + * must follow the changes of ext3_create_indirect() in fs/ext3/snap.c + */ +#define COW_CREDITS (2 * EXT3_EA_TRANS_BLOCKS + 17 + 2 * EXT3_DATA_TRANS_BLOCKS ) + +/* start the filesystem journal operations */ +static void *snap_e3_trans_start(struct inode *inode, int op) +{ + int jblocks; + + /* XXX needs to be fixed up when we get reiserfs support */ + switch (op) { + case SNAP_OP_CREATE: + jblocks = COW_CREDITS + EXT3_DATA_TRANS_BLOCKS + 3; + break; + case SNAP_OP_LINK: + jblocks = COW_CREDITS + EXT3_DATA_TRANS_BLOCKS; + break; + case SNAP_OP_UNLINK: + jblocks = COW_CREDITS + EXT3_DELETE_TRANS_BLOCKS; + break; + case SNAP_OP_SYMLINK: + jblocks = COW_CREDITS + EXT3_DATA_TRANS_BLOCKS + 5; + break; + case SNAP_OP_MKDIR: + jblocks = COW_CREDITS + EXT3_DATA_TRANS_BLOCKS + 4; + break; + case SNAP_OP_RMDIR: + jblocks = 2 * COW_CREDITS + EXT3_DELETE_TRANS_BLOCKS; + break; + case SNAP_OP_MKNOD: + jblocks = COW_CREDITS + EXT3_DATA_TRANS_BLOCKS + 3; + break; + case SNAP_OP_RENAME: + jblocks = 4 * COW_CREDITS + 2 * EXT3_DATA_TRANS_BLOCKS + 2; + break; + default: + CDEBUG(D_JOURNAL, "invalid operation %d for journal\n", op); + return NULL; + } + + CDEBUG(D_JOURNAL, "creating journal handle (%d blocks)\n", jblocks); + return journal_start(EXT3_JOURNAL(inode), jblocks); +} + +static void snap_e3_trans_commit(void *handle) +{ + journal_stop(current->j_handle); +} + +struct journal_ops snap_ext3_journal_ops = { + snap_e3_trans_start, + snap_e3_trans_commit +}; + +#endif /* CONFIG_EXT3_FS */ diff --git a/lustre/snapfs/psdev.c b/lustre/snapfs/psdev.c new file mode 100644 index 0000000000..424d32557d --- /dev/null +++ b/lustre/snapfs/psdev.c @@ -0,0 +1,180 @@ +/* + * A file system filter driver in the style of InterMezzo + * to manage file system snapshots + * + * Author: Peter J. Braam <braam@mountainviewdata.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define EXPORT_SYMTAB + +#include <linux/config.h> /* for CONFIG_PROC_FS */ +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/major.h> +/* #include <linux/kmod.h> for request_module() */ +#include <linux/sched.h> +#include <linux/lp.h> +#include <linux/malloc.h> +#include <linux/ioport.h> +#include <linux/fcntl.h> +#include <linux/delay.h> +#include <linux/skbuff.h> +#include <linux/proc_fs.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <linux/poll.h> +#include <linux/init.h> +#include <linux/list.h> +#include <asm/io.h> +#include <asm/segment.h> +#include <asm/system.h> +#include <asm/poll.h> +#include <asm/uaccess.h> +#include <linux/miscdevice.h> + +#include <linux/filter.h> +#include <linux/snapfs.h> +#include <linux/snapsupport.h> + +#if 1 /* XXX - enable for debug messages */ +int snap_print_entry = 1; +int snap_debug_level = ~D_INFO; +#else +int snap_print_entry = 0; +int snap_debug_level = 0; +#endif +int snap_inodes = 0; +long snap_memory = 0; + +struct snap_control_device snap_dev; + +extern int snap_ioctl (struct inode * inode, struct file * filp, + unsigned int cmd, unsigned long arg); + +/* called when opening /dev/device */ +static int snap_psdev_open(struct inode * inode, struct file * file) +{ + int dev; + ENTRY; + + if (!inode) + return -EINVAL; + dev = MINOR(inode->i_rdev); + if (dev != SNAP_PSDEV_MINOR) + return -ENODEV; + + MOD_INC_USE_COUNT; + EXIT; + return 0; +} + +/* called when closing /dev/device */ +static int snap_psdev_release(struct inode * inode, struct file * file) +{ + int dev; + ENTRY; + + if (!inode) + return -EINVAL; + dev = MINOR(inode->i_rdev); + if (dev != SNAP_PSDEV_MINOR) + return -ENODEV; + + MOD_DEC_USE_COUNT; + + EXIT; + return 0; +} + +/* XXX need ioctls here to do snap_delete and snap_restore, snap_backup */ + + +/* declare character device */ +static struct file_operations snapcontrol_fops = { + NULL, /* llseek */ + NULL, /* read */ + NULL, /* write */ + NULL, /* presto_psdev_readdir */ + NULL, /* poll */ + snap_ioctl, /* ioctl */ + NULL, /* presto_psdev_mmap */ + snap_psdev_open, /* open */ + NULL, + snap_psdev_release, /* release */ + NULL, /* fsync */ + NULL, /* fasync */ + NULL /* lock */ +}; + + + +#define SNAPFS_MINOR 240 + +static struct miscdevice snapcontrol_dev = { + SNAPFS_MINOR, + "snapcontrol", + &snapcontrol_fops +}; + +int init_snap_psdev(void) +{ + printk(KERN_INFO "SNAP psdev driver v0.01, braam@mountainviewdata.com\n"); + + misc_register( &snapcontrol_dev ); + + return 0; +} + +void snap_cleanup_psdev(void) +{ + ENTRY; + misc_deregister(&snapcontrol_dev); + EXIT; +} + +#ifdef MODULE +MODULE_AUTHOR("Peter J. Braam <braam@cs.cmu.edu>"); +MODULE_DESCRIPTION("Snapfs file system filters v0.01"); + +extern int init_snapfs(void); +extern int cleanup_snapfs(void); +extern int init_clonefs(void); +extern int init_snap_sysctl(void); + +int init_module(void) +{ + int err; + if ( (err = init_snap_psdev()) ) { + printk("Error initializing snap_psdev, %d\n", err); + return -EINVAL; + } + + if ( (err = init_snapfs()) ) { + printk("Error initializing snapfs, %d\n", err); + return -EINVAL; + } + + if ( (err = init_snapfs_proc_sys()) ) { + printk("Error initializing snapfs proc sys, %d\n", err); + return -EINVAL; + } + + + return 0; +} + +void cleanup_module(void) +{ + + cleanup_snapfs(); + snap_cleanup_psdev(); + +} +#endif + diff --git a/lustre/snapfs/snap.c b/lustre/snapfs/snap.c new file mode 100644 index 0000000000..463e0faae7 --- /dev/null +++ b/lustre/snapfs/snap.c @@ -0,0 +1,278 @@ + +/* + * fs/snap/snap.c + * + * A snap shot file system. + * + */ + +#define EXPORT_SYMTAB + + +#define __NO_VERSION__ +#include <linux/module.h> +#include <asm/uaccess.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/locks.h> +#include <linux/quotaops.h> +#include <linux/list.h> +#include <linux/file.h> +#include <asm/bitops.h> +#include <asm/byteorder.h> + +#include <linux/filter.h> +#include <linux/snapfs.h> +#include <linux/snapsupport.h> + +/* + * Return true if the inode is a redirector inode. + */ +int snap_is_redirector(struct inode *cache_inode) +{ + struct snap_cache *cache; + struct snapshot_operations *snapops; + + cache = snap_find_cache(cache_inode->i_dev); + if (!cache) { + EXIT; + return 0; + } + snapops = filter_c2csnapops(cache->cache_filter); + if (!snapops || !snapops->is_redirector) { + EXIT; + return 0; + } + + CDEBUG(D_SNAP, "ino %ld\n", cache_inode->i_ino); + return snapops->is_redirector(cache_inode); +} + +/* + * Using a cache inode and clone super block find the real one. + */ +struct inode *snap_redirect(struct inode *cache_inode, + struct super_block *clone_sb) +{ + struct snap_clone_info *clone_info; + struct snap_table *table; + struct inode *redirected; + struct snap_cache *cache; + struct snapshot_operations *snapops; + int slot = 0; + int my_table[SNAP_MAX]; + int clone_slot; + + ENTRY; + + cache = snap_find_cache(cache_inode->i_dev); + if (!cache) { + EXIT; + return NULL; + } + snapops = filter_c2csnapops(cache->cache_filter); + if (!snapops || !snapops->get_indirect) { + EXIT; + return NULL; + } + + CDEBUG(D_SNAP, "cache ino %ld\n", cache_inode->i_ino); + clone_info = (struct snap_clone_info *)&clone_sb->u.generic_sbp; + + table = &snap_tables[clone_info->clone_cache->cache_snap_tableno]; + + /* first find if there are indirected at the clone_index */ + redirected = snapops->get_indirect(cache_inode, NULL, + clone_info->clone_index); + /* if not found, get the FIRST index after this and before NOW */ + /* XXX fix this later, now use tbl_count, not NOW */ + if(!redirected) { + clone_slot = snap_index2slot(table, clone_info->clone_index); + for(slot = table->tbl_count; slot >= clone_slot; slot --) + { + my_table[slot-clone_slot+1] = table->tbl_index[slot]; + } + redirected = snapops->get_indirect + (cache_inode, my_table, table->tbl_count - clone_slot + 1); + } + /* old version + redirected = snapops->get_indirect + (cache_inode, table->tbl_index, + snap_index2slot(table, clone_info->clone_index)); + */ + if(redirected) CDEBUG(D_SNAP,"redirected ino %ld\n",redirected->i_ino); + EXIT; + return redirected; +} + +/* + * Make a copy of the data and plug a redirector in between if there + * is no redirector yet. + */ +int snap_do_cow(struct inode *inode, ino_t parent_ino, int del) +{ + struct snap_cache *cache; + struct snap snap; + struct inode *ind = NULL; + struct snapshot_operations *snapops; + + ENTRY; + CDEBUG(D_SNAP, "snap_do_cow, ino %ld\n", inode->i_ino); + + cache = snap_find_cache(inode->i_dev); + if (!cache) { + EXIT; + return -EINVAL; + } + snapops = filter_c2csnapops(cache->cache_filter); + if (!snapops || !snapops->create_indirect) { + EXIT; + return -EINVAL; + } + snap_last(cache, &snap); + ind = snapops->create_indirect(inode, parent_ino, snap.index, del); + EXIT; + if(ind) { + iput(ind); + return 0; + } + else + return -EINVAL; +} + +int snap_iterate(struct super_block *sb, + int (*repeat)(struct inode *inode, void *priv), + struct inode **start, void *priv, int flag) +{ + struct inode *inode = sb->s_root->d_inode; + struct snap_cache *cache; + struct snapshot_operations *snapops; + + ENTRY; + + cache = snap_find_cache(inode->i_dev); + if (!cache) { + EXIT; + return 0; + } + snapops = filter_c2csnapops(cache->cache_filter); + if (!snapops || !snapops->iterate) { + EXIT; + return 0; + } + + EXIT; + return snapops->iterate(sb, repeat, start, priv, flag); +} + +int snap_destroy_indirect(struct inode *pri, int index, struct inode *next_ind ) +{ + struct snap_cache *cache; + struct snapshot_operations *snapops; + + ENTRY; + cache = snap_find_cache(pri->i_dev); + if (!cache) { + EXIT; + return 0; + } + snapops = filter_c2csnapops(cache->cache_filter); + if (!snapops || !snapops->destroy_indirect) { + EXIT; + return 0; + } + + EXIT; + return snapops->destroy_indirect(pri, index, next_ind); +} + +int snap_restore_indirect(struct inode *pri, int index ) +{ + struct snap_cache *cache; + struct snapshot_operations *snapops; + + ENTRY; + + cache = snap_find_cache(pri->i_dev); + if (!cache) { + EXIT; + return 0; + } + snapops = filter_c2csnapops(cache->cache_filter); + if (!snapops || !snapops->restore_indirect) { + EXIT; + return 0; + } + + EXIT; + return snapops->restore_indirect(pri, index); +} + +struct inode *snap_get_indirect(struct inode *pri, int *table, int slot) +{ + struct snap_cache *cache; + struct snapshot_operations *snapops; + + ENTRY; + + cache = snap_find_cache(pri->i_dev); + if (!cache) { + EXIT; + return NULL; + } + snapops = filter_c2csnapops(cache->cache_filter); + if (!snapops || !snapops->get_indirect) { + EXIT; + return NULL; + } + + EXIT; + return snapops->get_indirect(pri, table, slot); +} + +int snap_migrate_data(struct inode *dst, struct inode *src) +{ + struct snap_cache *cache; + struct snapshot_operations *snapops; + + ENTRY; + + cache = snap_find_cache(src->i_dev); + if (!cache) { + EXIT; + return 0; + } + snapops = filter_c2csnapops(cache->cache_filter); + if (!snapops || !snapops->migrate_data) { + EXIT; + return 0; + } + + EXIT; + return snapops->migrate_data(dst, src); +} + +int snap_set_indirect(struct inode *pri, ino_t ind_ino, int index, ino_t parent_ino) +{ + struct snap_cache *cache; + struct snapshot_operations *snapops; + + ENTRY; + + cache = snap_find_cache(pri->i_dev); + if (!cache) { + EXIT; + return -EINVAL; + } + snapops = filter_c2csnapops(cache->cache_filter); + if (!snapops || !snapops->set_indirect) { + EXIT; + return -EINVAL; + } + + EXIT; + return snapops->set_indirect(pri, ind_ino, index, parent_ino); +} + + diff --git a/lustre/snapfs/snaptable.c b/lustre/snapfs/snaptable.c new file mode 100644 index 0000000000..058164b349 --- /dev/null +++ b/lustre/snapfs/snaptable.c @@ -0,0 +1,1099 @@ + +/* + * snaptable.c + * + * Manipulate snapshot tables + * + */ + +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/malloc.h> +#include <linux/locks.h> +#include <linux/errno.h> +#include <linux/swap.h> +#include <linux/smp_lock.h> +#include <linux/vmalloc.h> +#include <linux/blkdev.h> +#include <linux/sysrq.h> +#include <linux/file.h> +#include <linux/init.h> +#include <linux/quotaops.h> + +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/bitops.h> +#include <asm/mmu_context.h> + +#include <linux/filter.h> +#include <linux/snapsupport.h> +#include <linux/snapfs.h> + +struct snap_table snap_tables[SNAP_MAX_TABLES]; + +#if 0 +static void snap_lock_table(int table_no) +{ + + spin_lock(snap_tables[table_no].tbl_lock); + +} + +static void snap_unlock_table(int table_no) +{ + + spin_unlock(snap_tables[table_no].tbl_lock); + +} +#endif + +int snap_index2slot(struct snap_table *snap_table, int snap_index) +{ + int slot; + + for ( slot=0 ; slot<snap_table->tbl_count ; slot++ ) + if ( snap_table->tbl_index[slot] == snap_index ) + return slot; + return -1; +} + + + +/* latest snap: returns + - the index of the latest snapshot before NOW + - hence it returns 0 in case all the volume snapshots lie in the future + - this is the index where a COW will land (will be created) + */ +void snap_last(struct snap_cache *info, struct snap *snap) +{ + int i ; + time_t now = CURRENT_TIME; + struct snap_table *table; + int tableno = info->cache_snap_tableno; + + ENTRY; + if ( tableno < 0 || tableno > SNAP_MAX_TABLES ) { + printk(__FUNCTION__ ": invalid table no %d\n", tableno); + snap->index = -1; + } + table = &snap_tables[tableno]; + + /* start at the highest index in the superblock + snaptime array */ + i = table->tbl_count - 1; + + /* NOTE: i>0 is an unnecessary check */ + while ( table->tbl_times[i] > now && i > 0) { + CDEBUG(D_SNAP, "time: %ld, i: %d\n", table->tbl_times[i], i); + i--; + } + + snap->index = table->tbl_index[i]; + snap->time = table->tbl_times[i]; + CDEBUG(D_SNAP, "index: %d, time[i]: %ld, now: %ld\n", + snap->index, snap->time, now); + EXIT; + return; +} + +/* return -1 if no COW is needed, otherwise the index of the + clone to COW to is returned +*/ + +int snap_needs_cow(struct inode *inode) +{ + struct snap snap; + struct snap_cache *cache; + int index = -1; + ENTRY; + + cache = snap_find_cache(inode->i_dev); + if ( !cache ) { + EXIT; + return -1; + } + + /* here we find the time of the last snap to compare with */ + snap_last(cache, &snap); + + /* decision .... if the snapshot is more recent than the object, + * then any change to the object should cause a COW. + */ + if (inode->i_mtime <= snap.time && inode->i_ctime <= snap.time) { + index = snap.index; + } + printk("snap_needs_cow, ino %lu , get index %d\n",inode->i_ino, index); + + EXIT; + return index; +} /* snap_needs_cow */ + +#if 0 +int snap_obd2snap(struct snap_clone_info *info, struct snap *snap) +{ + struct snap_table *table; + int tableno = info->clone_cache->cache_snap_tableno; + int index = info->clone_index; + int slot; + + ENTRY; + if ( tableno < 0 || tableno > SNAP_MAX_TABLES ) { + printk(__FUNCTION__ ": invalid table no %d\n", tableno); + snap->index = -1; + } + table = &snap_tables[tableno]; + slot = snap_index2slot(table, index); + + snap->index = index; + snap->time = table->tbl_times[slot]; + EXIT; + return slot; +} +#endif + +/* at what index is the current snapshot located */ +int snap_current(struct snap_cache *cache) +{ + int tableno = cache->cache_snap_tableno; + + return snap_tables[tableno].tbl_index[0]; +} + +int snap_is_used(int table_no, int snap_index) + +{ + /* ENTRY; */ + if ( snap_index < 0 || snap_index >= SNAP_MAX ) { + printk(__FUNCTION__ ": invalid snapno %d,table %d\n", + snap_index, table_no); + EXIT; + return -1; + } + if ( table_no < 0 || table_no > SNAP_MAX_TABLES ) { + printk(__FUNCTION__ ": invalid snapno %d,table %d\n", + snap_index, table_no); + EXIT; + return -1; + } + + /* EXIT; */ + return snap_tables[table_no].tbl_used & (1<<snap_index); +} + +void snap_use(int table_no, int snap_index) +{ + if ( snap_index < 0 || snap_index >= SNAP_MAX ) { + printk(__FUNCTION__ ": invalid snapno %d,table %d\n", + snap_index, table_no); + return; + } + if ( table_no < 0 || table_no > SNAP_MAX_TABLES ) { + printk(__FUNCTION__ ": invalid snapno %d,table %d\n", + snap_index, table_no); + return; + } + if ( snap_index2slot(&snap_tables[table_no], snap_index) < 0 ) + return; + + snap_tables[table_no].tbl_used |= (1<<snap_index); +} + +void snap_unuse(int table_no, int snap_index) +{ + if ( snap_index < 0 || snap_index >= SNAP_MAX ) { + printk(__FUNCTION__ ": invalid snapno %d,table %d\n", + snap_index, table_no); + return; + } + if ( table_no < 0 || table_no > SNAP_MAX_TABLES ) { + printk(__FUNCTION__ ": invalid snapno %d,table %d\n", + snap_index, table_no); + return; + } + if ( snap_index2slot(&snap_tables[table_no], snap_index) < 0 ) + return; + + snap_tables[table_no].tbl_used &= ~(1<<snap_index); +} + +static int nprint_buf(char *buf, int buflen, char *fmt, ...) +{ + va_list args; + int n; + char local_buf[1024]; + + va_start(args, fmt); + n = vsprintf(local_buf, fmt, args); + va_end(args); + + if( n > buflen ) { + if( buflen > 1024) buflen=1024; + memcpy(buf, local_buf, buflen); + return buflen; + } + else { + memcpy(buf, local_buf, n); + return n; + } +} + +int snap_print_table(struct snap_table_data *data, char *buf, int *buflen) +{ + int tableno = data->tblcmd_no; + int i; + struct snap_table *table; + char *buf_ptr; + int nleft = (*buflen); + int nprint = 0; + + if ( tableno < 0 || tableno > SNAP_MAX_TABLES ) { + printk(__FUNCTION__ ": invalid table number %d\n", tableno); + EXIT; + return -EINVAL; + } + + table = &snap_tables[tableno]; + + printk("------- snap table %d\n", tableno); + printk(" -- snap count %d\n", table->tbl_count); + printk(" -- snap used 0x%x\n", table->tbl_used); + for ( i = 0 ; i < SNAP_MAX ; i++ ) { + printk(" -- slot %d, idx %d, time %ld, name %s\n", + i, table->tbl_index[i], table->tbl_times[i], + table->tbl_name[i]); + } + + buf_ptr = buf; + nprint= nprint_buf(buf_ptr, nleft, "------- snap table %d\n", tableno); + nleft -= nprint; + if( nleft > 0 ) buf_ptr += nprint; + else goto exit; + nprint = nprint_buf(buf_ptr, nleft, " -- snap count %d\n", table->tbl_count); + nleft -= nprint; + if( nleft > 0 ) buf_ptr += nprint; + else goto exit; + nprint = nprint_buf(buf_ptr, nleft, " -- snap used 0x%x\n", table->tbl_used); + nleft -= nprint; + if( nleft > 0 ) buf_ptr += nprint; + else goto exit; + for ( i = 0 ; i < SNAP_MAX ; i++ ) { + nprint = nprint_buf( buf_ptr, nleft, + " -- slot %d, idx %d, time %ld, name %s\n", + i, table->tbl_index[i], table->tbl_times[i], + table->tbl_name[i]); + nleft -= nprint; + if( nleft > 0 ) buf_ptr += nprint; + else goto exit; + } + +exit: + if(nleft > 0) (*buflen) = (*buflen) - nleft; + + return 0; +} + +int snap_install_table(int len, struct snap_table_data *data) +{ + int i, j; + int tableno = data->tblcmd_no; +// int found_current; + struct snap_table *table; + + if ( tableno < 0 || tableno > SNAP_MAX_TABLES ) { + printk(__FUNCTION__ ": invalid table number %d\n", tableno); + EXIT; + return -EINVAL; + } + table = &snap_tables[tableno]; + + /* for each index that is used by the current table + we need to make sure that the new table we are about + to put in contains that index too + */ + for ( i = 0; i < SNAP_MAX ; i++ ) { + int foundit; + int err; + + if ((err = snap_is_used(tableno, i)) < 0 ) { + printk(__FUNCTION__ ": table %d not used\n", tableno); + EXIT; + return -EINVAL; + } else if (err == 0) { + continue; + } + + foundit = 0; + for (j = 0 ; j<= data->tblcmd_count ; j++) { + if ( i == data->tblcmd_snaps[j].index ) { + foundit = 1; + break; + } + } + if ( !foundit ) { + printk(__FUNCTION__ ": index %d not in table %d\n", + i, tableno); + return -EINVAL; + } + } + + /* we must have: + - valid indices + - a current snapshot in the table + - increasing snapshot times + */ +// found_current = 0; + CDEBUG(D_SNAP, "snaplist: tblcmd_count %d\n", data->tblcmd_count); + for (i = 0 ; i < data->tblcmd_count ; i++) { + + if ( (data->tblcmd_snaps[i].index < 0) || + (data->tblcmd_snaps[i].index >= SNAP_MAX) ) { + printk(__FUNCTION__ ": snap_index out of range!\n"); + return -EINVAL; + } + + if (i>0 && data->tblcmd_snaps[i].time <= + data->tblcmd_snaps[i-1].time) { + printk(__FUNCTION__ ": times not increasing\n"); + return -EINVAL; + } + +// if ( 0 == data->tblcmd_snaps[i].time ) { +// found_current = 1; +// break; +// } + } +// if ( !found_current ) { +// printk(__FUNCTION__ "no current snapshot in table\n"); +// return -EINVAL; +// } + + /* ready to go: over write the table */ +/* + for (i = 0 ; i < data->tblcmd_count ; i++) { + + table->tbl_times[i] = data->tblcmd_snaps[i].time; + table->tbl_index[i] = data->tblcmd_snaps[i].index; + memcpy(table->tbl_name[i], data->tblcmd_snaps[i].name, + SNAP_MAX_NAMELEN); + table->tbl_name[i][SNAP_MAX_NAMELEN - 1] = '\0'; + + CDEBUG(D_SNAP, "snaplist: i %d, time %ld, idx %d, name %s\n", + i, table->tbl_times[i], table->tbl_index[i], + table->tbl_name[i]); + } +*/ + /* below : new, we don't need current snapshot for data + * current snapshot always has slot 0, index 0, name "current" + */ + table->tbl_times[0] = 0; + table->tbl_index[0] = 0; + strcpy(table->tbl_name[0], "current"); + + i=0; + CDEBUG(D_SNAP, "snaplist: i %d, time %ld, idx %d, name %s\n", + i, table->tbl_times[i], table->tbl_index[i], + table->tbl_name[i]); + + for (i = 0 ; i < data->tblcmd_count ; i++) { + + table->tbl_times[i+1] = data->tblcmd_snaps[i].time; + table->tbl_index[i+1] = data->tblcmd_snaps[i].index; + memcpy(table->tbl_name[i+1], data->tblcmd_snaps[i].name, + SNAP_MAX_NAMELEN); + table->tbl_name[i+1][SNAP_MAX_NAMELEN - 1] = '\0'; + + CDEBUG(D_SNAP, "snaplist: i %d, time %ld, idx %d, name %s\n", + i+1, table->tbl_times[i+1], table->tbl_index[i+1], + table->tbl_name[i+1]); + } + + for ( i = data->tblcmd_count + 1 ; i < SNAP_MAX ; i++ ) { + table->tbl_times[i] = 0; + table->tbl_index[i] = 0; + memset(table->tbl_name[i], 0, SNAP_MAX_NAMELEN); + } + + /* set the table count */ +// table->tbl_count = data->tblcmd_count; + table->tbl_count = data->tblcmd_count + 1; + return 0; +} + + +int snap_table_attach(int tableno, int snap_index) +{ + struct snap_table *table; + + if ( tableno < 0 || tableno > SNAP_MAX_TABLES ) { + printk(__FUNCTION__ ": invalid table number %d\n", tableno); + EXIT; + return -EINVAL; + } + table = &snap_tables[tableno]; + + if ( snap_index2slot(table, snap_index) < 0 ) { + printk(__FUNCTION__ ": snap index %d not present in table %d\n", + snap_index, tableno); + return -EINVAL; + } + + snap_use(tableno, snap_index); + return 0; +} + +static int getdata(int len, void **data) +{ + void *tmp = NULL; + + if (!len) { + *data = NULL; + return 0; + } + + SNAP_ALLOC(tmp, void *, len); + if ( !tmp ) + return -ENOMEM; + + CDEBUG(D_MALLOC, "snap_alloc:len %d, add %p\n", len, tmp); + + memset(tmp, 0, len); + if ( copy_from_user(tmp, *data, len)) { + SNAP_FREE(tmp, len); + CDEBUG(D_MALLOC, "snap_free:len %d, add %p\n", len, tmp); + return -EFAULT; + } + *data = tmp; + + return 0; +} + +static void freedata(void *data, int len) { + SNAP_FREE(data, len); + CDEBUG(D_MALLOC, "snap_free:len %d, add %p\n", len, data); +} + +static int get_next_inode(struct inode *pri, void *ino) +{ + static ino_t prev_ino = -1 ; + ino_t this_ino = pri->i_ino; + ino_t find_ino = *(ino_t *)ino; + ino_t *found = ino; + + if( find_ino == 0) { + (*found) = this_ino; + return -1; + } + + if( find_ino == prev_ino ) { + (*found) = this_ino; + return -1; + } + else { + prev_ino = this_ino; + } + return 0; +} + + +static int snap_get_next_inode(struct snap_ino_list_data *data, ino_t *found_ino, ino_t *parent_ino) +{ + kdev_t dev = data->dev; + ino_t this_ino = data->ino; + + struct snap_cache *cache; + + struct inode *inode; + struct dentry * dentry; + + ENTRY; + + cache = snap_find_cache(dev); + if ( !cache ) { + EXIT; + return -EINVAL; + } + + snap_iterate( cache->cache_sb, &get_next_inode, NULL, &(data->ino), + SNAP_ITERATE_COWED_INODE); + + if( data->ino == this_ino ) { + data->ino = 0; + } + + *found_ino = data->ino; + + if( !(*found_ino) ) return 0; + + *parent_ino = 0; + inode = iget (cache->cache_sb, *found_ino); + if (list_empty(&inode->i_dentry)) { + printk("No dentry for ino %lu, Error(XXX)! \n", inode->i_ino); + iput(inode); + return 0; + } + else { + dentry = dget(list_entry(inode->i_dentry.next, struct dentry, d_alias)); + } + if( dentry->d_parent->d_inode) + *parent_ino = dentry->d_parent->d_inode->i_ino; + else + *parent_ino = 0; + + dput(dentry); + iput(inode); + + return 0; +} +/* +static int snap_get_inode_info(struct snap_ino_list_data *data, int index) +{ + kdev_t dev = data->dev; + ino_t pri = data->ino; + int index = data->index; + + struct snap_cache *cache; + + struct inode *pri; + struct inode *ind; + ino_t ind_ino = 0; + + ENTRY; + + cache = snap_find_cache(dev); + if ( !cache ) { + EXIT; + return -EINVAL; + } + pri = iget(cache->cache->sb, pri_ino); + ind = snap_get_indirect(pri, NULL, index); + if(ind) { + ind_ino = ind->i_ino; + iput(ind); + } + return ind_ino; +} +*/ + +static int print_inode(struct inode *pri,void *param) +{ + CDEBUG(D_SNAP, "cowed inode list: ino %lu \n", pri->i_ino); + return 0; +} + +static int snap_print(struct super_block *sb, void *data) +{ + snap_iterate(sb, &print_inode, NULL, data, SNAP_ITERATE_COWED_INODE); + return 0; +} + +static int delete_inode(struct inode *primary, void *param) +{ + struct snap_iterdata * data; + int tableno = 0; + int index = 0; + int rc = 0; + + struct inode *redirect; + ino_t old_ind = 0; + struct snap_table *table; + int slot; + int delete_slot; + int this_index; + struct inode *next_ind = NULL; + int my_table[SNAP_MAX]; + + if(!primary) return 0; + + data = (struct snap_iterdata*) param; + + if(data) { + index = data->index; + tableno = data->tableno; + } + + printk("delete_inode ino %lu, index %d\n", primary->i_ino, index); + + table = &snap_tables[tableno]; + + redirect = snap_get_indirect(primary, NULL, index); + + if(!redirect) + return 0; + + old_ind = redirect->i_ino; + iput(redirect); + slot = snap_index2slot(table, index) - 1; + if( slot > 0 ) { + this_index = table->tbl_index[slot]; + redirect = snap_get_indirect(primary, NULL, this_index); + if(redirect) + iput(redirect); + else { + snap_set_indirect(primary, old_ind, this_index, 0); + snap_set_indirect(primary, 0, index, 0); + return 0; + } + } + + /* get the FIRST index after this and before NOW */ + /* used for destroy_indirect and block level cow */ + /* XXX fix this later, now use tbl_count, not NOW */ + delete_slot = snap_index2slot(table, index); + for(slot = table->tbl_count; slot > delete_slot; slot --) + { + my_table[slot - delete_slot] = table->tbl_index[slot]; + } + next_ind = snap_get_indirect + (primary, my_table, table->tbl_count - delete_slot ); + if( next_ind && (next_ind->i_ino == primary->i_ino) ) { + iput(next_ind); + next_ind = NULL; + } + + if( next_ind && (next_ind->i_ino == old_ind) ) { + iput(next_ind); + next_ind = NULL; + } + + rc = snap_destroy_indirect(primary, index, next_ind); + + if(next_ind) iput(next_ind); + + if(rc != 0) + printk("ERROR:snap_destroy_indirect(ino %lu,index %d),ret %d\n", primary->i_ino, index, rc); + return 0; +} + +static int snap_delete(struct super_block *sb, struct snap_iterdata *data) +//static int snap_delete(struct super_block *sb, void *data) +{ + CDEBUG(D_SNAP, "dev %d, tableno %d, index %d, time %lu\n", + data->dev, data->tableno, data->index, data->time ); + + snap_iterate(sb,&delete_inode,NULL, data, SNAP_ITERATE_COWED_INODE); + return 0; +} + +static int delete_new_inode(struct inode *pri, void *param) +{ + struct snap_iterdata * data; + + int index = 1; + time_t restore_time = 0xFFFFFFFF; + + ENTRY; + + if(!pri) return 0; + + if(snap_is_redirector(pri)){ + EXIT; + return 0; + } + + data = (struct snap_iterdata*) param; + + if(data) { + index = data->index; + restore_time = data->time; + } + + CDEBUG(D_SNAP, "ino %lu, index=%d, time=%lu\n", + pri->i_ino, index, restore_time); + + + if( pri->i_mtime > restore_time || pri->i_ctime > restore_time ) { + struct list_head *head = &pri->i_dentry, *pos; + + CDEBUG(D_SNAP, "snap_restore ino %lu is newer, delete \n",pri->i_ino); + for( pos = head->next; pos != head; pos = pos->next ){ +// d_invalidate( list_entry(pos, struct dentry, d_alias) ); + d_drop( list_entry(pos, struct dentry, d_alias) ); + } + pri->i_nlink = 0; + } + return 0; + +} + +static int restore_inode(struct inode *pri, void *param) +{ + struct snap_iterdata * data; +// struct snap_cache *cache; + int tableno = 0; + + int index = 1; + time_t restore_time = 0xFFFFFFFF; + + struct inode *ind = NULL; + int slot; + int restore_slot; + struct snap_table *table; +// int my_table[SNAP_MAX]; + int restore_index; + + ENTRY; + + if(!pri) return 0; + + data = (struct snap_iterdata*) param; + + if(data) { + index = data->index; + tableno = data->tableno; + restore_time = data->time; + } + + CDEBUG(D_SNAP, "ino %lu, index=%d, time=%lu, tableno %d\n", + pri->i_ino, index, restore_time, tableno); + + /* XXX: should we have = here? */ + if( pri->i_mtime > restore_time || pri->i_ctime > restore_time ) + { + restore_index = index; + table = &snap_tables[tableno]; + /* first find if there are indirected at the index */ + ind = snap_get_indirect(pri, NULL, index); + /* if not found, get the FIRST index after this and before NOW*/ + /* XXX fix this later, now use tbl_count, not NOW */ + if(!ind) { + restore_slot = snap_index2slot(table, index); + for(slot = restore_slot; slot <= table->tbl_count; + slot++) { + ind = snap_get_indirect (pri, NULL, + table->tbl_index[slot]); + if(ind) { + restore_index = table->tbl_index[slot]; + break; + } + } +/* for(slot = table->tbl_count; slot >= restore_slot; + slot --) + { + my_table[slot - restore_slot + 1] = + table->tbl_index[slot]; + } + ind = snap_get_indirect (pri, my_table, + table->tbl_count - restore_slot + 1); + + if( ind && (ind->i_ino == pri->i_ino) ) { + iput(ind); + ind = NULL; + } +*/ + } + + if(ind) { + CDEBUG(D_SNAP, "restore ino %lu with index %d\n", + pri->i_ino, restore_index); + iput(ind); +// snap_restore_indirect(pri, index); + snap_restore_indirect(pri, restore_index); + /* XXX */ + //delete_inode(pri, param); + snap_destroy_indirect(pri, restore_index, NULL); + } + else { + CDEBUG(D_SNAP, "ERROR:restore ino %lu\n", pri->i_ino); + } + } + else { + printk("ino %lu is older, don't restore\n",pri->i_ino); + } + EXIT; + return 0; +} + +//int snap_restore(struct super_block *sb, void *data) +static int snap_restore(struct super_block *sb, struct snap_iterdata *data) +{ + CDEBUG(D_SNAP, "dev %d, tableno %d, index %d, time %lu\n", + data->dev, data->tableno, data->index, data->time ); + + snap_iterate(sb, &delete_new_inode, NULL, data, SNAP_ITERATE_ALL_INODE); + snap_iterate(sb, &restore_inode, NULL, data, SNAP_ITERATE_COWED_INODE ); + return 0; +} + +/* return the index number of a name in a table */ +int snap_get_index_from_name(int tableno, char *name) +{ + struct snap_table *table; + int slot; + + if ( tableno < 0 || tableno > SNAP_MAX_TABLES ) { + printk(__FUNCTION__ ": invalid table number %d\n", tableno); + return -EINVAL; + } + + table = &snap_tables[tableno]; + + for ( slot = 0 ; slot < SNAP_MAX ; slot++ ) { +/* if(memcmp (table->tbl_name[slot], name, + strlen(table->tbl_name[slot]) ) == 0 ) { + return table->tbl_index[slot]; + } +*/ + if(strncmp (table->tbl_name[slot], name, + SNAP_MAX_NAMELEN) == 0 ) { + return table->tbl_index[slot]; + } + } + return -EINVAL; +} + +int snap_iterate_func(int len, struct snap_ioc_data *ioc_data, unsigned int cmd) +{ + struct snap_iterdata data; + + kdev_t dev ; + char name[SNAP_MAX_NAMELEN]; + + int index ; + int tableno; + int name_len; + int slot; + + struct super_block *sb; + struct snap_cache *cache; + struct snap_table *table; + + ENTRY; + + dev = ioc_data->dev; + cache = snap_find_cache(dev); + if ( !cache ) { + EXIT; + return -EINVAL; + } + + sb = cache->cache_sb; + tableno = cache->cache_snap_tableno; + table = &snap_tables[tableno]; + + name_len = len - sizeof(kdev_t); + memset(name, 0, SNAP_MAX_NAMELEN); + if(name_len > SNAP_MAX_NAMELEN) + name_len = SNAP_MAX_NAMELEN; + if(name_len < 0 ) + name_len = 0; + memcpy(name, ioc_data->name, name_len); + if ( (index = snap_get_index_from_name (tableno, name)) < 0 ) { + EXIT; + return -EINVAL; + } + + data.dev = dev; + data.index = index; + data.tableno = tableno; + slot = snap_index2slot (table, index); + if( slot < 0 ) { + EXIT; + return -EINVAL; + } + data.time = table->tbl_times[slot]; + + CDEBUG(D_SNAP, "dev %d, tableno %d, index %d, time %lu\n", + data.dev, data.tableno, data.index, data.time ); + + switch (cmd) { + case IOC_SNAP_DEBUG: + snap_print(sb, &data); + break; + case IOC_SNAP_DELETE: + snap_delete(sb, &data); + break; + case IOC_SNAP_RESTORE: + snap_restore(sb, &data); + break; + default: + return -EINVAL; + } + + EXIT; + + return 0; +} + +int snap_ioctl (struct inode * inode, struct file * filp, + unsigned int cmd, unsigned long arg) +{ + void *uarg, *karg; + int len; + int err; + kdev_t dev; + struct { + int len; + char *data; + }input; + int rc = 0; + + ENTRY; + + dev = MINOR(inode->i_rdev); + if (dev != SNAP_PSDEV_MINOR) + return -ENODEV; + + if (!inode) { + CDEBUG(D_IOCTL, "invalid inode\n"); + return -EINVAL; + } + + if ( _IOC_TYPE(cmd) != IOC_SNAP_TYPE || + _IOC_NR(cmd) < IOC_SNAP_MIN_NR || + _IOC_NR(cmd) > IOC_SNAP_MAX_NR ) { + CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n", + _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd)); + EXIT; + return -EINVAL; + } + + /* get data structures */ + err = copy_from_user(&input, (void *)arg, sizeof(input)); + if ( err ) { + EXIT; + return err; + } + uarg = input.data; + len = input.len; + + karg = input.data; + err = getdata(input.len, &karg); + if ( err ) { + EXIT; + return err; + } + + switch (cmd) { + case IOC_SNAP_SETTABLE: + rc = snap_install_table(len, karg); + break; + case IOC_SNAP_PRINTTABLE: { + struct output_data{ + int len; + char buf[1024]; + }output; + output.len = sizeof(output.buf); + snap_print_table(karg, output.buf, &(output.len)); + rc = copy_to_user((char *)arg, &output, output.len+sizeof(int)); + break; + } + case IOC_SNAP_GETINDEXFROMNAME: { + int index = 0; + char name[SNAP_MAX_NAMELEN]; + int tableno = 0; + struct snap_cache *cache; + kdev_t dev; + int name_len; + + struct get_index_struct { + kdev_t dev; + char name[SNAP_MAX_NAMELEN]; + }; + + struct get_index_struct *data = karg; + + name_len = len - sizeof(kdev_t); + dev = data->dev; + memset(name, 0, SNAP_MAX_NAMELEN); + if(name_len > SNAP_MAX_NAMELEN) + name_len = SNAP_MAX_NAMELEN; + if(name_len < 0 ) + name_len = 0; + /*for(i=0 ; i< name_len; i++) { + name[i] = data->name[i]; + } + */ + memcpy(name, data->name, name_len); + printk("dev %d , len %d, name_len %d, find name is [%s]\n", dev, len, name_len, name); + cache = snap_find_cache(dev); + if ( !cache ) { + EXIT; + rc = -EINVAL; + break; + } + tableno = cache->cache_snap_tableno; + + index = snap_get_index_from_name(tableno, name); + rc = copy_to_user((char *)arg, &index, sizeof(index)); + break; + } + case IOC_SNAP_GET_NEXT_INO: { + struct get_ino_struct{ + ino_t found_ino; + ino_t parent_ino; + }get_ino; + get_ino.found_ino = 0; + get_ino.parent_ino = 0; + rc = snap_get_next_inode(karg, &get_ino.found_ino, &get_ino.parent_ino); + rc = copy_to_user((char *)arg, &get_ino, sizeof(get_ino)); + break; + } + case IOC_SNAP_GET_INO_INFO: { + struct ioc_ino_info{ + kdev_t dev; + ino_t ino; + int index; + }; + struct snap_cache *cache; + struct inode *pri; + struct inode *ind; + struct ioc_ino_info *data = karg; + ino_t ind_ino = 0; + + cache = snap_find_cache(data->dev); + if ( !cache ) { + EXIT; + rc = -EINVAL; + break; + } + printk("get_ino_info, dev %d, ino %lu, index %d\n", + data->dev, data->ino, data->index); + pri = iget(cache->cache_sb, data->ino); + ind = snap_get_indirect(pri, NULL, data->index); + if(ind) { + ind_ino = ind->i_ino; + iput(ind); + } + iput(pri); + printk("get_ino_info, get ind %lu\n", ind_ino); + rc = copy_to_user((char *)arg, &ind_ino, sizeof(ino_t)); + break; + } + case IOC_SNAP_DELETE: + case IOC_SNAP_RESTORE: + case IOC_SNAP_DEBUG: + rc = snap_iterate_func(len, karg, cmd); + break; + case IOC_SNAP_DEVFAIL: + snap_debug_failcode = (unsigned int)arg; + break; + case IOC_SNAP_SHOW_DOTSNAP: { + struct ioc_show_info{ + kdev_t dev; + int show; + }; + struct snap_cache *cache; + struct ioc_show_info *data = karg; + + cache = snap_find_cache(data->dev); + if( !cache ) { + EXIT; + rc = -EINVAL; + break; + } + cache->cache_show_dotsnap = (char)data->show; + CDEBUG(D_IOCTL, "Set show dotsnap: %s\n", + data->show ? "Yes" : "No"); + + break; + } + + default: + rc = -EINVAL; + break; + } + + freedata(karg, input.len); + EXIT; + return rc; +} diff --git a/lustre/snapfs/super.c b/lustre/snapfs/super.c new file mode 100644 index 0000000000..f1019771a8 --- /dev/null +++ b/lustre/snapfs/super.c @@ -0,0 +1,714 @@ +/* + * snap_current + * + * Copyright (C) 1998 Peter J. Braam + * Copyright (C) 2000 Stelias Computing, Inc. + * Copyright (C) 2000 Red Hat, Inc. + * Copyright (C) 2000 Mountain View Data, Inc. + * + * Author: Peter J. Braam <braam@mountainviewdata.com> + */ + + +#include <stdarg.h> + +#include <asm/bitops.h> +#include <asm/uaccess.h> +#include <asm/system.h> + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/dcache.h> + +#ifdef CONFIG_SNAPFS_EXT2 +#include <linux/ext2_fs.h> +#endif +#ifdef CONFIG_SNAPFS_EXT3 +#include <linux/ext3_fs.h> +#endif + +#include <linux/malloc.h> +#include <linux/vmalloc.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/locks.h> +#include <linux/blkdev.h> +#include <linux/init.h> +#define __NO_VERSION__ +#include <linux/module.h> + +#include <linux/filter.h> +#include <linux/snapfs.h> +#include <linux/snapsupport.h> + +#ifdef SNAP_DEBUG +long snap_vmemory = 0; +long snap_kmemory = 0; +unsigned int snap_debug_failcode = 0; +#endif + +extern struct snap_cache *snap_init_cache(void); +extern inline void snap_cache_add(struct snap_cache *, kdev_t); +extern inline void snap_init_cache_hash(void); + +extern int snap_get_index_from_name (int tableno, char *name); + +#ifdef CONFIG_SNAPFS_EXT2 +extern struct snapshot_operations ext2_snap_operations; +extern struct journal_ops snap_ext2_journal_ops; +#endif + +#ifdef CONFIG_SNAPFS_EXT3 +extern struct snapshot_operations ext3_snap_operations; +extern struct journal_ops snap_ext3_journal_ops; +#endif + +/* returns an allocated string, copied out from data if opt is found */ +static char *read_opt(const char *opt, char *data) +{ + char *value; + char *retval; + + CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data); + if ( strncmp(opt, data, strlen(opt)) ) + return NULL; + + if ( (value = strchr(data, '=')) == NULL ) + return NULL; + + value++; + SNAP_ALLOC(retval, char *, strlen(value) + 1); + if ( !retval ) { + printk("snapfs: Out of memory!\n"); + return NULL; + } + + strcpy(retval, value); + CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval); + return retval; +} + +static inline void store_opt(char **dst, char *opt) +{ + if (dst) { + if (*dst) + SNAP_FREE(*dst, strlen(*dst) + 1); + *dst = opt; + } else + SNAP_FREE(opt, strlen(opt) + 1); +} + +/* Find the options for snapfs in "options", saving them into the + * passed pointers. If the pointer is null, the option is discarded. + * Copy out all non-snapfs options into cache_data (to be passed + * to the read_super operation of the cache). The return value will + * be a pointer to the end of the cache_data. + */ +static char *snapfs_options(char *options, char *cache_data, + char **cache_type, char **cow_type, + char **snaptable) +{ + char *this_char; + char *cache_data_end = cache_data; + + /* set the defaults here */ + if (cache_type && !*cache_type) { + SNAP_ALLOC(*cache_type, char *, strlen("ext2") + 1); + strcpy(*cache_type, "ext2"); + } + if (cow_type && !*cow_type) { + SNAP_ALLOC(*cow_type, char *, strlen("block") + 1); + strcpy(*cow_type, "block"); + } + if (snaptable && !*snaptable) { + SNAP_ALLOC(*snaptable, char *, strlen("-1")+1); + strcpy(*snaptable, "-1"); + } + + if (!options || !cache_data) + return cache_data_end; + + CDEBUG(D_SUPER, "parsing options\n"); + for (this_char = strtok (options, ","); + this_char != NULL; + this_char = strtok (NULL, ",")) { + char *opt; + CDEBUG(D_SUPER, "this_char %s\n", this_char); + + if ( (opt = read_opt("cache_type", this_char)) ) { + store_opt(cache_type, opt); + continue; + } + if ( (opt = read_opt("cow_type", this_char)) ){ + store_opt(cow_type, opt); + continue; + } + if ( (opt = read_opt("table", this_char)) ) { + store_opt(snaptable, opt); + continue; + } + + cache_data_end += sprintf(cache_data_end, "%s%s", + cache_data_end != cache_data ? ",":"", + this_char); + } + + return cache_data_end; +} + +int snapfs_remount(struct super_block * sb, int *flags, char *data) +{ + char *cache_data = NULL; + char *snapno = NULL; + char *cache_data_end; + struct snap_cache *cache = NULL; + struct super_operations *sops; + int err = 0; + + ENTRY; + CDEBUG(D_MALLOC, "before remount: kmem %ld, vmem %ld\n", + snap_kmemory, snap_vmemory); + CDEBUG(D_SUPER, "remount opts: %s\n", data ? (char *)data : "(none)"); + if (data) { + /* reserve space for the cache's data */ + SNAP_ALLOC(cache_data, void *, PAGE_SIZE); + if ( !cache_data ) { + err = -ENOMEM; + EXIT; + goto out_err; + } + } + + cache = snap_find_cache(sb->s_dev); + if (!cache) { + printk(__FUNCTION__ ": cannot find cache on remount\n"); + err = -ENODEV; + EXIT; + goto out_err; + } + + /* If an option has not yet been set, we allow it to be set on + * remount. If an option already has a value, we pass NULL for + * the option pointer, which means that the snapfs option + * will be parsed but discarded. + */ + cache_data_end = snapfs_options(data, cache_data, NULL, NULL, &snapno); + + if (cache_data) { + if (cache_data_end == cache_data) { + SNAP_FREE(cache_data, PAGE_SIZE); + cache_data = NULL; + } else { + CDEBUG(D_SUPER, "cache_data at %p is: %s\n", cache_data, + cache_data); + } + } + + + sops = filter_c2csops(cache->cache_filter); + if (sops->remount_fs) { + err = sops->remount_fs(sb, flags, cache_data); + } + + CDEBUG(D_MALLOC, "after remount: kmem %ld, vmem %ld\n", + snap_kmemory, snap_vmemory); + EXIT; +out_err: + if (cache_data) + SNAP_FREE(cache_data, PAGE_SIZE); + return err; +} + +/* XXXX remount: needed if snapfs was mounted RO at boot time + without a snaptable +*/ + + +/* + * snapfs super block read. + * + * Allocate a struct snap_cache, determine the underlying fs type, + * read the underlying fs superblock, save the underlying fs ops, + * and then replace them with snapfs ops. + * + * Remove the snapfs options before passing to underlying fs. + */ +struct super_block * +snapfs_read_super ( + struct super_block *sb, + void *data, + int silent) +{ + struct file_system_type *fstype; + struct snap_cache *cache = NULL; + char *cache_data = NULL; + char *cache_data_end; + char *cache_type = NULL; + char *cow_type = NULL; + char *snapno = NULL; + char *endptr; + int tableno; + + ENTRY; + CDEBUG(D_MALLOC, "before parsing: kmem %ld, vmem %ld\n", + snap_kmemory, snap_vmemory); + + /* reserve space for the cache's data */ + SNAP_ALLOC(cache_data, void *, PAGE_SIZE); + if ( !cache_data ) { + printk("snapfs_read_super: Cannot allocate data page.\n"); + EXIT; + goto out_err; + } + + CDEBUG(D_SUPER, "mount opts: %s\n", data ? (char *)data : "(none)"); + + /* read and validate options */ + cache_data_end = snapfs_options(data, cache_data, &cache_type, &cow_type, &snapno); + + /* Need to free cache_type and snapno when it's not in use */ + + /* was there anything for the cache filesystem in the data? */ + if (cache_data_end == cache_data) { + SNAP_FREE(cache_data, PAGE_SIZE); + cache_data = NULL; + } else { + CDEBUG(D_SUPER, "cache_data at %p is: %s\n", cache_data, + cache_data); + } + + /* set up the cache */ + cache = snap_init_cache(); + if ( !cache ) { + printk("snapfs_read_super: failure allocating cache.\n"); + EXIT; + goto out_err; + } + + fstype = get_fs_type(cache_type); + if ( !fstype || !fstype->read_super) { + EXIT; + goto out_err; + } + + cache->cache_filter = filter_get_filter_fs((const char *)cache_type); + /* XXX if cache->cache_filter==NULL?although it's rare ***/ + + + /* + * Read the underlying file system superblock - ext2, ext3, reiser. + * This performs the underlying mount operation. The snapfs options + * have been removed from 'cache_data'. + * + * Note: It's assumed that sb is always returned. + */ + CDEBUG(D_SUPER, "\n"); + if (fstype->read_super(sb, cache_data, silent) != sb) { + printk("snapfs: cache mount failure.\n"); + EXIT; + goto out_err; + } + + /* XXX now look at the flags in the superblock and determine if this + is a block cow file system or a file cow fs. Then assign the + snap operations accordingly. This goes in the sections for ext2/ext3/xfs etc + */ + + /* this might have been freed above */ + CDEBUG(D_SUPER, "\n"); + if (cache_data) { + SNAP_FREE(cache_data, PAGE_SIZE); + cache_data = NULL; + } + + + /* + * We now know the dev of the cache: hash the cache. + * + * 'cache' is the struct snap_cache allocated for this + * snapfs mount. + */ + CDEBUG(D_SUPER, "\n"); + snap_cache_add(cache, sb->s_dev); + + tableno = simple_strtoul(snapno, &endptr, 0); + cache->cache_snap_tableno = tableno; + + CDEBUG(D_SUPER, "get tableno %d\n", cache->cache_snap_tableno); + + /* + * make sure we have our own super operations + * + * Initialize or re-initialize the cache->cache_ops shared + * struct snap_ops structure set based on the underlying + * file system type. + */ + CDEBUG(D_SUPER, "\n"); + filter_setup_super_ops(cache->cache_filter, sb->s_op, + ¤tfs_super_ops); + CDEBUG(D_SUPER, "\n"); + sb->s_op = filter_c2usops(cache->cache_filter); + /* + * Save pointers in the snap_cache structure to the + * snapfs and underlying file system superblocks. + */ + cache->cache_sb = sb; /* Underlying file system superblock. */ + + /* set up snapshot ops, handle COMPAT_FEATUREs */ + if( 0 ){ + } +#ifdef CONFIG_SNAPFS_EXT2 + else if ( strcmp (cache_type,"ext2") == 0 ){ + cache->cache_type = FILTER_FS_EXT2; + filter_setup_snapshot_ops(cache->cache_filter, + &ext2_snap_operations); + filter_setup_journal_ops(cache->cache_filter, + &snap_ext2_journal_ops); + if( !EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_SNAPFS) ){ + if( strcmp(cow_type, "block")==0 ){ + sb->u.ext2_sb.s_feature_compat |= + EXT2_FEATURE_COMPAT_BLOCKCOW; + sb->u.ext2_sb.s_es->s_feature_compat |= + cpu_to_le32(EXT2_FEATURE_COMPAT_BLOCKCOW); + } + } + sb->u.ext2_sb.s_last_cowed_ino = 0; + } +#endif +#ifdef CONFIG_SNAPFS_EXT3 + else if ( strcmp (cache_type,"ext3") == 0 ){ + cache->cache_type = FILTER_FS_EXT3; + filter_setup_snapshot_ops(cache->cache_filter, + &ext3_snap_operations); + filter_setup_journal_ops(cache->cache_filter, + &snap_ext3_journal_ops); + if( !EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_SNAPFS) ){ + if( strcmp(cow_type, "block")==0 ){ + sb->u.ext3_sb.s_es->s_feature_compat |= + cpu_to_le32(EXT3_FEATURE_COMPAT_BLOCKCOW); + } + } + sb->u.ext3_sb.s_last_cowed_ino = 0; + } +#endif + + CDEBUG(D_SUPER, "\n"); + /* now get our own directory operations */ + if ( sb->s_root && sb->s_root->d_inode ) { + CDEBUG(D_SUPER, "\n"); + filter_setup_dir_ops(cache->cache_filter, + sb->s_root->d_inode->i_op, + ¤tfs_dir_iops); + CDEBUG(D_SUPER, "\n"); + sb->s_root->d_inode->i_op =filter_c2udiops(cache->cache_filter); +// CDEBUG(D_SUPER, "\n"); +// sb->s_root->d_inode->i_snapop = ext2_snapops(); + + CDEBUG(D_SUPER, "lookup at %p\n", + sb->s_root->d_inode->i_op->lookup); +#if 0 + /* XXX is this needed ?? */ + filter_setup_dentry_ops(cache->cache_filter, + sb->s_root->d_op, + ¤tfs_dentry_ops); + sb->s_root->d_op = filter_c2udops(cache->cache_filter); +#endif + } + /* + * Save a pointer to the snap_cache structure in the + * "snap_current" superblock. + */ + (struct snap_cache *) sb->u.generic_sbp = cache; + CDEBUG(D_SUPER, "sb %lx, sb->u.generic_sbp: %lx\n", + (ulong) sb, (ulong) sb->u.generic_sbp); + + /* we can free snapno and cache_type now, because it's not used */ + if (snapno) { + SNAP_FREE(snapno, strlen(snapno) + 1); + snapno = NULL; + } + if (cache_type) { + SNAP_FREE(cache_type, strlen(cache_type) + 1); + snapno = NULL; + } + if (cow_type) { + SNAP_FREE(cow_type, strlen(cow_type) + 1); + cow_type = NULL; + } + + CDEBUG(D_MALLOC, "after mounting: kmem %ld, vmem %ld\n", + snap_kmemory, snap_vmemory); + + MOD_INC_USE_COUNT; + EXIT; + return sb; + + out_err: + CDEBUG(D_SUPER, "out_err called\n"); + if (cache) + SNAP_FREE(cache, sizeof(struct snap_cache)); + if (cache_data) + SNAP_FREE(cache_data, PAGE_SIZE); + if (snapno) + SNAP_FREE(snapno, strlen(snapno) + 1); + if (cache_type) + SNAP_FREE(cache_type, strlen(cache_type) + 1); + if (cow_type) + SNAP_FREE(cow_type, strlen(cow_type) + 1); + + CDEBUG(D_MALLOC, "mount error exit: kmem %ld, vmem %ld\n", + snap_kmemory, snap_vmemory); + return NULL; +} + + +struct file_system_type snapfs_current_type = { + "snap_current", + FS_REQUIRES_DEV, /* can use Ibaskets when ext2 does */ + snapfs_read_super, + NULL +}; + + +/* Find the options for the clone. These consist of a cache device + and an index in the snaptable associated with that device. +*/ +static char *clonefs_options(char *options, char *cache_data, + char **devstr, char **namestr) +{ + char *this_char; + char *cache_data_end = cache_data; + + if (!options || !cache_data) + return cache_data_end; + + CDEBUG(D_SUPER, "parsing options\n"); + for (this_char = strtok (options, ","); + this_char != NULL; + this_char = strtok (NULL, ",")) { + char *opt; + CDEBUG(D_SUPER, "this_char %s\n", this_char); + + if ( (opt = read_opt("dev", this_char)) ) { + store_opt(devstr, opt); + continue; + } + if ( (opt = read_opt("name", this_char)) ) { + store_opt(namestr, opt); + continue; + } + + cache_data_end += sprintf(cache_data_end, "%s%s", + cache_data_end != cache_data ? ",":"", + this_char); + } + + return cache_data_end; +} + +static int snapfs_path2dev(char *dev_path, kdev_t *dev) +{ + struct dentry *dentry; + + dentry = lookup_dentry(dev_path, NULL, 0); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + if (!dentry->d_inode) + return -ENODEV; + + if (!S_ISBLK(dentry->d_inode->i_mode)) + return -ENODEV; + + *dev = dentry->d_inode->i_rdev; + + return 0; +} + + +extern struct super_operations clone_super_ops; + +/* + * We always need to remove the snapfs options before passing + * to bottom FS. + */ +struct super_block * +clone_read_super( + struct super_block *sb, + void *data, + int silent) +{ + struct snap_clone_info *clone_sb; + struct snap_cache *snap_cache = NULL; + int err; + char *cache_data = NULL; + char *cache_data_end; + char *devstr = NULL; + kdev_t dev; + char *namestr = NULL; + //char *endptr; + int index; + ino_t root_ino; + struct inode *root_inode; + + ENTRY; + + CDEBUG(D_MALLOC, "before parsing: kmem %ld, vmem %ld\n", + snap_kmemory, snap_vmemory); + + /* reserve space for the cache's data */ + SNAP_ALLOC(cache_data, void *, PAGE_SIZE); + if ( !cache_data ) { + printk("clone_read_super: Cannot allocate data page.\n"); + EXIT; + goto out_err; + } + + CDEBUG(D_SUPER, "mount opts: %s\n", data ? (char *)data : "(none)"); + + /* read and validate options */ + cache_data_end = clonefs_options(data, cache_data, &devstr, &namestr); + + /* was there anything for the cache filesystem in the data? */ + if (cache_data_end == cache_data) { + SNAP_FREE(cache_data, PAGE_SIZE); + cache_data = NULL; + } else { + printk("clonefs: invalid mount option %s\n", cache_data); + EXIT; + goto out_err; + } + + if (!namestr || !devstr) { + printk("snapfs: mount options name and dev mandatory\n"); + EXIT; + goto out_err; + } + + err = snapfs_path2dev(devstr, &dev); + if ( err ) { + printk("snap: incorrect device option %s\n", devstr); + EXIT; + goto out_err; + } + + snap_cache = snap_find_cache(dev); + if ( !snap_cache ) { + printk("snap: incorrect device option %s\n", devstr); + EXIT; + goto out_err; + } + + /*index = simple_strtoul(indexstr, &endptr, 0); + if ( indexstr == endptr ) { + printk("No valid index passed to mount\n"); + EXIT; + goto out_err; + } + */ + + index = snap_get_index_from_name (snap_cache->cache_snap_tableno, + namestr); + CDEBUG(D_SUPER, "tableno %d, name %s, get index %d\n", + snap_cache->cache_snap_tableno, namestr, index); + + if(index < 0 ) { + printk("No valid index for name %s passed to mount\n",namestr); + EXIT; + goto out_err; + } + + /* + * Force clone fs to be read-only. + * + * XXX - Is there a way to change the mount options too so + * the fs is listed as RO by mount? + */ + sb->s_flags |= MS_RDONLY; + + /* set up the super block */ + clone_sb = (struct snap_clone_info *)&sb->u.generic_sbp; + list_add(&clone_sb->clone_list_entry, &snap_cache->cache_clone_list); + clone_sb->clone_cache = snap_cache; + clone_sb->clone_index = index; + sb->s_op = &clone_super_ops; + + root_ino = snap_cache->cache_sb->s_root->d_inode->i_ino; + root_inode = iget(sb, root_ino); + + CDEBUG(D_SUPER, "readinode %p, root ino %ld, root inode at %p\n", + sb->s_op->read_inode, root_ino, root_inode); + + sb->s_root = d_alloc_root(root_inode, NULL); + if (!sb->s_root) { + list_del(&clone_sb->clone_list_entry); + sb = NULL; + } + + dget( snap_cache->cache_sb->s_root ); + + if (cache_data) + SNAP_FREE(cache_data, PAGE_SIZE); + if (devstr) + SNAP_FREE(devstr, strlen(devstr) + 1); + if (namestr) + SNAP_FREE(namestr, strlen(namestr) + 1); + CDEBUG(D_SUPER, "sb %lx, &sb->u.generic_sbp: %lx\n", + (ulong) sb, (ulong) &sb->u.generic_sbp); + + MOD_INC_USE_COUNT; + EXIT; + return sb; + out_err: + CDEBUG(D_MALLOC, "mount error exit: kmem %ld, vmem %ld\n", + snap_kmemory, snap_vmemory); + return NULL; +} + + +struct file_system_type snapfs_clone_type = { + "snap_clone", + 0, + clone_read_super, + NULL +}; + + +int init_snapfs(void) +{ + int status; + + snap_init_cache_hash(); + + status = register_filesystem(&snapfs_current_type); + if (status) { + printk("snapfs: failed in register current filesystem!\n"); + } + status = register_filesystem(&snapfs_clone_type); + if (status) { + unregister_filesystem(&snapfs_current_type); + printk("snapfs: failed in register clone filesystem!\n"); + } + return status; +} + + + +int cleanup_snapfs(void) +{ + int err; + + ENTRY; + + err = unregister_filesystem(&snapfs_clone_type); + if ( err ) { + printk("snapfs: failed to unregister clone filesystem\n"); + } + err = unregister_filesystem(&snapfs_current_type); + if ( err ) { + printk("snapfs: failed to unregister filesystem\n"); + } + + return 0; +} diff --git a/lustre/snapfs/symlink.c b/lustre/snapfs/symlink.c new file mode 100644 index 0000000000..41efbd1935 --- /dev/null +++ b/lustre/snapfs/symlink.c @@ -0,0 +1,213 @@ +/* + * fs/snap/snap.c + * + * A snap shot file system. + * + */ + +#define EXPORT_SYMTAB + + +#define __NO_VERSION__ +#include <linux/module.h> +#include <asm/uaccess.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/locks.h> +#include <linux/quotaops.h> +#include <linux/list.h> +#include <linux/file.h> +#include <asm/bitops.h> +#include <asm/byteorder.h> + +#include <linux/filter.h> +#include <linux/snapfs.h> +#include <linux/snapsupport.h> + +static inline int inode_has_ea(struct inode *inode) +{ + return (inode->u.ext2_i.i_file_acl != 0); +} + +static int currentfs_readlink(struct dentry * dentry, char * buffer, int buflen) +{ + struct snap_cache *cache; + int rc; + struct inode_operations *iops; + struct inode * inode = dentry->d_inode; + int bpib = inode->i_sb->s_blocksize >> 9; + __u32 save_i_blocks; + + ENTRY; + + cache = snap_find_cache(inode->i_dev); + if ( !cache ) { + EXIT; + return -EINVAL; + } + + iops = filter_c2csiops(cache->cache_filter); + if (!iops || + !iops->readlink) { + rc = -EINVAL; + goto exit; + } + + save_i_blocks = inode->i_blocks; + /* If this link has ea and its i_blocks is ea's block, + * then we should treate it as a fast symlink + */ + if( inode_has_ea(inode) && inode->i_blocks == bpib ) { + inode->i_blocks = 0; + } + rc = iops->readlink(dentry, buffer, buflen); + + if( inode->i_blocks != save_i_blocks ){ + inode->i_blocks = save_i_blocks; + mark_inode_dirty(inode); + } + +exit: + EXIT; + return rc; +} + +static int cat_str_ahead(char *buf, int pos, const char* str) +{ + int len = strlen(str); + + if( pos - len -1 < 0 ) + return pos; + + buf[--pos] = '/'; + memcpy(&buf[pos-len], str, len); + return pos-len; +} + +/* + * Adjust the following path if we are under dotsnap (skip .snap/clonexx...) + * in following two case, we just return null and let caller do + * the normal follow_link: + * (1) we are not lies in .snap + * (2) we are already in the root's .snap + */ +static struct dentry * dotsnap_follow_link(struct dentry *base, + struct dentry *dentry, + int follow) +{ + struct super_block *sb = dentry->d_inode->i_sb; + struct dentry *rc = NULL; + struct dentry *de = dentry, *de_save1=NULL, *de_save2=NULL; + char *buf = NULL; + int pos = D_MAXLEN; + + SNAP_ALLOC(buf, char*, D_MAXLEN); + if( !buf ) + return ERR_PTR(-ENOMEM); + + /* + * iterate upward to construct the path + */ + do { + if( de_save2 ) + pos = cat_str_ahead(buf, pos, de_save2->d_name.name); + + if ( de->d_inode && de->d_inode->i_ino & 0xF0000000 ) + goto lookup; + + de_save2 = de_save1; + de_save1 = de; + de = de->d_parent; + } while (de->d_parent != de); + + /* we are not under dotsnap */ + goto exit; + +lookup: + /* See if we already under root's .snap */ + de = de->d_parent; + if( de == sb->s_root ) + goto exit; + + while( (de->d_parent != de) && (de != sb->s_root) ){ + pos = cat_str_ahead(buf, pos, de->d_name.name); + de = de->d_parent; + } + if( de_save1 ) + pos = cat_str_ahead(buf, pos, de_save1->d_name.name); + + pos = cat_str_ahead(buf, pos, ".snap"); + buf[D_MAXLEN-1] = 0; + CDEBUG(D_FILE, "constructed path: %s\n", &buf[pos]); + + /* FIXME lookup_dentry will never return NULL ?? */ + rc = lookup_dentry(&buf[pos], dget(sb->s_root), follow); + if( !rc ){ + rc = ERR_PTR(-ENOENT); + CDEBUG(D_FILE, "lookup_dentry return NULL~!@#$^&*\n"); + } + dput(base); + +exit: + SNAP_FREE(buf, D_MAXLEN); + return rc; +} + +static struct dentry * currentfs_follow_link ( struct dentry *dentry, + struct dentry *base, + unsigned int follow) +{ + struct snap_cache *cache; + struct dentry * rc; + struct inode_operations *iops; + struct inode * inode = dentry->d_inode; + int bpib = inode->i_sb->s_blocksize >> 9; + __u32 save_i_blocks; + + ENTRY; + + cache = snap_find_cache(inode->i_dev); + if ( !cache ) { + EXIT; + return ERR_PTR(-EINVAL); + } + + iops = filter_c2csiops(cache->cache_filter); + if (!iops || + !iops->follow_link) { + rc = ERR_PTR(-EINVAL); + goto exit; + } + + if( currentfs_is_under_dotsnap(dentry) ){ + rc = dotsnap_follow_link( base, dentry, follow ); + if( rc ) + goto exit; + } + + save_i_blocks = inode->i_blocks; + /* If this link has ea and its i_blocks is ea's block, + * then we should treate it as a fast symlink + */ + if( inode_has_ea(inode) && inode->i_blocks == bpib ) { + inode->i_blocks = 0; + } + rc = iops->follow_link(dentry, base, follow); + + if( inode->i_blocks != save_i_blocks ){ + inode->i_blocks = save_i_blocks; + mark_inode_dirty(inode); + } + +exit: + EXIT; + return rc; +} + +struct inode_operations currentfs_sym_iops = { + readlink: currentfs_readlink, + follow_link: currentfs_follow_link +}; + +struct file_operations currentfs_sym_fops; diff --git a/lustre/snapfs/sysctl.c b/lustre/snapfs/sysctl.c new file mode 100644 index 0000000000..f7f88a6700 --- /dev/null +++ b/lustre/snapfs/sysctl.c @@ -0,0 +1,110 @@ +/* + * Sysctrl entries for Snapfs + */ + +#define __NO_VERSION__ +#include <linux/config.h> /* for CONFIG_PROC_FS */ +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/sysctl.h> +#include <linux/swapctl.h> +#include <linux/proc_fs.h> +#include <linux/malloc.h> +#include <linux/vmalloc.h> +#include <linux/stat.h> +#include <linux/ctype.h> +#include <linux/init.h> +#include <asm/bitops.h> +#include <asm/segment.h> +#include <asm/uaccess.h> +#include <linux/utsname.h> +#include <linux/blk.h> + +#include <linux/filter.h> +#include <linux/snapfs.h> +#include <linux/snapsupport.h> + + +/* /proc entries */ + +#ifdef CONFIG_PROC_FS + + +static void snapfs_proc_modcount(struct inode *inode, int fill) +{ + if (fill) + MOD_INC_USE_COUNT; + else + MOD_DEC_USE_COUNT; +} + +struct proc_dir_entry proc_fs_snapfs = { + 0, 10, "snapfs", + S_IFDIR | S_IRUGO | S_IXUGO, 2, 0, 0, + 0, &proc_dir_inode_operations, + NULL, NULL, + NULL, + NULL, NULL +}; + + +#endif + + +/* SYSCTL below */ + +static struct ctl_table_header *snapfs_table_header = NULL; +/* 0x100 to avoid any chance of collisions at any point in the tree with + * non-directories + */ +#define PSDEV_SNAPFS (0x120) + +#define PSDEV_DEBUG 1 /* control debugging */ +#define PSDEV_TRACE 2 /* control enter/leave pattern */ + +/* These are global control options */ +#define ENTRY_CNT 3 + +/* XXX - doesn't seem to be working in 2.2.15 */ +static struct ctl_table snapfs_ctltable[ENTRY_CNT] = +{ + {PSDEV_DEBUG, "debug", &snap_debug_level, sizeof(int), 0644, NULL, &proc_dointvec}, + {PSDEV_TRACE, "trace", &snap_print_entry, sizeof(int), 0644, NULL, &proc_dointvec}, + {0} +}; + +static ctl_table snapfs_table[2] = { + {PSDEV_SNAPFS, "snapfs", NULL, 0, 0555, snapfs_ctltable}, + {0} +}; + + +int /* __init */ init_snapfs_proc_sys(void) +{ + +#ifdef CONFIG_SYSCTL + if ( !snapfs_table_header ) + snapfs_table_header = + register_sysctl_table(snapfs_table, 0); +#endif +#ifdef CONFIG_PROC_FS + proc_register(&proc_root_fs, &proc_fs_snapfs); + proc_fs_snapfs.fill_inode = &snapfs_proc_modcount; +#endif + return 0; +} + +void cleanup_snapfs_proc_sys(void) { + +#ifdef CONFIG_SYSCTL + if ( snapfs_table_header ) + unregister_sysctl_table(snapfs_table_header); + snapfs_table_header = NULL; +#endif + +#if CONFIG_PROC_FS + proc_unregister(&proc_root_fs, proc_fs_snapfs.low_ino); +#endif +} + -- GitLab