From 65230291ac3140e29aeb5813d075825a348b45d6 Mon Sep 17 00:00:00 2001
From: yury <yury>
Date: Wed, 10 Mar 2004 07:50:30 +0000
Subject: [PATCH] added tmpfs related stuff

---
 lustre/lvfs/fsfilt_tmpfs.c    | 654 ++++++++++++++++++++++++++++++++++
 lustre/tests/ltmpfs-sanity.sh |  10 +
 lustre/tests/ltmpfs.sh        |   1 +
 3 files changed, 665 insertions(+)
 create mode 100644 lustre/lvfs/fsfilt_tmpfs.c
 create mode 100755 lustre/tests/ltmpfs-sanity.sh
 create mode 100755 lustre/tests/ltmpfs.sh

diff --git a/lustre/lvfs/fsfilt_tmpfs.c b/lustre/lvfs/fsfilt_tmpfs.c
new file mode 100644
index 0000000000..808ee25187
--- /dev/null
+++ b/lustre/lvfs/fsfilt_tmpfs.c
@@ -0,0 +1,654 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  lustre/lib/fsfilt_tmpfs.c
+ *  Lustre filesystem abstraction routines
+ *
+ *  Copyright (C) 2002, 2003, 2004 Cluster File Systems, Inc.
+ *   Author: Yury Umanets <umka@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_FILTER
+
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/quotaops.h>
+#include <linux/ext3_fs.h>
+#include <linux/ext3_jbd.h>
+#include <linux/version.h>
+#include <linux/kp30.h>
+#include <linux/lustre_fsfilt.h>
+#include <linux/obd.h>
+#include <linux/obd_class.h>
+#include <linux/module.h>
+#include <linux/shmem_fs.h>
+
+/* prefix is needed because tmpfs xattr patch deos not support namespaces
+ * yet. */
+#define XATTR_LUSTRE_MDS_LOV_EA         "trusted.lov"
+#define XATTR_LUSTRE_MDS_OBJID          "system.lustre_mds_objid"
+
+/* structure instance of to be returned as a transaction handle. This is not
+ * needed for now, but probably we will need to save something during modifying
+ * an inode and this is useful for us. */
+struct tmpfs_trans {
+        int op;
+};
+
+static kmem_cache_t *trans_cache;
+static atomic_t trans_count = ATOMIC_INIT(0);
+
+/* ext2 directory stuff. It is needed for fs_readpage(), which is used for
+ * reading directoris on MDS. Probably this should be moved to somewhere more
+ * convenient? */
+#define EXT2_NAME_LEN (255)
+
+struct ext2_dirent {
+        __u32   inode;
+        __u16   rec_len;
+        __u8    name_len;
+        __u8    file_type;
+        char    name[0];
+};
+
+typedef struct ext2_dirent ext2_dirent_t;
+
+struct fetch_hint {
+	int stop;
+        int count;
+	__u16 chunk;
+	void *dirent;
+        __u16 rec_len;
+	struct file *file;
+};
+
+typedef struct fetch_hint fetch_hint_t;
+
+#define EXT2_ENT_PAD       4
+#define EXT2_ENT_ROUND     (EXT2_ENT_PAD - 1)
+#define EXT2_ENT_LEN(len)  (((len) + 8 + EXT2_ENT_ROUND) & ~EXT2_ENT_ROUND)
+
+/* starts new transaction on tmpfs for metadata operations. That if for create
+ * file, delete it, etc. That is everything except of read/write data. Returns
+ * pointer to transaction handle to be used later. What we have to do here? 
+ * Seems nothing for a while. */
+static void *
+fsfilt_tmpfs_mtd_start(struct inode *inode, int op, void *desc_private)
+{
+        int rc;
+        struct kstatfs sfs;
+        struct tmpfs_trans *trans;
+
+        CDEBUG(D_INFO, "Metadata operation 0x%x is started on "
+               "inode 0x%lx\n", op, inode->i_ino);
+
+        if ((rc = vfs_statfs(inode->i_sb, &sfs)))
+                return ERR_PTR(rc);
+
+        if (sfs.f_bfree == 0)
+                return ERR_PTR(-ENOSPC);
+        
+        OBD_SLAB_ALLOC(trans, trans_cache, GFP_NOFS,
+                       sizeof(*trans));
+
+        if (trans == NULL)
+                return NULL;
+
+        atomic_inc(&trans_count);
+
+        trans->op = op;
+        return trans;
+}
+
+/* commits changes on passed @inode using passed transaction @handle. Should we
+ * do something here? */
+static int
+fsfilt_tmpfs_mtd_commit(struct inode *inode, void *handle, int force_sync)
+{
+        struct tmpfs_trans *trans;
+
+        trans = (struct tmpfs_trans *)handle;
+
+        OBD_SLAB_FREE(trans, trans_cache, sizeof(*trans));
+        atomic_dec(&trans_count);
+
+        CDEBUG(D_INFO, "Metadata operation 0x%x is "
+               "finished on inode 0x%lx\n", trans->op,
+               inode->i_ino);
+
+        return 0;
+}
+
+/* starts new transaction for read/write operations. Seems, that here we do
+ * nothing also. */
+static void *
+fsfilt_tmpfs_io_start(int objcount, struct fsfilt_objinfo *fso,
+                      int niocount, struct niobuf_local *nb,
+                      void *desc_private)
+{
+        int rc;
+        struct kstatfs sfs;
+        struct tmpfs_trans *trans;
+
+        ENTRY;
+
+        CDEBUG(D_INFO, "IO operation is started on inode 0x%lx\n",
+               fso->fso_dentry->d_inode->i_ino);
+
+        /* check if we still have free space on filesystem. */
+        if ((rc = vfs_statfs(fso->fso_dentry->d_inode->i_sb, &sfs)))
+                RETURN(ERR_PTR(rc));
+
+        if (sfs.f_bfree == 0)
+                RETURN(ERR_PTR(-ENOSPC));
+        
+        OBD_SLAB_ALLOC(trans, trans_cache, GFP_NOFS,
+                       sizeof(*trans));
+
+        if (trans == NULL)
+                RETURN(NULL);
+
+        atomic_inc(&trans_count);
+
+        trans->op = 0;
+        RETURN(trans);
+}
+
+/* commits changes on passed @inode using passed transaction @handle. This is
+ * called from direct_io() with handle obtained from brw_start(). */
+static int
+fsfilt_tmpfs_io_commit(struct inode *inode, void *handle, void **wh)
+{
+        struct tmpfs_trans *trans;
+
+        trans = (struct tmpfs_trans *)handle;
+
+        OBD_SLAB_FREE(trans, trans_cache, sizeof(*trans));
+        atomic_dec(&trans_count);
+
+        CDEBUG(D_INFO, "IO operation is finished on inode "
+               "0x%lx\n", inode->i_ino);
+
+        /* wait handle is not used. */
+        *wh = NULL;
+
+        return 0;
+}
+
+/* waits for transaction started by io_commit() to be finished on passed wait
+ * handle. What should we do here? Nothing so far. */
+static int
+fsfilt_tmpfs_commit_wait(struct inode *inode, void *wh)
+{
+        CDEBUG(D_INFO, "commit wait is called\n");
+        return 0;
+}
+
+/* implements additional ioctl fucntions. Nothing do here. */
+static int
+fsfilt_tmpfs_iocontrol(struct inode * inode, struct file *file,
+                       unsigned int cmd, unsigned long arg)
+{
+        int rc = -ENOTTY;
+        
+        ENTRY;
+
+        if (inode->i_fop->ioctl)
+                rc = inode->i_fop->ioctl(inode, file, cmd, arg);
+
+        RETURN(rc);
+}
+
+/* fills @osfs by statfs info for tmpfs. Should we do some correcting 
+   here? Probably later. */
+static int
+fsfilt_tmpfs_statfs(struct super_block *sb, struct obd_statfs *osfs)
+{
+        int rc;
+        struct kstatfs sfs;
+
+        if (!sb->s_op->statfs)
+                return -ENOSYS;
+
+        memset(&sfs, 0, sizeof(sfs));
+
+        /* trying to be consistent with other parts of tmpfs filter and call
+         * sb->s_op->statfs() instead of using vfs_statfs(). */
+        lock_kernel();
+        rc = sb->s_op->statfs(sb, &sfs);
+        unlock_kernel();
+
+        if (rc == 0)
+                statfs_pack(osfs, &sfs);
+
+        return rc;
+}
+
+/* make sure, that all dirty buffers are stored onto device. This is nothing to
+ * do for tmpfs in principle, but we will not aim to be smarter than tmpfs is
+ * and call sb->s_op->sync_fs() is any. */
+static int
+fsfilt_tmpfs_sync(struct super_block *sb)
+{
+        if (sb->s_op->sync_fs)
+                return sb->s_op->sync_fs(sb);
+        
+        return 0;
+}
+
+/* uses inode setattr method if any, or does default actions otherwise. */
+static int fsfilt_tmpfs_setattr(struct dentry *dentry, void *handle,
+                                struct iattr *iattr, int do_trunc)
+{
+        int rc;
+        struct inode *inode = dentry->d_inode;
+
+        lock_kernel();
+
+        /* preventing vmtruncate() to be called on inode_setattr(). */
+        if (iattr->ia_valid & ATTR_SIZE && !do_trunc) {
+                iattr->ia_valid &= ~ATTR_SIZE;
+                inode->i_size = iattr->ia_size;
+        }
+
+        iattr->ia_mode = (inode->i_mode & S_IFMT) |
+                (iattr->ia_mode & ~S_IFMT);
+
+        iattr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET);
+
+        if (inode->i_op->setattr) {
+                rc = inode->i_op->setattr(dentry, iattr);
+        } else {
+                if (!(rc = inode_change_ok(inode, iattr)))
+                        rc = inode_setattr(inode, iattr);
+        }
+
+        unlock_kernel();
+
+        return rc;
+}
+
+/* nothing to do here. */
+static int
+fsfilt_tmpfs_setup(struct super_block *sb)
+{
+        return 0;
+}
+
+/* sets lmm into inode xattrs using passed transaction @handle. */
+static int
+fsfilt_tmpfs_set_md(struct inode *inode, void *handle,
+                    void *lmm, int lmm_size)
+{
+        int rc;
+
+        lock_kernel();
+
+        rc = shmem_xattr_set(inode, XATTR_LUSTRE_MDS_LOV_EA,
+                             lmm, lmm_size, 0);
+
+        unlock_kernel();
+
+        if (rc) {
+                CERROR("error adding MD data to inode %lu: rc = %d\n",
+                       inode->i_ino, rc);
+        }
+        
+        return rc;
+}
+
+/* gets lmm from inode xattrs. */
+static int
+fsfilt_tmpfs_get_md(struct inode *inode, void *lmm,
+                    int lmm_size)
+{
+        int rc;
+
+        LASSERT(down_trylock(&inode->i_sem) != 0);
+
+        lock_kernel();
+
+	/* getting new key first. */
+        rc = shmem_xattr_get(inode, XATTR_LUSTRE_MDS_LOV_EA,
+                             lmm, lmm_size);
+
+	/* check for old one. */
+        if (rc == -ENODATA) {
+                rc = shmem_xattr_get(inode, XATTR_LUSTRE_MDS_OBJID,
+                                     lmm, lmm_size);
+        }
+
+        unlock_kernel();
+
+        if (lmm == NULL)
+                return (rc == -ENODATA) ? 0 : rc;
+
+        if (rc < 0) {
+                CDEBUG(D_INFO, "error getting EA %s from inode %lu: rc = %d\n",
+                       XATTR_LUSTRE_MDS_OBJID, inode->i_ino, rc);
+                
+                memset(lmm, 0, lmm_size);
+                return (rc == -ENODATA) ? 0 : rc;
+        }
+
+        return rc;
+}
+
+/* reads data from passed @file to @buf. */
+static ssize_t
+fsfilt_tmpfs_read(struct file *file, char *buf,
+                  size_t count, loff_t *off)
+{
+        struct inode *inode = file->f_dentry->d_inode;
+
+        if (!S_ISREG(inode->i_mode))
+                return -EINVAL;
+        
+    	return file->f_op->read(file, buf, count, off);
+}
+
+/* writes data to regular @file. */
+static ssize_t
+fsfilt_tmpfs_write(struct file *file, char *buf,
+                   size_t count, loff_t *off)
+{
+        struct inode *inode = file->f_dentry->d_inode;
+
+        if (!S_ISREG(inode->i_mode))
+                return -EINVAL;
+        
+        return file->f_op->write(file, buf, count, off);
+}
+
+/* puts passed page to page cache. */
+static int
+fsfilt_tmpfs_putpage(struct inode *inode, struct page *page,
+                     int lazy_cache)
+{
+        struct page *shmem_page;
+	struct shmem_inode_info *info = SHMEM_I(inode);
+
+	down(&info->sem);
+
+        /* getting page from shmem. It may be read from swap. And this is the
+         * reason, why we do not just add passed @page to pacge cache. */
+        shmem_page = shmem_getpage_locked(inode, page->index);
+        
+        if (IS_ERR(shmem_page)) {
+                up(&info->sem);
+                return PTR_ERR(shmem_page);
+        }
+        
+	up(&info->sem);
+        
+        copy_page(kmap(shmem_page), kmap(page));
+        kunmap(page); kunmap(shmem_page);
+
+        /* taking care about possible cache aliasing. */
+        if (inode->i_mapping->i_mmap_shared != NULL)
+                flush_dcache_page(shmem_page);
+        
+        SetPageDirty(shmem_page);
+        UnlockPage(shmem_page);
+        page_cache_release(shmem_page);
+        
+        return 0;
+}
+
+/* returns inode page by its @index. */
+static struct page *
+fsfilt_tmpfs_getpage(struct inode *inode, long int index)
+{
+	struct page *page;
+
+        page = shmem_getpage_unlocked(inode, index);
+        
+	if (IS_ERR(page))
+                return page;
+
+        /* taking care about possible cache aliasing. */
+        if (inode->i_mapping->i_mmap_shared != NULL)
+                flush_dcache_page(page);
+        
+	return page;
+}
+
+/* fills up passed @buf by entry data. Used from readdir(). */
+static int
+fillent(void *buf, const char *name, int namlen, 
+        loff_t offset, ino_t ino, unsigned int d_type)
+{
+        __u16 rec_len;
+	fetch_hint_t *hint = (fetch_hint_t *)buf;
+	ext2_dirent_t *entry = hint->dirent;
+	
+	rec_len = EXT2_ENT_LEN(namlen);
+
+        if ((hint->stop = (hint->chunk < rec_len)))
+		return -ENOENT;
+
+	entry->file_type = 0;
+
+        hint->count++;
+        hint->chunk -= rec_len;
+        hint->rec_len = rec_len;
+        hint->dirent += rec_len;
+
+        entry->name_len = namlen;
+	entry->inode = cpu_to_le32(ino);
+	memcpy(entry->name, name, namlen);
+	entry->rec_len = cpu_to_le16(rec_len);
+
+	return 0;
+}
+
+/* this should be the same as in tmpfs. Should it be not hardcoded? */
+#define BOGO_ENTRY_SIZE (20)
+
+/* mostly needed for reading directory from @file on MDS. */
+static ssize_t
+fsfilt_tmpfs_readpage(struct file *file, char *buf,
+                      size_t count, loff_t *off)
+{
+        int rc = 0;
+        struct inode *inode = file->f_dentry->d_inode;
+
+        if (S_ISREG(inode->i_mode)) {
+    		rc = file->f_op->read(file, buf, count, off);
+        } else if (S_ISDIR(inode->i_mode)) {
+                int error;
+		loff_t offset;
+		fetch_hint_t hint;
+                ext2_dirent_t *dirent;
+
+                /* positioning to passed @off. */
+		offset = *(long int *)off / BOGO_ENTRY_SIZE;
+                
+		if (file->f_op->llseek(file, offset, 0) != offset)
+			return -ENOENT;
+
+                /* reading @count bytesof data. */
+		while (count > 0) {
+                        hint.count = 0;
+                        hint.file = file;
+                        hint.dirent = buf;
+                        hint.chunk = count;
+                        hint.rec_len = count;
+				
+                        if ((error = vfs_readdir(file, fillent, &hint)) < 0)
+                                return error;
+
+                        /* we should have something after vfs_readdir() is
+                         * finished. */
+                        LASSERT(hint.count != 0);
+                        
+                        /* last entry should be extended up to free page
+                         * size. */
+                        if (hint.chunk > 0) {
+                                __u16 rec_len;
+                                
+                                hint.dirent -= hint.rec_len;
+                                dirent = (ext2_dirent_t *)hint.dirent;
+
+                                rec_len = le16_to_cpu(dirent->rec_len);
+                                dirent->rec_len = cpu_to_le16(rec_len + hint.chunk);
+                        }
+		
+			count -= PAGE_CACHE_SIZE;
+			*off += PAGE_CACHE_SIZE;
+			rc += PAGE_CACHE_SIZE;
+		}
+
+                UPDATE_ATIME(inode);
+        } else {
+		rc = -EINVAL;
+	}
+
+        return rc;
+}
+
+static int
+fsfilt_tmpfs_add_journal_cb(struct obd_device *obd, __u64 last_rcvd,
+                            void *handle, fsfilt_cb_t cb_func,
+                            void *cb_data)
+{
+        cb_func(obd, last_rcvd, cb_data, 0);
+        return 0;
+}
+
+static int
+fsfilt_tmpfs_prep_san_write(struct inode *inode, long *blocks,
+                            int nblocks, loff_t newsize)
+{
+        /* we do not need block numbers and other stuff, as it will not be
+         * used. */
+        blocks[0] = 0;
+
+        if (newsize > inode->i_size)
+                inode->i_size = newsize;
+        
+        return 0;
+}
+
+/* this is used for reading configuration */
+static int
+fsfilt_tmpfs_read_record(struct file *file, void *buf,
+                         int size, loff_t *off)
+{
+        int error;
+	struct inode *inode = file->f_dentry->d_inode;
+	
+	lock_kernel();
+	
+	if (inode->i_size < *off + size) {
+		size = inode->i_size - *off;
+		unlock_kernel();
+		
+		if (size < 0) {
+			return -EIO;
+		} else if (size == 0) {
+			return 0;
+		}
+	} else {
+		unlock_kernel();
+	}
+        
+        if ((error = fsfilt_tmpfs_read(file, buf, size, off)) < 0)
+                return error;
+        
+        return 0;
+}
+
+/* this is used for writing configuration */
+static int
+fsfilt_tmpfs_write_record(struct file *file, void *buf,
+                          int size, loff_t *off, int sync)
+{
+        int error;
+        
+        if ((error = fsfilt_tmpfs_write(file, buf, size, off)) < 0)
+                return error;
+        
+        return 0;
+}
+
+static struct fsfilt_operations fsfilt_tmpfs_ops = {
+        fs_type:                "tmpfs",
+        fs_owner:               THIS_MODULE,
+        fs_start:               fsfilt_tmpfs_mtd_start,
+        fs_commit:              fsfilt_tmpfs_mtd_commit,
+        fs_brw_start:           fsfilt_tmpfs_io_start,
+        fs_commit_async:        fsfilt_tmpfs_io_commit,
+        fs_commit_wait:         fsfilt_tmpfs_commit_wait,
+        fs_iocontrol:           fsfilt_tmpfs_iocontrol,
+        fs_set_md:              fsfilt_tmpfs_set_md,
+        fs_get_md:              fsfilt_tmpfs_get_md,
+        fs_readpage:            fsfilt_tmpfs_readpage,
+        fs_getpage:             fsfilt_tmpfs_getpage,
+        fs_putpage:             fsfilt_tmpfs_putpage,
+        fs_add_journal_cb:      fsfilt_tmpfs_add_journal_cb,
+        fs_statfs:              fsfilt_tmpfs_statfs,
+        fs_sync:                fsfilt_tmpfs_sync,
+        fs_prep_san_write:      fsfilt_tmpfs_prep_san_write,
+        fs_write_record:        fsfilt_tmpfs_write_record,
+        fs_read_record:         fsfilt_tmpfs_read_record,
+	fs_setattr:             fsfilt_tmpfs_setattr,
+        fs_setup:               fsfilt_tmpfs_setup,
+};
+
+static int __init
+fsfilt_tmpfs_init(void)
+{
+        int rc;
+
+        trans_cache = kmem_cache_create("fsfilt_tmpfs_trans",
+                                        sizeof(struct tmpfs_trans),
+                                        0, 0, NULL, NULL);
+        if (!trans_cache) {
+                CERROR("error allocating fsfilt transaction handle cache\n");
+                GOTO(out, rc = -ENOMEM);
+        }
+
+        if ((rc = fsfilt_register_ops(&fsfilt_tmpfs_ops)))
+                kmem_cache_destroy(trans_cache);
+out:
+        return rc;
+}
+
+static void __exit
+fsfilt_tmpfs_exit(void)
+{
+        int rc;
+
+        fsfilt_unregister_ops(&fsfilt_tmpfs_ops);
+        rc = kmem_cache_destroy(trans_cache);
+
+        if (rc || atomic_read(&trans_count)) {
+                CERROR("can't free fsfilt trans cache: count %d, rc = %d\n",
+                       atomic_read(&trans_count), rc);
+        }
+}
+
+module_init(fsfilt_tmpfs_init);
+module_exit(fsfilt_tmpfs_exit);
+
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Lustre tmpfs Filesystem Helper v0.1");
+MODULE_LICENSE("GPL");
diff --git a/lustre/tests/ltmpfs-sanity.sh b/lustre/tests/ltmpfs-sanity.sh
new file mode 100755
index 0000000000..a6d921eb06
--- /dev/null
+++ b/lustre/tests/ltmpfs-sanity.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+# Currently the following tests do not pass:
+# 47 - due to unresolvable symbol in UML local libc
+# 52a, 52b - due to not implemented ioctl() in tmpfs
+# 57a - due to inability to be supplied to tmpfs
+# 56 - due to some unknown reason yet.
+
+NAME=local FSTYPE=tmpfs MDSDEV=tmpfs OSTDEV=tmpfs sh llmount.sh && \
+START=: CLEAN=: EXCEPT="47 52a 52b 56 57a" sh sanity.sh
diff --git a/lustre/tests/ltmpfs.sh b/lustre/tests/ltmpfs.sh
new file mode 100755
index 0000000000..c561572700
--- /dev/null
+++ b/lustre/tests/ltmpfs.sh
@@ -0,0 +1 @@
+NAME=local FSTYPE=tmpfs MDSDEV=tmpfs OSTDEV=tmpfs sh llmount.sh
-- 
GitLab