Skip to content
Snippets Groups Projects
Commit 65230291 authored by Yury Umanets's avatar Yury Umanets
Browse files

added tmpfs related stuff

parent c832f10d
No related branches found
No related tags found
No related merge requests found
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* lustre/lib/fsfilt_tmpfs.c
* Lustre filesystem abstraction routines
*
* Copyright (C) 2002, 2003, 2004 Cluster File Systems, Inc.
* Author: Yury Umanets <umka@clusterfs.com>
*
* This file is part of Lustre, http://www.lustre.org.
*
* Lustre is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* Lustre is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Lustre; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#define DEBUG_SUBSYSTEM S_FILTER
#include <linux/fs.h>
#include <linux/jbd.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/quotaops.h>
#include <linux/ext3_fs.h>
#include <linux/ext3_jbd.h>
#include <linux/version.h>
#include <linux/kp30.h>
#include <linux/lustre_fsfilt.h>
#include <linux/obd.h>
#include <linux/obd_class.h>
#include <linux/module.h>
#include <linux/shmem_fs.h>
/* prefix is needed because tmpfs xattr patch deos not support namespaces
* yet. */
#define XATTR_LUSTRE_MDS_LOV_EA "trusted.lov"
#define XATTR_LUSTRE_MDS_OBJID "system.lustre_mds_objid"
/* structure instance of to be returned as a transaction handle. This is not
* needed for now, but probably we will need to save something during modifying
* an inode and this is useful for us. */
struct tmpfs_trans {
int op;
};
static kmem_cache_t *trans_cache;
static atomic_t trans_count = ATOMIC_INIT(0);
/* ext2 directory stuff. It is needed for fs_readpage(), which is used for
* reading directoris on MDS. Probably this should be moved to somewhere more
* convenient? */
#define EXT2_NAME_LEN (255)
struct ext2_dirent {
__u32 inode;
__u16 rec_len;
__u8 name_len;
__u8 file_type;
char name[0];
};
typedef struct ext2_dirent ext2_dirent_t;
struct fetch_hint {
int stop;
int count;
__u16 chunk;
void *dirent;
__u16 rec_len;
struct file *file;
};
typedef struct fetch_hint fetch_hint_t;
#define EXT2_ENT_PAD 4
#define EXT2_ENT_ROUND (EXT2_ENT_PAD - 1)
#define EXT2_ENT_LEN(len) (((len) + 8 + EXT2_ENT_ROUND) & ~EXT2_ENT_ROUND)
/* starts new transaction on tmpfs for metadata operations. That if for create
* file, delete it, etc. That is everything except of read/write data. Returns
* pointer to transaction handle to be used later. What we have to do here?
* Seems nothing for a while. */
static void *
fsfilt_tmpfs_mtd_start(struct inode *inode, int op, void *desc_private)
{
int rc;
struct kstatfs sfs;
struct tmpfs_trans *trans;
CDEBUG(D_INFO, "Metadata operation 0x%x is started on "
"inode 0x%lx\n", op, inode->i_ino);
if ((rc = vfs_statfs(inode->i_sb, &sfs)))
return ERR_PTR(rc);
if (sfs.f_bfree == 0)
return ERR_PTR(-ENOSPC);
OBD_SLAB_ALLOC(trans, trans_cache, GFP_NOFS,
sizeof(*trans));
if (trans == NULL)
return NULL;
atomic_inc(&trans_count);
trans->op = op;
return trans;
}
/* commits changes on passed @inode using passed transaction @handle. Should we
* do something here? */
static int
fsfilt_tmpfs_mtd_commit(struct inode *inode, void *handle, int force_sync)
{
struct tmpfs_trans *trans;
trans = (struct tmpfs_trans *)handle;
OBD_SLAB_FREE(trans, trans_cache, sizeof(*trans));
atomic_dec(&trans_count);
CDEBUG(D_INFO, "Metadata operation 0x%x is "
"finished on inode 0x%lx\n", trans->op,
inode->i_ino);
return 0;
}
/* starts new transaction for read/write operations. Seems, that here we do
* nothing also. */
static void *
fsfilt_tmpfs_io_start(int objcount, struct fsfilt_objinfo *fso,
int niocount, struct niobuf_local *nb,
void *desc_private)
{
int rc;
struct kstatfs sfs;
struct tmpfs_trans *trans;
ENTRY;
CDEBUG(D_INFO, "IO operation is started on inode 0x%lx\n",
fso->fso_dentry->d_inode->i_ino);
/* check if we still have free space on filesystem. */
if ((rc = vfs_statfs(fso->fso_dentry->d_inode->i_sb, &sfs)))
RETURN(ERR_PTR(rc));
if (sfs.f_bfree == 0)
RETURN(ERR_PTR(-ENOSPC));
OBD_SLAB_ALLOC(trans, trans_cache, GFP_NOFS,
sizeof(*trans));
if (trans == NULL)
RETURN(NULL);
atomic_inc(&trans_count);
trans->op = 0;
RETURN(trans);
}
/* commits changes on passed @inode using passed transaction @handle. This is
* called from direct_io() with handle obtained from brw_start(). */
static int
fsfilt_tmpfs_io_commit(struct inode *inode, void *handle, void **wh)
{
struct tmpfs_trans *trans;
trans = (struct tmpfs_trans *)handle;
OBD_SLAB_FREE(trans, trans_cache, sizeof(*trans));
atomic_dec(&trans_count);
CDEBUG(D_INFO, "IO operation is finished on inode "
"0x%lx\n", inode->i_ino);
/* wait handle is not used. */
*wh = NULL;
return 0;
}
/* waits for transaction started by io_commit() to be finished on passed wait
* handle. What should we do here? Nothing so far. */
static int
fsfilt_tmpfs_commit_wait(struct inode *inode, void *wh)
{
CDEBUG(D_INFO, "commit wait is called\n");
return 0;
}
/* implements additional ioctl fucntions. Nothing do here. */
static int
fsfilt_tmpfs_iocontrol(struct inode * inode, struct file *file,
unsigned int cmd, unsigned long arg)
{
int rc = -ENOTTY;
ENTRY;
if (inode->i_fop->ioctl)
rc = inode->i_fop->ioctl(inode, file, cmd, arg);
RETURN(rc);
}
/* fills @osfs by statfs info for tmpfs. Should we do some correcting
here? Probably later. */
static int
fsfilt_tmpfs_statfs(struct super_block *sb, struct obd_statfs *osfs)
{
int rc;
struct kstatfs sfs;
if (!sb->s_op->statfs)
return -ENOSYS;
memset(&sfs, 0, sizeof(sfs));
/* trying to be consistent with other parts of tmpfs filter and call
* sb->s_op->statfs() instead of using vfs_statfs(). */
lock_kernel();
rc = sb->s_op->statfs(sb, &sfs);
unlock_kernel();
if (rc == 0)
statfs_pack(osfs, &sfs);
return rc;
}
/* make sure, that all dirty buffers are stored onto device. This is nothing to
* do for tmpfs in principle, but we will not aim to be smarter than tmpfs is
* and call sb->s_op->sync_fs() is any. */
static int
fsfilt_tmpfs_sync(struct super_block *sb)
{
if (sb->s_op->sync_fs)
return sb->s_op->sync_fs(sb);
return 0;
}
/* uses inode setattr method if any, or does default actions otherwise. */
static int fsfilt_tmpfs_setattr(struct dentry *dentry, void *handle,
struct iattr *iattr, int do_trunc)
{
int rc;
struct inode *inode = dentry->d_inode;
lock_kernel();
/* preventing vmtruncate() to be called on inode_setattr(). */
if (iattr->ia_valid & ATTR_SIZE && !do_trunc) {
iattr->ia_valid &= ~ATTR_SIZE;
inode->i_size = iattr->ia_size;
}
iattr->ia_mode = (inode->i_mode & S_IFMT) |
(iattr->ia_mode & ~S_IFMT);
iattr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET);
if (inode->i_op->setattr) {
rc = inode->i_op->setattr(dentry, iattr);
} else {
if (!(rc = inode_change_ok(inode, iattr)))
rc = inode_setattr(inode, iattr);
}
unlock_kernel();
return rc;
}
/* nothing to do here. */
static int
fsfilt_tmpfs_setup(struct super_block *sb)
{
return 0;
}
/* sets lmm into inode xattrs using passed transaction @handle. */
static int
fsfilt_tmpfs_set_md(struct inode *inode, void *handle,
void *lmm, int lmm_size)
{
int rc;
lock_kernel();
rc = shmem_xattr_set(inode, XATTR_LUSTRE_MDS_LOV_EA,
lmm, lmm_size, 0);
unlock_kernel();
if (rc) {
CERROR("error adding MD data to inode %lu: rc = %d\n",
inode->i_ino, rc);
}
return rc;
}
/* gets lmm from inode xattrs. */
static int
fsfilt_tmpfs_get_md(struct inode *inode, void *lmm,
int lmm_size)
{
int rc;
LASSERT(down_trylock(&inode->i_sem) != 0);
lock_kernel();
/* getting new key first. */
rc = shmem_xattr_get(inode, XATTR_LUSTRE_MDS_LOV_EA,
lmm, lmm_size);
/* check for old one. */
if (rc == -ENODATA) {
rc = shmem_xattr_get(inode, XATTR_LUSTRE_MDS_OBJID,
lmm, lmm_size);
}
unlock_kernel();
if (lmm == NULL)
return (rc == -ENODATA) ? 0 : rc;
if (rc < 0) {
CDEBUG(D_INFO, "error getting EA %s from inode %lu: rc = %d\n",
XATTR_LUSTRE_MDS_OBJID, inode->i_ino, rc);
memset(lmm, 0, lmm_size);
return (rc == -ENODATA) ? 0 : rc;
}
return rc;
}
/* reads data from passed @file to @buf. */
static ssize_t
fsfilt_tmpfs_read(struct file *file, char *buf,
size_t count, loff_t *off)
{
struct inode *inode = file->f_dentry->d_inode;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
return file->f_op->read(file, buf, count, off);
}
/* writes data to regular @file. */
static ssize_t
fsfilt_tmpfs_write(struct file *file, char *buf,
size_t count, loff_t *off)
{
struct inode *inode = file->f_dentry->d_inode;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
return file->f_op->write(file, buf, count, off);
}
/* puts passed page to page cache. */
static int
fsfilt_tmpfs_putpage(struct inode *inode, struct page *page,
int lazy_cache)
{
struct page *shmem_page;
struct shmem_inode_info *info = SHMEM_I(inode);
down(&info->sem);
/* getting page from shmem. It may be read from swap. And this is the
* reason, why we do not just add passed @page to pacge cache. */
shmem_page = shmem_getpage_locked(inode, page->index);
if (IS_ERR(shmem_page)) {
up(&info->sem);
return PTR_ERR(shmem_page);
}
up(&info->sem);
copy_page(kmap(shmem_page), kmap(page));
kunmap(page); kunmap(shmem_page);
/* taking care about possible cache aliasing. */
if (inode->i_mapping->i_mmap_shared != NULL)
flush_dcache_page(shmem_page);
SetPageDirty(shmem_page);
UnlockPage(shmem_page);
page_cache_release(shmem_page);
return 0;
}
/* returns inode page by its @index. */
static struct page *
fsfilt_tmpfs_getpage(struct inode *inode, long int index)
{
struct page *page;
page = shmem_getpage_unlocked(inode, index);
if (IS_ERR(page))
return page;
/* taking care about possible cache aliasing. */
if (inode->i_mapping->i_mmap_shared != NULL)
flush_dcache_page(page);
return page;
}
/* fills up passed @buf by entry data. Used from readdir(). */
static int
fillent(void *buf, const char *name, int namlen,
loff_t offset, ino_t ino, unsigned int d_type)
{
__u16 rec_len;
fetch_hint_t *hint = (fetch_hint_t *)buf;
ext2_dirent_t *entry = hint->dirent;
rec_len = EXT2_ENT_LEN(namlen);
if ((hint->stop = (hint->chunk < rec_len)))
return -ENOENT;
entry->file_type = 0;
hint->count++;
hint->chunk -= rec_len;
hint->rec_len = rec_len;
hint->dirent += rec_len;
entry->name_len = namlen;
entry->inode = cpu_to_le32(ino);
memcpy(entry->name, name, namlen);
entry->rec_len = cpu_to_le16(rec_len);
return 0;
}
/* this should be the same as in tmpfs. Should it be not hardcoded? */
#define BOGO_ENTRY_SIZE (20)
/* mostly needed for reading directory from @file on MDS. */
static ssize_t
fsfilt_tmpfs_readpage(struct file *file, char *buf,
size_t count, loff_t *off)
{
int rc = 0;
struct inode *inode = file->f_dentry->d_inode;
if (S_ISREG(inode->i_mode)) {
rc = file->f_op->read(file, buf, count, off);
} else if (S_ISDIR(inode->i_mode)) {
int error;
loff_t offset;
fetch_hint_t hint;
ext2_dirent_t *dirent;
/* positioning to passed @off. */
offset = *(long int *)off / BOGO_ENTRY_SIZE;
if (file->f_op->llseek(file, offset, 0) != offset)
return -ENOENT;
/* reading @count bytesof data. */
while (count > 0) {
hint.count = 0;
hint.file = file;
hint.dirent = buf;
hint.chunk = count;
hint.rec_len = count;
if ((error = vfs_readdir(file, fillent, &hint)) < 0)
return error;
/* we should have something after vfs_readdir() is
* finished. */
LASSERT(hint.count != 0);
/* last entry should be extended up to free page
* size. */
if (hint.chunk > 0) {
__u16 rec_len;
hint.dirent -= hint.rec_len;
dirent = (ext2_dirent_t *)hint.dirent;
rec_len = le16_to_cpu(dirent->rec_len);
dirent->rec_len = cpu_to_le16(rec_len + hint.chunk);
}
count -= PAGE_CACHE_SIZE;
*off += PAGE_CACHE_SIZE;
rc += PAGE_CACHE_SIZE;
}
UPDATE_ATIME(inode);
} else {
rc = -EINVAL;
}
return rc;
}
static int
fsfilt_tmpfs_add_journal_cb(struct obd_device *obd, __u64 last_rcvd,
void *handle, fsfilt_cb_t cb_func,
void *cb_data)
{
cb_func(obd, last_rcvd, cb_data, 0);
return 0;
}
static int
fsfilt_tmpfs_prep_san_write(struct inode *inode, long *blocks,
int nblocks, loff_t newsize)
{
/* we do not need block numbers and other stuff, as it will not be
* used. */
blocks[0] = 0;
if (newsize > inode->i_size)
inode->i_size = newsize;
return 0;
}
/* this is used for reading configuration */
static int
fsfilt_tmpfs_read_record(struct file *file, void *buf,
int size, loff_t *off)
{
int error;
struct inode *inode = file->f_dentry->d_inode;
lock_kernel();
if (inode->i_size < *off + size) {
size = inode->i_size - *off;
unlock_kernel();
if (size < 0) {
return -EIO;
} else if (size == 0) {
return 0;
}
} else {
unlock_kernel();
}
if ((error = fsfilt_tmpfs_read(file, buf, size, off)) < 0)
return error;
return 0;
}
/* this is used for writing configuration */
static int
fsfilt_tmpfs_write_record(struct file *file, void *buf,
int size, loff_t *off, int sync)
{
int error;
if ((error = fsfilt_tmpfs_write(file, buf, size, off)) < 0)
return error;
return 0;
}
static struct fsfilt_operations fsfilt_tmpfs_ops = {
fs_type: "tmpfs",
fs_owner: THIS_MODULE,
fs_start: fsfilt_tmpfs_mtd_start,
fs_commit: fsfilt_tmpfs_mtd_commit,
fs_brw_start: fsfilt_tmpfs_io_start,
fs_commit_async: fsfilt_tmpfs_io_commit,
fs_commit_wait: fsfilt_tmpfs_commit_wait,
fs_iocontrol: fsfilt_tmpfs_iocontrol,
fs_set_md: fsfilt_tmpfs_set_md,
fs_get_md: fsfilt_tmpfs_get_md,
fs_readpage: fsfilt_tmpfs_readpage,
fs_getpage: fsfilt_tmpfs_getpage,
fs_putpage: fsfilt_tmpfs_putpage,
fs_add_journal_cb: fsfilt_tmpfs_add_journal_cb,
fs_statfs: fsfilt_tmpfs_statfs,
fs_sync: fsfilt_tmpfs_sync,
fs_prep_san_write: fsfilt_tmpfs_prep_san_write,
fs_write_record: fsfilt_tmpfs_write_record,
fs_read_record: fsfilt_tmpfs_read_record,
fs_setattr: fsfilt_tmpfs_setattr,
fs_setup: fsfilt_tmpfs_setup,
};
static int __init
fsfilt_tmpfs_init(void)
{
int rc;
trans_cache = kmem_cache_create("fsfilt_tmpfs_trans",
sizeof(struct tmpfs_trans),
0, 0, NULL, NULL);
if (!trans_cache) {
CERROR("error allocating fsfilt transaction handle cache\n");
GOTO(out, rc = -ENOMEM);
}
if ((rc = fsfilt_register_ops(&fsfilt_tmpfs_ops)))
kmem_cache_destroy(trans_cache);
out:
return rc;
}
static void __exit
fsfilt_tmpfs_exit(void)
{
int rc;
fsfilt_unregister_ops(&fsfilt_tmpfs_ops);
rc = kmem_cache_destroy(trans_cache);
if (rc || atomic_read(&trans_count)) {
CERROR("can't free fsfilt trans cache: count %d, rc = %d\n",
atomic_read(&trans_count), rc);
}
}
module_init(fsfilt_tmpfs_init);
module_exit(fsfilt_tmpfs_exit);
MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
MODULE_DESCRIPTION("Lustre tmpfs Filesystem Helper v0.1");
MODULE_LICENSE("GPL");
#!/bin/bash
# Currently the following tests do not pass:
# 47 - due to unresolvable symbol in UML local libc
# 52a, 52b - due to not implemented ioctl() in tmpfs
# 57a - due to inability to be supplied to tmpfs
# 56 - due to some unknown reason yet.
NAME=local FSTYPE=tmpfs MDSDEV=tmpfs OSTDEV=tmpfs sh llmount.sh && \
START=: CLEAN=: EXCEPT="47 52a 52b 56 57a" sh sanity.sh
NAME=local FSTYPE=tmpfs MDSDEV=tmpfs OSTDEV=tmpfs sh llmount.sh
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment