-
Andreas Dilger authored
Add "abort_recov" mount option to mount.lustre usage message.
Andreas Dilger authoredAdd "abort_recov" mount option to mount.lustre usage message.
mount_lustre.c 24.62 KiB
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved
* Use is subject to license terms.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* lustre/utils/mount_lustre.c
*
* Author: Robert Read <rread@clusterfs.com>
* Author: Nathan Rutman <nathan@clusterfs.com>
*/
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
#include <sys/mount.h>
#include <mntent.h>
#include <getopt.h>
#include "obdctl.h"
#include <lustre_ver.h>
#include <glob.h>
#include <ctype.h>
#include <limits.h>
#include "mount_utils.h"
#define MAX_HW_SECTORS_KB_PATH "queue/max_hw_sectors_kb"
#define MAX_SECTORS_KB_PATH "queue/max_sectors_kb"
#define MAX_RETRIES 99
int verbose = 0;
int nomtab = 0;
int fake = 0;
int force = 0;
int retry = 0;
char *progname = NULL;
void usage(FILE *out)
{
fprintf(out, "%s v"LUSTRE_VERSION_STRING"\n", progname);
fprintf(out, "\nThis mount helper should only be invoked via the "
"mount (8) command,\ne.g. mount -t lustre dev dir\n\n");
fprintf(out, "usage: %s [-fhnv] [-o <mntopt>] <device> <mountpt>\n",
progname);
fprintf(out,
"\t<device>: the disk device, or for a client:\n"
"\t\t<mgmtnid>[:<altmgtnid>...]:/<filesystem>-client\n"
"\t<filesystem>: name of the Lustre filesystem (e.g. lustre1)\n"
"\t<mountpt>: filesystem mountpoint (e.g. /mnt/lustre)\n"
"\t-f|--fake: fake mount (updates /etc/mtab)\n"
"\t--force: force mount even if already in /etc/mtab\n"
"\t-h|--help: print this usage message\n"
"\t-n|--nomtab: do not update /etc/mtab after mount\n"
"\t-v|--verbose: print verbose config settings\n"
"\t<mntopt>: one or more comma separated of:\n"
"\t\t(no)flock,(no)user_xattr,(no)acl\n"
"\t\tabort_recov: abort server recovery handling\n"
"\t\tnosvc: only start MGC/MGS obds\n"
"\t\tnomgs: only start target obds, using existing MGS\n"
"\t\texclude=<ostname>[:<ostname>] : colon-separated list of "
"inactive OSTs (e.g. lustre-OST0001)\n"
"\t\tretry=<num>: number of times mount is retried by client\n"
);
exit((out != stdout) ? EINVAL : 0);
}
static int check_mtab_entry(char *spec1, char *spec2, char *mtpt, char *type)
{
FILE *fp;
struct mntent *mnt;
fp = setmntent(MOUNTED, "r");
if (fp == NULL)
return(0);
while ((mnt = getmntent(fp)) != NULL) {
if ((strcmp(mnt->mnt_fsname, spec1) == 0 ||
strcmp(mnt->mnt_fsname, spec2) == 0) &&
strcmp(mnt->mnt_dir, mtpt) == 0 &&
strcmp(mnt->mnt_type, type) == 0) {
endmntent(fp);
return(EEXIST);
}
}
endmntent(fp);
return(0);
}
static int
update_mtab_entry(char *spec, char *mtpt, char *type, char *opts,
int flags, int freq, int pass)
{
FILE *fp;
struct mntent mnt;
int rc = 0;
mnt.mnt_fsname = spec;
mnt.mnt_dir = mtpt;
mnt.mnt_type = type;
mnt.mnt_opts = opts ? opts : "";
mnt.mnt_freq = freq;
mnt.mnt_passno = pass;
fp = setmntent(MOUNTED, "a+");
if (fp == NULL) {
fprintf(stderr, "%s: setmntent(%s): %s:",
progname, MOUNTED, strerror (errno));
rc = 16;
} else {
if ((addmntent(fp, &mnt)) == 1) {
fprintf(stderr, "%s: addmntent: %s:",
progname, strerror (errno));
rc = 16;
}
endmntent(fp);
}
return rc;
}
/* Get rid of symbolic hostnames for tcp, since kernel can't do lookups */
#define MAXNIDSTR 1024
static char *convert_hostnames(char *s1)
{
char *converted, *s2 = 0, *c;
char sep;
int left = MAXNIDSTR;
lnet_nid_t nid;
converted = malloc(left);
c = converted;
while ((left > 0) && (*s1 != '/')) {
s2 = strpbrk(s1, ",:");
if (!s2)
goto out_free;
sep = *s2;
*s2 = '\0';
nid = libcfs_str2nid(s1);
*s2 = sep; /* back to original string */
if (nid == LNET_NID_ANY)
goto out_free;
c += snprintf(c, left, "%s%c", libcfs_nid2str(nid), sep);
left = converted + MAXNIDSTR - c;
s1 = s2 + 1;
}
snprintf(c, left, "%s", s1);
return converted;
out_free:
fprintf(stderr, "%s: Can't parse NID '%s'\n", progname, s1);
free(converted);
return NULL;
}
/*****************************************************************************
*
* This part was cribbed from util-linux/mount/mount.c. There was no clear
* license information, but many other files in the package are identified as
* GNU GPL, so it's a pretty safe bet that was their intent.
*
****************************************************************************/
struct opt_map {
const char *opt; /* option name */
int inv; /* true if flag value should be inverted */
int mask; /* flag mask value */
};
static const struct opt_map opt_map[] = {
/*"optname", inv,ms_mask */
/* These flags are parsed by mount, not lustre */
{ "defaults", 0, 0 }, /* default options */
{ "remount", 0, MS_REMOUNT}, /* remount with different options */
{ "rw", 1, MS_RDONLY }, /* read-write */
{ "ro", 0, MS_RDONLY }, /* read-only */
{ "exec", 1, MS_NOEXEC }, /* permit execution of binaries */
{ "noexec", 0, MS_NOEXEC }, /* don't execute binaries */
{ "suid", 1, MS_NOSUID }, /* honor suid executables */
{ "nosuid", 0, MS_NOSUID }, /* don't honor suid executables */
{ "dev", 1, MS_NODEV }, /* interpret device files */
{ "nodev", 0, MS_NODEV }, /* don't interpret devices */
{ "sync", 0, MS_SYNCHRONOUS}, /* synchronous I/O */
{ "async", 1, MS_SYNCHRONOUS}, /* asynchronous I/O */
{ "atime", 1, MS_NOATIME }, /* set file access time on read */
{ "noatime", 0, MS_NOATIME }, /* do not set file access time on read */
#ifdef MS_NODIRATIME
{ "diratime", 1, MS_NODIRATIME }, /* set file access time on read */
{ "nodiratime",0,MS_NODIRATIME }, /* do not set file access time on read */
#endif
#ifdef MS_RELATIME
{ "relatime", 0, MS_RELATIME }, /* set file access time on read */
{ "norelatime",1,MS_RELATIME }, /* do not set file access time on read */
#endif
{ "auto", 0, 0 }, /* Can be mounted using -a */
{ "noauto", 0, 0 }, /* Can only be mounted explicitly */
{ "nousers", 1, 0 }, /* Forbid ordinary user to mount */
{ "nouser", 1, 0 }, /* Forbid ordinary user to mount */
{ "noowner", 1, 0 }, /* Device owner has no special privs */
{ "_netdev", 0, 0 }, /* Device accessible only via network */
{ "loop", 0, 0 },
{ NULL, 0, 0 }
};
/****************************************************************************/
/* 1 = don't pass on to lustre
0 = pass on to lustre */
static int parse_one_option(const char *check, int *flagp)
{
const struct opt_map *opt;
for (opt = &opt_map[0]; opt->opt != NULL; opt++) {
if (strncmp(check, opt->opt, strlen(opt->opt)) == 0) {
if (opt->mask) {
if (opt->inv)
*flagp &= ~(opt->mask);
else
*flagp |= opt->mask;
}
return 1;
}
}
/* Assume any unknown options are valid and pass them on. The mount
will fail if lmd_parse, ll_options or ldiskfs doesn't recognize it.*/
return 0;
}
/* Replace options with subset of Lustre-specific options, and
fill in mount flags */
int parse_options(char *orig_options, int *flagp)
{
char *options, *opt, *nextopt, *arg, *val;
options = calloc(strlen(orig_options) + 1, 1);
*flagp = 0;
nextopt = orig_options;
while ((opt = strsep(&nextopt, ","))) {
if (!*opt)
/* empty option */
continue;
/* Handle retries in a slightly different
* manner */
arg = opt;
val = strchr(opt, '=');
if (val != NULL && strncmp(arg, "retry", 5) == 0) {
retry = atoi(val + 1);
if (retry > MAX_RETRIES)
retry = MAX_RETRIES;
else if (retry < 0)
retry = 0;
}
else if (parse_one_option(opt, flagp) == 0) {
/* pass this on as an option */
if (*options)
strcat(options, ",");
strcat(options, opt);
}
}
strcpy(orig_options, options);
free(options);
return 0;
}
int read_file(char *path, char *buf, int size)
{
FILE *fd;
fd = fopen(path, "r");
if (fd == NULL)
return errno;
fgets(buf, size, fd);
fclose(fd);
return 0;
}
int write_file(char *path, char *buf)
{
FILE *fd;
fd = fopen(path, "w");
if (fd == NULL)
return errno;
fputs(buf, fd);
fclose(fd);
return 0;
}
/* This is to tune the kernel for good SCSI performance.
* For that we set the value of /sys/block/{dev}/queue/max_sectors_kb
* to the value of /sys/block/{dev}/queue/max_hw_sectors_kb */
int set_tunables(char *source, int src_len)
{
glob_t glob_info;
struct stat stat_buf;
char *chk_major, *chk_minor;
char *savept, *dev, *s2 = 0;
char *ret_path;
char buf[PATH_MAX] = {'\0'}, path[PATH_MAX] = {'\0'};
char real_path[PATH_MAX] = {'\0'};
int i, rc = 0;
int major, minor;
if (!source)
return -EINVAL;
ret_path = realpath(source, real_path);
if (ret_path == NULL) {
if (verbose)
fprintf(stderr, "warning: %s: cannot resolve: %s\n",
source, strerror(errno));
return -EINVAL;
}
src_len = sizeof(real_path);
if (strncmp(real_path, "/dev/loop", 9) == 0)
return 0;
if ((real_path[0] != '/') && ((s2 = strpbrk(real_path, ",:")) != NULL))
return 0;
dev = real_path + src_len - 1;
while (dev > real_path && (*dev != '/')) {
if (isdigit(*dev))
*dev = 0;
dev--;
}
snprintf(path, sizeof(path), "/sys/block%s/%s", dev,
MAX_HW_SECTORS_KB_PATH);
rc = read_file(path, buf, sizeof(buf));
if (rc == 0 && (strlen(buf) - 1) > 0) {
snprintf(path, sizeof(path), "/sys/block%s/%s", dev,
MAX_SECTORS_KB_PATH);
rc = write_file(path, buf);
if (rc && verbose)
fprintf(stderr, "warning: opening %s: %s\n",
path, strerror(errno));
return rc;
}
if (rc != ENOENT)
return rc;
/* The name of the device say 'X' specified in /dev/X may not
* match any entry under /sys/block/. In that case we need to
* match the major/minor number to find the entry under
* sys/block corresponding to /dev/X */
dev = real_path + src_len - 1;
while (dev > real_path) {
if (isdigit(*dev))
*dev = 0;
dev--;
}
rc = stat(dev, &stat_buf);
if (rc) {
if (verbose)
fprintf(stderr, "warning: %s, device %s stat failed\n",
strerror(errno), dev);
return rc;
}
major = major(stat_buf.st_rdev);
minor = minor(stat_buf.st_rdev);
rc = glob("/sys/block/*", GLOB_NOSORT, NULL, &glob_info);
if (rc) {
if (verbose)
fprintf(stderr, "warning: failed to read entries under "
"/sys/block\n");
return rc;
}
for (i = 0; i < glob_info.gl_pathc; i++){
snprintf(path, sizeof(path), "%s/dev", glob_info.gl_pathv[i]);
rc = read_file(path, buf, sizeof(buf));
if (rc)
continue;
if (buf[strlen(buf) - 1] == '\n')
buf[strlen(buf) - 1] = '\0';
chk_major = strtok_r(buf, ":", &savept);
chk_minor = savept;
if (major == atoi(chk_major) &&minor == atoi(chk_minor))
break;
}
if (i == glob_info.gl_pathc) {
if (verbose)
fprintf(stderr,"warning: device %s does not match any "
"entry under /sys/block\n", real_path);
rc = -EINVAL;
goto out;
}
snprintf(path, sizeof(path), "%s/%s", glob_info.gl_pathv[i],
MAX_HW_SECTORS_KB_PATH);
rc = read_file(path, buf, sizeof(buf));
if (rc) {
if (verbose)
fprintf(stderr, "warning: opening %s: %s\n",
path, strerror(errno));
goto out;
}
if (strlen(buf) - 1 > 0) {
snprintf(path, sizeof(path), "%s/%s",
glob_info.gl_pathv[i], MAX_SECTORS_KB_PATH);
rc = write_file(path, buf);
if (rc && verbose)
fprintf(stderr, "warning: writing to %s: %s\n",
path, strerror(errno));
}
out:
globfree(&glob_info);
return rc;
}
int main(int argc, char *const argv[])
{
char default_options[] = "";
char *usource, *source, *target, *ptr;
char *options, *optcopy, *orig_options = default_options;
int i, nargs = 3, opt, rc, flags, optlen;
static struct option long_opt[] = {
{"fake", 0, 0, 'f'},
{"force", 0, 0, 1},
{"help", 0, 0, 'h'},
{"nomtab", 0, 0, 'n'},
{"options", 1, 0, 'o'},
{"verbose", 0, 0, 'v'},
{0, 0, 0, 0}
};
progname = strrchr(argv[0], '/');
progname = progname ? progname + 1 : argv[0];
while ((opt = getopt_long(argc, argv, "fhno:v",
long_opt, NULL)) != EOF){
switch (opt) {
case 1:
++force;
printf("force: %d\n", force);
nargs++;
break;
case 'f':
++fake;
printf("fake: %d\n", fake);
nargs++;
break;
case 'h':
usage(stdout);
break;
case 'n':
++nomtab;
printf("nomtab: %d\n", nomtab);
nargs++;
break;
case 'o':
orig_options = optarg;
nargs++;
break;
case 'v':
++verbose;
nargs++;
break;
default:
fprintf(stderr, "%s: unknown option '%c'\n",
progname, opt);
usage(stderr);
break;
}
}
if (optind + 2 > argc) {
fprintf(stderr, "%s: too few arguments\n", progname);
usage(stderr);
}
usource = argv[optind];
source = convert_hostnames(usource);
target = argv[optind + 1];
ptr = target + strlen(target) - 1;
while ((ptr > target) && (*ptr == '/')) {
*ptr = 0;
ptr--;
}
if (!usource || !source) {
usage(stderr);
}
if (verbose) {
for (i = 0; i < argc; i++)
printf("arg[%d] = %s\n", i, argv[i]);
printf("source = %s (%s), target = %s\n", usource, source,
target);
printf("options = %s\n", orig_options);
}
options = malloc(strlen(orig_options) + 1);
strcpy(options, orig_options);
rc = parse_options(options, &flags);
if (rc) {
fprintf(stderr, "%s: can't parse options: %s\n",
progname, options);
return(EINVAL);
}
if (!force) {
rc = check_mtab_entry(usource, source, target, "lustre");
if (rc && !(flags & MS_REMOUNT)) {
fprintf(stderr, "%s: according to %s %s is "
"already mounted on %s\n",
progname, MOUNTED, usource, target);
return(EEXIST);
}
if (!rc && (flags & MS_REMOUNT)) {
fprintf(stderr, "%s: according to %s %s is "
"not already mounted on %s\n",
progname, MOUNTED, usource, target);
return(ENOENT);
}
}
if (flags & MS_REMOUNT)
nomtab++;
rc = access(target, F_OK);
if (rc) {
rc = errno;
fprintf(stderr, "%s: %s inaccessible: %s\n", progname, target,
strerror(errno));
return rc;
}
/* In Linux 2.4, the target device doesn't get passed to any of our
functions. So we'll stick it on the end of the options. */
optlen = strlen(options) + strlen(",device=") + strlen(source) + 1;
optcopy = malloc(optlen);
strcpy(optcopy, options);
if (*optcopy)
strcat(optcopy, ",");
strcat(optcopy, "device=");
strcat(optcopy, source);
if (verbose)
printf("mounting device %s at %s, flags=%#x options=%s\n",
source, target, flags, optcopy);
if (!strstr(usource, ":/") && set_tunables(source, strlen(source)) &&
verbose)
fprintf(stderr, "%s: unable to set tunables for %s"
" (may cause reduced IO performance)\n",
argv[0], source);
register_service_tags(usource, source, target);
if (!fake) {
/* flags and target get to lustre_get_sb, but not
lustre_fill_super. Lustre ignores the flags, but mount
does not. */
for (i = 0, rc = -EAGAIN; i <= retry && rc != 0; i++) {
rc = mount(source, target, "lustre", flags,
(void *)optcopy);
if (rc) {
if (verbose) {
fprintf(stderr, "%s: mount %s at %s "
"failed: %s retries left: "
"%d\n", basename(progname),
usource, target,
strerror(errno), retry-i);
}
if (retry) {
sleep(1 << max((i/2), 5));
}
else {
rc = errno;
}
}
}
}
if (rc) {
char *cli;
rc = errno;
cli = strrchr(usource, ':');
if (cli && (strlen(cli) > 2))
cli += 2;
else
cli = NULL;
fprintf(stderr, "%s: mount %s at %s failed: %s\n", progname,
usource, target, strerror(errno));
if (errno == ENODEV)
fprintf(stderr, "Are the lustre modules loaded?\n"
"Check /etc/modprobe.conf and /proc/filesystems"
"\nNote 'alias lustre llite' should be removed"
" from modprobe.conf\n");
if (errno == ENOTBLK)
fprintf(stderr, "Do you need -o loop?\n");
if (errno == ENOMEDIUM)
fprintf(stderr,
"This filesystem needs at least 1 OST\n");
if (errno == ENOENT) {
fprintf(stderr, "Is the MGS specification correct?\n");
fprintf(stderr, "Is the filesystem name correct?\n");
fprintf(stderr, "If upgrading, is the copied client log"
" valid? (see upgrade docs)\n");
}
if (errno == EALREADY)
fprintf(stderr, "The target service is already running."
" (%s)\n", usource);
if (errno == ENXIO)
fprintf(stderr, "The target service failed to start "
"(bad config log?) (%s). "
"See /var/log/messages.\n", usource);
if (errno == EIO)
fprintf(stderr, "Is the MGS running?\n");
if (errno == EADDRINUSE)
fprintf(stderr, "The target service's index is already "
"in use. (%s)\n", usource);
if (errno == EINVAL) {
fprintf(stderr, "This may have multiple causes.\n");
if (cli)
fprintf(stderr, "Is '%s' the correct filesystem"
" name?\n", cli);
fprintf(stderr, "Are the mount options correct?\n");
fprintf(stderr, "Check the syslog for more info.\n");
}
/* May as well try to clean up loop devs */
if (strncmp(usource, "/dev/loop", 9) == 0) {
char cmd[256];
sprintf(cmd, "/sbin/losetup -d %s", usource);
system(cmd);
}
} else if (!nomtab) {
rc = update_mtab_entry(usource, target, "lustre", orig_options,
0,0,0);
}
free(optcopy);
free(source);
return rc;
}