From aef0bdabb2fc90014787ed35613f338c08019d32 Mon Sep 17 00:00:00 2001 From: adilger <adilger> Date: Tue, 2 Aug 2005 23:29:58 +0000 Subject: [PATCH] Merge b1_4_bug3389 from b1_4 (20050729_0312) Description: Mounting a Lustre file system on a node running as an OST could lead to deadlocks Details : OSTs now allocate memory needed to write out data at startup, instead of when needed, to avoid having to allocate memory in possibly low memory situations. Specifically, if the file system is mounted on on OST, memory pressure could force it to try to write out data, which it needed to allocate memory to do. Due to the low memory, it would be unable to do so and the node would become unresponsive. b=6514, b=5137 Description: Addition of lconf --service command line option Details : lconf now accepts a '--service <arg>' option, which is shorthand for 'lconf --group <arg> --select <arg>=<hostname>' b=7015 Description: Failover mode is now the default for OSTs. Details : By default, OSTs will now run in failover mode. To return to the old behaviour, add '--failout' to the lmc line for OSTs. b=6101 Description: Health checks are now provided for MDS and OSTs Details : Additional detailed health check information on MSD and OSTs is now provided through the procfs health_check value. b=1693 Description: Disk fragmentation on the OSTs could eventually cause slowdowns after numerous create/delete cycles Details : The ext3 inode allocation policy would not allocate new inodes very well on the OSTs because there are no new directories being created. Instead we look for groups with free space if the parent directories are nearly full. b=4466 Description: Network or server problems during mount may cause partially mounted clients instead of returning an error. Details : The config llog parsing code may overwrite the error return code during mount error handling, returning success instead of an error. b=6302 --- lnet/libcfs/tracefile.c | 30 ++++----- lnet/utils/acceptor.c | 134 ++++++++++++++++++++++++++-------------- 2 files changed, 102 insertions(+), 62 deletions(-) diff --git a/lnet/libcfs/tracefile.c b/lnet/libcfs/tracefile.c index d54e1ae556..e93ff1b702 100644 --- a/lnet/libcfs/tracefile.c +++ b/lnet/libcfs/tracefile.c @@ -82,12 +82,14 @@ static void tage_to_tail(struct trace_page *tage, struct list_head *queue) list_move_tail(&tage->linkage, queue); } -static int tage_invariant(struct trace_page *tage) +static void LASSERT_TAGE_INVARIANT(struct trace_page *tage) { - return (tage != NULL && - tage->page != NULL && - tage->used <= CFS_PAGE_SIZE && - cfs_page_count(tage->page) > 0); + LASSERT(tage != NULL); + LASSERT(tage->page != NULL); + LASSERTF(tage->used <= CFS_PAGE_SIZE, "used = %u, PAGE_SIZE %lu\n", + tage->used, CFS_PAGE_SIZE); + LASSERTF(cfs_page_count(tage->page) > 0, "count = %d\n", + cfs_page_count(tage->page)); } /* return a page that has 'len' bytes left at the end */ @@ -287,7 +289,7 @@ static void put_pages_back_on_cpu(void *info) spin_lock(&pc->pc_lock); list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) { - LASSERT(tage_invariant(tage)); + LASSERT_TAGE_INVARIANT(tage); if (tage->cpu != smp_processor_id()) continue; @@ -324,7 +326,7 @@ static void put_pages_on_daemon_list_on_cpu(void *info) spin_lock(&pc->pc_lock); list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) { - LASSERT(tage_invariant(tage)); + LASSERT_TAGE_INVARIANT(tage); if (tage->cpu != smp_processor_id()) continue; @@ -338,7 +340,7 @@ static void put_pages_on_daemon_list_on_cpu(void *info) LASSERT(!list_empty(&tcd->tcd_daemon_pages)); victim = tage_from_list(tcd->tcd_daemon_pages.next); - LASSERT(tage_invariant(victim)); + LASSERT_TAGE_INVARIANT(victim); list_del(&victim->linkage); tage_free(victim); @@ -370,7 +372,7 @@ void trace_debug_print(void) char *p, *file, *fn; cfs_page_t *page; - LASSERT(tage_invariant(tage)); + LASSERT_TAGE_INVARIANT(tage); page = tage->page; p = cfs_page_address(page); @@ -425,7 +427,7 @@ int tracefile_dump_all_pages(char *filename) CFS_MMSPACE_OPEN; list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { - LASSERT(tage_invariant(tage)); + LASSERT_TAGE_INVARIANT(tage); rc = cfs_filp_write(filp, cfs_page_address(tage->page), tage->used, cfs_filp_poff(filp)); @@ -461,7 +463,7 @@ void trace_flush_pages(void) collect_pages(&pc); list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { - LASSERT(tage_invariant(tage)); + LASSERT_TAGE_INVARIANT(tage); list_del(&tage->linkage); tage_free(tage); @@ -556,7 +558,7 @@ static int tracefiled(void *arg) /* mark the first header, so we can sort in chunks */ tage = tage_from_list(pc.pc_pages.next); - LASSERT(tage_invariant(tage)); + LASSERT_TAGE_INVARIANT(tage); hdr = cfs_page_address(tage->page); hdr->ph_flags |= PH_FLAG_FIRST_RECORD; @@ -564,7 +566,7 @@ static int tracefiled(void *arg) list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { static loff_t f_pos; - LASSERT(tage_invariant(tage)); + LASSERT_TAGE_INVARIANT(tage); if (f_pos >= tracefile_size) f_pos = 0; @@ -657,7 +659,7 @@ static void trace_cleanup_on_cpu(void *info) tcd->tcd_shutting_down = 1; list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) { - LASSERT(tage_invariant(tage)); + LASSERT_TAGE_INVARIANT(tage); list_del(&tage->linkage); tage_free(tage); diff --git a/lnet/utils/acceptor.c b/lnet/utils/acceptor.c index f34a1fe373..03b69ce60b 100644 --- a/lnet/utils/acceptor.c +++ b/lnet/utils/acceptor.c @@ -12,6 +12,8 @@ #include <sys/ioctl.h> #include <unistd.h> #include <syslog.h> +#include <stdarg.h> +#include <signal.h> #include <errno.h> #ifdef HAVE_LIBWRAP #include <arpa/inet.h> @@ -29,7 +31,8 @@ #define PIDFILE_DIR "/var/run" #endif -#define PROGNAME "acceptor" +char progname[] = "acceptor"; +char name_port[40]; /* for signal handler */ #ifdef HAVE_LIBWRAP /* needed because libwrap declares these as externs */ @@ -37,66 +40,87 @@ int allow_severity = LOG_INFO; int deny_severity = LOG_WARNING; #endif -void usage(char *myname) +void usage(char *progname) { fprintf(stderr, "usage: %s [-N nal_id] [-p] [-l] port\n\n" " -l\tKeep stdin/stdout open\n" - " -p\tAllow connections from non-privileged ports\n", myname); + " -p\tAllow connections from non-privileged ports\n", progname); exit (1); } -char *pidfile_name(char *name, int port) +void errlog(int level, const char *fmt, ...) +{ + va_list arg; + FILE *out; + + switch (level) { + case LOG_DEBUG: + case LOG_INFO: + case LOG_NOTICE: + out = stdout; + break; + default: + out = stderr; + break; + } + va_start(arg, fmt); + fprintf(out, "%s: ", name_port); + vfprintf(out, fmt, arg); + va_end(arg); + va_start(arg, fmt); + vsyslog(level, fmt, arg); + va_end(arg); +} + +char *pidfile_name(char *name_port) { static char pidfile[1024]; - snprintf(pidfile, sizeof(pidfile), "%s/%s-%d.pid", - PIDFILE_DIR, name, port); + snprintf(pidfile, sizeof(pidfile), "%s/%s.pid", PIDFILE_DIR, name_port); return pidfile; } -void pidfile_create(char *name, int port) +void pidfile_create(char *name_port) { - char *pidfile = pidfile_name(name, port); + char *pidfile = pidfile_name(name_port); FILE *fp; if ((fp = fopen(pidfile, "w"))) { fprintf(fp, "%d\n", getpid()); fclose(fp); } else { - syslog(LOG_DAEMON|LOG_ERR, "%s: %s\n", pidfile,strerror(errno)); + errlog(LOG_ERR, " error creating %s: %s\n", + pidfile, strerror(errno)); } } -int pidfile_cleanup(char *name, int port) +int pidfile_cleanup(char *name_port) { - char *pidfile = pidfile_name(name, port); + char *pidfile = pidfile_name(name_port); int rc; rc = unlink(pidfile); if (rc && errno != -ENOENT) fprintf(stderr, "%s: error removing %s: %s\n", - PROGNAME, pidfile, strerror(errno)); + progname, pidfile, strerror(errno)); return errno; } -int pidfile_exists(char *name, int port) +int pidfile_exists(char *name_port) { - char *pidfile = pidfile_name(name, port); + char *pidfile = pidfile_name(name_port); FILE *fpid; int pid, rc; - snprintf(pidfile, sizeof(pidfile), "%s/%s-%d.pid", - PIDFILE_DIR, name, port); - fpid = fopen(pidfile, "r+"); if (fpid == NULL) { if (errno == ENOENT) return 0; fprintf(stderr, "%s: error opening %s: %s.\n", - PROGNAME, pidfile, strerror(errno)); + progname, pidfile, strerror(errno)); return (1); } @@ -104,25 +128,30 @@ int pidfile_exists(char *name, int port) fclose(fpid); if (rc != 1) { fprintf(stderr,"%s: %s didn't contain a valid pid, removing.\n", - PROGNAME, pidfile); + progname, pidfile); goto stale; } if (kill(pid, 0) == 0) { fprintf(stderr, "%s: %s exists, acceptor pid %d running.\n", - PROGNAME, pidfile, pid); + progname, pidfile, pid); return (1); } fprintf(stderr, "%s: stale %s exists, pid %d doesn't, removing.\n", - PROGNAME, pidfile, pid); + progname, pidfile, pid); stale: - pidfile_cleanup(name, port); + pidfile_cleanup(name_port); return (0); } -void -show_connection (int fd, __u32 net_ip) +void handler(int sig) +{ + pidfile_cleanup(name_port); + exit(sig); +} + +void show_connection(int fd, __u32 net_ip) { static long last_time; static __u32 host_ip; @@ -146,7 +175,7 @@ show_connection (int fd, __u32 net_ip) else snprintf(host, sizeof(host), "%s", h->h_name); - syslog(LOG_DAEMON | LOG_INFO, "Accepted host: %s\n", host); + syslog(LOG_INFO, "accepted host: %s\n", host); } int main(int argc, char **argv) @@ -183,8 +212,10 @@ int main(int argc, char **argv) port = atol(argv[optind++]); - if (pidfile_exists(PROGNAME, port)) + snprintf(name_port, sizeof(name_port) - 1, "%s-%d", progname, port); + if (pidfile_exists(name_port)) return(EEXIST); + openlog(name_port, LOG_PID, LOG_DAEMON); memset(&srvaddr, 0, sizeof(srvaddr)); srvaddr.sin_family = AF_INET; @@ -194,21 +225,23 @@ int main(int argc, char **argv) fd = socket(PF_INET, SOCK_STREAM, 0); if (fd < 0) { rc = errno; - perror("opening socket"); + errlog(LOG_ERR, "error opening socket: %s\n", strerror(errno)); return(rc); } o = 1; if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &o, sizeof(o))) { rc = errno; - perror("Cannot set REUSEADDR socket opt"); + errlog(LOG_ERR, "cannot set REUSEADDR socket opt: %s\n", + strerror(errno)); return(rc); } rc = bind(fd, (struct sockaddr *)&srvaddr, sizeof(srvaddr)); if (rc == -1) { rc = errno; - perror("bind: "); + errlog(LOG_ERR, "error binding to socket: %s\n", + strerror(errno)); return(rc); } @@ -217,25 +250,29 @@ int main(int argc, char **argv) perror("listen: "); return(rc); } - fprintf(stderr, "listening on port %d\n", port); + printf("listening on port %d\n", port); pfd = open("/dev/portals", O_RDWR); if (pfd < 0) { rc = errno; - perror("opening portals device"); + errlog(LOG_ERR, "opening portals device: %s\n",strerror(errno)); return(rc); } rc = daemon(0, noclose); if (rc < 0) { rc = errno; - perror("daemon(): "); + errlog(LOG_ERR, "error daemonizing: %s\n", strerror(errno)); return(rc); } - openlog(PROGNAME, LOG_PID, LOG_DAEMON); - syslog(LOG_DAEMON | LOG_INFO, "started, listening on port %d\n", port); - pidfile_create(PROGNAME, port); + signal(SIGHUP, SIG_IGN); + signal(SIGINT, handler); + signal(SIGQUIT, handler); + signal(SIGTERM, handler); + + errlog(LOG_NOTICE, "started, listening on port %d\n", port); + pidfile_create(name_port); while (1) { struct sockaddr_in clntaddr; @@ -249,11 +286,11 @@ int main(int argc, char **argv) char addrstr[INET_ADDRSTRLEN]; cfd = accept(fd, (struct sockaddr *)&clntaddr, &len); - if ( cfd < 0 ) { - perror("accept"); - pidfile_cleanup(PROGNAME, port); - return(0); - continue; + if (cfd < 0) { + errlog(LOG_ERR, "error accepting connection: %s\n", + strerror(errno)); + break; + //continue; } #ifdef HAVE_LIBWRAP @@ -263,19 +300,19 @@ int main(int argc, char **argv) if (!hosts_access(&request)) { inet_ntop(AF_INET, &clntaddr.sin_addr, addrstr, INET_ADDRSTRLEN); - syslog(LOG_DAEMON | LOG_WARNING, - "Unauthorized access from %s:%hd\n", + errlog(LOG_WARNING, "unauthorized access from %s:%hd\n", addrstr, ntohs(clntaddr.sin_port)); close (cfd); continue; } #endif - if (require_privports && ntohs(clntaddr.sin_port) >= IPPORT_RESERVED) { + if (require_privports && + ntohs(clntaddr.sin_port) >= IPPORT_RESERVED) { inet_ntop(AF_INET, &clntaddr.sin_addr, addrstr, INET_ADDRSTRLEN); - syslog(LOG_DAEMON | LOG_ERR, - "Closing non-privileged connection from %s:%d\n", + errlog(LOG_ERR, + "closing non-privileged connection from %s:%d\n", addrstr, ntohs(clntaddr.sin_port)); rc = close(cfd); if (rc) @@ -295,9 +332,10 @@ int main(int argc, char **argv) data.ioc_plen1 = sizeof(pcfg); if (ioctl(pfd, IOC_PORTAL_NAL_CMD, &data) < 0) { - perror("ioctl failed"); + errlog(LOG_ERR, + "portals ioctl failed: %s\n", strerror(errno)); } else { - printf("client registered\n"); + errlog(LOG_DEBUG, "client registered\n"); } rc = close(cfd); if (rc) @@ -305,7 +343,7 @@ int main(int argc, char **argv) } closelog(); - pidfile_cleanup(PROGNAME, port); + pidfile_cleanup(name_port); return (0); } -- GitLab