From da63a6e1b2fcbb780271548a898f00acb5f50cf5 Mon Sep 17 00:00:00 2001 From: bobijam <bobijam> Date: Wed, 7 May 2008 01:50:50 +0000 Subject: [PATCH] Branch b1_6 b=15575 i=wangdi, johann Description: Stack overflow during MDS log replay Details : ease stack pressure by using a thread dealing llog_process. --- lustre/ChangeLog | 5 ++ lustre/obdclass/llog.c | 82 ++++++++++++++++++++++++++------- lustre/obdclass/llog_internal.h | 11 +++++ 3 files changed, 81 insertions(+), 17 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 0967934c99..cb5df0aeaa 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -25,6 +25,11 @@ tbd Sun Microsystems, Inc. 'tunefs.lustre --param="mdt.quota_type=ug1" $MDTDEV'. For more information, please refer to bugzilla 13904. +Severity : major +Bugzilla : 15575 +Description: Stack overflow during MDS log replay +Details : ease stack pressure by using a thread dealing llog_process. + Severity : normal Bugzilla : 15278 Description: fix build on ppc32 diff --git a/lustre/obdclass/llog.c b/lustre/obdclass/llog.c index 977b489367..ec44fde1fb 100644 --- a/lustre/obdclass/llog.c +++ b/lustre/obdclass/llog.c @@ -42,6 +42,7 @@ #include <obd_class.h> #include <lustre_log.h> #include <libcfs/list.h> +#include "llog_internal.h" /* Allocate a new log or catalog handle */ struct llog_handle *llog_alloc_handle(void) @@ -204,22 +205,30 @@ int llog_close(struct llog_handle *loghandle) } EXPORT_SYMBOL(llog_close); -int llog_process(struct llog_handle *loghandle, llog_cb_t cb, - void *data, void *catdata) +static int llog_process_thread(void *arg) { - struct llog_log_hdr *llh = loghandle->lgh_hdr; - struct llog_process_cat_data *cd = catdata; - char *buf; - __u64 cur_offset = LLOG_CHUNK_SIZE, last_offset; - int rc = 0, index = 1, last_index; - int saved_index = 0, last_called_index = 0; - ENTRY; + struct llog_process_info *lpi = (struct llog_process_info *)arg; + struct llog_handle *loghandle = lpi->lpi_loghandle; + struct llog_log_hdr *llh = loghandle->lgh_hdr; + struct llog_process_cat_data *cd = lpi->lpi_catdata; + char *buf; + __u64 cur_offset = LLOG_CHUNK_SIZE; + __u64 last_offset; + int rc = 0, index = 1, last_index; + int saved_index = 0, last_called_index = 0; LASSERT(llh); OBD_ALLOC(buf, LLOG_CHUNK_SIZE); - if (!buf) - RETURN(-ENOMEM); + if (!buf) { + lpi->lpi_rc = -ENOMEM; +#ifdef __KERNEL__ + complete(&lpi->lpi_completion); +#endif + return 0; + } + + cfs_daemonize("llog_process"); if (cd != NULL) { last_called_index = cd->first_idx; @@ -267,7 +276,7 @@ int llog_process(struct llog_handle *loghandle, llog_cb_t cb, CDEBUG(D_OTHER, "after swabbing, type=%#x idx=%d\n", rec->lrh_type, rec->lrh_index); - + if (rec->lrh_index == 0) GOTO(out, 0); /* no more records */ @@ -284,18 +293,18 @@ int llog_process(struct llog_handle *loghandle, llog_cb_t cb, continue; } - CDEBUG(D_OTHER, + CDEBUG(D_OTHER, "lrh_index: %d lrh_len: %d (%d remains)\n", rec->lrh_index, rec->lrh_len, (int)(buf + LLOG_CHUNK_SIZE - (char *)rec)); - loghandle->lgh_cur_idx = rec->lrh_index; + loghandle->lgh_cur_idx = rec->lrh_index; loghandle->lgh_cur_offset = (char *)rec - (char *)buf + - last_offset; + last_offset; /* if set, process the callback on this record */ if (ext2_test_bit(index, llh->llh_bitmap)) { - rc = cb(loghandle, rec, data); + rc = lpi->lpi_cb(loghandle, rec, lpi->lpi_cbdata); last_called_index = index; if (rc == LLOG_PROC_BREAK) { CDEBUG(D_HA, "recovery from log: "LPX64 @@ -304,7 +313,8 @@ int llog_process(struct llog_handle *loghandle, llog_cb_t cb, loghandle->lgh_id.lgl_ogen); GOTO(out, rc); } else if (rc == LLOG_DEL_RECORD) { - llog_cancel_rec(loghandle, rec->lrh_index); + llog_cancel_rec(loghandle, + rec->lrh_index); rc = 0; } if (rc) @@ -325,6 +335,44 @@ int llog_process(struct llog_handle *loghandle, llog_cb_t cb, cd->last_idx = last_called_index; if (buf) OBD_FREE(buf, LLOG_CHUNK_SIZE); + lpi->lpi_rc = rc; +#ifdef __KERNEL__ + complete(&lpi->lpi_completion); +#endif + return 0; +} + +int llog_process(struct llog_handle *loghandle, llog_cb_t cb, + void *data, void *catdata) +{ + struct llog_process_info *lpi; + int rc; + ENTRY; + + OBD_ALLOC_PTR(lpi); + if (lpi == NULL) { + CERROR("cannot alloc pointer\n"); + RETURN(-ENOMEM); + } + lpi->lpi_loghandle = loghandle; + lpi->lpi_cb = cb; + lpi->lpi_cbdata = data; + lpi->lpi_catdata = catdata; + +#ifdef __KERNEL__ + init_completion(&lpi->lpi_completion); + rc = cfs_kernel_thread(llog_process_thread, lpi, CLONE_VM | CLONE_FILES); + if (rc < 0) { + CERROR("cannot start thread: %d\n", rc); + OBD_FREE_PTR(lpi); + RETURN(rc); + } + wait_for_completion(&lpi->lpi_completion); +#else + llog_process_thread(lpi); +#endif + rc = lpi->lpi_rc; + OBD_FREE_PTR(lpi); RETURN(rc); } EXPORT_SYMBOL(llog_process); diff --git a/lustre/obdclass/llog_internal.h b/lustre/obdclass/llog_internal.h index d4be19c282..82bb2e3e99 100644 --- a/lustre/obdclass/llog_internal.h +++ b/lustre/obdclass/llog_internal.h @@ -1,6 +1,17 @@ #ifndef __LLOG_INTERNAL_H__ #define __LLOG_INTERNAL_H__ +#include <lustre_log.h> + +struct llog_process_info { + struct llog_handle *lpi_loghandle; + llog_cb_t lpi_cb; + void *lpi_cbdata; + void *lpi_catdata; + int lpi_rc; + struct completion lpi_completion; +}; + int llog_put_cat_list(struct obd_device *obd, struct obd_device *disk_obd, char *name, int count, struct llog_catid *idarray); int llog_cat_id2handle(struct llog_handle *cathandle, struct llog_handle **res, -- GitLab