diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 1c43b1af701069ec23ff656eb6ea3780d069323f..762279d7374c31304a2494c067ebced408b3b6cd 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -10,84 +10,10 @@ tbd Cluster File Systems, Inc. <info@clusterfs.com> (see http://wiki.lustre.org/index.php?title=Patchless_Client) 2.6.9-42.0.10.EL (RHEL 4) 2.6.16 - 2.6.22 vanilla (kernel.org) - * Recommended e2fsprogs version: 1.39.cfs7 + * Recommended e2fsprogs version: 1.39.cfs8 * Note that reiserfs quotas are disabled on SLES 10 in this kernel. * bug fixes -Severity : enhancement -Bugzilla : 11039 -Description: 2.6.18 server support (lustre 1.6.1) -Details : Support for 2.6.18 kernels on the server side. - -Severity : normal -Frequency : rare -Bugzilla : 12696 -Description: ASSERTION(imp->imp_conn_current) failed -Details : an assertion failure is hit if a client node boots and attempts to - mount a lustre filesystem faster than RECONNECT_INTERVAL seconds. - -Severity : normal -Frequency : only for i686 -Bugzilla : 12695 -Description: 1.4.11 RC1 build fails for RHEL 4, i686 -Details : Fixed config variable for build. - -Severity : normal -Frequency : rare -Bugzilla : 12415 -Description: Updated patchess for new RHEL4 kernel -Details : Updated patch inode-nr_unused-2.6.9-rhel4.patch - Updated patch jbd-stats-2.6.9.patch - Updated patch qsnet-rhel4-2.6.patch - Updated patch quota-deadlock-on-pagelock-core.patch - Updated patch vfs_intent-2.6-rhel4.patch - Updated patch vfs_races-2.6-rhel4.patch - Updated series file 2.6-rhel4-titech.series - Updated series file 2.6-rhel4.series - Updated kernel config files - -Severity : normal -Frequency : rare -Bugzilla : 12374 -Description: lquota slave complains LBUG when reconnecting with mds - or failover in mds. -Details : quota slave depends qctxt->lqc_import to send its quota request. - This pointer will be invalid if mds did failover or broke its - connect to osts, which leads to LBUG. - -Severity : normal -Frequency : when qunit size is too small(less than 20M) -Bugzilla : 12588 -Description: write is stopped by improper -EDQUOT -Details : If the master is busy and qunit size is small enough(let's say 1M), - the slave can not get quota from master on time, which will lead - slave to trigger a -EQUOTA to client. - -Severity : normal -Frequency : rare -Bugzilla : 12629 -Description: Deadlock during metadata tests -Details : in prune_dir_dentries(), shrink_dcache_parent() should not be - called with the per-dentry lock held. - -Severity : normal -Frequency : SLES9 only -Bugzilla : 12744 -Description: Lustre patched kernel for SLES9 SP3 has NR_CPUS set to 8 -Details : set CONFIG_NR_CPUS to 128 instead of 8. - -Severity : enhancement -Bugzilla : 12678 -Description: remove fs_prep_san_write operation and related patches -Details : remove the ext3-san-jdike patches which are no longer useful. - -Severity : normal -Frequency : rare -Bugzilla : 11324 -Description: LDISKFS-fs error (device sdc): ldiskfs_free_blocks -Details : a disk corruption can cause the mballoc code to assert on a - double free. - Severity : enhancement Bugzilla : 12194 Description: add optional extra BUILD_VERSION info @@ -351,9 +277,7 @@ Details : dev_clear_rdonly(bdev) must be called in kill_bdev() instead of Severity : minor Bugzilla : 11706 Description: service threads may hog cpus when there are a lot of requests - coming -Details : Insert cond_resched to give other threads a chance to use some of - the cpu +Details : Insert cond_resched to give other threads a chance to use some CPU Severity : normal Frequency : rare @@ -369,42 +293,42 @@ Severity : normal Bugzilla : 12597 Description: brw_stats were being printed incorrectly Details : brw_stats were being printed as log2 but all of them were not - recorded as log2. Also remove some code duplication arising from - filter_tally_{read,write}. + recorded as log2. Also remove some code duplication arising from + filter_tally_{read,write}. Severity : normal Bugzilla : 11674 Frequency : rare, only in recovery. Description: ASSERTION(req->rq_type != LI_POISON) failed Details : imp_lock should be held while iterating over imp_sending_list for - prevent destroy request after get timeout in ptlrpc_queue_wait. + prevent destroy request after get timeout in ptlrpc_queue_wait. Severity : normal Bugzilla : 12689 Description: replay-single.sh test 52 fails Details : A lock's skiplist need to be cleanup when it being unlinked - from its resource list. + from its resource list. Severity : normal Bugzilla : 11737 Frequency : always Description: Short directio read returns full requested size rather than - actual amount read. + actual amount read. Details : Direct I/O operations should return actual amount of bytes - transferred rather than requested size. + transferred rather than requested size. Severity : enhancement Bugzilla : 10589 Description: metadata RPC reduction (e.g. for rm performance) Details : decrease the amount of synchronous RPC between clients and servers - by canceling conflicing lock before the operation on the client side - and packing thier handles into the main operation RPC to server. + by canceling conflicing lock before the operation on the client + and packing thier handles into the main operation RPC to server. Severity : enhancement Bugzilla : 12605 Description: add #ifdef HAVE_KERNEL_CONFIG_H -Details : kernels from 2.6.19 not need include linux/config.h, but add include - linux/autoconf.h in commpiler command line. +Details : kernels from 2.6.19 not need include linux/config.h, but add + include linux/autoconf.h in commpiler command line. Severity : enhancement Bugzilla : 12764 @@ -429,24 +353,24 @@ Severity : minor Bugzilla : 10419 Frequency : always Description: Correct condition for output debug message. -Details : inode i_nlink equal zero is not enough for output message about disk - corruption, i_ctime and i_mode should be also checked. +Details : inode i_nlink equal zero is not enough for output message about + disk corruption, i_ctime and i_mode should be also checked. Severity : minor Bugzilla : 12415 Frequency : always in patchless client Description: add configure check for truncate_complete_page Details : improve checks for exported symbols. This allow run check without - sources, but with Module.symvers shipped with kernel distribution. - add check for truncate_complete_page used by patchless client. + sources, but with Module.symvers shipped with kernel distribution. + add check for truncate_complete_page used by patchless client. Severity : major Bugzilla : 11710 Frequency : always Description: improve handling recoverable errors -Details : if request processig with error which can be recoverable on server side - request should be resend, otherwise page released from cache and marked - as error. +Details : if request processig with error which can be recoverable on server + request should be resend, otherwise page released from cache and + marked as error. Severity : normal Bugzilla : 12646 @@ -462,20 +386,103 @@ Severity : normal Bugzilla : 11815 Description: replace obdo_alloc() with OBDO_ALLOC macro Details : nothing special is done in obdo_alloc() function, and for - debugging purpose, it needs to be replaced with macros. + debugging purpose, it needs to be replaced with macros. Severity : normal Bugzilla : 12784 Description: bad return value and errno from fcntl call Details : In liblustre API, errno should be a negative value if error - happens. + happens. Severity : normal Bugzilla : 11544 Description: ptlrpc_check_set() LBUG Details : In case of positive reply from server and failed client bulk - callback after bulk transfer shouldn't LBUG, but process this - request as erroneous. + callback after bulk transfer shouldn't LBUG, but process this + request as erroneous. + +Severity : enhancement +Bugzilla : 10968 +Description: VFS operations stats tool. +Details : Tool which collects stats by tracking value written in pid, + ppid, gid and uses llstat to generate output to plot graph using + plot-llstat + Updated lustre/utils/Makefile.am + Added lustre/utils/ltrack_stats.c + +Severity : enhancement +Bugzilla : 11039 +Description: 2.6.18 server support (lustre 1.6.1) +Details : Support for 2.6.18 kernels on the server side. + +Severity : normal +Frequency : rare +Bugzilla : 12696 +Description: ASSERTION(imp->imp_conn_current) failed +Details : an assertion failure is hit if a client node boots and attempts to + mount a lustre filesystem faster than RECONNECT_INTERVAL seconds. + +Severity : normal +Frequency : only for i686 +Bugzilla : 12695 +Description: 1.4.11 RC1 build fails for RHEL 4, i686 +Details : Fixed config variable for build. + +Severity : normal +Frequency : rare +Bugzilla : 12415 +Description: Updated patchess for new RHEL4 kernel +Details : Updated patch inode-nr_unused-2.6.9-rhel4.patch + Updated patch jbd-stats-2.6.9.patch + Updated patch qsnet-rhel4-2.6.patch + Updated patch quota-deadlock-on-pagelock-core.patch + Updated patch vfs_intent-2.6-rhel4.patch + Updated patch vfs_races-2.6-rhel4.patch + Updated series file 2.6-rhel4-titech.series + Updated series file 2.6-rhel4.series + Updated kernel config files + +Severity : normal +Frequency : rare +Bugzilla : 12374 +Description: lquota slave complains LBUG when reconnecting with mds + or failover in mds. +Details : quota slave depends qctxt->lqc_import to send its quota request. + This pointer will be invalid if mds did failover or broke its + connect to osts, which leads to LBUG. + +Severity : normal +Frequency : when qunit size is too small(less than 20M) +Bugzilla : 12588 +Description: write is stopped by improper -EDQUOT +Details : If the master is busy and qunit size is small enough(let's say 1M), + the slave can not get quota from master on time, which will lead + slave to trigger a -EQUOTA to client. + +Severity : normal +Frequency : rare +Bugzilla : 12629 +Description: Deadlock during metadata tests +Details : in prune_dir_dentries(), shrink_dcache_parent() should not be + called with the per-dentry lock held. + +Severity : normal +Frequency : SLES9 only +Bugzilla : 12744 +Description: Lustre patched kernel for SLES9 SP3 has NR_CPUS set to 8 +Details : set CONFIG_NR_CPUS to 128 instead of 8. + +Severity : enhancement +Bugzilla : 12678 +Description: remove fs_prep_san_write operation and related patches +Details : remove the ext3-san-jdike patches which are no longer useful. + +Severity : normal +Frequency : rare +Bugzilla : 11324 +Description: LDISKFS-fs error (device sdc): ldiskfs_free_blocks +Details : a disk corruption can cause the mballoc code to assert on a + double free. -------------------------------------------------------------------------------- diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index e19088b954f128b4033af2cb09ac5dfd395fe36f..d354eb19b115249981cfe7e01005e0d28b40d170 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -363,19 +363,19 @@ typedef enum { } ost_cmd_t; #define OST_FIRST_OPC OST_REPLY -typedef uint64_t obd_id; -typedef uint64_t obd_gr; -typedef uint64_t obd_time; -typedef uint64_t obd_size; -typedef uint64_t obd_off; -typedef uint64_t obd_blocks; -typedef uint32_t obd_blksize; -typedef uint32_t obd_mode; -typedef uint32_t obd_uid; -typedef uint32_t obd_gid; -typedef uint32_t obd_flag; -typedef uint64_t obd_valid; -typedef uint32_t obd_count; +typedef __u64 obd_id; +typedef __u64 obd_gr; +typedef __u64 obd_time; +typedef __u64 obd_size; +typedef __u64 obd_off; +typedef __u64 obd_blocks; +typedef __u64 obd_valid; +typedef __u32 obd_blksize; +typedef __u32 obd_mode; +typedef __u32 obd_uid; +typedef __u32 obd_gid; +typedef __u32 obd_flag; +typedef __u32 obd_count; #define OBD_FL_INLINEDATA (0x00000001) #define OBD_FL_OBDMDEXISTS (0x00000002) @@ -1342,7 +1342,7 @@ struct llog_cookie { } __attribute__((packed)); /* llog protocol */ -enum llogd_rpc_ops { +typedef enum { LLOG_ORIGIN_HANDLE_CREATE = 501, LLOG_ORIGIN_HANDLE_NEXT_BLOCK = 502, LLOG_ORIGIN_HANDLE_READ_HEADER = 503, @@ -1352,7 +1352,9 @@ enum llogd_rpc_ops { LLOG_CATINFO = 507, /* for lfs catinfo */ LLOG_ORIGIN_HANDLE_PREV_BLOCK = 508, LLOG_ORIGIN_HANDLE_DESTROY = 509, /* for destroy llog object*/ -}; + LLOG_LAST_OPC +} llog_cmd_t; +#define LLOG_FIRST_OPC LLOG_ORIGIN_HANDLE_CREATE struct llogd_body { struct llog_logid lgd_logid; diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index ba7f180f95a7d0a4c9c7da6cef3422c7aa5103f1..1a6dbc2db88b1be4fdd13581c0c36880ae3f2c20 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -926,6 +926,7 @@ int ldlm_cli_cancel(struct lustre_handle *lockh) LDLM_LOCK_PUT(lock); return rc < 0 ? rc : 0; } + /* - Free space in lru for @count new locks, * redundant unused locks are canceled locally; * - also cancel locally unused aged locks; @@ -961,13 +962,13 @@ int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels, } if (&lock->l_lru == &ns->ns_unused_list) break; - + if ((added >= count) && (!(flags & LDLM_CANCEL_AGED) || cfs_time_before_64(cur, (__u64)ns->ns_max_age + lock->l_last_used))) break; - + LDLM_LOCK_GET(lock); /* dropped by bl thread */ spin_unlock(&ns->ns_unused_lock); diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c index dd8415aec482a0236641c16edb499caae0eb3ef8..5ce488bb015bd1a96c40eedae428359148315db0 100644 --- a/lustre/llite/lproc_llite.c +++ b/lustre/llite/lproc_llite.c @@ -614,15 +614,11 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent, void *ptr = NULL; if (type & LPROCFS_TYPE_REGS) ptr = "regs"; - else { - if (type & LPROCFS_TYPE_BYTES) - ptr = "bytes"; - else { - if (type & LPROCFS_TYPE_PAGES) - ptr = "pages"; - } - } - lprocfs_counter_init(sbi->ll_stats, + else if (type & LPROCFS_TYPE_BYTES) + ptr = "bytes"; + else if (type & LPROCFS_TYPE_PAGES) + ptr = "pages"; + lprocfs_counter_init(sbi->ll_stats, llite_opcode_table[id].opcode, (type & LPROCFS_CNTR_AVGMINMAX), llite_opcode_table[id].opname, ptr); diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c index 67247f0dd5751d21545f4a9393bf38026d3d6506..df26df40b86988b9a5d5f9b7d58fc3c693191158 100644 --- a/lustre/ptlrpc/lproc_ptlrpc.c +++ b/lustre/ptlrpc/lproc_ptlrpc.c @@ -55,8 +55,8 @@ struct ll_rpc_opcode { { OST_OPEN, "ost_open" }, { OST_CLOSE, "ost_close" }, { OST_STATFS, "ost_statfs" }, - { 14, NULL }, - { 15, NULL }, + { 14, NULL }, /* formerly OST_SAN_READ */ + { 15, NULL }, /* formerly OST_SAN_WRITE */ { OST_SYNC, "ost_sync" }, { OST_SET_INFO, "ost_set_info" }, { OST_QUOTACHECK, "ost_quotacheck" }, @@ -91,7 +91,17 @@ struct ll_rpc_opcode { { MGS_TARGET_REG, "mgs_target_reg" }, { MGS_TARGET_DEL, "mgs_target_del" }, { OBD_PING, "obd_ping" }, - { OBD_LOG_CANCEL, "llog_origin_handle_cancel"}, + { OBD_LOG_CANCEL, "llog_origin_handle_cancel" }, + { OBD_QC_CALLBACK, "obd_quota_callback" }, + { LLOG_ORIGIN_HANDLE_CREATE, "llog_origin_handle_create" }, + { LLOG_ORIGIN_HANDLE_NEXT_BLOCK, "llog_origin_handle_next_block"}, + { LLOG_ORIGIN_HANDLE_READ_HEADER,"llog_origin_handle_read_header" }, + { LLOG_ORIGIN_HANDLE_WRITE_REC, "llog_origin_handle_write_rec" }, + { LLOG_ORIGIN_HANDLE_CLOSE, "llog_origin_handle_close" }, + { LLOG_ORIGIN_CONNECT, "llog_origin_connect" }, + { LLOG_CATINFO, "llog_catinfo" }, + { LLOG_ORIGIN_HANDLE_PREV_BLOCK, "llog_origin_handle_prev_block" }, + { LLOG_ORIGIN_HANDLE_DESTROY, "llog_origin_handle_destroy" }, }; const char* ll_opcode2str(__u32 opcode) diff --git a/lustre/ptlrpc/ptlrpc_internal.h b/lustre/ptlrpc/ptlrpc_internal.h index 698d9bc0132ba9b3af44879dbbf66b738d018f4a..c1238b933a8f637ce6e1f5cb5e4c034c20c5cdbc 100644 --- a/lustre/ptlrpc/ptlrpc_internal.h +++ b/lustre/ptlrpc/ptlrpc_internal.h @@ -92,17 +92,26 @@ static inline int opcode_offset(__u32 opc) { (LDLM_LAST_OPC - LDLM_FIRST_OPC) + (MDS_LAST_OPC - MDS_FIRST_OPC) + (OST_LAST_OPC - OST_FIRST_OPC)); + } else if (opc < LLOG_LAST_OPC) { + /* LLOG Opcode */ + return (opc - LLOG_FIRST_OPC + + (OBD_LAST_OPC - OBD_FIRST_OPC) + + (MGS_LAST_OPC - MGS_FIRST_OPC) + + (LDLM_LAST_OPC - LDLM_FIRST_OPC) + + (MDS_LAST_OPC - MDS_FIRST_OPC) + + (OST_LAST_OPC - OST_FIRST_OPC)); } else { /* Unknown Opcode */ return -1; } } -#define LUSTRE_MAX_OPCODES ((LDLM_LAST_OPC - LDLM_FIRST_OPC) + \ +#define LUSTRE_MAX_OPCODES ((OST_LAST_OPC - OST_FIRST_OPC) + \ (MDS_LAST_OPC - MDS_FIRST_OPC) + \ - (OST_LAST_OPC - OST_FIRST_OPC) + \ + (LDLM_LAST_OPC - LDLM_FIRST_OPC) + \ + (MGS_LAST_OPC - MGS_FIRST_OPC) + \ (OBD_LAST_OPC - OBD_FIRST_OPC) + \ - (MGS_LAST_OPC - MGS_FIRST_OPC)) + (LLOG_LAST_OPC - LLOG_FIRST_OPC)) enum { PTLRPC_REQWAIT_CNTR = 0,