From 957290e14f9ca9d91687a0a665ac1aca93ae4d3e Mon Sep 17 00:00:00 2001 From: girish <girish> Date: Tue, 13 Nov 2007 17:36:53 +0000 Subject: [PATCH] JBD: Fix NULL pointer bh->b_data on NUMA box with journal checksumming. Current journal checksumming patch failed fsstress test on NUMA. The bh->b_data passed to the crc32_be () function could be NULL pointer, which caused kernel oops immediately when running fsstress with -o journal_checksum. It is because the page is part of highmem on NUMA box. We need to kmap the page before access the bh->b_data to calculate the checksums. --- .../jbd-journal-chksum-2.6-sles10.patch | 80 ++++++++------ .../jbd-journal-chksum-2.6.18-vanilla.patch | 103 +++++++++++------- 2 files changed, 106 insertions(+), 77 deletions(-) diff --git a/lustre/kernel_patches/patches/jbd-journal-chksum-2.6-sles10.patch b/lustre/kernel_patches/patches/jbd-journal-chksum-2.6-sles10.patch index f55ca27487..1467b02ba2 100644 --- a/lustre/kernel_patches/patches/jbd-journal-chksum-2.6-sles10.patch +++ b/lustre/kernel_patches/patches/jbd-journal-chksum-2.6-sles10.patch @@ -1,7 +1,7 @@ -Index: linux-2.6.16.46-0.14/fs/jbd/commit.c +Index: linux-2.6.16.53-0.16/fs/jbd/commit.c =================================================================== ---- linux-2.6.16.46-0.14.orig/fs/jbd/commit.c -+++ linux-2.6.16.46-0.14/fs/jbd/commit.c +--- linux-2.6.16.53-0.16.orig/fs/jbd/commit.c ++++ linux-2.6.16.53-0.16/fs/jbd/commit.c @@ -22,6 +22,7 @@ #include <linux/pagemap.h> #include <linux/smp_lock.h> @@ -82,7 +82,7 @@ Index: linux-2.6.16.46-0.14/fs/jbd/commit.c /* is it possible for another commit to fail at roughly * the same time as this one? If so, we don't want to * trust the barrier flag in the super, but instead want -@@ -153,15 +171,74 @@ static int journal_write_commit_record(j +@@ -153,12 +171,84 @@ static int journal_write_commit_record(j clear_buffer_ordered(bh); set_buffer_uptodate(bh); set_buffer_dirty(bh); @@ -113,9 +113,9 @@ Index: linux-2.6.16.46-0.14/fs/jbd/commit.c + journal_put_journal_head(bh2jh(bh)); + + return ret; - } - - /* ++} ++ ++/* + * Wait for all submitted IO to complete. + */ +static int journal_wait_on_locked_list(journal_t *journal, @@ -156,12 +156,22 @@ Index: linux-2.6.16.46-0.14/fs/jbd/commit.c + return ret; +} + -+ -+/* - * journal_commit_transaction - * - * The primary function for committing a transaction to the log. This -@@ -184,6 +261,8 @@ void journal_commit_transaction(journal_ ++static inline __u32 jbd_checksum_data(__u32 crc32_sum, struct buffer_head *bh) ++{ ++ struct page *page = bh->b_page; ++ char *addr; ++ __u32 checksum; ++ ++ addr = kmap(page); ++ checksum = crc32_be(crc32_sum, ++ (void *)(addr + offset_in_page(bh->b_data)), ++ bh->b_size); ++ kunmap(page); ++ return checksum; + } + + /* +@@ -184,6 +274,8 @@ void journal_commit_transaction(journal_ int first_tag = 0; int tag_flag; int i; @@ -170,7 +180,7 @@ Index: linux-2.6.16.46-0.14/fs/jbd/commit.c /* * First job: lock down the current transaction and wait for -@@ -395,37 +474,14 @@ write_out_data: +@@ -395,37 +487,14 @@ write_out_data: } /* @@ -214,7 +224,7 @@ Index: linux-2.6.16.46-0.14/fs/jbd/commit.c spin_unlock(&journal->j_list_lock); if (err) -@@ -598,6 +654,16 @@ write_out_data: +@@ -598,6 +667,16 @@ write_out_data: start_journal_io: for (i = 0; i < bufs; i++) { struct buffer_head *bh = wbuf[i]; @@ -223,15 +233,15 @@ Index: linux-2.6.16.46-0.14/fs/jbd/commit.c + */ + if (JFS_HAS_COMPAT_FEATURE(journal, + JFS_FEATURE_COMPAT_CHECKSUM)) { -+ crc32_sum = crc32_be(crc32_sum, -+ (void *)bh->b_data, -+ bh->b_size); ++ crc32_sum = ++ jbd_checksum_data(crc32_sum, ++ bh); + } + lock_buffer(bh); clear_buffer_dirty(bh); set_buffer_uptodate(bh); -@@ -614,6 +680,23 @@ start_journal_io: +@@ -614,6 +693,23 @@ start_journal_io: } } @@ -255,7 +265,7 @@ Index: linux-2.6.16.46-0.14/fs/jbd/commit.c /* Lo and behold: we have just managed to send a transaction to the log. Before we can commit it, wait for the IO so far to complete. Control buffers being written are on the -@@ -712,9 +795,15 @@ wait_for_iobuf: +@@ -712,9 +808,15 @@ wait_for_iobuf: } jbd_debug(3, "JBD: commit phase 6\n"); @@ -274,10 +284,10 @@ Index: linux-2.6.16.46-0.14/fs/jbd/commit.c if (err) __journal_abort_hard(journal); -Index: linux-2.6.16.46-0.14/include/linux/jbd.h +Index: linux-2.6.16.53-0.16/include/linux/jbd.h =================================================================== ---- linux-2.6.16.46-0.14.orig/include/linux/jbd.h -+++ linux-2.6.16.46-0.14/include/linux/jbd.h +--- linux-2.6.16.53-0.16.orig/include/linux/jbd.h ++++ linux-2.6.16.53-0.16/include/linux/jbd.h @@ -142,6 +142,29 @@ typedef struct journal_header_s __be32 h_sequence; } journal_header_t; @@ -337,10 +347,10 @@ Index: linux-2.6.16.46-0.14/include/linux/jbd.h extern int journal_create (journal_t *); extern int journal_load (journal_t *journal); extern void journal_destroy (journal_t *); -Index: linux-2.6.16.46-0.14/fs/jbd/recovery.c +Index: linux-2.6.16.53-0.16/fs/jbd/recovery.c =================================================================== ---- linux-2.6.16.46-0.14.orig/fs/jbd/recovery.c -+++ linux-2.6.16.46-0.14/fs/jbd/recovery.c +--- linux-2.6.16.53-0.16.orig/fs/jbd/recovery.c ++++ linux-2.6.16.53-0.16/fs/jbd/recovery.c @@ -21,6 +21,7 @@ #include <linux/jbd.h> #include <linux/errno.h> @@ -536,10 +546,10 @@ Index: linux-2.6.16.46-0.14/fs/jbd/recovery.c /* It's really bad news if different passes end up at * different places (but possible due to IO errors). */ if (info->end_transaction != next_commit_ID) { -Index: linux-2.6.16.46-0.14/fs/jbd/journal.c +Index: linux-2.6.16.53-0.16/fs/jbd/journal.c =================================================================== ---- linux-2.6.16.46-0.14.orig/fs/jbd/journal.c -+++ linux-2.6.16.46-0.14/fs/jbd/journal.c +--- linux-2.6.16.53-0.16.orig/fs/jbd/journal.c ++++ linux-2.6.16.53-0.16/fs/jbd/journal.c @@ -64,6 +64,7 @@ EXPORT_SYMBOL(journal_update_format); EXPORT_SYMBOL(journal_check_used_features); EXPORT_SYMBOL(journal_check_available_features); @@ -582,10 +592,10 @@ Index: linux-2.6.16.46-0.14/fs/jbd/journal.c /** * int journal_update_format () - Update on-disk journal structure. -Index: linux-2.6.16.46-0.14/fs/Kconfig +Index: linux-2.6.16.53-0.16/fs/Kconfig =================================================================== ---- linux-2.6.16.46-0.14.orig/fs/Kconfig -+++ linux-2.6.16.46-0.14/fs/Kconfig +--- linux-2.6.16.53-0.16.orig/fs/Kconfig ++++ linux-2.6.16.53-0.16/fs/Kconfig @@ -140,6 +140,7 @@ config EXT3_FS_SECURITY config JBD @@ -594,10 +604,10 @@ Index: linux-2.6.16.46-0.14/fs/Kconfig help This is a generic journaling layer for block devices. It is currently used by the ext3 and OCFS2 file systems, but it could -Index: linux-2.6.16.46-0.14/Documentation/filesystems/ext3.txt +Index: linux-2.6.16.53-0.16/Documentation/filesystems/ext3.txt =================================================================== ---- linux-2.6.16.46-0.14.orig/Documentation/filesystems/ext3.txt -+++ linux-2.6.16.46-0.14/Documentation/filesystems/ext3.txt +--- linux-2.6.16.53-0.16.orig/Documentation/filesystems/ext3.txt ++++ linux-2.6.16.53-0.16/Documentation/filesystems/ext3.txt @@ -14,6 +14,16 @@ Options When mounting an ext3 filesystem, the following option are accepted: (*) == default diff --git a/lustre/kernel_patches/patches/jbd-journal-chksum-2.6.18-vanilla.patch b/lustre/kernel_patches/patches/jbd-journal-chksum-2.6.18-vanilla.patch index b2825e2a3e..356ae37347 100644 --- a/lustre/kernel_patches/patches/jbd-journal-chksum-2.6.18-vanilla.patch +++ b/lustre/kernel_patches/patches/jbd-journal-chksum-2.6.18-vanilla.patch @@ -1,16 +1,16 @@ -Index: linux-2.6.18-8.1.8/fs/jbd/commit.c +Index: linux-2.6.18.8/fs/jbd/commit.c =================================================================== ---- linux-2.6.18-8.1.8.orig/fs/jbd/commit.c -+++ linux-2.6.18-8.1.8/fs/jbd/commit.c -@@ -21,6 +21,7 @@ +--- linux-2.6.18.8.orig/fs/jbd/commit.c ++++ linux-2.6.18.8/fs/jbd/commit.c +@@ -22,6 +22,7 @@ #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/smp_lock.h> +#include <linux/crc32.h> + /* - * Default IO end handler for temporary BJ_IO buffer_heads. -@@ -93,19 +94,23 @@ static int inverted_lock(journal_t *jour +@@ -95,19 +96,23 @@ static int inverted_lock(journal_t *jour return 1; } @@ -38,7 +38,7 @@ Index: linux-2.6.18-8.1.8/fs/jbd/commit.c int barrier_done = 0; if (is_journal_aborted(journal)) -@@ -117,21 +122,34 @@ static int journal_write_commit_record(j +@@ -119,21 +124,34 @@ static int journal_write_commit_record(j bh = jh2bh(descriptor); @@ -82,7 +82,7 @@ Index: linux-2.6.18-8.1.8/fs/jbd/commit.c /* is it possible for another commit to fail at roughly * the same time as this one? If so, we don't want to * trust the barrier flag in the super, but instead want -@@ -152,14 +170,72 @@ static int journal_write_commit_record(j +@@ -154,12 +172,70 @@ static int journal_write_commit_record(j clear_buffer_ordered(bh); set_buffer_uptodate(bh); set_buffer_dirty(bh); @@ -94,8 +94,7 @@ Index: linux-2.6.18-8.1.8/fs/jbd/commit.c + *cbh = bh; + return ret; +} - -- return (ret == -EIO); ++ +/* + * This function along with journal_submit_commit_record + * allows to write the commit record asynchronously. @@ -113,8 +112,8 @@ Index: linux-2.6.18-8.1.8/fs/jbd/commit.c + journal_put_journal_head(bh2jh(bh)); + + return ret; - } - ++} ++ +/* + * Wait for all submitted IO to complete. + */ @@ -123,7 +122,8 @@ Index: linux-2.6.18-8.1.8/fs/jbd/commit.c +{ + int ret = 0; + struct journal_head *jh; -+ + +- return (ret == -EIO); + while (commit_transaction->t_locked_list) { + struct buffer_head *bh; + @@ -154,12 +154,31 @@ Index: linux-2.6.18-8.1.8/fs/jbd/commit.c + cond_resched_lock(&journal->j_list_lock); + } + return ret; + } + + void journal_do_submit_data(struct buffer_head **wbuf, int bufs) +@@ -273,6 +349,20 @@ write_out_data: + journal_do_submit_data(wbuf, bufs); + } + ++static inline __u32 jbd_checksum_data(__u32 crc32_sum, struct buffer_head *bh) ++{ ++ struct page *page = bh->b_page; ++ char *addr; ++ __u32 checksum; ++ ++ addr = kmap(page); ++ checksum = crc32_be(crc32_sum, ++ (void *)(addr + offset_in_page(bh->b_data)), ++ bh->b_size); ++ kunmap(page); ++ return checksum; +} + - void journal_do_submit_data(struct buffer_head **wbuf, int bufs) - { - int i; -@@ -293,6 +369,8 @@ void journal_commit_transaction(journal_ + /* + * journal_commit_transaction + * +@@ -296,6 +386,8 @@ void journal_commit_transaction(journal_ int first_tag = 0; int tag_flag; int i; @@ -168,7 +187,7 @@ Index: linux-2.6.18-8.1.8/fs/jbd/commit.c /* * First job: lock down the current transaction and wait for -@@ -428,38 +506,14 @@ void journal_commit_transaction(journal_ +@@ -439,38 +531,14 @@ void journal_commit_transaction(journal_ journal_submit_data_buffers(journal, commit_transaction); /* @@ -213,7 +232,7 @@ Index: linux-2.6.18-8.1.8/fs/jbd/commit.c spin_unlock(&journal->j_list_lock); if (err) -@@ -627,6 +681,16 @@ void journal_commit_transaction(journal_ +@@ -643,6 +711,16 @@ void journal_commit_transaction(journal_ start_journal_io: for (i = 0; i < bufs; i++) { struct buffer_head *bh = wbuf[i]; @@ -222,15 +241,15 @@ Index: linux-2.6.18-8.1.8/fs/jbd/commit.c + */ + if (JFS_HAS_COMPAT_FEATURE(journal, + JFS_FEATURE_COMPAT_CHECKSUM)) { -+ crc32_sum = crc32_be(crc32_sum, -+ (void *)bh->b_data, -+ bh->b_size); ++ crc32_sum = ++ jbd_checksum_data(crc32_sum, ++ bh); + } + lock_buffer(bh); clear_buffer_dirty(bh); set_buffer_uptodate(bh); -@@ -642,6 +706,23 @@ start_journal_io: +@@ -659,6 +737,23 @@ start_journal_io: } } @@ -254,7 +273,7 @@ Index: linux-2.6.18-8.1.8/fs/jbd/commit.c /* Lo and behold: we have just managed to send a transaction to the log. Before we can commit it, wait for the IO so far to complete. Control buffers being written are on the -@@ -740,9 +821,15 @@ wait_for_iobuf: +@@ -757,9 +852,15 @@ wait_for_iobuf: } jbd_debug(3, "JBD: commit phase 6\n"); @@ -273,10 +292,10 @@ Index: linux-2.6.18-8.1.8/fs/jbd/commit.c if (err) __journal_abort_hard(journal); -Index: linux-2.6.18-8.1.8/include/linux/jbd.h +Index: linux-2.6.18.8/include/linux/jbd.h =================================================================== ---- linux-2.6.18-8.1.8.orig/include/linux/jbd.h -+++ linux-2.6.18-8.1.8/include/linux/jbd.h +--- linux-2.6.18.8.orig/include/linux/jbd.h ++++ linux-2.6.18.8/include/linux/jbd.h @@ -148,6 +148,29 @@ typedef struct journal_header_s __be32 h_sequence; } journal_header_t; @@ -327,7 +346,7 @@ Index: linux-2.6.18-8.1.8/include/linux/jbd.h #ifdef __KERNEL__ -@@ -967,6 +994,8 @@ extern int journal_check_available_fe +@@ -1053,6 +1080,8 @@ extern int journal_check_available_fe (journal_t *, unsigned long, unsigned long, unsigned long); extern int journal_set_features (journal_t *, unsigned long, unsigned long, unsigned long); @@ -336,10 +355,10 @@ Index: linux-2.6.18-8.1.8/include/linux/jbd.h extern int journal_create (journal_t *); extern int journal_load (journal_t *journal); extern void journal_destroy (journal_t *); -Index: linux-2.6.18-8.1.8/fs/jbd/recovery.c +Index: linux-2.6.18.8/fs/jbd/recovery.c =================================================================== ---- linux-2.6.18-8.1.8.orig/fs/jbd/recovery.c -+++ linux-2.6.18-8.1.8/fs/jbd/recovery.c +--- linux-2.6.18.8.orig/fs/jbd/recovery.c ++++ linux-2.6.18.8/fs/jbd/recovery.c @@ -21,6 +21,7 @@ #include <linux/jbd.h> #include <linux/errno.h> @@ -535,11 +554,11 @@ Index: linux-2.6.18-8.1.8/fs/jbd/recovery.c /* It's really bad news if different passes end up at * different places (but possible due to IO errors). */ if (info->end_transaction != next_commit_ID) { -Index: linux-2.6.18-8.1.8/fs/jbd/journal.c +Index: linux-2.6.18.8/fs/jbd/journal.c =================================================================== ---- linux-2.6.18-8.1.8.orig/fs/jbd/journal.c -+++ linux-2.6.18-8.1.8/fs/jbd/journal.c -@@ -66,6 +66,7 @@ EXPORT_SYMBOL(journal_update_format); +--- linux-2.6.18.8.orig/fs/jbd/journal.c ++++ linux-2.6.18.8/fs/jbd/journal.c +@@ -67,6 +67,7 @@ EXPORT_SYMBOL(journal_update_format); EXPORT_SYMBOL(journal_check_used_features); EXPORT_SYMBOL(journal_check_available_features); EXPORT_SYMBOL(journal_set_features); @@ -547,7 +566,7 @@ Index: linux-2.6.18-8.1.8/fs/jbd/journal.c EXPORT_SYMBOL(journal_create); EXPORT_SYMBOL(journal_load); EXPORT_SYMBOL(journal_destroy); -@@ -1271,6 +1272,33 @@ int journal_set_features (journal_t *jou +@@ -1573,6 +1574,33 @@ int journal_set_features (journal_t *jou return 1; } @@ -581,10 +600,10 @@ Index: linux-2.6.18-8.1.8/fs/jbd/journal.c /** * int journal_update_format () - Update on-disk journal structure. -Index: linux-2.6.18-8.1.8/fs/Kconfig +Index: linux-2.6.18.8/fs/Kconfig =================================================================== ---- linux-2.6.18-8.1.8.orig/fs/Kconfig -+++ linux-2.6.18-8.1.8/fs/Kconfig +--- linux-2.6.18.8.orig/fs/Kconfig ++++ linux-2.6.18.8/fs/Kconfig @@ -140,6 +140,7 @@ config EXT3_FS_SECURITY config JBD @@ -593,10 +612,10 @@ Index: linux-2.6.18-8.1.8/fs/Kconfig help This is a generic journaling layer for block devices. It is currently used by the ext3 and OCFS2 file systems, but it could -Index: linux-2.6.18-8.1.8/Documentation/filesystems/ext3.txt +Index: linux-2.6.18.8/Documentation/filesystems/ext3.txt =================================================================== ---- linux-2.6.18-8.1.8.orig/Documentation/filesystems/ext3.txt -+++ linux-2.6.18-8.1.8/Documentation/filesystems/ext3.txt +--- linux-2.6.18.8.orig/Documentation/filesystems/ext3.txt ++++ linux-2.6.18.8/Documentation/filesystems/ext3.txt @@ -14,6 +14,16 @@ Options When mounting an ext3 filesystem, the following option are accepted: (*) == default -- GitLab