From 272e49ce2d5d6883e6ca1b00a9322b3a23b2e55a Mon Sep 17 00:00:00 2001
From: James Simmons <uja.ornl@yahoo.com>
Date: Tue, 20 Mar 2018 16:44:56 -0400
Subject: [PATCH] LU-10157 lnet: make LNET_MAX_IOV dependent on page size

The default behavior of LNet is to always use 256 pages which is
LNET_MAX_IOV and that LNET_MAX_PAYLOAD is always one megabyte.
This assumes pages are always 4K in size which is not the case.
This cause bulk I/O errors when using platforms like PowerPC or
ARM which tend to use 64K pages. This is resolved by first making
LNET_MAX_PAYLOAD always one megabyte since this is what the
configuring sets it too by default and no one ever changes it.
In theory it could set it to as high as 16MB but that will cause
the I/O errors since the ptlrpc layer expects the packets to be
always 1 megabyte in size. Also it would be better to make the
maximum payload a per network setup configurations instead of for
everything. Second we make LNET_MAX_IOV equal to LNET_MAX_PAYLOAD
divided by the PAGE_SIZE. This way packets will always be the
LNET_MAX_PAYLOAD in size but the number of pages used,
LNET_MAX_IOV will vary depending on the platform it is creating
packets on.

Change-Id: Ie1dcdb195e68b44e2fa2d9b24715216d8aca4c65
Signed-off-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-on: https://review.whamcloud.com/31559
Tested-by: Jenkins
Reviewed-by: Wang Shilong <wshilong@ddn.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Ruth Klundt <rklundt@sandia.gov>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
---
 lnet/autoconf/lustre-lnet.m4              | 23 -----------------------
 lnet/include/lnet/lib-types.h             | 11 ++---------
 lnet/include/uapi/linux/lnet/lnet-types.h |  3 ---
 lnet/klnds/o2iblnd/o2iblnd.c              |  8 ++++----
 lnet/utils/lst.c                          |  4 +---
 lustre/include/lustre_net.h               |  1 +
 6 files changed, 8 insertions(+), 42 deletions(-)

diff --git a/lnet/autoconf/lustre-lnet.m4 b/lnet/autoconf/lustre-lnet.m4
index be388405d6..d3a302e1b0 100644
--- a/lnet/autoconf/lustre-lnet.m4
+++ b/lnet/autoconf/lustre-lnet.m4
@@ -1,25 +1,3 @@
-#
-# LN_CONFIG_MAX_PAYLOAD
-#
-# configure maximum payload
-#
-AC_DEFUN([LN_CONFIG_MAX_PAYLOAD], [
-AC_MSG_CHECKING([for non-default maximum LNET payload])
-AC_ARG_WITH([max-payload-mb],
-	AC_HELP_STRING([--with-max-payload-mb=MBytes],
-		[set maximum lnet payload in MBytes]),
-	[
-		AC_MSG_RESULT([$with_max_payload_mb])
-		CONFIG_LNET_MAX_PAYLOAD_MB=$with_max_payload_mb
-		CONFIG_LNET_MAX_PAYLOAD="(($with_max_payload_mb)<<20)"
-	], [
-		AC_MSG_RESULT([no])
-		CONFIG_LNET_MAX_PAYLOAD="LNET_MTU"
-	])
-AC_DEFINE_UNQUOTED(CONFIG_LNET_MAX_PAYLOAD, $CONFIG_LNET_MAX_PAYLOAD,
-	[Max LNET payload])
-]) # LN_CONFIG_MAX_PAYLOAD
-
 #
 # LN_CHECK_GCC_VERSION
 #
@@ -805,7 +783,6 @@ AS_IF([test "$enable_efence" = yes], [
 ])
 AC_SUBST(LIBEFENCE)
 
-LN_CONFIG_MAX_PAYLOAD
 LN_CONFIG_DLC
 ]) # LN_CONFIGURE
 
diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h
index 953897080b..e96b544883 100644
--- a/lnet/include/lnet/lib-types.h
+++ b/lnet/include/lnet/lib-types.h
@@ -51,16 +51,9 @@
 #include <uapi/linux/lnet/lnetctl.h>
 
 /* Max payload size */
-#ifndef CONFIG_LNET_MAX_PAYLOAD
-# error "CONFIG_LNET_MAX_PAYLOAD must be defined in config.h"
-#endif
+#define LNET_MAX_PAYLOAD	LNET_MTU
 
-#define LNET_MAX_PAYLOAD       CONFIG_LNET_MAX_PAYLOAD
-#if (LNET_MAX_PAYLOAD < LNET_MTU)
-# error "LNET_MAX_PAYLOAD too small - error in configure --with-max-payload-mb"
-#elif (LNET_MAX_PAYLOAD > (PAGE_SIZE * LNET_MAX_IOV))
-# error "LNET_MAX_PAYLOAD too large - error in configure --with-max-payload-mb"
-#endif
+#define LNET_MAX_IOV		(LNET_MAX_PAYLOAD >> PAGE_SHIFT)
 
 /* forward refs */
 struct lnet_libmd;
diff --git a/lnet/include/uapi/linux/lnet/lnet-types.h b/lnet/include/uapi/linux/lnet/lnet-types.h
index 4daa0beec2..f30f28421a 100644
--- a/lnet/include/uapi/linux/lnet/lnet-types.h
+++ b/lnet/include/uapi/linux/lnet/lnet-types.h
@@ -515,9 +515,6 @@ struct lnet_md {
 #define LNET_MTU_BITS	20
 #define LNET_MTU	(1 << LNET_MTU_BITS)
 
-/** limit on the number of fragments in discontiguous MDs */
-#define LNET_MAX_IOV	256
-
 /**
  * Options for the MD structure. See struct lnet_md::options.
  */
diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c
index dc7981d62a..343cd2e33a 100644
--- a/lnet/klnds/o2iblnd/o2iblnd.c
+++ b/lnet/klnds/o2iblnd/o2iblnd.c
@@ -1520,7 +1520,7 @@ kiblnd_fmr_flush_trigger(struct lnet_ioctl_config_o2iblnd_tunables *tunables,
 static int kiblnd_alloc_fmr_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo)
 {
 	struct ib_fmr_pool_param param = {
-		.max_pages_per_fmr = LNET_MAX_PAYLOAD/PAGE_SIZE,
+		.max_pages_per_fmr = LNET_MAX_IOV,
 		.page_shift        = PAGE_SHIFT,
 		.access            = (IB_ACCESS_LOCAL_WRITE |
 				      IB_ACCESS_REMOTE_WRITE),
@@ -1567,7 +1567,7 @@ static int kiblnd_alloc_freg_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo,
 
 #ifndef HAVE_IB_MAP_MR_SG
 		frd->frd_frpl = ib_alloc_fast_reg_page_list(fpo->fpo_hdev->ibh_ibdev,
-							    LNET_MAX_PAYLOAD/PAGE_SIZE);
+							    LNET_MAX_IOV);
 		if (IS_ERR(frd->frd_frpl)) {
 			rc = PTR_ERR(frd->frd_frpl);
 			CERROR("Failed to allocate ib_fast_reg_page_list: %d\n",
@@ -1579,7 +1579,7 @@ static int kiblnd_alloc_freg_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo,
 
 #ifdef HAVE_IB_ALLOC_FAST_REG_MR
 		frd->frd_mr = ib_alloc_fast_reg_mr(fpo->fpo_hdev->ibh_pd,
-						   LNET_MAX_PAYLOAD/PAGE_SIZE);
+						   LNET_MAX_IOV);
 #else
 		/*
 		 * it is expected to get here if this is an MLX-5 card.
@@ -1597,7 +1597,7 @@ static int kiblnd_alloc_freg_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo,
 #else
 						IB_MR_TYPE_MEM_REG,
 #endif
-					  LNET_MAX_PAYLOAD/PAGE_SIZE);
+					  LNET_MAX_IOV);
 		if ((*kiblnd_tunables.kib_use_fastreg_gaps == 1) &&
 		    (dev_caps & IBLND_DEV_CAPS_FASTREG_GAPS_SUPPORT))
 			CWARN("using IB_MR_TYPE_SG_GAPS, expect a performance drop\n");
diff --git a/lnet/utils/lst.c b/lnet/utils/lst.c
index 99b0e69a5c..0b6d40a513 100644
--- a/lnet/utils/lst.c
+++ b/lnet/utils/lst.c
@@ -2952,8 +2952,6 @@ lst_get_bulk_param(int argc, char **argv, struct lst_test_bulk_param *bulk)
 
 		} else if (strcasestr(argv[i], "size=") == argv[i] ||
 			   strcasestr(argv[i], "s=") == argv[i]) {
-			int max_size = sysconf(_SC_PAGESIZE) * LNET_MAX_IOV;
-
                         tok = strchr(argv[i], '=') + 1;
 
                         bulk->blk_size = strtol(tok, &end, 0);
@@ -2970,7 +2968,7 @@ lst_get_bulk_param(int argc, char **argv, struct lst_test_bulk_param *bulk)
                         else if (*end == 'm' || *end == 'M')
                                 bulk->blk_size *= 1024 * 1024;
 
-			if (bulk->blk_size > max_size) {
+			if (bulk->blk_size > LNET_MTU) {
                                 fprintf(stderr, "Size exceed limitation: %d bytes\n",
                                         bulk->blk_size);
                                 return -1;
diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h
index f9daeaecbd..15f294a2fd 100644
--- a/lustre/include/lustre_net.h
+++ b/lustre/include/lustre_net.h
@@ -54,6 +54,7 @@
 #include <linux/uio.h>
 #include <libcfs/libcfs.h>
 #include <lnet/api.h>
+#include <lnet/lib-types.h>
 #include <uapi/linux/lnet/nidstr.h>
 #include <uapi/linux/lustre/lustre_idl.h>
 #include <lustre_ha.h>
-- 
GitLab