diff --git a/lnet/include/lnet/api.h b/lnet/include/lnet/api.h
index 4fa2f66f0b3d9a49aca754b31356933ee14ffdf9..84c6bd0039632247d09a951560655f8f1cc0ea83 100644
--- a/lnet/include/lnet/api.h
+++ b/lnet/include/lnet/api.h
@@ -78,6 +78,7 @@ int LNetNIFini(void);
 int LNetGetId(unsigned int index, struct lnet_process_id *id);
 int LNetDist(lnet_nid_t nid, lnet_nid_t *srcnid, __u32 *order);
 lnet_nid_t LNetPrimaryNID(lnet_nid_t nid);
+bool LNetIsPeerLocal(lnet_nid_t nid);
 
 /** @} lnet_addr */
 
diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c
index e912e58eaca519328ef8667f23d2142d8db17da1..aac37c093aad3e8f872752c99a9293c8b6ab247a 100644
--- a/lnet/lnet/api-ni.c
+++ b/lnet/lnet/api-ni.c
@@ -2933,6 +2933,35 @@ void LNetDebugPeer(struct lnet_process_id id)
 }
 EXPORT_SYMBOL(LNetDebugPeer);
 
+/**
+ * Determine if the specified peer \a nid is on the local node.
+ *
+ * \param nid	peer nid to check
+ *
+ * \retval true		If peer NID is on the local node.
+ * \retval false	If peer NID is not on the local node.
+ */
+bool LNetIsPeerLocal(lnet_nid_t nid)
+{
+	struct lnet_net *net;
+	struct lnet_ni *ni;
+	int cpt;
+
+	cpt = lnet_net_lock_current();
+	list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+		list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+			if (ni->ni_nid == nid) {
+				lnet_net_unlock(cpt);
+				return true;
+			}
+		}
+	}
+	lnet_net_unlock(cpt);
+
+	return false;
+}
+EXPORT_SYMBOL(LNetIsPeerLocal);
+
 /**
  * Retrieve the struct lnet_process_id ID of LNet interface at \a index.
  * Note that all interfaces share a same PID, as requested by LNetNIInit().
diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h
index c79cc0263a606a89f25f06df8f30f779afd5ef4d..436139fa128c30385e3b40c4e5ea818e56f63c31 100644
--- a/lustre/include/dt_object.h
+++ b/lustre/include/dt_object.h
@@ -1060,6 +1060,13 @@ struct dt_object_operations {
 				const struct lu_buf *buf, struct thandle *th);
 };
 
+enum dt_bufs_type {
+	DT_BUFS_TYPE_READ	= 0x0000,
+	DT_BUFS_TYPE_WRITE	= 0x0001,
+	DT_BUFS_TYPE_READAHEAD	= 0x0002,
+	DT_BUFS_TYPE_LOCAL	= 0x0004,
+};
+
 /**
  * Per-dt-object operations on "file body" - unstructure raw data.
  */
@@ -1177,7 +1184,7 @@ struct dt_body_operations {
 			    loff_t pos,
 			    ssize_t len,
 			    struct niobuf_local *lb,
-			    int rw);
+			    enum dt_bufs_type rw);
 
 	/**
 	 * Release reference granted by ->dbo_bufs_get().
@@ -2379,7 +2386,7 @@ static inline int dt_ref_del(const struct lu_env *env,
 
 static inline int dt_bufs_get(const struct lu_env *env, struct dt_object *d,
 			      struct niobuf_remote *rnb,
-			      struct niobuf_local *lnb, int rw)
+			      struct niobuf_local *lnb, enum dt_bufs_type rw)
 {
 	LASSERT(d);
 	LASSERT(d->do_body_ops);
diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h
index 8c918afc3d170c821574c1f58a51265c39d73e59..2e342ee4d3b12fe0a5801f58ac2d8a6dd8dc9d7a 100644
--- a/lustre/include/lustre_net.h
+++ b/lustre/include/lustre_net.h
@@ -2015,6 +2015,30 @@ int ptlrpc_connection_init(void);
 void ptlrpc_connection_fini(void);
 extern lnet_pid_t ptl_get_pid(void);
 
+/*
+ * Check if the peer connection is on the local node.  We need to use GFP_NOFS
+ * for requests from a local client to avoid recursing into the filesystem
+ * as we might end up waiting on a page sent in the request we're serving.
+ *
+ * Use __GFP_HIGHMEM so that the pages can use all of the available memory
+ * on 32-bit machines.  Use more aggressive GFP_HIGHUSER flags from non-local
+ * clients to be able to generate more memory pressure on the OSS and allow
+ * inactive pages to be reclaimed, since it doesn't have any other processes
+ * or allocations that generate memory reclaim pressure.
+ *
+ * See b=17576 (bdf50dc9) and b=19529 (3dcf18d3) for details.
+ */
+static inline bool ptlrpc_connection_is_local(struct ptlrpc_connection *conn)
+{
+	if (!conn)
+		return false;
+
+	if (conn->c_peer.nid == conn->c_self)
+		return true;
+
+	RETURN(LNetIsPeerLocal(conn->c_peer.nid));
+}
+
 /* ptlrpc/niobuf.c */
 /**
  * Actual interfacing with LNet to put/get/register/unregister stuff
diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c
index 2476f9e1f599382e8bb4a0f7a29ee5a5b80865d0..aecaa8edc0f830b5504ea888bbdc854d935f51dc 100644
--- a/lustre/ofd/ofd_dev.c
+++ b/lustre/ofd/ofd_dev.c
@@ -2116,13 +2116,13 @@ out:
 static int ofd_ladvise_prefetch(const struct lu_env *env,
 				struct ofd_object *fo,
 				struct niobuf_local *lnb,
-				__u64 start, __u64 end)
+				__u64 start, __u64 end, enum dt_bufs_type dbt)
 {
-	struct ofd_thread_info	*info = ofd_info(env);
-	pgoff_t			 start_index, end_index, pages;
-	struct niobuf_remote	 rnb;
-	unsigned long		 nr_local;
-	int			 rc = 0;
+	struct ofd_thread_info *info = ofd_info(env);
+	pgoff_t start_index, end_index, pages;
+	struct niobuf_remote rnb;
+	unsigned long nr_local;
+	int rc = 0;
 
 	if (end <= start)
 		RETURN(-EINVAL);
@@ -2150,7 +2150,7 @@ static int ofd_ladvise_prefetch(const struct lu_env *env,
 			PTLRPC_MAX_BRW_PAGES;
 		rnb.rnb_offset = start_index << PAGE_SHIFT;
 		rnb.rnb_len = nr_local << PAGE_SHIFT;
-		rc = dt_bufs_get(env, ofd_object_child(fo), &rnb, lnb, 0);
+		rc = dt_bufs_get(env, ofd_object_child(fo), &rnb, lnb, dbt);
 		if (unlikely(rc < 0))
 			break;
 		nr_local = rc;
@@ -2188,7 +2188,7 @@ static int ofd_ladvise_hdl(struct tgt_session_info *tsi)
 	struct ptlrpc_thread *svc_thread = req->rq_svc_thread;
 	const struct lu_env *env = svc_thread->t_env;
 	struct tgt_thread_big_cache *tbc = svc_thread->t_data;
-	int rc = 0;
+	enum dt_bufs_type dbt = DT_BUFS_TYPE_READAHEAD;
 	struct lu_ladvise *ladvise;
 	int num_advise;
 	struct ladvise_hdr *ladvise_hdr;
@@ -2199,6 +2199,7 @@ static int ofd_ladvise_hdl(struct tgt_session_info *tsi)
 	struct dt_object *dob;
 	__u64 start;
 	__u64 end;
+	int rc = 0;
 	ENTRY;
 
 	CFS_FAIL_TIMEOUT(OBD_FAIL_OST_LADVISE_PAUSE, cfs_fail_val);
@@ -2247,6 +2248,9 @@ static int ofd_ladvise_hdl(struct tgt_session_info *tsi)
 	LASSERT(fo != NULL);
 	dob = ofd_object_child(fo);
 
+	if (ptlrpc_connection_is_local(exp->exp_connection))
+		dbt |= DT_BUFS_TYPE_LOCAL;
+
 	for (i = 0; i < num_advise; i++, ladvise++) {
 		start = ladvise->lla_start;
 		end = ladvise->lla_end;
@@ -2274,7 +2278,7 @@ static int ofd_ladvise_hdl(struct tgt_session_info *tsi)
 
 			req->rq_status = ofd_ladvise_prefetch(env, fo,
 							      tbc->local,
-							      start, end);
+							      start, end, dbt);
 			tgt_extent_unlock(&lockh, LCK_PR);
 			break;
 		case LU_LADVISE_DONTNEED:
diff --git a/lustre/ofd/ofd_io.c b/lustre/ofd/ofd_io.c
index 5ccdca550394c19e124004260266a45e94e0a4de..53136558a5f634a78ddfd5f055866485e34e4ead 100644
--- a/lustre/ofd/ofd_io.c
+++ b/lustre/ofd/ofd_io.c
@@ -453,8 +453,9 @@ static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp,
 			   struct niobuf_remote *rnb, int *nr_local,
 			   struct niobuf_local *lnb, char *jobid)
 {
-	struct ofd_object	*fo;
-	int			 i, j, rc, tot_bytes = 0;
+	struct ofd_object *fo;
+	int i, j, rc, tot_bytes = 0;
+	enum dt_bufs_type dbt = DT_BUFS_TYPE_READ;
 
 	ENTRY;
 	LASSERT(env != NULL);
@@ -474,10 +475,12 @@ static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp,
 			GOTO(unlock, rc);
 	}
 
-	*nr_local = 0;
-	for (i = 0, j = 0; i < niocount; i++) {
+	if (ptlrpc_connection_is_local(exp->exp_connection))
+		dbt |= DT_BUFS_TYPE_LOCAL;
+
+	for (*nr_local = 0, i = 0, j = 0; i < niocount; i++) {
 		rc = dt_bufs_get(env, ofd_object_child(fo), rnb + i,
-				 lnb + j, 0);
+				 lnb + j, dbt);
 		if (unlikely(rc < 0))
 			GOTO(buf_put, rc);
 		LASSERT(rc <= PTLRPC_MAX_BRW_PAGES);
@@ -538,8 +541,9 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
 			    struct niobuf_remote *rnb, int *nr_local,
 			    struct niobuf_local *lnb, char *jobid)
 {
-	struct ofd_object	*fo;
-	int			 i, j, k, rc = 0, tot_bytes = 0;
+	struct ofd_object *fo;
+	int i, j, k, rc = 0, tot_bytes = 0;
+	enum dt_bufs_type dbt = DT_BUFS_TYPE_WRITE;
 
 	ENTRY;
 	LASSERT(env != NULL);
@@ -628,11 +632,13 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
 	 * space back if possible */
 	tgt_grant_prepare_write(env, exp, oa, rnb, obj->ioo_bufcnt);
 
+	if (ptlrpc_connection_is_local(exp->exp_connection))
+		dbt |= DT_BUFS_TYPE_LOCAL;
+
 	/* parse remote buffers to local buffers and prepare the latter */
-	*nr_local = 0;
-	for (i = 0, j = 0; i < obj->ioo_bufcnt; i++) {
+	for (*nr_local = 0, i = 0, j = 0; i < obj->ioo_bufcnt; i++) {
 		rc = dt_bufs_get(env, ofd_object_child(fo),
-				 rnb + i, lnb + j, 1);
+				 rnb + i, lnb + j, dbt);
 		if (unlikely(rc < 0))
 			GOTO(err, rc);
 		LASSERT(rc <= PTLRPC_MAX_BRW_PAGES);
diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c
index c3545a40643bcc1c5a6b488ccc2cfac1e0c2dec2..56e5231a61603ac8e5732645d0afae7916e833d3 100644
--- a/lustre/osd-ldiskfs/osd_io.c
+++ b/lustre/osd-ldiskfs/osd_io.c
@@ -421,16 +421,18 @@ static int osd_map_remote_to_local(loff_t offset, ssize_t len, int *nrpages,
         RETURN(0);
 }
 
-static struct page *osd_get_page(struct dt_object *dt, loff_t offset, int rw)
+static struct page *osd_get_page(struct dt_object *dt, loff_t offset,
+				 gfp_t gfp_mask)
 {
-        struct inode      *inode = osd_dt_obj(dt)->oo_inode;
-        struct osd_device *d = osd_obj2dev(osd_dt_obj(dt));
-        struct page       *page;
+	struct inode *inode = osd_dt_obj(dt)->oo_inode;
+	struct osd_device *d = osd_obj2dev(osd_dt_obj(dt));
+	struct page *page;
 
         LASSERT(inode);
 
 	page = find_or_create_page(inode->i_mapping, offset >> PAGE_SHIFT,
-                                   GFP_NOFS | __GFP_HIGHMEM);
+				   gfp_mask);
+
         if (unlikely(page == NULL))
                 lprocfs_counter_add(d->od_stats, LPROC_OSD_NO_PAGE, 1);
 
@@ -504,7 +506,7 @@ static int osd_bufs_put(const struct lu_env *env, struct dt_object *dt,
  * \param pos		byte offset of IO start
  * \param len		number of bytes of IO
  * \param lnb		array of extents undergoing IO
- * \param rw		read or write operation?
+ * \param rw		read or write operation, and other flags
  * \param capa		capabilities
  *
  * \retval pages	(zero or more) loaded successfully
@@ -512,17 +514,22 @@ static int osd_bufs_put(const struct lu_env *env, struct dt_object *dt,
  */
 static int osd_bufs_get(const struct lu_env *env, struct dt_object *dt,
 			loff_t pos, ssize_t len, struct niobuf_local *lnb,
-			int rw)
+			enum dt_bufs_type rw)
 {
-	struct osd_object   *obj    = osd_dt_obj(dt);
+	struct osd_object *obj = osd_dt_obj(dt);
 	int npages, i, rc = 0;
+	gfp_t gfp_mask;
 
 	LASSERT(obj->oo_inode);
 
 	osd_map_remote_to_local(pos, len, &npages, lnb);
 
+	/* this could also try less hard for DT_BUFS_TYPE_READAHEAD pages */
+	gfp_mask = rw & DT_BUFS_TYPE_LOCAL ? (GFP_NOFS | __GFP_HIGHMEM) :
+					     GFP_HIGHUSER;
 	for (i = 0; i < npages; i++, lnb++) {
-		lnb->lnb_page = osd_get_page(dt, lnb->lnb_file_offset, rw);
+		lnb->lnb_page = osd_get_page(dt, lnb->lnb_file_offset,
+					     gfp_mask);
 		if (lnb->lnb_page == NULL)
 			GOTO(cleanup, rc = -ENOMEM);
 
diff --git a/lustre/osd-zfs/osd_io.c b/lustre/osd-zfs/osd_io.c
index 082d7ddef3fb8c0d7e8113c53c9155a9e418b7bf..41e6ee7d48afe525dd16a628c00fcc42dc8affc8 100644
--- a/lustre/osd-zfs/osd_io.c
+++ b/lustre/osd-zfs/osd_io.c
@@ -313,7 +313,7 @@ static inline struct page *kmem_to_page(void *addr)
  * \retval		negative error number of failure
  */
 static int osd_bufs_get_read(const struct lu_env *env, struct osd_object *obj,
-				loff_t off, ssize_t len, struct niobuf_local *lnb)
+			     loff_t off, ssize_t len, struct niobuf_local *lnb)
 {
 	struct osd_device *osd = osd_obj2dev(obj);
 	unsigned long	   start = cfs_time_current();
@@ -420,7 +420,7 @@ static inline arc_buf_t *osd_request_arcbuf(dnode_t *dn, size_t bs)
 }
 
 static int osd_bufs_get_write(const struct lu_env *env, struct osd_object *obj,
-				loff_t off, ssize_t len, struct niobuf_local *lnb)
+			      loff_t off, ssize_t len, struct niobuf_local *lnb)
 {
 	struct osd_device *osd = osd_obj2dev(obj);
 	int                plen, off_in_block, sz_in_block;
@@ -525,7 +525,7 @@ out_err:
 
 static int osd_bufs_get(const struct lu_env *env, struct dt_object *dt,
 			loff_t offset, ssize_t len, struct niobuf_local *lnb,
-			int rw)
+			enum dt_bufs_type rw)
 {
 	struct osd_object *obj  = osd_dt_obj(dt);
 	int                rc;
@@ -533,10 +533,10 @@ static int osd_bufs_get(const struct lu_env *env, struct dt_object *dt,
 	LASSERT(dt_object_exists(dt));
 	LASSERT(obj->oo_dn);
 
-	if (rw == 0)
-		rc = osd_bufs_get_read(env, obj, offset, len, lnb);
-	else
+	if (rw & DT_BUFS_TYPE_WRITE)
 		rc = osd_bufs_get_write(env, obj, offset, len, lnb);
+	else
+		rc = osd_bufs_get_read(env, obj, offset, len, lnb);
 
 	return rc;
 }
diff --git a/lustre/target/tgt_handler.c b/lustre/target/tgt_handler.c
index 5ceaaf1d5f666fd346a2d42be89379f34aa241eb..24c37f2321d87d1f86438a361dab06eb39afcd05 100644
--- a/lustre/target/tgt_handler.c
+++ b/lustre/target/tgt_handler.c
@@ -2183,7 +2183,7 @@ int tgt_brw_write(struct tgt_session_info *tsi)
 		RETURN(err_serious(-EPROTO));
 
 	if ((remote_nb[0].rnb_flags & OBD_BRW_MEMALLOC) &&
-	    (exp->exp_connection->c_peer.nid == exp->exp_connection->c_self))
+	    ptlrpc_connection_is_local(exp->exp_connection))
 		memory_pressure_set();
 
 	req_capsule_set_size(&req->rq_pill, &RMF_RCS, RCL_SERVER,